blob: 5eec30592a5713baa4fdf047527e4740acb22226 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.BiMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StructType;
/**
* The schema of a data table.
*/
public class Schema implements Serializable {
private static final Joiner NEWLINE = Joiner.on('\n');
private static final String ALL_COLUMNS = "*";
private final StructType struct;
private transient BiMap<String, Integer> aliasToId = null;
private transient Map<Integer, NestedField> idToField = null;
private transient Map<String, Integer> nameToId = null;
private transient Map<String, Integer> lowerCaseNameToId = null;
private transient Map<Integer, Accessor<StructLike>> idToAccessor = null;
private transient Map<Integer, String> idToName = null;
public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
this.struct = StructType.of(columns);
this.aliasToId = aliases != null ? ImmutableBiMap.copyOf(aliases) : null;
// validate the schema through IndexByName visitor
lazyIdToName();
}
public Schema(List<NestedField> columns) {
this.struct = StructType.of(columns);
lazyIdToName();
}
public Schema(NestedField... columns) {
this(Arrays.asList(columns));
}
private Map<Integer, NestedField> lazyIdToField() {
if (idToField == null) {
this.idToField = TypeUtil.indexById(struct);
}
return idToField;
}
private Map<String, Integer> lazyNameToId() {
if (nameToId == null) {
this.nameToId = ImmutableMap.copyOf(TypeUtil.indexByName(struct));
}
return nameToId;
}
private Map<Integer, String> lazyIdToName() {
if (idToName == null) {
this.idToName = ImmutableMap.copyOf(TypeUtil.indexNameById(struct));
}
return idToName;
}
private Map<String, Integer> lazyLowerCaseNameToId() {
if (lowerCaseNameToId == null) {
this.lowerCaseNameToId = ImmutableMap.copyOf(TypeUtil.indexByLowerCaseName(struct));
}
return lowerCaseNameToId;
}
private Map<Integer, Accessor<StructLike>> lazyIdToAccessor() {
if (idToAccessor == null) {
idToAccessor = Accessors.forSchema(this);
}
return idToAccessor;
}
/**
* Returns an alias map for this schema, if set.
* <p>
* Alias maps are created when translating an external schema, like an Avro Schema, to this
* format. The original column names can be provided in a Map when constructing this Schema.
*
* @return a Map of column aliases to field ids
*/
public Map<String, Integer> getAliases() {
return aliasToId;
}
/**
* Returns the underlying {@link StructType struct type} for this schema.
*
* @return the StructType version of this schema.
*/
public StructType asStruct() {
return struct;
}
/**
* Returns a List of the {@link NestedField columns} in this Schema.
*/
public List<NestedField> columns() {
return struct.fields();
}
/**
* Returns the {@link Type} of a sub-field identified by the field name.
*
* @param name a field name
* @return a Type for the sub-field or null if it is not found
*/
public Type findType(String name) {
Preconditions.checkArgument(!name.isEmpty(), "Invalid column name: (empty)");
Integer id = lazyNameToId().get(name);
if (id != null) { // name is found
return findType(id);
}
// name could not be found
return null;
}
/**
* Returns the {@link Type} of a sub-field identified by the field id.
*
* @param id a field id
* @return a Type for the sub-field or null if it is not found
*/
public Type findType(int id) {
NestedField field = lazyIdToField().get(id);
if (field != null) {
return field.type();
}
return null;
}
/**
* Returns the sub-field identified by the field id as a {@link NestedField}.
*
* @param id a field id
* @return the sub-field or null if it is not found
*/
public NestedField findField(int id) {
return lazyIdToField().get(id);
}
/**
* Returns a sub-field by name as a {@link NestedField}.
* <p>
* The result may be a top-level or a nested field.
*
* @param name a String name
* @return a Type for the sub-field or null if it is not found
*/
public NestedField findField(String name) {
Preconditions.checkArgument(!name.isEmpty(), "Invalid column name: (empty)");
Integer id = lazyNameToId().get(name);
if (id != null) {
return lazyIdToField().get(id);
}
return null;
}
/**
* Returns a sub-field by name as a {@link NestedField}.
* <p>
* The result may be a top-level or a nested field.
*
* @param name a String name
* @return the sub-field or null if it is not found
*/
public NestedField caseInsensitiveFindField(String name) {
Preconditions.checkArgument(!name.isEmpty(), "Invalid column name: (empty)");
Integer id = lazyLowerCaseNameToId().get(name.toLowerCase(Locale.ROOT));
if (id != null) {
return lazyIdToField().get(id);
}
return null;
}
/**
* Returns the full column name for the given id.
*
* @param id a field id
* @return the full column name in this schema that resolves to the id
*/
public String findColumnName(int id) {
return lazyIdToName().get(id);
}
/**
* Returns the column id for the given column alias. Column aliases are set
* by conversions from Parquet or Avro to this Schema type.
*
* @param alias a full column name in the unconverted data schema
* @return the column id in this schema, or null if the column wasn't found
*/
public Integer aliasToId(String alias) {
if (aliasToId != null) {
return aliasToId.get(alias);
}
return null;
}
/**
* Returns the full column name in the unconverted data schema for the given column id.
* Column aliases are set by conversions from Parquet or Avro to this Schema type.
*
* @param fieldId a column id in this schema
* @return the full column name in the unconverted data schema, or null if one wasn't found
*/
public String idToAlias(Integer fieldId) {
if (aliasToId != null) {
return aliasToId.inverse().get(fieldId);
}
return null;
}
/**
* Returns an accessor for retrieving the data from {@link StructLike}.
* <p>
* Accessors do not retrieve data contained in lists or maps.
*
* @param id a column id in this schema
* @return an {@link Accessor} to retrieve values from a {@link StructLike} row
*/
public Accessor<StructLike> accessorForField(int id) {
return lazyIdToAccessor().get(id);
}
/**
* Creates a projection schema for a subset of columns, selected by name.
* <p>
* Names that identify nested fields will select part or all of the field's top-level column.
*
* @param names String names for selected columns
* @return a projection schema from this schema, by name
*/
public Schema select(String... names) {
return select(Arrays.asList(names));
}
/**
* Creates a projection schema for a subset of columns, selected by name.
* <p>
* Names that identify nested fields will select part or all of the field's top-level column.
*
* @param names a List of String names for selected columns
* @return a projection schema from this schema, by name
*/
public Schema select(Collection<String> names) {
return internalSelect(names, true);
}
/**
* Creates a projection schema for a subset of columns, selected by case insensitive names
* <p>
* Names that identify nested fields will select part or all of the field's top-level column.
*
* @param names a List of String names for selected columns
* @return a projection schema from this schema, by names
*/
public Schema caseInsensitiveSelect(String... names) {
return caseInsensitiveSelect(Arrays.asList(names));
}
/**
* Creates a projection schema for a subset of columns, selected by case insensitive names
* <p>
* Names that identify nested fields will select part or all of the field's top-level column.
*
* @param names a List of String names for selected columns
* @return a projection schema from this schema, by names
*/
public Schema caseInsensitiveSelect(Collection<String> names) {
return internalSelect(names, false);
}
private Schema internalSelect(Collection<String> names, boolean caseSensitive) {
if (names.contains(ALL_COLUMNS)) {
return this;
}
Set<Integer> selected = Sets.newHashSet();
for (String name : names) {
Integer id;
if (caseSensitive) {
id = lazyNameToId().get(name);
} else {
id = lazyLowerCaseNameToId().get(name.toLowerCase(Locale.ROOT));
}
if (id != null) {
selected.add(id);
}
}
return TypeUtil.select(this, selected);
}
@Override
public String toString() {
return String.format("table {\n%s\n}",
NEWLINE.join(struct.fields().stream()
.map(f -> " " + f)
.collect(Collectors.toList())));
}
}