IMPALA-15118: Copy ByteBuffers for partitionList_ IcebergContentFileStore.fromThrift() didn't copy the contents of ByteBuffers when filling partitionList_. Thrift's binary handling in Java is special as the ByteBuffer object in deserialized Thrift object can still reference the transport buffer. Keeping the reference to that can almost double the size of IcebergContentFileStore in the coordinator. Another issue fixed is that partitionMap_ and partitionList_ did not share the ByteBuffers after fromThrift() Improvement for test table with 1M files, 25K partitions: 665MB->389MB Change-Id: Ib39940dd64aadcb094b099e3863dd42c22afc2b8 Reviewed-on: http://gerrit.cloudera.org:8080/24515 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java index 18dec4c..ae2f7ec 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergContentFileStore.java
@@ -695,10 +695,13 @@ ret.hasParquet_ = tFileStore.isSetHas_parquet() ? tFileStore.isHas_parquet() : false; ret.missingFiles_ = tFileStore.isSetMissing_files() ? new HashSet<>(tFileStore.getMissing_files()) : Collections.emptySet(); + // Call copyBinary to avoid referencing the whole transport buffer in ByteBuffers. ret.partitionList_ = tFileStore.isSetPartitions() ? - tFileStore.getPartitions() : new ArrayList<>(); - ret.partitionMap_ = tFileStore.isSetPartitions() ? - convertPartitionListToMap(tFileStore.getPartitions()) : new HashMap<>(); + tFileStore.getPartitions().stream() + .map(org.apache.thrift.TBaseHelper::copyBinary) + .collect(Collectors.toList()) + : new ArrayList<>(); + ret.partitionMap_ = convertPartitionListToMap(ret.partitionList_); return ret; } @@ -720,7 +723,7 @@ Preconditions.checkState(partitionList != null); ImmutableMap.Builder<ByteBuffer, Integer> builder = ImmutableMap.builder(); for (int i = 0; i < partitionList.size(); ++i) { - builder.put(org.apache.thrift.TBaseHelper.copyBinary(partitionList.get(i)), i); + builder.put(partitionList.get(i), i); } return builder.build(); }