blob: f500ab09f0b1deba27a47c6183c4e35ad8a85e84 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo.JobType;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.gson.annotations.SerializedName;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
public class TableStatsMeta implements Writable {
@SerializedName("tblId")
public final long tblId;
@SerializedName("idxId")
public final long idxId;
@SerializedName("updatedRows")
public final AtomicLong updatedRows = new AtomicLong();
// We would like to analyze tables which queried frequently with higher priority in the future.
@SerializedName("queriedTimes")
public final AtomicLong queriedTimes = new AtomicLong();
// Used for external table.
@SerializedName("rowCount")
public long rowCount;
@SerializedName("updateTime")
public long updatedTime;
@SerializedName("colNameToColStatsMeta")
private ConcurrentMap<String, ColStatsMeta> colNameToColStatsMeta = new ConcurrentHashMap<>();
@SerializedName("trigger")
public JobType jobType;
@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
idxId = 0;
}
// It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo
// and TableStats is quite different.
public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) {
this.tblId = table.getId();
this.idxId = -1;
this.rowCount = rowCount;
update(analyzedJob, table);
}
@Override
public void write(DataOutput out) throws IOException {
String json = GsonUtils.GSON.toJson(this);
Text.writeString(out, json);
}
public static TableStatsMeta read(DataInput dataInput) throws IOException {
String json = Text.readString(dataInput);
TableStatsMeta tableStats = GsonUtils.GSON.fromJson(json, TableStatsMeta.class);
// Might be null counterintuitively, for compatible
if (tableStats.colNameToColStatsMeta == null) {
tableStats.colNameToColStatsMeta = new ConcurrentHashMap<>();
}
return tableStats;
}
public long findColumnLastUpdateTime(String colName) {
ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(colName);
if (colStatsMeta == null) {
return 0;
}
return colStatsMeta.updatedTime;
}
public ColStatsMeta findColumnStatsMeta(String colName) {
return colNameToColStatsMeta.get(colName);
}
public void removeColumn(String colName) {
colNameToColStatsMeta.remove(colName);
}
public Set<String> analyzeColumns() {
return colNameToColStatsMeta.keySet();
}
public void reset() {
updatedTime = 0;
colNameToColStatsMeta.values().forEach(ColStatsMeta::clear);
}
public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
updatedTime = analyzedJob.tblUpdateTime;
String colNameStr = analyzedJob.colName;
// colName field AnalyzeJob's format likes: "[col1, col2]", we need to remove brackets here
// TODO: Refactor this later
if (analyzedJob.colName.startsWith("[") && analyzedJob.colName.endsWith("]")) {
colNameStr = colNameStr.substring(1, colNameStr.length() - 1);
}
List<String> cols = Arrays.stream(colNameStr.split(",")).map(String::trim).collect(Collectors.toList());
for (String col : cols) {
ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(col);
if (colStatsMeta == null) {
colNameToColStatsMeta.put(col, new ColStatsMeta(updatedTime,
analyzedJob.analysisMethod, analyzedJob.analysisType, analyzedJob.jobType, 0));
} else {
colStatsMeta.updatedTime = updatedTime;
colStatsMeta.analysisType = analyzedJob.analysisType;
colStatsMeta.analysisMethod = analyzedJob.analysisMethod;
colStatsMeta.jobType = analyzedJob.jobType;
}
}
jobType = analyzedJob.jobType;
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
rowCount = tableIf.getRowCount();
}
if (analyzedJob.colToPartitions.keySet()
.containsAll(tableIf.getBaseSchema().stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.toSet()))) {
updatedRows.set(0);
}
}
}
}