blob: e25026ed9839ff63bd7c63b3f1a853a5ef8509ae [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.stats;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.lang.invoke.MethodHandles;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.solr.common.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Various utilities for de/serialization of term stats and collection stats.
* <p>TODO: serialization format is very simple and does nothing to compress the data.</p>
*/
public class StatsUtil {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String ENTRY_SEPARATOR = "!";
public static final char ENTRY_SEPARATOR_CHAR = '!';
/**
* Parse a list of urls separated by "|" in order to retrieve a shard name.
* @param collectionName collection name
* @param shardUrls list of urls
* @return shard name, or shardUrl if no shard info is present,
* or null if impossible to determine (eg. empty string)
*/
public static String shardUrlToShard(String collectionName, String shardUrls) {
// we may get multiple replica urls
String[] urls = shardUrls.split("\\|");
if (urls.length == 0) {
return null;
}
String[] urlParts = urls[0].split("/");
String coreName = urlParts[urlParts.length - 1];
String replicaName = Utils.parseMetricsReplicaName(collectionName, coreName);
String shard;
if (replicaName != null) {
shard = coreName.substring(collectionName.length() + 1);
shard = shard.substring(0, shard.length() - replicaName.length() - 1);
} else {
if (coreName.length() > collectionName.length() && coreName.startsWith(collectionName)) {
shard = coreName.substring(collectionName.length() + 1);
if (shard.isEmpty()) {
shard = urls[0];
}
} else {
shard = urls[0];
}
}
return shard;
}
public static String termsToEncodedString(Collection<?> terms) {
StringBuilder sb = new StringBuilder();
for (Object o : terms) {
if (sb.length() > 0) {
sb.append(ENTRY_SEPARATOR);
}
if (o instanceof Term) {
sb.append(termToEncodedString((Term) o));
} else {
sb.append(termToEncodedString(String.valueOf(o)));
}
}
return sb.toString();
}
public static Set<Term> termsFromEncodedString(String data) {
Set<Term> terms = new HashSet<>();
if (data == null || data.trim().isEmpty()) {
return terms;
}
String[] items = data.split(ENTRY_SEPARATOR);
for (String item : items) {
Term t = termFromEncodedString(item);
if (t != null) {
terms.add(t);
}
}
return terms;
}
public static Set<String> fieldsFromString(String data) {
Set<String> fields = new HashSet<>();
if (data == null || data.trim().isEmpty()) {
return fields;
}
String[] items = data.split(ENTRY_SEPARATOR);
for (String item : items) {
if (!item.trim().isEmpty()) {
fields.add(item);
}
}
return fields;
}
public static String fieldsToString(Collection<String> fields) {
StringBuilder sb = new StringBuilder();
for (String field : fields) {
if (field.trim().isEmpty()) {
continue;
}
if (sb.length() > 0) {
sb.append(ENTRY_SEPARATOR);
}
sb.append(field);
}
return sb.toString();
}
/**
* Make a String representation of {@link CollectionStats}
*/
public static String colStatsToString(CollectionStats colStats) {
StringBuilder sb = new StringBuilder();
sb.append(colStats.field);
sb.append(',');
sb.append(colStats.maxDoc);
sb.append(',');
sb.append(colStats.docCount);
sb.append(',');
sb.append(colStats.sumTotalTermFreq);
sb.append(',');
sb.append(colStats.sumDocFreq);
return sb.toString();
}
private static CollectionStats colStatsFromString(String data) {
if (data == null || data.trim().length() == 0) {
log.warn("Invalid empty collection stats string");
return null;
}
String[] vals = data.split(",");
if (vals.length != 5) {
log.warn("Invalid collection stats string, num fields {} != 5 '{}'", vals.length, data);
return null;
}
String field = vals[0];
try {
long maxDoc = Long.parseLong(vals[1]);
long docCount = Long.parseLong(vals[2]);
long sumTotalTermFreq = Long.parseLong(vals[3]);
long sumDocFreq = Long.parseLong(vals[4]);
return new CollectionStats(field, maxDoc, docCount, sumTotalTermFreq,
sumDocFreq);
} catch (Exception e) {
log.warn("Invalid collection stats string '{}', ", data, e);
return null;
}
}
public static String termToEncodedString(Term t) {
StringBuilder sb = new StringBuilder();
sb.append(t.field()).append(':');
sb.append(encode(t.text()));
return sb.toString();
}
public static final char ESCAPE = '_';
public static final char ESCAPE_ENTRY_SEPARATOR = '0';
public static String encode(String value) {
StringBuilder output = new StringBuilder(value.length() + 2);
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
switch (c) {
case ESCAPE :
output.append(ESCAPE).append(ESCAPE);
break;
case ENTRY_SEPARATOR_CHAR :
output.append(ESCAPE).append(ESCAPE_ENTRY_SEPARATOR);
break;
default :
output.append(c);
}
}
try {
return URLEncoder.encode(output.toString(), "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Apparently your JVM doesn't support UTF-8 encoding?", e);
}
}
public static String decode(String value) throws IOException {
value = URLDecoder.decode(value, "UTF-8");
StringBuilder output = new StringBuilder(value.length());
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
// escaped char follows
if (c == ESCAPE && i < value.length() - 1) {
i++;
char next = value.charAt(i);
if (next == ESCAPE) {
output.append(ESCAPE);
} else if (next == ESCAPE_ENTRY_SEPARATOR) {
output.append(ENTRY_SEPARATOR_CHAR);
} else {
throw new IOException("invalid escape sequence in " + value);
}
} else {
output.append(c);
}
}
return output.toString();
}
public static String termToEncodedString(String term) {
int idx = term.indexOf(':');
if (idx == -1) {
log.warn("Invalid term data without ':': '{}'", term);
return null;
}
String prefix = term.substring(0, idx + 1);
String value = term.substring(idx + 1);
return prefix + encode(value);
}
public static Term termFromEncodedString(String data) {
if (data == null || data.trim().length() == 0) {
log.warn("Invalid empty term value");
return null;
}
int idx = data.indexOf(':');
if (idx == -1) {
log.warn("Invalid term data without ':': '{}'", data);
return null;
}
String field = data.substring(0, idx);
String value = data.substring(idx + 1);
try {
return new Term(field, decode(value));
} catch (Exception e) {
log.warn("Invalid term value '{}'", value);
return null;
}
}
public static String termStatsToString(TermStats termStats, boolean encode) {
StringBuilder sb = new StringBuilder();
sb.append(encode ? termToEncodedString(termStats.term) : termStats.term).append(',');
sb.append(termStats.docFreq);
sb.append(',');
sb.append(termStats.totalTermFreq);
return sb.toString();
}
private static TermStats termStatsFromString(String data) {
if (data == null || data.trim().length() == 0) {
log.warn("Invalid empty term stats string");
return null;
}
String[] vals = data.split(",");
if (vals.length < 3) {
log.warn("Invalid term stats string, num fields {} < 3, '{}'", vals.length, data);
return null;
}
Term term = termFromEncodedString(vals[0]);
try {
long docFreq = Long.parseLong(vals[1]);
long totalTermFreq = Long.parseLong(vals[2]);
return new TermStats(term.toString(), docFreq, totalTermFreq);
} catch (Exception e) {
log.warn("Invalid termStats string '{}'", data);
return null;
}
}
public static Map<String,CollectionStats> colStatsMapFromString(String data) {
if (data == null || data.trim().length() == 0) {
return null;
}
Map<String,CollectionStats> map = new HashMap<String,CollectionStats>();
String[] entries = data.split(ENTRY_SEPARATOR);
for (String es : entries) {
CollectionStats stats = colStatsFromString(es);
if (stats != null) {
map.put(stats.field, stats);
}
}
return map;
}
public static String colStatsMapToString(Map<String,CollectionStats> stats) {
if (stats == null || stats.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
for (Entry<String,CollectionStats> e : stats.entrySet()) {
if (sb.length() > 0) {
sb.append(ENTRY_SEPARATOR);
}
sb.append(colStatsToString(e.getValue()));
}
return sb.toString();
}
public static Map<String,TermStats> termStatsMapFromString(String data) {
if (data == null || data.trim().length() == 0) {
return null;
}
Map<String,TermStats> map = new HashMap<>();
String[] entries = data.split(ENTRY_SEPARATOR);
for (String es : entries) {
TermStats termStats = termStatsFromString(es);
if (termStats != null) {
map.put(termStats.term, termStats);
}
}
return map;
}
public static String termStatsMapToString(Map<String,TermStats> stats) {
if (stats == null || stats.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
for (Entry<String,TermStats> e : stats.entrySet()) {
if (sb.length() > 0) {
sb.append(ENTRY_SEPARATOR);
}
sb.append(termStatsToString(e.getValue(), true));
}
return sb.toString();
}
}