blob: dc28adef75d3be37fe996e5e0d797442cb663a7c [file] [log] [blame]
/*
* Copyright 2016, Yahoo! Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches.cmd;
import static com.yahoo.sketches.Util.LS;
import static com.yahoo.sketches.Util.TAB;
import static java.lang.Math.*;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import com.yahoo.sketches.theta.Sketches;
import com.yahoo.sketches.theta.UpdateSketch;
import com.yahoo.sketches.theta.UpdateSketchBuilder;
import com.yahoo.sketches.quantiles.QuantilesSketchBuilder;
import com.yahoo.sketches.quantiles.QuantilesSketch;
import com.yahoo.sketches.frequencies.FrequentItemsSketch;
import com.yahoo.sketches.frequencies.FrequentLongsSketch.Row;
import com.yahoo.sketches.frequencies.ErrorType;
/**
* Command line access to the basic sketch functions.
*/
public class CommandLine {
private static final String BOLD = "\033[1m";
private static final String OFF = "\033[0m";
public static void main(String[] args) {
if (args.length == 0) help();
else parseType(args);
}
static void parseType(String[] args) {
String token1 = args[0].toLowerCase();
switch (token1) {
case "uniq": parseUniq(args); break;
case "rank": parseRank(args); break;
case "hist": parseHist(args); break;
case "loghist": parseLogHist(args); break;
case "freq": parseFreq(args); break;
case "help": help(); break;
default: {
printlnErr("Unrecognized TYPE: "+token1);
help();
}
}
}
private static int parseArgsCase(String[] args) { //we already know type, args[0] is valid
int len = args.length;
int ret = 0;
switch (len) {
case 1: ret = 1; break; //only type, assume default k, System.in
case 2: {
String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName
if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help
if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename
ret = 4; //2nd arg must be numeric, assume System.in
break;
}
default: { //3 or more
String token2 = args[1]; //2nd arg could be help, k (numeric) or a fileName
if (token2.equalsIgnoreCase("help")) { ret = 2; break; } //help
if (!isNumeric(token2)) { ret = 3; break; } //2nd arg not numeric, must be a filename
//2nd arg is numeric, 3rd arg must be filename
ret = 5;
break;
}
}
return ret;
}
private static void parseUniq(String[] args) {
UpdateSketchBuilder bldr = Sketches.updateSketchBuilder();
UpdateSketch sketch;
int argsCase = parseArgsCase(args);
switch (argsCase) {
case 1:
doUniq(getBR(null), bldr.build()); break; //[default k], [System.in]
case 2:
uniqHelp(); break; //help
case 3: //2nd arg not numeric, must be a filename
doUniq(getBR(args[1]), bldr.build()); break; //[default k], file
case 4: //2nd arg is numeric, no filename
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doUniq(getBR(null), sketch); //user k, [System.in]
break;
case 5: //3 valid args
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doUniq(getBR(args[2]), sketch);
}
}
private static void doUniq(BufferedReader br, UpdateSketch sketch) {
String itemStr = "";
try {
while ((itemStr = br.readLine()) != null) {
sketch.update(itemStr);
}
} catch (IOException e) {
printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
System.exit(1);
}
println(sketch.toString());
}
private static void parseRank(String[] args) {
QuantilesSketchBuilder bldr = new QuantilesSketchBuilder();
QuantilesSketch sketch;
int argsCase = parseArgsCase(args);
switch (argsCase) {
case 1:
doRank(getBR(null), bldr.build()); break; //[default k], [System.in]
case 2:
rankHelp(); break; //help
case 3: //2nd arg not numeric, must be a filename
doRank(getBR(args[1]), bldr.build()); break; //[default k], file
case 4: //2nd arg is numeric, no filename
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doRank(getBR(null), sketch); //user k, [System.in]
break;
case 5: //3 valid args
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doRank(getBR(args[2]), sketch);
}
}
private static void doRank(BufferedReader br, QuantilesSketch sketch) {
String itemStr = "";
try {
while ((itemStr = br.readLine()) != null) {
double item = Double.parseDouble(itemStr);
sketch.update(item);
}
} catch (IOException | NumberFormatException e ) {
printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
System.exit(1);
}
int ranks = 101;
double[] valArr = sketch.getQuantiles(ranks);
println("Rank"+TAB+ "Value");
for (int i=0; i<ranks; i++) {
String r = String.format("%.2f",(double)i/ranks);
println(r + TAB + valArr[i]);
}
}
private static void parseHist(String[] args) {
QuantilesSketchBuilder bldr = new QuantilesSketchBuilder();
QuantilesSketch sketch;
int argsCase = parseArgsCase(args);
switch (argsCase) {
case 1:
doHist(getBR(null), bldr.build()); break; //[default k], [System.in]
case 2:
histHelp(); break; //help
case 3: //2nd arg not numeric, must be a filename
doHist(getBR(args[1]), bldr.build()); break; //[default k], file
case 4: //2nd arg is numeric, no filename
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doHist(getBR(null), sketch); //user k, [System.in]
break;
case 5: //3 valid args
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doHist(getBR(args[2]), sketch);
}
}
private static void doHist(BufferedReader br, QuantilesSketch sketch) {
String itemStr = "";
try {
while ((itemStr = br.readLine()) != null) {
double item = Double.parseDouble(itemStr);
sketch.update(item);
}
} catch (IOException | NumberFormatException e ) {
printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
System.exit(1);
}
int splitPoints = 30;
long n = sketch.getN();
double[] splitsArr = getEvenSplits(sketch, splitPoints);
double[] histArr = sketch.getPMF(splitsArr);
println("Value"+TAB+ "Freq");
//int histArrLen = histArr.length; //one larger than splitsArr
double min = sketch.getMinValue();
String splitVal = String.format("%,f", min);
String freqVal = String.format("%,d", (long)(histArr[0] * n));
println(splitVal+TAB+freqVal);
for (int i=0; i<splitsArr.length; i++) {
splitVal = String.format("%,f", splitsArr[i] * n);
freqVal = String.format("%,d", (long)(histArr[i+1] * n));
println(splitVal+TAB+freqVal);
}
}
private static void parseLogHist(String[] args) {
QuantilesSketchBuilder bldr = new QuantilesSketchBuilder();
QuantilesSketch sketch;
int argsCase = parseArgsCase(args);
switch (argsCase) {
case 1:
doLogHist(getBR(null), bldr.build()); break; //[default k], [System.in]
case 2:
logHistHelp(); break; //help
case 3: //2nd arg not numeric, must be a filename
doLogHist(getBR(args[1]), bldr.build()); break; //[default k], file
case 4: //2nd arg is numeric, no filename
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doLogHist(getBR(null), sketch); //user k, [System.in]
break;
case 5: //3 valid args
sketch = bldr.build(Integer.parseInt(args[1])); //args[1] is numeric = k
doLogHist(getBR(args[2]), sketch);
}
}
private static void doLogHist(BufferedReader br, QuantilesSketch sketch) {
String itemStr = "";
try {
while ((itemStr = br.readLine()) != null) {
double item = Double.parseDouble(itemStr);
if (Double.isNaN(item) || (item <= 0.0)) continue;
sketch.update(item);
}
} catch (IOException | NumberFormatException e ) {
printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
System.exit(1);
}
int splitPoints = 30;
long n = sketch.getN();
double[] splitsArr = getLogSplits(sketch, splitPoints);
double[] histArr = sketch.getPMF(splitsArr);
println("Value"+TAB+ "Freq");
//int histArrLen = histArr.length; //one larger than splitsArr
double min = sketch.getMinValue();
String splitVal = String.format("%,f", min);
String freqVal = String.format("%,d", (long)(histArr[0] * n));
println(splitVal+TAB+freqVal);
for (int i=0; i<splitsArr.length; i++) {
splitVal = String.format("%,f", splitsArr[i] * n);
freqVal = String.format("%,d", (long)(histArr[i+1] * n));
println(splitVal+TAB+freqVal);
}
}
private static void parseFreq(String[] args) {
FrequentItemsSketch<String> sketch;
int defaultSize = 1 << 17; //128K
int argsCase = parseArgsCase(args);
switch (argsCase) {
case 1:
sketch = new FrequentItemsSketch<String>(defaultSize);
doFreq(getBR(null), sketch); break; //[default k], [System.in]
case 2:
freqHelp(); break; //help
case 3: //2nd arg not numeric, must be a filename
sketch = new FrequentItemsSketch<String>(defaultSize);
doFreq(getBR(args[1]), sketch); break; //[default k], file
case 4: //2nd arg is numeric, no filename
sketch = new FrequentItemsSketch<String>(Integer.parseInt(args[1])); //args[1] is numeric = k
doFreq(getBR(null), sketch); //user k, [System.in]
break;
case 5: //3 valid args
sketch = new FrequentItemsSketch<String>(Integer.parseInt(args[1])); //args[1] is numeric = k
doFreq(getBR(args[2]), sketch);
}
}
private static void doFreq(BufferedReader br, FrequentItemsSketch<String> sketch) {
String itemStr = "";
try {
while ((itemStr = br.readLine()) != null) {
sketch.update(itemStr);
}
} catch (IOException e ) {
printlnErr("Read Error: Item: "+itemStr +", "+br.toString());
System.exit(1);
}
//NFP is a subset of NFN
FrequentItemsSketch<String>.Row[] rowArr = sketch.getFrequentItems(ErrorType.NO_FALSE_POSITIVES);
int len = rowArr.length;
println("Qualifying Rows: "+len);
println(Row.getRowHeader());
for (int i=0; i<len; i++) {
println((i+1) + rowArr[i].toString());
}
}
private static double[] getEvenSplits(QuantilesSketch sketch, int splitPoints) {
double min = sketch.getMinValue();
double max = sketch.getMaxValue();
return getSplits(min, max, splitPoints);
}
private static double[] getLogSplits(QuantilesSketch sketch, int splitPoints) {
double min = sketch.getMinValue();
double max = sketch.getMaxValue();
double logMin = log10(min);
double logMax = log10(max);
double[] logArr = getSplits(logMin, logMax, splitPoints);
double[] expArr = new double[logArr.length];
for (int i= 0; i<logArr.length; i++) {
expArr[i] = pow(10.0, logArr[i]);
}
return expArr;
}
private static double[] getSplits(double min, double max, int splitPoints) {
double range = max - min;
double delta = range/(splitPoints + 1);
double[] splits = new double[splitPoints];
for (int i = 0; i < splitPoints; i++) {
splits[i] = delta * (i+1);
}
return splits;
}
private static boolean isNumeric(String token) {
for (char c : token.toCharArray()) {
if (!Character.isDigit(c)) return false;
}
return true;
}
private static BufferedReader getBR(String token) {
BufferedReader br = null;
try {
if ((token == null) || (token.length() == 0)) {
br = new BufferedReader(new InputStreamReader(System.in));
} else {
br = new BufferedReader(new InputStreamReader(new FileInputStream(token)));
}
} catch (FileNotFoundException e) {
printlnErr("File Not Found: "+token);
System.exit(1);
}
return br;
}
private static void uniqHelp() {
StringBuilder sb = new StringBuilder();
sb.append(BOLD+"UNIQ SYNOPSIS"+OFF).append(LS);
sb.append(" sketch uniq help").append(LS);
sb.append(" sketch uniq [SIZE] [FILE]");
println(sb.toString());
}
private static void rankHelp() {
StringBuilder sb = new StringBuilder();
sb.append(BOLD+"RANK SYNOPSIS"+OFF).append(LS);
sb.append(" sketch rank help").append(LS);
sb.append(" sketch rank [SIZE] [FILE]");
println(sb.toString());
}
private static void histHelp() {
StringBuilder sb = new StringBuilder();
sb.append(BOLD+"HIST SYNOPSIS"+OFF).append(LS);
sb.append(" sketch hist help").append(LS);
sb.append(" sketch hist [SIZE] [FILE]");
println(sb.toString());
}
private static void logHistHelp() {
StringBuilder sb = new StringBuilder();
sb.append(BOLD+"LOGHIST SYNOPSIS"+OFF).append(LS);
sb.append(" sketch loghist help").append(LS);
sb.append(" sketch loghist [SIZE] [FILE]");
println(sb.toString());
}
private static void freqHelp() {
StringBuilder sb = new StringBuilder();
sb.append(BOLD+"FREQ SYNOPSIS"+OFF).append(LS);
sb.append(" sketch freq help").append(LS);
sb.append(" sketch freq [SIZE] [FILE]");
println(sb.toString());
}
static void help() {
StringBuilder sb = new StringBuilder();
sb.append(BOLD+"NAME"+OFF).append(LS);
sb.append(" sketch - sketch Uniques, Quantiles, Histograms, or Frequent Items.").append(LS);
sb.append(BOLD+"SYNOPSIS"+OFF).append(LS);
sb.append(" sketch (this help)").append(LS);
sb.append(" sketch TYPE help").append(LS);
sb.append(" sketch TYPE [SIZE] [FILE]").append(LS);
sb.append(BOLD+"DESCRIPTION"+OFF).append(LS);
sb.append(" Write a sketch(TYPE, SIZE) of FILE to standard output.").append(LS);
sb.append(" TYPE is required.").append(LS);
sb.append(" If SIZE is omitted, internal defaults are used.").append(LS);
sb.append(" If FILE is omitted, Standard In is assumed.").append(LS);
sb.append(BOLD+"TYPE DESCRIPTION"+OFF).append(LS);
sb.append(" sketch uniq : Sketch the unique string items of a stream.").append(LS);
sb.append(" sketch rank : Sketch the rank-value distribution of a numeric value stream.").
append(LS);
sb.append(" sketch hist : "+
"Sketch the linear-axis value-frequency distribution of numeric value stream.").append(LS);
sb.append(" sketch loghist : "+
"Sketch the log-axis value-frequency distribution of numeric value stream.").append(LS);
sb.append(" sketch freq : Sketch the Heavy Hitters of a string item stream.");
println(sb.toString());
uniqHelp();
rankHelp();
histHelp();
logHistHelp();
freqHelp();
}
private static void printlnErr(String s) { System.err.println(s); }
private static void println(String s) { System.out. println(s); }
}