blob: abfa924d92ed79ae80b4690a6a195f3d2dc9eaec [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.pherf.rules;
import com.google.common.base.Preconditions;
import org.apache.commons.math3.random.RandomDataGenerator;
import org.apache.phoenix.pherf.PherfConstants;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.RandomUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.phoenix.pherf.configuration.*;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicLong;
public class RulesApplier {
private static final Logger LOGGER = LoggerFactory.getLogger(RulesApplier.class);
private static final AtomicLong COUNTER = new AtomicLong(0);
// Used to bail out of random distribution if it takes too long
// This should never happen when distributions add up to 100
private static final int OH_SHIT_LIMIT = 1000;
private final Random rndNull;
private final Random rndVal;
private final RandomDataGenerator randomDataGenerator;
private final XMLConfigParser parser;
private final List<Map> modelList;
private final Map<String, Column> columnMap;
private String cachedScenarioOverrideName;
private Map<DataTypeMapping, List> scenarioOverrideMap;
public RulesApplier(XMLConfigParser parser) {
this(parser, EnvironmentEdgeManager.currentTimeMillis());
}
public RulesApplier(XMLConfigParser parser, long seed) {
this.parser = parser;
this.modelList = new ArrayList<Map>();
this.columnMap = new HashMap<String, Column>();
this.rndNull = new Random(seed);
this.rndVal = new Random(seed);
this.randomDataGenerator = new RandomDataGenerator();
this.cachedScenarioOverrideName = null;
populateModelList();
}
public List<Map> getModelList() {
return Collections.unmodifiableList(this.modelList);
}
private Map<DataTypeMapping, List> getCachedScenarioOverrides(Scenario scenario) {
if (this.cachedScenarioOverrideName == null || this.cachedScenarioOverrideName != scenario.getName()) {
this.cachedScenarioOverrideName = scenario.getName();
this.scenarioOverrideMap = new HashMap<DataTypeMapping, List>();
if (scenario.getDataOverride() != null) {
for (Column column : scenario.getDataOverride().getColumn()) {
List<Column> cols;
DataTypeMapping type = column.getType();
if (this.scenarioOverrideMap.containsKey(type)) {
this.scenarioOverrideMap.get(type).add(column);
} else {
cols = new LinkedList<Column>();
cols.add(column);
this.scenarioOverrideMap.put(type, cols);
}
}
}
}
return scenarioOverrideMap;
}
/**
* Get a data value based on rules.
*
* @param scenario {@link org.apache.phoenix.pherf.configuration.Scenario} We are getting data for
* @param phxMetaColumn {@link org.apache.phoenix.pherf.configuration.Column}
* From Phoenix MetaData that are
* generating data for. It defines the
* type we are trying to match.
* @return
* @throws Exception
*/
public DataValue getDataForRule(Scenario scenario, Column phxMetaColumn) throws Exception {
// TODO Make a Set of Rules that have already been applied so that so we don't generate for every value
List<Scenario> scenarios = parser.getScenarios();
DataValue value = null;
if (scenarios.contains(scenario)) {
LOGGER.debug("We found a correct Scenario");
Map<DataTypeMapping, List> overrideRuleMap = this.getCachedScenarioOverrides(scenario);
if (overrideRuleMap != null) {
List<Column> overrideRuleList = this.getCachedScenarioOverrides(scenario).get(phxMetaColumn.getType());
if (overrideRuleList != null && overrideRuleList.contains(phxMetaColumn)) {
LOGGER.debug("We found a correct override column rule");
Column columnRule = getColumnForRuleOverride(overrideRuleList, phxMetaColumn);
if (columnRule != null) {
return getDataValue(columnRule);
}
}
}
// Assume the first rule map
Map<DataTypeMapping, List> ruleMap = modelList.get(0);
List<Column> ruleList = ruleMap.get(phxMetaColumn.getType());
// Make sure Column from Phoenix Metadata matches a rule column
if (ruleList.contains(phxMetaColumn)) {
// Generate some random data based on this rule
LOGGER.debug("We found a correct column rule");
Column columnRule = getColumnForRule(ruleList, phxMetaColumn);
value = getDataValue(columnRule);
} else {
LOGGER.warn("Attempted to apply rule to data, but could not find a rule to match type:"
+ phxMetaColumn.getType()
);
}
}
return value;
}
/**
* Get data value based on the supplied rule
*
* @param column {@link org.apache.phoenix.pherf.configuration.Column} Column rule to get data for
* @return {@link org.apache.phoenix.pherf.rules.DataValue} Container Type --> Value mapping
*/
public DataValue getDataValue(Column column) throws Exception{
DataValue data = null;
String prefix = "";
int length = column.getLength();
int nullChance = column.getNullChance();
List<DataValue> dataValues = column.getDataValues();
// Return an empty value if we fall within the configured probability of null
if ((nullChance != Integer.MIN_VALUE) && (isValueNull(nullChance))) {
return new DataValue(column.getType(), "");
}
if (column.getPrefix() != null) {
prefix = column.getPrefix();
}
if ((prefix.length() >= length) && (length > 0)) {
LOGGER.warn("You are attempting to generate data with a prefix (" + prefix + ") "
+ "That is longer than expected overall field length (" + length + "). "
+ "This will certainly lead to unexpected data values.");
}
switch (column.getType()) {
case VARCHAR:
case VARBINARY:
case CHAR:
// Use the specified data values from configs if they exist
if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
data = pickDataValueFromList(dataValues);
} else {
Preconditions.checkArgument(length > 0, "length needs to be > 0");
if (column.getDataSequence() == DataSequence.SEQUENTIAL) {
data = getSequentialDataValue(column);
} else {
data = getRandomDataValue(column);
}
}
break;
case VARCHAR_ARRAY:
//only list datavalues are supported
String arr = "";
for (DataValue dv : dataValues) {
arr += "," + dv.getValue();
}
if (arr.startsWith(",")) {
arr = arr.replaceFirst(",", "");
}
data = new DataValue(column.getType(), arr);
break;
case DECIMAL:
if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
data = pickDataValueFromList(dataValues);
} else {
int precision = column.getPrecision();
double minDbl = column.getMinValue();
Preconditions.checkArgument((precision > 0) && (precision <= 18), "Precision must be between 0 and 18");
Preconditions.checkArgument(minDbl >= 0, "minvalue must be set in configuration for decimal");
Preconditions.checkArgument(column.getMaxValue() > 0, "maxValue must be set in configuration decimal");
StringBuilder maxValueStr = new StringBuilder();
for (int i = 0; i < precision; i++) {
maxValueStr.append(9);
}
double maxDbl = Math.min(column.getMaxValue(), Double.parseDouble(maxValueStr.toString()));
final double dbl = RandomUtils.nextDouble(minDbl, maxDbl);
data = new DataValue(column.getType(), String.valueOf(dbl));
}
break;
case TINYINT:
case INTEGER:
if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
data = pickDataValueFromList(dataValues);
} else {
int minInt = (int) column.getMinValue();
int maxInt = (int) column.getMaxValue();
if (column.getType() == DataTypeMapping.TINYINT) {
Preconditions.checkArgument((minInt >= -128) && (minInt <= 128), "min value need to be set in configuration for tinyints " + column.getName());
Preconditions.checkArgument((maxInt >= -128) && (maxInt <= 128), "max value need to be set in configuration for tinyints " + column.getName());
}
int intVal = ThreadLocalRandom.current().nextInt(minInt, maxInt + 1);
data = new DataValue(column.getType(), String.valueOf(intVal));
}
break;
case BIGINT:
case UNSIGNED_LONG:
if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
data = pickDataValueFromList(dataValues);
} else {
long minLong = column.getMinValue();
long maxLong = column.getMaxValue();
if (column.getType() == DataTypeMapping.UNSIGNED_LONG)
Preconditions.checkArgument((minLong > 0) && (maxLong > 0), "min and max values need to be set in configuration for unsigned_longs " + column.getName());
long longVal = RandomUtils.nextLong(minLong, maxLong);
data = new DataValue(column.getType(), String.valueOf(longVal));
}
break;
case DATE:
case TIMESTAMP:
if ((column.getDataValues() != null) && (column.getDataValues().size() > 0)) {
data = pickDataValueFromList(dataValues);
// Check if date has right format or not
data.setValue(checkDatePattern(data.getValue()));
} else if (column.getUseCurrentDate() != true){
int minYear = (int) column.getMinValue();
int maxYear = (int) column.getMaxValue();
Preconditions.checkArgument((minYear > 0) && (maxYear > 0), "min and max values need to be set in configuration for date/timestamps " + column.getName());
String dt = generateRandomDate(minYear, maxYear);
data = new DataValue(column.getType(), dt);
data.setMaxValue(String.valueOf(minYear));
data.setMinValue(String.valueOf(maxYear));
} else {
String dt = getCurrentDate();
data = new DataValue(column.getType(), dt);
}
break;
default:
break;
}
Preconditions.checkArgument(data != null,
"Data value could not be generated for some reason. Please check configs");
return data;
}
// Convert years into standard date format yyyy-MM-dd HH:mm:ss.SSS z
public String generateRandomDate(int min, int max) throws Exception {
String mindt = min + "-01-01 00:00:00.000"; // set min date as starting of min year
String maxdt = max + "-12-31 23:59:59.999"; // set max date as end of max year
return generateRandomDate(mindt, maxdt);
}
public String generateRandomDate(String min, String max) throws Exception {
DateTimeFormatter fmtr = DateTimeFormat.forPattern(PherfConstants.DEFAULT_DATE_PATTERN).withZone(DateTimeZone.UTC);
DateTime minDt;
DateTime maxDt;
DateTime dt;
minDt = fmtr.parseDateTime(checkDatePattern(min));
maxDt = fmtr.parseDateTime(checkDatePattern(max));
// Get Ms Date between min and max
synchronized (randomDataGenerator) {
//Make sure date generated is exactly between the passed limits
long rndLong = randomDataGenerator.nextLong(minDt.getMillis()+1, maxDt.getMillis()-1);
dt = new DateTime(rndLong, PherfConstants.DEFAULT_TIME_ZONE);
}
return fmtr.print(dt);
}
public String getCurrentDate() {
DateTimeFormatter fmtr = DateTimeFormat.forPattern(PherfConstants.DEFAULT_DATE_PATTERN).withZone(DateTimeZone.UTC);;
DateTime dt = new DateTime(PherfConstants.DEFAULT_TIME_ZONE);
return fmtr.print(dt);
}
/**
* Given an int chance [0-100] inclusive, this method will return true if a winner is selected, otherwise false.
*
* @param chance Percentage as an int while number.
* @return boolean if we pick a number within range
*/
private boolean isValueNull(int chance) {
return (rndNull.nextInt(100) < chance);
}
private DataValue pickDataValueFromList(List<DataValue> values) throws Exception{
DataValue generatedDataValue = null;
int sum = 0, count = 0;
// Verify distributions add up to 100 if they exist
for (DataValue value : values) {
int dist = value.getDistribution();
sum += dist;
}
Preconditions.checkArgument((sum == 100) || (sum == 0),
"Distributions need to add up to 100 or not exist.");
// Spin the wheel until we get a value.
while (generatedDataValue == null) {
// Give an equal chance at picking any one rule to test
// This prevents rules at the beginning of the list from getting more chances to get picked
int rndIndex = rndVal.nextInt(values.size());
DataValue valueRule = values.get(rndIndex);
generatedDataValue = pickDataValueFromList(valueRule);
// While it's possible to get here if you have a bunch of really small distributions,
// It's just really unlikely. This is just a safety just so we actually pick a value.
if(count++ == OH_SHIT_LIMIT){
LOGGER.info("We generated a value from hitting our OH_SHIT_LIMIT: " + OH_SHIT_LIMIT);
generatedDataValue = valueRule;
}
}
return generatedDataValue;
}
private DataValue pickDataValueFromList(final DataValue valueRule) throws Exception{
DataValue retValue = new DataValue(valueRule);
// Path taken when configuration specifies a specific value to be taken with the <value> tag
if (valueRule.getValue() != null) {
int chance = (valueRule.getDistribution() == 0) ? 100 : valueRule.getDistribution();
return (rndVal.nextInt(100) <= chance) ? retValue : null;
}
// Path taken when configuration specifies to use current date
if (valueRule.getUseCurrentDate() == true) {
int chance = (valueRule.getDistribution() == 0) ? 100 : valueRule.getDistribution();
retValue.setValue(getCurrentDate());
return (rndVal.nextInt(100) <= chance) ? retValue : null;
}
// Later we can add support fo other data types if needed.Right now, we just do this for dates
Preconditions.checkArgument((retValue.getMinValue() != null) || (retValue.getMaxValue() != null), "Both min/maxValue tags must be set if value tag is not used");
Preconditions.checkArgument((retValue.getType() == DataTypeMapping.DATE), "Currently on DATE is supported for ranged random values");
retValue.setValue(generateRandomDate(retValue.getMinValue(), retValue.getMaxValue()));
retValue.setValue(generateRandomDate(retValue.getMinValue(), retValue.getMaxValue()));
retValue.setMinValue(checkDatePattern(valueRule.getMinValue()));
retValue.setMaxValue(checkDatePattern(valueRule.getMaxValue()));
return retValue;
}
// Checks if date is in defult pattern
public String checkDatePattern(String date) {
DateTimeFormatter fmtr = DateTimeFormat.forPattern(PherfConstants.DEFAULT_DATE_PATTERN).withZone(DateTimeZone.UTC);;
DateTime parsedDate = fmtr.parseDateTime(date);
return fmtr.print(parsedDate);
}
/**
* Top level {@link java.util.List} {@link java.util.Map}. This will likely only have one entry until we have
* multiple files.
* <p/>
* <p/>
* Each Map entry in the List is:
* {@link java.util.Map} of
* {@link org.apache.phoenix.pherf.configuration.DataTypeMapping} -->
* List of {@link org.apache.phoenix.pherf.configuration.Column
* Build the initial Map with all the general rules.
* These are contained in:
* <datamode><datamapping><column>...</column></datamapping></datamode>
* <p/>
* <p/>
* Unsupported until V2
* Build the overrides by appending them to the list of rules that match the column type
*/
private void populateModelList() {
if (!modelList.isEmpty()) {
return;
}
// Support for multiple models, but rules are only relevant each model
for (DataModel model : parser.getDataModels()) {
// Step 1
final Map<DataTypeMapping, List> ruleMap = new HashMap<DataTypeMapping, List>();
for (Column column : model.getDataMappingColumns()) {
columnMap.put(column.getName(), column);
List<Column> cols;
DataTypeMapping type = column.getType();
if (ruleMap.containsKey(type)) {
ruleMap.get(type).add(column);
} else {
cols = new LinkedList<Column>();
cols.add(column);
ruleMap.put(type, cols);
}
}
this.modelList.add(ruleMap);
}
}
public Column getRule(Column phxMetaColumn) {
// Assume the first rule map
Map<DataTypeMapping, List> ruleMap = modelList.get(0);
List<Column> ruleList = ruleMap.get(phxMetaColumn.getType());
return getColumnForRule(ruleList, phxMetaColumn);
}
public Column getRule(String columnName) {
return getRule(columnName, null);
}
public Column getRule(String columnName, Scenario scenario) {
if (null != scenario && null != scenario.getDataOverride()) {
for (Column column: scenario.getDataOverride().getColumn()) {
if (column.getName().equals(columnName)) {
return column;
}
}
}
return columnMap.get(columnName);
}
private Column getColumnForRuleOverride(List<Column> ruleList, Column phxMetaColumn) {
for (Column columnRule : ruleList) {
if (columnRule.getName().equals(phxMetaColumn.getName())) {
return new Column(columnRule);
}
}
return null;
}
private Column getColumnForRule(List<Column> ruleList, Column phxMetaColumn) {
// Column pointer to head of list
Column ruleAppliedColumn = new Column(ruleList.get(0));
// Then we apply each rule override as a mutation to the column
for (Column columnRule : ruleList) {
// Check if user defined column rules match the column data type we are generating
// We don't want to apply the rule if name doesn't match the column from Phoenix
if (columnRule.isUserDefined()
&& !columnRule.getName().equals(phxMetaColumn.getName())) {
continue;
}
ruleAppliedColumn.mutate(columnRule);
}
return ruleAppliedColumn;
}
/**
* Add a numerically increasing counter onto the and of a random string.
* Incremented counter should be thread safe.
*
* @param column {@link org.apache.phoenix.pherf.configuration.Column}
* @return {@link org.apache.phoenix.pherf.rules.DataValue}
*/
private DataValue getSequentialDataValue(Column column) {
DataValue data = null;
long inc = COUNTER.getAndIncrement();
String strInc = String.valueOf(inc);
int paddedLength = column.getLengthExcludingPrefix();
String strInc1 = StringUtils.leftPad(strInc, paddedLength, "0");
String strInc2 = StringUtils.right(strInc1, column.getLengthExcludingPrefix());
String varchar = (column.getPrefix() != null) ? column.getPrefix() + strInc2:
strInc2;
// Truncate string back down if it exceeds length
varchar = StringUtils.left(varchar,column.getLength());
data = new DataValue(column.getType(), varchar);
return data;
}
private DataValue getRandomDataValue(Column column) {
String varchar = RandomStringUtils.randomAlphanumeric(column.getLength());
varchar = (column.getPrefix() != null) ? column.getPrefix() + varchar : varchar;
// Truncate string back down if it exceeds length
varchar = StringUtils.left(varchar, column.getLength());
return new DataValue(column.getType(), varchar);
}
}