blob: 7907da80d0567d481168334fc7f45578606a6011 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kylin.dict;
import static org.apache.kylin.dict.global.GlobalDictHDFSStore.V2_INDEX_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.fail;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.common.util.RandomUtil;
import org.apache.kylin.dict.global.AppendDictSliceKey;
import org.apache.kylin.dict.global.AppendTrieDictionaryBuilder;
import org.apache.kylin.dict.global.GlobalDictHDFSStore;
import org.apache.kylin.dict.global.GlobalDictMetadata;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import com.google.common.collect.Lists;
public class AppendTrieDictionaryTest extends LocalFileMetadataTestCase {
private static final String RESOURCE_DIR = "/dict/append_dict_test/" + RandomUtil.randomUUID();
private static String BASE_DIR;
private static String LOCAL_BASE_DIR;
@Before
public void beforeTest() {
staticCreateTestMetadata();
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-entry-size", "50000");
BASE_DIR = KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "/resources/GlobalDict" + RESOURCE_DIR
+ "/";
LOCAL_BASE_DIR = getLocalWorkingDirectory() + "/resources/GlobalDict" + RESOURCE_DIR + "/";
}
@After
public void afterTest() {
cleanup();
staticCleanupTestMetadata();
}
private void cleanup() {
Path basePath = new Path(BASE_DIR);
try {
HadoopUtil.getFileSystem(basePath).delete(basePath, true);
} catch (IOException e) {
}
}
private static final String[] words = new String[] { "paint", "par", "part", "parts", "partition", "partitions",
"party", "partie", "parties", "patient", "taste", "tar", "trie", "try", "tries", "字典", "字典树", "字母", // non-ascii characters
"", // empty
"paiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii",
"paiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiipaiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii",
"paintjkjdfklajkdljfkdsajklfjklsadjkjekjrklewjrklewjklrjklewjkljkljkljkljweklrjewkljrklewjrlkjewkljrkljkljkjlkjjkljkljkljkljlkjlkjlkjljdfadfads"
+ "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+ "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+ "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+ "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+ "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+ "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+ "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk",
"paint", "tar", "try", // some dup
};
private AppendTrieDictionaryBuilder createBuilder() throws IOException {
int maxEntriesPerSlice = KylinConfig.getInstanceFromEnv().getAppendDictEntrySize();
return new AppendTrieDictionaryBuilder(BASE_DIR, maxEntriesPerSlice, true);
}
@Test
public void testStringRepeatly() throws IOException {
ArrayList<String> list = new ArrayList<>();
Collections.addAll(list, words);
ArrayList<String> notfound = new ArrayList<>();
notfound.add("pa");
notfound.add("pars");
notfound.add("tri");
notfound.add("字");
for (int i = 0; i < 50; i++) {
testStringDictAppend(list, notfound, true);
//to speed up the test
cleanup();
}
}
@Test
public void testEnglishWords() throws Exception {
InputStream is = new FileInputStream("src/test/resources/dict/english-words.80 (scowl-2015.05.18).txt");
ArrayList<String> str = loadStrings(is);
testStringDictAppend(str, null, false);
}
@Test
public void testCategoryNames() throws Exception {
InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat");
ArrayList<String> str = loadStrings(is);
testStringDictAppend(str, null, true);
}
private static ArrayList<String> loadStrings(InputStream is) throws Exception {
ArrayList<String> r = new ArrayList<String>();
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
try {
String word;
while ((word = reader.readLine()) != null) {
word = word.trim();
if (word.isEmpty() == false)
r.add(word);
}
} finally {
reader.close();
is.close();
}
return r;
}
@Ignore("need huge key set")
@Test
public void testHugeKeySet() throws IOException {
AppendTrieDictionaryBuilder builder = createBuilder();
AppendTrieDictionary<String> dict = null;
InputStream is = new FileInputStream("src/test/resources/dict/huge_key");
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
try {
String word;
while ((word = reader.readLine()) != null) {
word = word.trim();
if (!word.isEmpty())
builder.addValue(word);
}
} finally {
reader.close();
is.close();
}
dict = builder.build(0);
dict.dump(System.out);
}
private void testStringDictAppend(ArrayList<String> list, ArrayList<String> notfound, boolean shuffleList)
throws IOException {
Random rnd = new Random(System.currentTimeMillis());
ArrayList<String> strList = new ArrayList<String>();
strList.addAll(list);
if (shuffleList) {
Collections.shuffle(strList, rnd);
}
BytesConverter converter = new StringBytesConverter();
AppendTrieDictionaryBuilder b = createBuilder();
TreeMap<Integer, String> checkMap = new TreeMap<>();
int firstAppend = rnd.nextInt(strList.size() / 2);
int secondAppend = firstAppend + rnd.nextInt((strList.size() - firstAppend) / 2);
int appendIndex = 0;
int checkIndex = 0;
for (; appendIndex < firstAppend; appendIndex++) {
b.addValue(strList.get(appendIndex));
}
AppendTrieDictionary<String> dict = b.build(0);
dict.dump(System.out);
for (; checkIndex < firstAppend; checkIndex++) {
String str = strList.get(checkIndex);
byte[] bytes = converter.convertToBytes(str);
int id = dict.getIdFromValueBytesWithoutCache(bytes, 0, bytes.length, 0);
assertNotEquals(String.format(Locale.ROOT, "Value %s not exist", str), -1, id);
assertFalse(
String.format(Locale.ROOT, "Id %d for %s should be empty, but is %s", id, str, checkMap.get(id)),
checkMap.containsKey(id) && !str.equals(checkMap.get(id)));
checkMap.put(id, str);
}
// reopen dict and append
b = createBuilder();
for (; appendIndex < secondAppend; appendIndex++) {
b.addValue(strList.get(appendIndex));
}
AppendTrieDictionary<String> newDict = b.build(0);
assert newDict.equals(dict);
dict = newDict;
dict.dump(System.out);
checkIndex = 0;
for (; checkIndex < secondAppend; checkIndex++) {
String str = strList.get(checkIndex);
byte[] bytes = converter.convertToBytes(str);
int id = dict.getIdFromValueBytesWithoutCache(bytes, 0, bytes.length, 0);
assertNotEquals(String.format(Locale.ROOT, "Value %s not exist", str), -1, id);
if (checkIndex < firstAppend) {
assertEquals("Except id " + id + " for " + str + " but " + checkMap.get(id), str, checkMap.get(id));
} else {
// check second append str, should be new id
assertFalse(String.format(Locale.ROOT, "Id %d for %s should be empty, but is %s", id, str,
checkMap.get(id)), checkMap.containsKey(id) && !str.equals(checkMap.get(id)));
checkMap.put(id, str);
}
}
// reopen dict and append rest str
b = createBuilder();
for (; appendIndex < strList.size(); appendIndex++) {
b.addValue(strList.get(appendIndex));
}
newDict = b.build(0);
assert newDict.equals(dict);
dict = newDict;
dict.dump(System.out);
checkIndex = 0;
for (; checkIndex < strList.size(); checkIndex++) {
String str = strList.get(checkIndex);
byte[] bytes = converter.convertToBytes(str);
int id = dict.getIdFromValueBytesWithoutCache(bytes, 0, bytes.length, 0);
assertNotEquals(String.format(Locale.ROOT, "Value %s not exist", str), -1, id);
if (checkIndex < secondAppend) {
assertEquals("Except id " + id + " for " + str + " but " + checkMap.get(id), str, checkMap.get(id));
} else {
// check third append str, should be new id
assertFalse(String.format(Locale.ROOT, "Id %d for %s should be empty, but is %s", id, str,
checkMap.get(id)), checkMap.containsKey(id) && !str.equals(checkMap.get(id)));
checkMap.put(id, str);
}
}
if (notfound != null) {
for (String s : notfound) {
byte[] bytes = converter.convertToBytes(s);
int id = dict.getIdFromValueBytesWithoutCache(bytes, 0, bytes.length, 0);
assertEquals(-1, id);
}
}
dict = testSerialize(dict, converter);
for (String str : strList) {
byte[] bytes = converter.convertToBytes(str);
int id = dict.getIdFromValueBytesWithoutCache(bytes, 0, bytes.length, 0);
assertNotEquals(String.format(Locale.ROOT, "Value %s not exist", str), -1, id);
assertEquals("Except id " + id + " for " + str + " but " + checkMap.get(id), str, checkMap.get(id));
}
}
private static AppendTrieDictionary<String> testSerialize(AppendTrieDictionary<String> dict,
BytesConverter converter) {
try {
ByteArrayOutputStream bout = new ByteArrayOutputStream();
DataOutputStream dataout = new DataOutputStream(bout);
dict.write(dataout);
dataout.close();
ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
DataInputStream datain = new DataInputStream(bin);
AppendTrieDictionary<String> r = new AppendTrieDictionary<String>();
r.readFields(datain);
datain.close();
return r;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
public void testMaxInteger() throws IOException {
AppendTrieDictionaryBuilder builder = createBuilder();
builder.setMaxId(Integer.MAX_VALUE - 2);
builder.addValue("a");
builder.addValue("ab");
builder.addValue("acd");
builder.addValue("ac");
AppendTrieDictionary dict = builder.build(0);
assertEquals(2147483646, dict.getIdFromValue("a", 0));
assertEquals(2147483647, dict.getIdFromValue("ab", 0));
assertEquals(-2147483647, dict.getIdFromValue("ac", 0));
assertEquals(-2147483648, dict.getIdFromValue("acd", 0));
}
@Ignore("Only occurred when value is very long (>8000 bytes)")
@Test
public void testSuperLongValue() throws IOException {
AppendTrieDictionaryBuilder builder = createBuilder();
String value = "a";
for (int i = 0; i < 10000; i++) {
value += "a";
try {
builder.addValue(value);
} catch (StackOverflowError e) {
System.out.println("\nstack overflow " + i);
throw e;
}
}
AppendTrieDictionary dictionary = builder.build(0);
dictionary.getMaxId();
}
@Test
public void testSplitContainSuperLongValue() throws IOException {
String superLongValue = "%5Cx1A%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%7E%29%5CxEF%5CxBF%5CxBD%5Cx1B+%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5Cx13%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5B";
createAppendTrieDict(Arrays.asList("a", superLongValue));
}
@Test
public void testSuperLongValueAsFileName() throws IOException {
String superLongValue = "%5Cx1A%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%7E%29%5CxEF%5CxBF%5CxBD%5Cx1B+%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5Cx13%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5CxEF%5CxBF%5CxBD%5B";
createAppendTrieDict(Arrays.asList("a", superLongValue));
}
@Test
public void testIllegalFileNameValue() throws IOException {
createAppendTrieDict(Arrays.asList("::", ":"));
}
@Test
public void testSkipAddValue() throws IOException {
createAppendTrieDict(new ArrayList<String>());
}
@Test
public void testSerialize() throws IOException {
AppendTrieDictionaryBuilder builder = createBuilder();
AppendTrieDictionary dict = builder.build(0);
ByteArrayOutputStream bout = new ByteArrayOutputStream();
DataOutputStream dataout = new DataOutputStream(bout);
dict.write(dataout);
dataout.close();
ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
DataInputStream datain = new DataInputStream(bin);
assertNull(new Path(datain.readUTF()).toUri().getScheme());
datain.close();
}
@Test
public void testDeserialize() throws IOException {
AppendTrieDictionaryBuilder builder = createBuilder();
builder.setMaxId(Integer.MAX_VALUE - 2);
builder.addValue("a");
builder.addValue("ab");
List<String> strList = Lists.newArrayList("a", "ab");
AppendTrieDictionary dict = builder.build(0);
TreeMap checkMap = new TreeMap();
BytesConverter converter = new StringBytesConverter();
for (String str : strList) {
byte[] bytes = converter.convertToBytes(str);
int id = dict.getIdFromValueBytesWithoutCache(bytes, 0, bytes.length, 0);
checkMap.put(id, str);
}
ByteArrayOutputStream bout = new ByteArrayOutputStream();
DataOutputStream dataout = new DataOutputStream(bout);
dict.setSaveAbsolutePath(true);
dict.write(dataout);
dataout.close();
ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
DataInputStream datain = new DataInputStream(bin);
AppendTrieDictionary<String> r = new AppendTrieDictionary<String>();
r.readFields(datain);
datain.close();
for (String str : strList) {
byte[] bytes = converter.convertToBytes(str);
int id = r.getIdFromValueBytesWithoutCache(bytes, 0, bytes.length, 0);
assertNotEquals(String.format(Locale.ROOT, "Value %s not exist", str), -1, id);
assertEquals("Except id " + id + " for " + str + " but " + checkMap.get(id), str, checkMap.get(id));
}
}
private void createAppendTrieDict(List<String> valueList) throws IOException {
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-entry-size", "1");
AppendTrieDictionaryBuilder builder = createBuilder();
for (String value : valueList) {
builder.addValue(value);
}
builder.build(0);
}
private static class CachedFileFilter implements FileFilter {
@Override
public boolean accept(File pathname) {
return pathname.getName().startsWith("cached_");
}
}
private static class VersionFilter implements FileFilter {
@Override
public boolean accept(File pathname) {
return pathname.getName().startsWith(GlobalDictHDFSStore.VERSION_PREFIX);
}
}
@Test
public void testMultiVersions() throws IOException, InterruptedException {
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-entry-size", "4");
AppendTrieDictionaryBuilder builder = createBuilder();
builder.addValue("a");
builder.addValue("b");
builder.addValue("c");
builder.addValue("d");
builder.addValue("e");
builder.addValue("f");
AppendTrieDictionary dict = builder.build(0);
assertEquals(2, dict.getIdFromValue("b"));
// re-open dict, append new data
builder = createBuilder();
builder.addValue("g");
// new data is not visible
try {
dict.getIdFromValue("g");
fail("Value 'g' (g) not exists!");
} catch (IllegalArgumentException e) {
}
// append data, and be visible for new immutable map
builder.addValue("h");
AppendTrieDictionary newDict = builder.build(0);
assert newDict.equals(dict);
assertEquals(7, newDict.getIdFromValue("g"));
assertEquals(8, newDict.getIdFromValue("h"));
// Check versions retention
File dir = new File(LOCAL_BASE_DIR);
assertEquals(2, dir.listFiles(new VersionFilter()).length);
}
@Test
public void testVersionRetention() throws IOException, InterruptedException {
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-entry-size", "4");
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-max-versions", "1");
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-version-ttl", "1000");
AppendTrieDictionaryBuilder builder = createBuilder();
builder.addValue("a");
//version 1
builder.build(0);
// Check versions retention
File dir = new File(LOCAL_BASE_DIR);
assertEquals(1, dir.listFiles(new VersionFilter()).length);
// sleep to make version 1 expired
Thread.sleep(1200);
//version 2
builder = createBuilder();
builder.addValue("");
builder.build(0);
// Check versions retention
assertEquals(1, dir.listFiles(new VersionFilter()).length);
}
@Test
public void testOldDirFormat() throws IOException {
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-entry-size", "4");
AppendTrieDictionaryBuilder builder = createBuilder();
builder.addValue("a");
builder.addValue("b");
builder.addValue("c");
builder.addValue("d");
builder.addValue("e");
builder.addValue("f");
builder.build(0);
convertDirToOldFormat(BASE_DIR);
File dir = new File(LOCAL_BASE_DIR);
assertEquals(0, dir.listFiles(new VersionFilter()).length);
assertEquals(3, dir.listFiles(new CachedFileFilter()).length);
//convert older format to new format when builder init
builder = createBuilder();
builder.build(0);
assertEquals(1, dir.listFiles(new VersionFilter()).length);
}
private void convertDirToOldFormat(String baseDir) throws IOException {
Path basePath = new Path(baseDir);
FileSystem fs = HadoopUtil.getFileSystem(basePath);
// move version dir to base dir, to simulate the older format
GlobalDictHDFSStore store = new GlobalDictHDFSStore(baseDir);
Long[] versions = store.listAllVersions();
Path versionPath = store.getVersionDir(versions[versions.length - 1]);
Path tmpVersionPath = new Path(versionPath.getParent().getParent(), versionPath.getName());
fs.rename(versionPath, tmpVersionPath);
fs.delete(new Path(baseDir), true);
fs.rename(tmpVersionPath, new Path(baseDir));
}
@Test
public void testOldIndexFormat() throws IOException {
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.append-entry-size", "4");
AppendTrieDictionaryBuilder builder = createBuilder();
builder.addValue("a");
builder.addValue("b");
builder.addValue("c");
builder.addValue("d");
builder.addValue("e");
builder.addValue("f");
builder.build(0);
convertIndexToOldFormat(BASE_DIR);
builder = createBuilder();
builder.addValue("g");
builder.addValue("h");
builder.addValue("i");
AppendTrieDictionary dict = builder.build(0);
assertEquals(1, dict.getIdFromValue("a"));
assertEquals(7, dict.getIdFromValue("g"));
}
private void convertIndexToOldFormat(String baseDir) throws IOException {
Path basePath = new Path(baseDir);
FileSystem fs = HadoopUtil.getFileSystem(basePath);
GlobalDictHDFSStore store = new GlobalDictHDFSStore(baseDir);
Long[] versions = store.listAllVersions();
GlobalDictMetadata metadata = store.getMetadata(versions[versions.length - 1]);
//convert v2 index to v1 index
Path versionPath = store.getVersionDir(versions[versions.length - 1]);
Path v2IndexFile = new Path(versionPath, V2_INDEX_NAME);
fs.delete(v2IndexFile, true);
GlobalDictHDFSStore.IndexFormat indexFormatV1 = new GlobalDictHDFSStore.IndexFormatV1(fs,
HadoopUtil.getCurrentConfiguration());
indexFormatV1.writeIndexFile(versionPath, metadata);
//convert v2 fileName format to v1 fileName format
for (Map.Entry<AppendDictSliceKey, String> entry : metadata.sliceFileMap.entrySet()) {
fs.rename(new Path(versionPath, entry.getValue()), new Path(versionPath, "cached_" + entry.getKey()));
}
}
@Test
public void testTooManySliceEvictions() throws IOException {
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.max-cache-size", "3");
AppendTrieDictionaryBuilder builder = createBuilder();
for (int i = 0 ; i < 100000; i++) {
builder.addValue(Integer.toString(i));
}
AppendTrieDictionary dict = builder.build(0);
assertEquals(4, dict.getDictMetadata().sliceFileMap.size());
assertEquals(1, dict.getIdFromValue("0", 0));
assertEquals(0, dict.getCacheStats().evictionCount());
assertEquals(1, dict.getCacheStats().loadCount());
List<String> keys = new ArrayList<>(100000);
for (int i = 0 ; i < 100000; i++) {
keys.add(Integer.toString(i));
}
Collections.sort(keys);
for (String key : keys) {
assertEquals(Integer.parseInt(key) + 1, dict.getIdFromValue(key, 0));
}
assertEquals(1, dict.getCacheStats().evictionCount());
assertEquals(4, dict.getCacheStats().loadCount());
// out of order
Collections.shuffle(keys);
try {
for (String key : keys) {
assertEquals(Integer.parseInt(key) + 1, dict.getIdFromValue(key, 0));
}
assertFalse("Should throw RuntimeException for too many dict slice evictions", true);
} catch (RuntimeException e) {
assertEquals("Too many dict slice evictions", e.getMessage().substring(0, 29));
}
assertEquals(22, dict.getCacheStats().evictionCount());
assertEquals(25, dict.getCacheStats().loadCount());
KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.max-cache-size", "-1");
}
}