blob: bf021ead8b70fb44e33839ee0b3c011aa741f7dd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
public class FileDictionaryTest extends LuceneTestCase {
private Map.Entry<List<String>, String> generateFileEntry(String fieldDelimiter, boolean hasWeight, boolean hasPayload) {
List<String> entryValues = new ArrayList<>();
StringBuilder sb = new StringBuilder();
String term = TestUtil.randomSimpleString(random(), 1, 300);
sb.append(term);
entryValues.add(term);
if (hasWeight) {
sb.append(fieldDelimiter);
long weight = TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE);
sb.append(weight);
entryValues.add(String.valueOf(weight));
}
if (hasPayload) {
sb.append(fieldDelimiter);
String payload = TestUtil.randomSimpleString(random(), 1, 300);
sb.append(payload);
entryValues.add(payload);
}
sb.append("\n");
return new SimpleEntry<>(entryValues, sb.toString());
}
private Map.Entry<List<List<String>>,String> generateFileInput(int count, String fieldDelimiter, boolean hasWeights, boolean hasPayloads) {
List<List<String>> entries = new ArrayList<>();
StringBuilder sb = new StringBuilder();
boolean hasPayload = hasPayloads;
for (int i = 0; i < count; i++) {
if (hasPayloads) {
hasPayload = (i==0) ? true : random().nextBoolean();
}
Map.Entry<List<String>, String> entrySet = generateFileEntry(fieldDelimiter, (!hasPayloads && hasWeights) ? random().nextBoolean() : hasWeights, hasPayload);
entries.add(entrySet.getKey());
sb.append(entrySet.getValue());
}
return new SimpleEntry<>(entries, sb.toString());
}
@Test
public void testFileWithTerm() throws IOException {
Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, false, false);
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8));
FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getEntryIterator();
assertFalse(inputIter.hasPayloads());
BytesRef term;
int count = 0;
while((term = inputIter.next()) != null) {
assertTrue(entries.size() > count);
List<String> entry = entries.get(count);
assertTrue(entry.size() >= 1); // at least a term
assertEquals(entry.get(0), term.utf8ToString());
assertEquals(1, inputIter.weight());
assertNull(inputIter.payload());
count++;
}
assertEquals(count, entries.size());
}
@Test
public void testFileWithWeight() throws IOException {
Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, true, false);
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8));
FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getEntryIterator();
assertFalse(inputIter.hasPayloads());
BytesRef term;
int count = 0;
while((term = inputIter.next()) != null) {
assertTrue(entries.size() > count);
List<String> entry = entries.get(count);
assertTrue(entry.size() >= 1); // at least a term
assertEquals(entry.get(0), term.utf8ToString());
assertEquals((entry.size() == 2) ? Long.parseLong(entry.get(1)) : 1, inputIter.weight());
assertNull(inputIter.payload());
count++;
}
assertEquals(count, entries.size());
}
@Test
public void testFileWithWeightAndPayload() throws IOException {
Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), FileDictionary.DEFAULT_FIELD_DELIMITER, true, true);
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8));
FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getEntryIterator();
assertTrue(inputIter.hasPayloads());
BytesRef term;
int count = 0;
while((term = inputIter.next()) != null) {
assertTrue(entries.size() > count);
List<String> entry = entries.get(count);
assertTrue(entry.size() >= 2); // at least term and weight
assertEquals(entry.get(0), term.utf8ToString());
assertEquals(Long.parseLong(entry.get(1)), inputIter.weight());
if (entry.size() == 3) {
assertEquals(entry.get(2), inputIter.payload().utf8ToString());
} else {
assertEquals(inputIter.payload().length, 0);
}
count++;
}
assertEquals(count, entries.size());
}
@Test
public void testFileWithOneEntry() throws IOException {
Map.Entry<List<List<String>>,String> fileInput = generateFileInput(1, FileDictionary.DEFAULT_FIELD_DELIMITER, true, true);
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8));
FileDictionary dictionary = new FileDictionary(inputReader);
List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getEntryIterator();
assertTrue(inputIter.hasPayloads());
BytesRef term;
int count = 0;
while((term = inputIter.next()) != null) {
assertTrue(entries.size() > count);
List<String> entry = entries.get(count);
assertTrue(entry.size() >= 2); // at least term and weight
assertEquals(entry.get(0), term.utf8ToString());
assertEquals(Long.parseLong(entry.get(1)), inputIter.weight());
if (entry.size() == 3) {
assertEquals(entry.get(2), inputIter.payload().utf8ToString());
} else {
assertEquals(inputIter.payload().length, 0);
}
count++;
}
assertEquals(count, entries.size());
}
@Test
public void testFileWithDifferentDelimiter() throws IOException {
Map.Entry<List<List<String>>,String> fileInput = generateFileInput(atLeast(100), " , ", true, true);
InputStream inputReader = new ByteArrayInputStream(fileInput.getValue().getBytes(StandardCharsets.UTF_8));
FileDictionary dictionary = new FileDictionary(inputReader, " , ");
List<List<String>> entries = fileInput.getKey();
InputIterator inputIter = dictionary.getEntryIterator();
assertTrue(inputIter.hasPayloads());
BytesRef term;
int count = 0;
while((term = inputIter.next()) != null) {
assertTrue(entries.size() > count);
List<String> entry = entries.get(count);
assertTrue(entry.size() >= 2); // at least term and weight
assertEquals(entry.get(0), term.utf8ToString());
assertEquals(Long.parseLong(entry.get(1)), inputIter.weight());
if (entry.size() == 3) {
assertEquals(entry.get(2), inputIter.payload().utf8ToString());
} else {
assertEquals(inputIter.payload().length, 0);
}
count++;
}
assertEquals(count, entries.size());
}
}