blob: b5c365eb33e57c3b65adce8ecd983bf00ecd554b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.atlas.data;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File ;
import java.util.*;
import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.datatypes.xsd.XSDDatatype ;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.query.SortCondition ;
import org.apache.jena.riot.system.SerializationFactoryFinder ;
import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.engine.binding.Binding ;
import org.apache.jena.sparql.engine.binding.BindingComparator ;
import org.apache.jena.sparql.engine.binding.BindingFactory ;
import org.apache.jena.sparql.engine.binding.BindingMap ;
import org.apache.jena.sparql.resultset.ResultSetCompare ;
import org.apache.jena.sparql.sse.Item ;
import org.apache.jena.sparql.sse.SSE ;
import org.apache.jena.sparql.sse.builders.BuilderBinding ;
import org.apache.jena.sparql.util.NodeUtils ;
import org.junit.Test ;
public class TestDistinctDataNet
{
private static final String LETTERS = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM";
Random random = new Random();
static Binding b12 = build("(?a 1) (?b 2)") ;
static Binding b19 = build("(?a 1) (?b 9)") ;
static Binding b02 = build("(?b 2)") ;
static Binding b10 = build("(?a 1)") ;
static Binding b0 = build("") ;
static Binding bb1 = build("(?a _:XYZ) (?b 1)");
static Binding x10 = build("(?x <http://example/abc>)") ;
@Test
public void testDistinct()
{
List<Binding> undistinct = new ArrayList<>();
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b02);
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b12);
undistinct.add(b02);
undistinct.add(x10);
List<Binding> control = Iter.toList(Iter.distinct(undistinct.iterator()));
List<Binding> distinct = new ArrayList<>();
DistinctDataNet<Binding> db = new DistinctDataNet<>(
new ThresholdPolicyCount<Binding>(2),
SerializationFactoryFinder.bindingSerializationFactory(),
new BindingComparator(new ArrayList<SortCondition>()));
try
{
db.addAll(undistinct);
Iterator<Binding> iter = db.iterator();
while (iter.hasNext())
{
distinct.add(iter.next());
}
Iter.close(iter);
}
finally
{
db.close();
}
assertEquals(control.size(), distinct.size());
assertTrue(ResultSetCompare.equalsByTest(control, distinct, NodeUtils.sameNode));
}
@Test
public void testDistinct2()
{
List<Binding> undistinct = new ArrayList<>();
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b02);
undistinct.add(b12);
undistinct.add(b19);
undistinct.add(b12);
undistinct.add(b02);
undistinct.add(x10);
List<Binding> control = Iter.toList(Iter.distinct(undistinct.iterator()));
List<Binding> distinct = new ArrayList<>();
DistinctDataNet<Binding> db = new DistinctDataNet<>(
new ThresholdPolicyCount<Binding>(2),
SerializationFactoryFinder.bindingSerializationFactory(),
new BindingComparator(new ArrayList<SortCondition>()));
try
{
for (Binding b : undistinct)
{
if (db.netAdd(b))
{
distinct.add(b);
}
}
Iterator<Binding> iter = db.netIterator();
while (iter.hasNext())
{
distinct.add(iter.next());
}
Iter.close(iter);
}
finally
{
db.close();
}
assertEquals(control.size(), distinct.size());
assertTrue(ResultSetCompare.equalsByTest(control, distinct, NodeUtils.sameNode));
}
@Test
public void testTemporaryFilesAreCleanedUpAfterCompletion()
{
List<Binding> undistinct = new ArrayList<>();
random = new Random();
Var[] vars = new Var[]{
Var.alloc("1"), Var.alloc("2"), Var.alloc("3"),
Var.alloc("4"), Var.alloc("5"), Var.alloc("6"),
Var.alloc("7"), Var.alloc("8"), Var.alloc("9"), Var.alloc("0")
};
for(int i = 0; i < 500; i++){
undistinct.add(randomBinding(vars));
}
DistinctDataNet<Binding> db = new DistinctDataNet<>(
new ThresholdPolicyCount<Binding>(10),
SerializationFactoryFinder.bindingSerializationFactory(),
new BindingComparator(new ArrayList<SortCondition>()));
List<File> spillFiles = new ArrayList<>();
try
{
db.addAll(undistinct);
spillFiles.addAll(db.getSpillFiles());
int count = 0;
for (File file : spillFiles)
{
if (file.exists())
{
count++;
}
}
// 500 bindings divided into 50 chunks (49 in files, and 1 in memory)
assertEquals(49, count);
Iterator<Binding> iter = db.iterator();
while (iter.hasNext())
{
iter.next();
}
Iter.close(iter);
}
finally
{
db.close();
}
int count = 0;
for (File file : spillFiles)
{
if (file.exists())
{
count++;
}
}
assertEquals(0, count);
}
private void testDiff(String first, String second, String expected)
{
DistinctDataNet.SortedDiffIterator.create(
Arrays.asList(first.split(" ")).iterator(),
Arrays.asList(second.split(" ")).iterator());
}
private void testDiff(String[] first, String[] second, String expected)
{
DistinctDataNet.SortedDiffIterator<String> sdi = DistinctDataNet.SortedDiffIterator.create(
Arrays.asList(first).iterator(),
Arrays.asList(second).iterator());
StringBuilder sb = new StringBuilder();
boolean firstTime = true;
while (sdi.hasNext())
{
if (!firstTime)
{
sb.append(" ");
}
firstTime = false;
String s = sdi.next();
if (null == s)
{
s = "null";
}
sb.append(s);
}
assertEquals(expected, sb.toString());
}
@Test
public void testSortedDiffIterator()
{
testDiff("a b e g i j", "b g h", "a e i j");
testDiff("a b e g i j", "", "a b e g i j");
testDiff("", "b g h", "");
testDiff("", "", "");
testDiff("a", "a", "");
testDiff("a", "b", "a");
testDiff("b", "a", "b");
testDiff("a b e g i j", "b g h z", "a e i j");
testDiff("a b c", "a b c", "");
testDiff(new String[] {null, "a", "b", "e", "g", "i", "j", }, new String[] { "b", "g", "h", }, "null a e i j");
testDiff(new String[] {"a", "b", "e", "g", "i", "j", }, new String[] { null, "b", "g", "h", }, "a e i j");
testDiff(new String[] {null, "a", "b", "e", "g", "i", "j", }, new String[] { null, "b", "g", "h", }, "a e i j");
}
private static Binding build(String string)
{
Item item = SSE.parse("(binding "+string+")") ;
return BuilderBinding.build(item) ;
}
private Binding randomBinding(Var[] vars)
{
BindingMap binding = BindingFactory.create();
binding.add(vars[0], NodeFactory.createBlankNode());
binding.add(vars[1], NodeFactory.createURI(randomURI()));
binding.add(vars[2], NodeFactory.createURI(randomURI()));
binding.add(vars[3], NodeFactory.createLiteral(randomString(20)));
binding.add(vars[4], NodeFactory.createBlankNode());
binding.add(vars[5], NodeFactory.createURI(randomURI()));
binding.add(vars[6], NodeFactory.createURI(randomURI()));
binding.add(vars[7], NodeFactory.createLiteral(randomString(5)));
binding.add(vars[8], NodeFactory.createLiteral("" + random.nextInt(), XSDDatatype.XSDinteger));
binding.add(vars[9], NodeFactory.createBlankNode());
return binding;
}
public String randomURI()
{
return String.format("http://%s.example.com/%s", randomString(10), randomString(10));
}
public String randomString(int length)
{
StringBuilder builder = new StringBuilder();
for(int i = 0; i < length; i++){
builder.append(LETTERS.charAt(random.nextInt(LETTERS.length())));
}
return builder.toString();
}
}