blob: 5ffa1db7b466e477ac46a8d9fd3fb317df80b6ae [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.query.text;
import static org.junit.Assert.assertTrue;
import java.io.Reader ;
import java.io.StringReader ;
import org.apache.jena.assembler.Assembler ;
import org.apache.jena.atlas.lib.StrUtils ;
import org.apache.jena.query.Dataset ;
import org.apache.jena.query.ReadWrite ;
import org.apache.jena.query.text.assembler.TextAssembler ;
import org.apache.jena.rdf.model.Model ;
import org.apache.jena.rdf.model.ModelFactory ;
import org.apache.jena.rdf.model.Resource ;
import org.junit.After ;
import org.junit.Before ;
import org.junit.Test ;
public class TestTextDefineAnalyzers extends AbstractTestDatasetWithTextIndexBase {
private static final String SPEC_BASE = "http://example.org/spec#";
private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
private static final String SPEC;
static {
SPEC = StrUtils.strjoinNL(
"prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
"prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
"prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
"prefix text: <http://jena.apache.org/text#>",
"prefix : <" + SPEC_BASE + ">",
"",
"[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
"text:TextDataset rdfs:subClassOf ja:RDFDataset .",
"text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
":" + SPEC_ROOT_LOCAL,
" a text:TextDataset ;",
" text:dataset :dataset ;",
" text:index :indexLucene ;",
" .",
"",
":dataset",
" a tdb:DatasetTDB ;",
" tdb:location \"--mem--\" ;",
" tdb:unionDefaultGraph true ;",
".",
"",
":indexLucene",
" a text:TextIndexLucene ;",
" text:directory \"mem\" ;",
" text:storeValues true ;",
" text:analyzer [",
" a text:DefinedAnalyzer ;",
" text:useAnalyzer :configuredAnalyzer ] ;",
" text:defineAnalyzers (",
" [ text:defineAnalyzer :configuredAnalyzer ;",
" text:analyzer [",
" a text:ConfigurableAnalyzer ;",
" text:tokenizer :ngram ;",
" text:filters ( :asciiff text:LowerCaseFilter ) ] ]",
" [ text:defineAnalyzer :configuredAnalyzer2 ;",
" text:analyzer [",
" a text:ConfigurableAnalyzer ;",
" text:tokenizer :ngram2 ;",
" text:filters ( :asciiff2 text:LowerCaseFilter ) ] ]",
" [ text:defineTokenizer :ngram ;",
" text:tokenizer [",
" a text:GenericTokenizer ;",
" text:class \"org.apache.lucene.analysis.ngram.NGramTokenizer\" ;",
" text:params (",
" [ text:paramName \"minGram\" ;",
" text:paramType text:TypeInt ;",
" text:paramValue 3 ]",
" [ text:paramName \"maxGram\" ;",
" text:paramType text:TypeInt ;",
" text:paramValue 7 ]",
" ) ] ]",
" [ text:defineFilter :asciiff ;",
" text:filter [",
" a text:GenericFilter ;",
" text:class \"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter\" ;",
" text:params (",
" [ text:paramName \"preserveOriginal\" ;",
" text:paramType text:TypeBoolean ;",
" text:paramValue true ]",
" ) ] ]",
" [ text:defineTokenizer :ngram2 ;",
" text:tokenizer [",
" a text:GenericTokenizer ;",
" text:class \"org.apache.lucene.analysis.ngram.NGramTokenizer\" ;",
" text:params (",
" [ text:paramValue 3 ]",
" [ text:paramValue 7 ]",
" ) ] ]",
" [ text:defineFilter :asciiff2 ;",
" text:filter [",
" a text:GenericFilter ;",
" text:class \"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter\" ;",
" text:params (",
" [ text:paramName \"preserveOriginal\" ;",
" text:paramValue true ]",
" ) ] ]",
" ) ;",
" text:entityMap :entMap ;",
" .",
"",
":entMap",
" a text:EntityMap ;",
" text:entityField \"uri\" ;",
" text:defaultField \"label\" ;",
" text:langField \"lang\" ;",
" text:graphField \"graph\" ;",
" text:map (",
" [ text:field \"label\" ; text:predicate rdfs:label ]",
" [ text:field \"comment\" ; text:predicate rdfs:comment ]",
" ) ."
);
}
@Before
public void before() {
Reader reader = new StringReader(SPEC);
Model specModel = ModelFactory.createDefaultModel();
specModel.read(reader, "", "TURTLE");
TextAssembler.init();
Resource root = specModel.getResource(SPEC_ROOT_URI);
dataset = (Dataset) Assembler.general.open(root);
}
@After
public void after() {
dataset.close();
}
private void putTurtleInModel(String turtle, String modelName) {
Model model = modelName != null ? dataset.getNamedModel(modelName) : dataset.getDefaultModel() ;
Reader reader = new StringReader(turtle) ;
dataset.begin(ReadWrite.WRITE) ;
try {
model.read(reader, "", "TURTLE") ;
dataset.commit() ;
}
finally {
dataset.end();
}
}
@Test
public void testTextQueryDefAnalyzers1() {
final String turtleA = StrUtils.strjoinNL(
TURTLE_PROLOG,
"<" + RESOURCE_BASE + "testResultOneInModelA>",
" rdfs:label 'bar testResultOne barfoo foo'",
".",
"<" + RESOURCE_BASE + "testResultTwoInModelA>",
" rdfs:label 'bar testResultTwo barfoo foo'",
".",
"<" + RESOURCE_BASE + "testResultThreeInModelA>",
" rdfs:label 'bar testResultThree barfoo foo'",
"."
);
putTurtleInModel(turtleA, "http://example.org/modelA") ;
final String turtleB = StrUtils.strjoinNL(
TURTLE_PROLOG,
"<" + RESOURCE_BASE + "testResultOneInModelB>",
" rdfs:label 'bar testResultOne barfoo foo'",
"."
);
putTurtleInModel(turtleB, "http://example.org/modelB") ;
// execution reaches here in the event that the assembler machinery
// has executed without errors and generated a usable dataset
// usage of the runtime machinery is tested elsewhere
assertTrue(true);
}
}