| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.query.text; |
| |
| import static org.junit.Assert.assertTrue; |
| |
| import java.io.Reader ; |
| import java.io.StringReader ; |
| |
| import org.apache.jena.assembler.Assembler ; |
| import org.apache.jena.atlas.lib.StrUtils ; |
| import org.apache.jena.query.Dataset ; |
| import org.apache.jena.query.ReadWrite ; |
| import org.apache.jena.query.text.assembler.TextAssembler ; |
| import org.apache.jena.rdf.model.Model ; |
| import org.apache.jena.rdf.model.ModelFactory ; |
| import org.apache.jena.rdf.model.Resource ; |
| import org.junit.After ; |
| import org.junit.Before ; |
| import org.junit.Test ; |
| |
| public class TestTextDefineAnalyzers extends AbstractTestDatasetWithTextIndexBase { |
| |
| private static final String SPEC_BASE = "http://example.org/spec#"; |
| private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset"; |
| private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL; |
| private static final String SPEC; |
| static { |
| SPEC = StrUtils.strjoinNL( |
| "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ", |
| "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ", |
| "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>", |
| "prefix text: <http://jena.apache.org/text#>", |
| "prefix : <" + SPEC_BASE + ">", |
| "", |
| "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .", |
| "text:TextDataset rdfs:subClassOf ja:RDFDataset .", |
| "text:TextIndexLucene rdfs:subClassOf text:TextIndex .", |
| |
| ":" + SPEC_ROOT_LOCAL, |
| " a text:TextDataset ;", |
| " text:dataset :dataset ;", |
| " text:index :indexLucene ;", |
| " .", |
| "", |
| ":dataset", |
| " a tdb:DatasetTDB ;", |
| " tdb:location \"--mem--\" ;", |
| " tdb:unionDefaultGraph true ;", |
| ".", |
| "", |
| ":indexLucene", |
| " a text:TextIndexLucene ;", |
| " text:directory \"mem\" ;", |
| " text:storeValues true ;", |
| " text:analyzer [", |
| " a text:DefinedAnalyzer ;", |
| " text:useAnalyzer :configuredAnalyzer ] ;", |
| " text:defineAnalyzers (", |
| " [ text:defineAnalyzer :configuredAnalyzer ;", |
| " text:analyzer [", |
| " a text:ConfigurableAnalyzer ;", |
| " text:tokenizer :ngram ;", |
| " text:filters ( :asciiff text:LowerCaseFilter ) ] ]", |
| " [ text:defineAnalyzer :configuredAnalyzer2 ;", |
| " text:analyzer [", |
| " a text:ConfigurableAnalyzer ;", |
| " text:tokenizer :ngram2 ;", |
| " text:filters ( :asciiff2 text:LowerCaseFilter ) ] ]", |
| " [ text:defineTokenizer :ngram ;", |
| " text:tokenizer [", |
| " a text:GenericTokenizer ;", |
| " text:class \"org.apache.lucene.analysis.ngram.NGramTokenizer\" ;", |
| " text:params (", |
| " [ text:paramName \"minGram\" ;", |
| " text:paramType text:TypeInt ;", |
| " text:paramValue 3 ]", |
| " [ text:paramName \"maxGram\" ;", |
| " text:paramType text:TypeInt ;", |
| " text:paramValue 7 ]", |
| " ) ] ]", |
| " [ text:defineFilter :asciiff ;", |
| " text:filter [", |
| " a text:GenericFilter ;", |
| " text:class \"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter\" ;", |
| " text:params (", |
| " [ text:paramName \"preserveOriginal\" ;", |
| " text:paramType text:TypeBoolean ;", |
| " text:paramValue true ]", |
| " ) ] ]", |
| " [ text:defineTokenizer :ngram2 ;", |
| " text:tokenizer [", |
| " a text:GenericTokenizer ;", |
| " text:class \"org.apache.lucene.analysis.ngram.NGramTokenizer\" ;", |
| " text:params (", |
| " [ text:paramValue 3 ]", |
| " [ text:paramValue 7 ]", |
| " ) ] ]", |
| " [ text:defineFilter :asciiff2 ;", |
| " text:filter [", |
| " a text:GenericFilter ;", |
| " text:class \"org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter\" ;", |
| " text:params (", |
| " [ text:paramName \"preserveOriginal\" ;", |
| " text:paramValue true ]", |
| " ) ] ]", |
| " ) ;", |
| " text:entityMap :entMap ;", |
| " .", |
| "", |
| ":entMap", |
| " a text:EntityMap ;", |
| " text:entityField \"uri\" ;", |
| " text:defaultField \"label\" ;", |
| " text:langField \"lang\" ;", |
| " text:graphField \"graph\" ;", |
| " text:map (", |
| " [ text:field \"label\" ; text:predicate rdfs:label ]", |
| " [ text:field \"comment\" ; text:predicate rdfs:comment ]", |
| " ) ." |
| ); |
| } |
| |
| @Before |
| public void before() { |
| Reader reader = new StringReader(SPEC); |
| Model specModel = ModelFactory.createDefaultModel(); |
| specModel.read(reader, "", "TURTLE"); |
| TextAssembler.init(); |
| Resource root = specModel.getResource(SPEC_ROOT_URI); |
| dataset = (Dataset) Assembler.general.open(root); |
| } |
| |
| @After |
| public void after() { |
| dataset.close(); |
| } |
| |
| private void putTurtleInModel(String turtle, String modelName) { |
| Model model = modelName != null ? dataset.getNamedModel(modelName) : dataset.getDefaultModel() ; |
| Reader reader = new StringReader(turtle) ; |
| dataset.begin(ReadWrite.WRITE) ; |
| try { |
| model.read(reader, "", "TURTLE") ; |
| dataset.commit() ; |
| } |
| finally { |
| dataset.end(); |
| } |
| } |
| |
| @Test |
| public void testTextQueryDefAnalyzers1() { |
| final String turtleA = StrUtils.strjoinNL( |
| TURTLE_PROLOG, |
| "<" + RESOURCE_BASE + "testResultOneInModelA>", |
| " rdfs:label 'bar testResultOne barfoo foo'", |
| ".", |
| "<" + RESOURCE_BASE + "testResultTwoInModelA>", |
| " rdfs:label 'bar testResultTwo barfoo foo'", |
| ".", |
| "<" + RESOURCE_BASE + "testResultThreeInModelA>", |
| " rdfs:label 'bar testResultThree barfoo foo'", |
| "." |
| ); |
| putTurtleInModel(turtleA, "http://example.org/modelA") ; |
| final String turtleB = StrUtils.strjoinNL( |
| TURTLE_PROLOG, |
| "<" + RESOURCE_BASE + "testResultOneInModelB>", |
| " rdfs:label 'bar testResultOne barfoo foo'", |
| "." |
| ); |
| putTurtleInModel(turtleB, "http://example.org/modelB") ; |
| |
| // execution reaches here in the event that the assembler machinery |
| // has executed without errors and generated a usable dataset |
| // usage of the runtime machinery is tested elsewhere |
| assertTrue(true); |
| } |
| } |