Merge pull request #770 from strangepleasures/JENA-1932
JENA-1932 Elasticsearch text index incorrectly maps properties to fields
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java
index 56d4035..c0461ff 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java
@@ -36,7 +36,7 @@
import org.apache.jena.riot.RDFLanguages;
import org.apache.jena.riot.system.*;
import org.apache.jena.riot.tokens.Tokenizer;
-import org.apache.jena.riot.tokens.TokenizerFactory;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.sparql.core.Quad;
/** Use RDFDataMgr operations.
@@ -53,7 +53,7 @@
Tokenizer tokenizer = new TokenizerJSON(PeekReader.makeUTF8(input));
return createParserRdfJson(tokenizer, dest, profile);
}
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input, profile.getErrorHandler());
+ Tokenizer tokenizer = TokenizerText.create().source(input).errorHandler(profile.getErrorHandler()).build();
if ( RDFLanguages.sameLang(TURTLE, lang) || RDFLanguages.sameLang(N3, lang) )
return createParserTurtle(tokenizer, dest, profile);
if ( RDFLanguages.sameLang(NTRIPLES, lang) )
@@ -72,8 +72,7 @@
return createParserRdfJson(tokenizer, dest, profile);
}
- @SuppressWarnings("deprecation")
- Tokenizer tokenizer = TokenizerFactory.makeTokenizer(input, profile.getErrorHandler());
+ Tokenizer tokenizer = TokenizerText.create().source(input).errorHandler(profile.getErrorHandler()).build();
if ( RDFLanguages.sameLang(TURTLE, lang) || RDFLanguages.sameLang(N3, lang) )
return createParserTurtle(tokenizer, dest, profile);
if ( RDFLanguages.sameLang(NTRIPLES, lang) )
@@ -120,7 +119,7 @@
/** Create an iterator for parsing N-Triples. */
public static Iterator<Triple> createIteratorNTriples(InputStream input, StreamRDF dest, ParserProfile profile) {
// LangNTriples supports iterator use.
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input, profile.getErrorHandler());
+ Tokenizer tokenizer = TokenizerText.create().source(input).errorHandler(profile.getErrorHandler()).build();
return createParserNTriples(tokenizer, null, profile);
}
@@ -132,7 +131,7 @@
/** Create an iterator for parsing N-Quads. */
public static Iterator<Quad> createIteratorNQuads(InputStream input, StreamRDF dest, ParserProfile profile) {
// LangNQuads supports iterator use.
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input, profile.getErrorHandler());
+ Tokenizer tokenizer = TokenizerText.create().source(input).errorHandler(profile.getErrorHandler()).build();
return createParserNQuads(tokenizer, null, profile);
}
}
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/IteratorStreamRDFText.java b/jena-arq/src/main/java/org/apache/jena/riot/system/IteratorStreamRDFText.java
index 78e62c1..5db9592 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/IteratorStreamRDFText.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/IteratorStreamRDFText.java
@@ -32,7 +32,7 @@
import org.apache.jena.riot.RiotException ;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
@@ -44,8 +44,8 @@
private Node[] previousTuple = null ;
private /*public*/ IteratorStreamRDFText(InputStream input) {
- Tokenizer t = TokenizerFactory.makeTokenizerUTF8(input) ;
- in = new TokenInputStream(null, t) ;
+ Tokenizer tokenizer = TokenizerText.create().source(input).build();
+ in = new TokenInputStream(null, tokenizer) ;
}
@Override
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/RiotLib.java b/jena-arq/src/main/java/org/apache/jena/riot/system/RiotLib.java
index a897a2a..d97e67e 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/RiotLib.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/RiotLib.java
@@ -34,7 +34,6 @@
import org.apache.jena.atlas.io.IndentedWriter;
import org.apache.jena.atlas.iterator.Iter;
-import org.apache.jena.atlas.logging.Log;
import org.apache.jena.graph.Graph;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
@@ -42,15 +41,13 @@
import org.apache.jena.query.ARQ;
import org.apache.jena.riot.*;
import org.apache.jena.riot.lang.LabelToNode;
-import org.apache.jena.riot.tokens.Token;
-import org.apache.jena.riot.tokens.Tokenizer;
-import org.apache.jena.riot.tokens.TokenizerFactory;
import org.apache.jena.riot.writer.WriterGraphRIOTBase;
import org.apache.jena.sparql.ARQConstants;
import org.apache.jena.sparql.core.DatasetGraph;
import org.apache.jena.sparql.core.DatasetGraphFactory;
import org.apache.jena.sparql.core.Quad;
import org.apache.jena.sparql.util.Context;
+import org.apache.jena.sparql.util.NodeFactoryExtra;
import org.apache.jena.util.iterator.ExtendedIterator;
/** Misc RIOT code */
@@ -150,16 +147,8 @@
private static ParserProfile profile = setupInternalParserProfile();
/** Parse a string to get one Node (the first token in the string) */
- public static Node parse(String string)
- {
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(string);
- if ( ! tokenizer.hasNext() )
- return null;
- Token t = tokenizer.next();
- Node n = profile.create(null, t);
- if ( tokenizer.hasNext() )
- Log.warn(RiotLib.class, "String has more than one token in it: "+string);
- return n;
+ public static Node parse(String string) {
+ return NodeFactoryExtra.parseNode(string, null);
}
public static ParserProfile profile(Lang lang, String baseIRI)
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/SerializationFactoryFinder.java b/jena-arq/src/main/java/org/apache/jena/riot/system/SerializationFactoryFinder.java
index a38567d..ef39772 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/SerializationFactoryFinder.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/SerializationFactoryFinder.java
@@ -32,7 +32,7 @@
import org.apache.jena.riot.out.SinkQuadOutput ;
import org.apache.jena.riot.out.SinkTripleOutput ;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.sparql.core.Quad ;
import org.apache.jena.sparql.engine.binding.Binding ;
import org.apache.jena.sparql.engine.binding.BindingInputStream ;
@@ -78,11 +78,11 @@
@Override
public Iterator<Triple> createDeserializer(InputStream in)
{
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerASCII(in);
+ Tokenizer tokenizer = TokenizerText.create().source(in).build();
ParserProfile profile = RiotLib.createParserProfile(RiotLib.factoryRDF(LabelToNode.createUseLabelEncoded()),
- ErrorHandlerFactory.errorHandlerNoWarnings,
- IRIResolver.createNoResolve(),
- false);
+ ErrorHandlerFactory.errorHandlerNoWarnings,
+ IRIResolver.createNoResolve(),
+ false);
LangNTriples parser = new LangNTriples(tokenizer, profile, null);
return parser ;
}
@@ -109,11 +109,11 @@
@Override
public Iterator<Quad> createDeserializer(InputStream in)
{
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerASCII(in);
+ Tokenizer tokenizer = TokenizerText.create().source(in).build();
ParserProfile profile = RiotLib.createParserProfile(RiotLib.factoryRDF(LabelToNode.createUseLabelEncoded()),
- ErrorHandlerFactory.errorHandlerNoWarnings,
- IRIResolver.createNoResolve(),
- false);
+ ErrorHandlerFactory.errorHandlerNoWarnings,
+ IRIResolver.createNoResolve(),
+ false);
LangNQuads parser = new LangNQuads(tokenizer, profile, null) ;
return parser ;
}
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java
index ef2566ab..f1a0224 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java
@@ -19,59 +19,64 @@
package org.apache.jena.riot.tokens;
import java.io.InputStream;
-import java.io.Reader;
-import java.io.StringReader;
-import org.apache.jena.riot.system.ErrorHandler;
-
+/** @deprecated Use {@code TokenizerText.create()...} */
+@Deprecated
public class TokenizerFactory {
-
- private static ErrorHandler dftErrorHandler = null;
-
- /** Discouraged - be careful about character sets */
+ // Just in case anyone is uing this operation, the only and proper old world operation.
+ /** @deprecated Use {@code TokenizerText.create().source(in).build();} */
@Deprecated
- public static Tokenizer makeTokenizer(Reader reader) {
- return TokenizerText.create().source(reader).build();
- }
-
- /** Discouraged - be careful about character sets */
- @Deprecated
- public static Tokenizer makeTokenizer(Reader reader, ErrorHandler errorHandler) {
- return TokenizerText.create().source(reader).errorHandler(errorHandler).build();
- }
-
- /** Safe use of a StringReader */
- public static Tokenizer makeTokenizer(StringReader reader) {
- return TokenizerText.create().source(reader).build();
- }
-
- /** Safe use of a StringReader */
- public static Tokenizer makeTokenizer(StringReader reader, ErrorHandler errorHandler) {
- return TokenizerText.create().source(reader).errorHandler(errorHandler).build();
- }
-
public static Tokenizer makeTokenizerUTF8(InputStream in) {
- return makeTokenizerUTF8(in, dftErrorHandler);
+ return TokenizerText.create().source(in).build();
}
-
- public static Tokenizer makeTokenizerUTF8(InputStream input, ErrorHandler errorHandler) {
- // BOM will be removed
- return TokenizerText.create().source(input).errorHandler(errorHandler).build();
- }
-
- public static Tokenizer makeTokenizerASCII(InputStream input) {
- return TokenizerText.create().source(input).asciiOnly(true).build();
- }
-
- public static Tokenizer makeTokenizerASCII(InputStream input, ErrorHandler errorHandler) {
- return TokenizerText.create().source(input).asciiOnly(true).errorHandler(errorHandler).build();
- }
-
- public static Tokenizer makeTokenizerString(String str) {
- return TokenizerText.create().fromString(str).build();
- }
-
- public static Tokenizer makeTokenizerString(String str, ErrorHandler errorHandler) {
- return TokenizerText.create().fromString(str).errorHandler(errorHandler).build();
- }
+
+//
+// private static ErrorHandler dftErrorHandler = null;
+//
+// /** Discouraged - be careful about character sets */
+// @Deprecated
+// public static Tokenizer makeTokenizer(Reader reader) {
+// return TokenizerText.create().source(reader).build();
+// }
+//
+// /** Discouraged - be careful about character sets */
+// @Deprecated
+// public static Tokenizer makeTokenizer(Reader reader, ErrorHandler errorHandler) {
+// return TokenizerText.create().source(reader).errorHandler(errorHandler).build();
+// }
+//
+// /** Safe use of a StringReader */
+// public static Tokenizer makeTokenizer(StringReader reader) {
+// return TokenizerText.create().source(reader).build();
+// }
+//
+// /** Safe use of a StringReader */
+// public static Tokenizer makeTokenizer(StringReader reader, ErrorHandler errorHandler) {
+// return TokenizerText.create().source(reader).errorHandler(errorHandler).build();
+// }
+//
+// public static Tokenizer makeTokenizerUTF8(InputStream in) {
+// return makeTokenizerUTF8(in, dftErrorHandler);
+// }
+//
+// public static Tokenizer makeTokenizerUTF8(InputStream input, ErrorHandler errorHandler) {
+// // BOM will be removed
+// return TokenizerText.create().source(input).errorHandler(errorHandler).build();
+// }
+//
+// public static Tokenizer makeTokenizerASCII(InputStream input) {
+// return TokenizerText.create().source(input).asciiOnly(true).build();
+// }
+//
+// public static Tokenizer makeTokenizerASCII(InputStream input, ErrorHandler errorHandler) {
+// return TokenizerText.create().source(input).asciiOnly(true).errorHandler(errorHandler).build();
+// }
+//
+// public static Tokenizer makeTokenizerString(String str) {
+// return TokenizerText.create().fromString(str).build();
+// }
+//
+// public static Tokenizer makeTokenizerString(String str, ErrorHandler errorHandler) {
+// return TokenizerText.create().fromString(str).errorHandler(errorHandler).build();
+// }
}
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
index a862978..50b7596 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
@@ -40,7 +40,7 @@
// Care with <=
// Policy driven for CURIES?
- public static final int CTRL_CHAR = CH_STAR;
+ private static final int CTRL_CHAR = CH_STAR;
// The code has the call points for checking tokens but it is generally better to
// do the check later in the parsing process. In case a need arises, the code
@@ -59,7 +59,9 @@
// The code assumes that errors throw exception and so stop parsing.
private final ErrorHandler errorHandler;
- public static TokenizeTextBuilder create() { return new TokenizeTextBuilder() ; }
+ public static TokenizeTextBuilder create() { return new TokenizeTextBuilder() ; }
+
+ public static Tokenizer fromString(String string) { return create().fromString(string).build(); }
/*package*/ static TokenizerText internal(PeekReader reader, boolean lineMode, ErrorHandler errorHandler) {
return new TokenizerText(reader, lineMode, errorHandler);
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/binding/BindingInputStream.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/binding/BindingInputStream.java
index 7316b62..5335eef 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/binding/BindingInputStream.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/binding/BindingInputStream.java
@@ -36,10 +36,7 @@
import org.apache.jena.riot.lang.LangEngine ;
import org.apache.jena.riot.out.NodeFmtLib ;
import org.apache.jena.riot.system.* ;
-import org.apache.jena.riot.tokens.Token ;
-import org.apache.jena.riot.tokens.TokenType ;
-import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.*;
import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.graph.NodeConst ;
@@ -69,7 +66,7 @@
public BindingInputStream(InputStream in)
{
- this(TokenizerFactory.makeTokenizerUTF8(in)) ;
+ this(TokenizerText.create().source(in).build()) ;
}
public BindingInputStream(Tokenizer tokenizer)
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/util/NodeFactoryExtra.java b/jena-arq/src/main/java/org/apache/jena/sparql/util/NodeFactoryExtra.java
index 546092f..7cf1edb 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/util/NodeFactoryExtra.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/util/NodeFactoryExtra.java
@@ -33,7 +33,7 @@
import org.apache.jena.riot.system.PrefixMapFactory ;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.sparql.sse.SSE ;
/**
@@ -69,7 +69,7 @@
* @throws RiotException Thrown if a valid node cannot be parsed
*/
public static Node parseNode(String nodeString, PrefixMap pmap) {
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(nodeString) ;
+ Tokenizer tokenizer = TokenizerText.create().fromString(nodeString).build();
if ( !tokenizer.hasNext() )
throw new RiotException("Empty RDF term") ;
Token token = tokenizer.next() ;
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
index 3e6584e..bf3217c 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
@@ -31,16 +31,16 @@
import org.apache.jena.riot.ErrorHandlerTestLib.ExError;
import org.apache.jena.riot.ErrorHandlerTestLib.ExFatal;
import org.apache.jena.riot.ErrorHandlerTestLib.ExWarning;
-import org.apache.jena.riot.system.*;
import org.apache.jena.riot.Lang ;
import org.apache.jena.riot.RIOT;
+import org.apache.jena.riot.system.*;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.junit.AfterClass ;
import org.junit.BeforeClass ;
import org.junit.Test ;
-/** Test of syntax by a tuples parser (does not include node validitiy checking) */
+/** Test of syntax by a tuples parser (does not include node validity checking) */
abstract public class AbstractTestLangNTuples
{
// Test streaming interface.
@@ -175,7 +175,8 @@
byte b[] = StrUtils.asUTF8bytes(string);
ByteArrayInputStream in = new ByteArrayInputStream(b);
Tokenizer tokenizer = charSpace == CharSpace.ASCII
- ? TokenizerFactory.makeTokenizerASCII(in) : TokenizerFactory.makeTokenizerUTF8(in);
+ ? TokenizerText.create().asciiOnly(true).source(in).build()
+ : TokenizerText.create().source(in).build();
return tokenizer;
}
@@ -183,7 +184,7 @@
// UTF-8
byte b[] = StrUtils.asUTF8bytes(string);
ByteArrayInputStream in = new ByteArrayInputStream(b);
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in);
+ Tokenizer tokenizer = TokenizerText.create().source(in).build() ;
return tokenizer;
}
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangRdfJson.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangRdfJson.java
index 2f7827c..c9ea511 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangRdfJson.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangRdfJson.java
@@ -35,7 +35,7 @@
import org.apache.jena.riot.system.RiotLib;
import org.apache.jena.riot.system.StreamRDFLib ;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.junit.Test ;
public class TestLangRdfJson
@@ -447,7 +447,7 @@
public void rdfjson_invalid_tokenizer() {
byte b[] = StrUtils.asUTF8bytes("") ;
ByteArrayInputStream in = new ByteArrayInputStream(b);
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ;
+ Tokenizer tokenizer = TokenizerText.create().source(in).build() ;
StreamRDFCounting sink = StreamRDFLib.count() ;
LangRDFJSON parser = RiotParsers.createParserRdfJson(tokenizer, sink, RiotLib.dftProfile()) ;
}
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestParserFactory.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestParserFactory.java
index 01d7197..7803faa 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestParserFactory.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestParserFactory.java
@@ -29,7 +29,7 @@
import org.apache.jena.riot.RIOT;
import org.apache.jena.riot.system.*;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.sparql.core.Quad ;
import org.apache.jena.sparql.sse.SSE ;
import org.junit.Test ;
@@ -52,7 +52,7 @@
// Old style, direct to LangRIOT -- very deprecated.
// NQ version tests that relative URIs remain relative.
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString("<x> <p> <q> .") ;
+ Tokenizer tokenizer = TokenizerText.create().fromString("<x> <p> <q> .").build();
CatchParserOutput sink = new CatchParserOutput() ;
ParserProfile profile = makeParserProfile(IRIResolver.createNoResolve(), null, false);
LangRIOT parser = RiotParsers.createParserNTriples(tokenizer, sink, profile) ;
@@ -67,28 +67,14 @@
@Test public void turtle_01()
{
// Verify the expected output works.
- {
- String s = "<x> <p> <q> ." ;
- CatchParserOutput sink = parseCapture(s, Lang.TTL) ;
- assertEquals(1, sink.startCalled) ;
- assertEquals(1, sink.finishCalled) ;
- assertEquals(1, sink.triples.size()) ;
- assertEquals(0, sink.quads.size()) ;
- Triple t = SSE.parseTriple("(<http://base/x> <http://base/p> <http://base/q>)") ;
- assertEquals(t, last(sink.triples)) ;
- }
-
- // Old style, deprecated.
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString("<x> <p> <q> .") ;
- CatchParserOutput sink = new CatchParserOutput() ;
- ParserProfile maker = makeParserProfile(IRIResolver.create("http://base/"), null, true);
- LangRIOT parser = RiotParsers.createParserTurtle(tokenizer, sink, maker) ;
- parser.parse();
+ String s = "<x> <p> <q> ." ;
+ CatchParserOutput sink = parseCapture(s, Lang.TTL) ;
assertEquals(1, sink.startCalled) ;
assertEquals(1, sink.finishCalled) ;
assertEquals(1, sink.triples.size()) ;
assertEquals(0, sink.quads.size()) ;
- assertEquals(SSE.parseTriple("(<http://base/x> <http://base/p> <http://base/q>)"), last(sink.triples)) ;
+ Triple t = SSE.parseTriple("(<http://base/x> <http://base/p> <http://base/q>)") ;
+ assertEquals(t, last(sink.triples)) ;
}
private ParserProfile makeParserProfile(IRIResolver resolver, ErrorHandler errorHandler, boolean checking) {
@@ -104,23 +90,8 @@
@Test public void nquads_01()
{
- {
- String s = "<x> <p> <q> <g> ." ;
- CatchParserOutput sink = parseCapture(s, Lang.NQ) ;
- assertEquals(1, sink.startCalled) ;
- assertEquals(1, sink.finishCalled) ;
- assertEquals(0, sink.triples.size()) ;
- assertEquals(1, sink.quads.size()) ;
- Quad q = SSE.parseQuad("(<g> <x> <p> <q>)") ;
- assertEquals(q, last(sink.quads)) ;
- }
-
- // Old style, deprecated.
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString("<x> <p> <q> <g>.") ;
- CatchParserOutput sink = new CatchParserOutput() ;
- ParserProfile x = makeParserProfile(IRIResolver.createNoResolve(), null, false);
- LangRIOT parser = RiotParsers.createParserNQuads(tokenizer, sink, x) ;
- parser.parse();
+ String s = "<x> <p> <q> <g> ." ;
+ CatchParserOutput sink = parseCapture(s, Lang.NQ) ;
assertEquals(1, sink.startCalled) ;
assertEquals(1, sink.finishCalled) ;
assertEquals(0, sink.triples.size()) ;
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestTurtleTerms.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestTurtleTerms.java
index ca8db6f..ddc84f2 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestTurtleTerms.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestTurtleTerms.java
@@ -19,9 +19,11 @@
package org.apache.jena.riot.lang;
import org.apache.jena.atlas.lib.StrUtils;
-import org.apache.jena.riot.system.*;
+import org.apache.jena.riot.system.RiotLib;
+import org.apache.jena.riot.system.StreamRDF;
+import org.apache.jena.riot.system.StreamRDFLib;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.junit.Test ;
public class TestTurtleTerms
@@ -250,7 +252,7 @@
public static void parse(String testString)
{
// Need a prefix mapping.
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(prefixMap+"\n"+testString) ;
+ Tokenizer tokenizer = TokenizerText.create().fromString(prefixMap+"\n"+testString).build() ;
StreamRDF sink = StreamRDFLib.sinkNull() ;
LangTurtle parser = RiotParsers.createParserTurtle(tokenizer, sink, RiotLib.dftProfile()) ;
parser.parse();
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
index 9621ddd..97c44cc 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
@@ -912,7 +912,7 @@
@Test
public void tokenizer_charset_1() {
ByteArrayInputStream in = bytes("'abc'") ;
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerASCII(in) ;
+ Tokenizer tokenizer = TokenizerText.create().asciiOnly(true).source(in).build() ;
Token t = tokenizer.next() ;
assertFalse(tokenizer.hasNext()) ;
}
@@ -920,7 +920,7 @@
@Test(expected = RiotParseException.class)
public void tokenizer_charset_2() {
ByteArrayInputStream in = bytes("'abcdé'") ;
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerASCII(in) ;
+ Tokenizer tokenizer = TokenizerText.create().asciiOnly(true).source(in).build() ;
Token t = tokenizer.next() ;
assertFalse(tokenizer.hasNext()) ;
}
@@ -928,7 +928,7 @@
@Test(expected = RiotParseException.class)
public void tokenizer_charset_3() {
ByteArrayInputStream in = bytes("<http://example/abcdé>") ;
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerASCII(in) ;
+ Tokenizer tokenizer = TokenizerText.create().asciiOnly(true).source(in).build() ;
Token t = tokenizer.next() ;
assertFalse(tokenizer.hasNext()) ;
}
@@ -937,7 +937,7 @@
public void tokenizer_BOM_1() {
// BOM
ByteArrayInputStream in = bytes("\uFEFF'abc'") ;
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ;
+ Tokenizer tokenizer = TokenizerText.create().source(in).build() ;
assertTrue(tokenizer.hasNext()) ;
Token token = tokenizer.next() ;
assertNotNull(token) ;
@@ -945,7 +945,7 @@
assertEquals("abc", token.getImage()) ;
assertFalse(tokenizer.hasNext()) ;
}
-
+
// First symbol from the stream.
private static void testSymbol(String string, TokenType expected) {
Tokenizer tokenizer = tokenizeAndTestFirst(string, expected, null) ;
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/engine/binding/TestBindingStreams.java b/jena-arq/src/test/java/org/apache/jena/sparql/engine/binding/TestBindingStreams.java
index b37c7d0..2078075 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/engine/binding/TestBindingStreams.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/engine/binding/TestBindingStreams.java
@@ -33,7 +33,7 @@
import org.apache.jena.riot.system.PrefixMap ;
import org.apache.jena.riot.system.PrefixMapFactory ;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.graph.NodeConst ;
import org.apache.jena.sparql.resultset.ResultSetCompare ;
@@ -120,7 +120,7 @@
static void testRead(String x, Binding ... bindings)
{
- Tokenizer t = TokenizerFactory.makeTokenizerString(x) ;
+ Tokenizer t = TokenizerText.create().fromString(x).build(); ;
BindingInputStream inStream = new BindingInputStream(t) ;
if ( bindings.length == 0 )
diff --git a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java
index 50c1e10..88a91f2 100644
--- a/jena-cmds/src/main/java/riotcmd/CmdLangParse.java
+++ b/jena-cmds/src/main/java/riotcmd/CmdLangParse.java
@@ -47,7 +47,7 @@
import org.apache.jena.riot.system.StreamRDFLib;
import org.apache.jena.riot.system.StreamRDFWriter;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.sparql.core.DatasetGraph ;
import org.apache.jena.sparql.core.DatasetGraphFactory ;
import org.apache.jena.sys.JenaSystem ;
@@ -351,7 +351,7 @@
}
protected Tokenizer makeTokenizer(InputStream in) {
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ;
+ Tokenizer tokenizer = TokenizerText.create().source(in).build() ;
return tokenizer ;
}
diff --git a/jena-cmds/src/main/java/riotcmd/CmdTokens.java b/jena-cmds/src/main/java/riotcmd/CmdTokens.java
index d7c1340..210ddb3 100644
--- a/jena-cmds/src/main/java/riotcmd/CmdTokens.java
+++ b/jena-cmds/src/main/java/riotcmd/CmdTokens.java
@@ -24,7 +24,7 @@
import org.apache.jena.atlas.lib.Timer ;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
+import org.apache.jena.riot.tokens.TokenizerText;
public class CmdTokens
{
@@ -47,18 +47,18 @@
for ( String filename : args )
{
InputStream in = IO.openFile(filename) ;
- Tokenizer tokenize = TokenizerFactory.makeTokenizerUTF8(in) ;
+ Tokenizer tokenizer = TokenizerText.create().source(in).build() ;
Timer timer = new Timer() ;
long count = 0 ;
timer.startTimer() ;
- for ( ; tokenize.hasNext() ; )
+ for ( ; tokenizer.hasNext() ; )
{
- Token t = tokenize.next() ;
+ Token t = tokenizer.next() ;
if ( print )
System.out.println(t) ;
count++ ;
}
- tokenize.close();
+ tokenizer.close();
long millis = timer.endTimer() ;
if ( timing )
{
diff --git a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractLineBasedNodeTupleReader.java b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractLineBasedNodeTupleReader.java
index 7828dad..e0c2fb1 100644
--- a/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractLineBasedNodeTupleReader.java
+++ b/jena-elephas/jena-elephas-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/AbstractLineBasedNodeTupleReader.java
@@ -41,7 +41,7 @@
import org.apache.jena.riot.lang.LabelToNode;
import org.apache.jena.riot.system.*;
import org.apache.jena.riot.tokens.Tokenizer;
-import org.apache.jena.riot.tokens.TokenizerFactory;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -155,7 +155,7 @@
* @return Tokenizer
*/
protected Tokenizer getTokenizer(String line) {
- return TokenizerFactory.makeTokenizerString(line);
+ return TokenizerText.fromString(line);
}
diff --git a/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/DatasetClauseTest.java b/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/DatasetClauseTest.java
index abb80f5..813a39d 100644
--- a/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/DatasetClauseTest.java
+++ b/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/DatasetClauseTest.java
@@ -23,7 +23,6 @@
import java.util.Arrays;
import org.apache.jena.arq.querybuilder.AbstractQueryBuilder;
-import org.apache.jena.arq.querybuilder.clauses.DatasetClause;
import org.apache.jena.arq.querybuilder.handlers.DatasetHandler;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.sparql.core.Var ;
diff --git a/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/PrologClauseTest.java b/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/PrologClauseTest.java
index 8abf6d8..4d77117 100644
--- a/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/PrologClauseTest.java
+++ b/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/PrologClauseTest.java
@@ -17,13 +17,13 @@
*/
package org.apache.jena.arq.querybuilder.clauses;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
import java.util.HashMap;
import java.util.Map;
import org.apache.jena.arq.querybuilder.AbstractQueryBuilder;
-import org.apache.jena.arq.querybuilder.clauses.PrologClause;
import org.apache.jena.arq.querybuilder.handlers.PrologHandler;
import org.apache.jena.graph.NodeFactory ;
import org.apache.jena.query.Query;
diff --git a/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/SolutionModifierTest.java b/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/SolutionModifierTest.java
index 2cbc76e..68b9663 100644
--- a/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/SolutionModifierTest.java
+++ b/jena-extras/jena-querybuilder/src/test/java/org/apache/jena/arq/querybuilder/clauses/SolutionModifierTest.java
@@ -23,7 +23,6 @@
import org.apache.jena.arq.querybuilder.AbstractQueryBuilder;
import org.apache.jena.arq.querybuilder.Order;
-import org.apache.jena.arq.querybuilder.clauses.SolutionModifierClause;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.query.Query;
diff --git a/jena-permissions/src/test/java/org/apache/jena/permissions/MockSecurityEvaluator.java b/jena-permissions/src/test/java/org/apache/jena/permissions/MockSecurityEvaluator.java
index 8e2fc09..21e45e1 100644
--- a/jena-permissions/src/test/java/org/apache/jena/permissions/MockSecurityEvaluator.java
+++ b/jena-permissions/src/test/java/org/apache/jena/permissions/MockSecurityEvaluator.java
@@ -23,7 +23,6 @@
import org.apache.jena.graph.Node;
import org.apache.jena.graph.Triple;
-import org.apache.jena.permissions.SecurityEvaluator;
import org.apache.jena.rdf.model.Resource ;
public class MockSecurityEvaluator implements SecurityEvaluator
diff --git a/jena-permissions/src/test/java/org/apache/jena/permissions/ModelBasedSecurityEvaluator.java b/jena-permissions/src/test/java/org/apache/jena/permissions/ModelBasedSecurityEvaluator.java
index ca2f3cc..65f083c 100644
--- a/jena-permissions/src/test/java/org/apache/jena/permissions/ModelBasedSecurityEvaluator.java
+++ b/jena-permissions/src/test/java/org/apache/jena/permissions/ModelBasedSecurityEvaluator.java
@@ -21,7 +21,6 @@
import org.apache.jena.graph.Node;
import org.apache.jena.graph.Triple;
-import org.apache.jena.permissions.SecurityEvaluator;
import org.apache.jena.rdf.model.Model ;
public class ModelBasedSecurityEvaluator implements SecurityEvaluator {
diff --git a/jena-permissions/src/test/java/org/apache/jena/permissions/StaticSecurityEvaluator.java b/jena-permissions/src/test/java/org/apache/jena/permissions/StaticSecurityEvaluator.java
index 37c9fed..6d29504 100644
--- a/jena-permissions/src/test/java/org/apache/jena/permissions/StaticSecurityEvaluator.java
+++ b/jena-permissions/src/test/java/org/apache/jena/permissions/StaticSecurityEvaluator.java
@@ -22,7 +22,6 @@
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
-import org.apache.jena.permissions.SecurityEvaluator;
public class StaticSecurityEvaluator implements SecurityEvaluator {
diff --git a/jena-permissions/src/test/java/org/apache/jena/permissions/model/SecuredContainerTest.java b/jena-permissions/src/test/java/org/apache/jena/permissions/model/SecuredContainerTest.java
index 7553c9b..0ab2609 100644
--- a/jena-permissions/src/test/java/org/apache/jena/permissions/model/SecuredContainerTest.java
+++ b/jena-permissions/src/test/java/org/apache/jena/permissions/model/SecuredContainerTest.java
@@ -21,9 +21,8 @@
import org.apache.jena.permissions.MockSecurityEvaluator;
import org.apache.jena.permissions.SecurityEvaluator;
-import org.apache.jena.permissions.SecurityEvaluatorParameters;
import org.apache.jena.permissions.SecurityEvaluator.Action;
-import org.apache.jena.permissions.model.SecuredContainer;
+import org.apache.jena.permissions.SecurityEvaluatorParameters;
import org.apache.jena.permissions.model.impl.SecuredContainerImpl;
import org.apache.jena.rdf.model.Container;
import org.apache.jena.rdf.model.ResourceFactory;
diff --git a/jena-shacl/src/main/java/org/apache/jena/shacl/compact/ShaclCompactParser.java b/jena-shacl/src/main/java/org/apache/jena/shacl/compact/ShaclCompactParser.java
index 7603d22..aff8310 100644
--- a/jena-shacl/src/main/java/org/apache/jena/shacl/compact/ShaclCompactParser.java
+++ b/jena-shacl/src/main/java/org/apache/jena/shacl/compact/ShaclCompactParser.java
@@ -28,7 +28,7 @@
import org.apache.jena.riot.system.PrefixMap;
import org.apache.jena.riot.system.PrefixMapFactory;
import org.apache.jena.riot.system.StreamRDF;
-import org.apache.jena.riot.tokens.TokenizerFactory;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.shacl.ShaclException;
import org.apache.jena.shacl.engine.ShaclPaths;
import org.apache.jena.shacl.vocabulary.SHACL;
@@ -464,7 +464,7 @@
return x;
String s = x.getLiteralLexicalForm();
PrefixMap pmap = PrefixMapFactory.create(getPrologue().getPrefixMapping());
- Node n = TokenizerFactory.makeTokenizerString(s).next().asNode(pmap);
+ Node n = TokenizerText.create().fromString(s).build().next().asNode(pmap);
return n;
}
diff --git a/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java b/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java
index 7752a52..13e879f 100644
--- a/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java
+++ b/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java
@@ -33,7 +33,7 @@
import org.apache.jena.riot.system.PrefixMapNull;
import org.apache.jena.riot.tokens.Token;
import org.apache.jena.riot.tokens.Tokenizer;
-import org.apache.jena.riot.tokens.TokenizerFactory;
+import org.apache.jena.riot.tokens.TokenizerText;
import org.apache.jena.riot.web.LangTag;
import org.apache.jena.shared.PrefixMapping;
import org.apache.jena.sparql.sse.SSE;
@@ -137,7 +137,7 @@
return NodeFactory.createURI(str);
}
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(str);
+ Tokenizer tokenizer = createTokenizer(str);
if ( ! tokenizer.hasNext() )
throw new TDBException("Failed to tokenize: "+str);
Token t = tokenizer.next();
@@ -152,6 +152,10 @@
}
}
+ private static Tokenizer createTokenizer(String string) {
+ return TokenizerText.create().fromString(string).build();
+ }
+
// Over-estimate the length of the encoding.
private static int maxLength(Node node)
{