blob: 78e62c1ca8f16c3aac6f5db854d6684b3c11deff [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.system;
import static org.apache.jena.riot.tokens.TokenType.DIRECTIVE ;
import static org.apache.jena.riot.tokens.TokenType.DOT ;
import static org.apache.jena.riot.tokens.TokenType.IRI ;
import static org.apache.jena.riot.tokens.TokenType.PREFIXED_NAME ;
import java.io.InputStream ;
import java.util.* ;
import org.apache.jena.atlas.lib.Closeable ;
import org.apache.jena.atlas.lib.NotImplemented ;
import org.apache.jena.graph.Node ;
import org.apache.jena.riot.RiotException ;
import org.apache.jena.riot.tokens.Token ;
import org.apache.jena.riot.tokens.Tokenizer ;
import org.apache.jena.riot.tokens.TokenizerFactory ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
/** Testing/development convenience.
* Iterator of StreamRowRDF (always a tuple) for an input stream of tokenized RDF terms.
*/
public class IteratorStreamRDFText extends IteratorStreamRDF implements Iterator<StreamRowRDF> {
private final TokenInputStream in ;
private Node[] previousTuple = null ;
private /*public*/ IteratorStreamRDFText(InputStream input) {
Tokenizer t = TokenizerFactory.makeTokenizerUTF8(input) ;
in = new TokenInputStream(null, t) ;
}
@Override
protected boolean hasMore() {
return true ;
}
@Override
protected StreamRowRDF moveToNext() {
if ( ! in.hasNext() ) return null ;
List<Token> line = in.next() ;
StreamRowRDF row = line2row(line) ;
return row ;
}
private StreamRowRDF line2row(List<Token> line) {
if ( line.size() != 3 && line.size() != 4 )
throw new RiotException("Input line is not 3 or 4 items long") ;
Node[] tuple = new Node[line.size()] ;
int idx = 0 ;
for ( Token token : line ) {
Node n = null ;
if ( ( token.isWord() && token.getImage().equals("R") )
//|| ( token.isCtlCode() && token.getCntrlCode() == -1 ) // *
) {
if ( previousTuple == null )
throw new RiotException("Repeat without previous data row") ;
if ( idx >= previousTuple.length)
throw new RiotException("Repeat position beyond previous data row") ;
n = previousTuple[idx] ;
} else if ( token.isNode() ) {
n = asNode(token) ;
}
if ( n == null )
throw new RiotException("Unrecognized token : "+token ) ;
tuple[idx] = n ;
idx++ ;
}
previousTuple = tuple ;
// Needs rethink.
throw new NotImplemented() ;
// if ( line.size() == 3 )
// return new StreamRowRDFBase(Triple.create(tuple[0], tuple[1], tuple[2])) ;
// else
// return new StreamRowRDFBase(Quad.create(tuple[0], tuple[1], tuple[2], tuple[3])) ;
// return new StreamRowRDFBase(Tuple.create(tuple)) ;
}
private static Node asNode(Token t) {
// <_:...> bnodes.
if ( t.isIRI() )
return RiotLib.createIRIorBNode(t.getImage()) ;
return t.asNode() ;
}
/** Tokenizer that sorts out prefixes and groups into sequences of token */
private static class TokenInputStream implements Iterator<List<Token>>, Iterable<List<Token>>, Closeable {
private static Logger log = LoggerFactory.getLogger(TokenInputStream.class) ;
private boolean finished = false ;
private final Tokenizer tokens ;
private List<Token> list ;
private Map<String, String> map = new HashMap<>() ;
private String label ;
public TokenInputStream(String label, Tokenizer tokens) {
this.tokens = tokens ;
this.label = label ;
}
@Override
public boolean hasNext() {
if ( finished )
return false ;
if ( list != null ) // Already got the reply.
return true ;
try {
if ( !tokens.hasNext() ) {
finished = true ;
return false ;
}
list = buildOneLine() ;
if ( false && log.isDebugEnabled() )
log.debug("Tokens: " + list) ;
if ( list == null )
finished = true ;
return list != null ;
} catch (Exception ex) {
finished = true ;
return false ;
}
}
private List<Token> buildOneLine() {
List<Token> tuple = new ArrayList<>() ;
boolean isDirective = false ;
for (; tokens.hasNext();) {
Token token = tokens.next() ;
if ( token.hasType(DIRECTIVE) )
isDirective = true ;
if ( token.hasType(DOT) ) {
if ( tuple.size() > 0 && tuple.get(0).hasType(DIRECTIVE) ) {
directive(tuple) ;
tuple.clear() ;
isDirective = false ;
// Start again.
continue ;
}
return tuple ;
}
// Fixup prefix names.
if ( !isDirective && token.hasType(PREFIXED_NAME) ) {
String ns = map.get(token.getImage()) ;
String iri ;
if ( ns == null ) {
log.warn("Can't resolve '" + token.toString(false) + "'", ns) ;
iri = "unresolved:" + token.getImage() + ":" + token.getImage2() ;
} else
iri = ns + token.getImage2() ;
token.setType(IRI) ;
token.setImage(iri) ;
token.setImage2(null) ;
}
tuple.add(token) ;
}
// No final DOT
return tuple ;
}
private void directive(List<Token> tuple) {
if ( tuple.size() != 3 )
throw new RiotException("Bad directive: " + tuple) ;
String x = tuple.get(0).getImage() ;
if ( x.equals("prefix") ) {
// Raw - unresolved prefix name.
if ( !tuple.get(1).hasType(PREFIXED_NAME) )
throw new RiotException("@prefix requires a prefix (found '" + tuple.get(1) + "')") ;
if ( tuple.get(1).getImage2().length() != 0 )
throw new RiotException("@prefix requires a prefix and no suffix (found '" + tuple.get(1) + "')") ;
String prefix = tuple.get(1).getImage() ;
if ( !tuple.get(2).hasType(IRI) )
throw new RiotException("@prefix requires an IRI (found '" + tuple.get(1) + "')") ;
String iriStr = tuple.get(2).getImage() ;
map.put(prefix, iriStr) ;
return ;
}
throw new RiotException("Unregcognized directive: " + x) ;
}
@Override
public List<Token> next() {
if ( !hasNext() )
throw new NoSuchElementException() ;
List<Token> r = list ;
if ( log.isDebugEnabled() ) {
if ( label != null )
log.debug("<< " + label + ": " + r) ;
else
log.debug("<< " + r.toString()) ;
}
list = null ;
return r ;
}
@Override
public void remove() {
throw new UnsupportedOperationException() ;
}
@Override
public Iterator<List<Token>> iterator() {
return this ;
}
@Override
public void close() {}
}
}