blob: 06b5adb0ec742208c2ccc5cb8c38f9f653e0cee9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.graph.impl;
import static org.apache.jena.atlas.lib.Lib.isEmpty;
import java.util.Arrays;
import java.util.Objects;
import org.apache.jena.atlas.lib.EscapeStr;
import org.apache.jena.datatypes.DatatypeFormatException;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.datatypes.xsd.impl.RDFhtml;
import org.apache.jena.datatypes.xsd.impl.RDFjson;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.TextDirection;
import org.apache.jena.shared.JenaException;
import org.apache.jena.shared.PrefixMapping;
import org.apache.jena.shared.impl.JenaParameters;
import org.apache.jena.vocabulary.RDF;
/**
* Represents the "contents" of a Node_Literal.
* These contents comprise a lexical form, an optional language tag,
* and optional datatype structure and a value.
* <p>
* Create via LiteralLabelFactory which does the checking and adjustments.
*/
final public class LiteralLabel {
/**
* The lexical form of the literal.
*/
private String lexicalForm;
/**
* The language tag. The empty string is not valid; {@code ""} is used to ensure this field is set.
*/
private String lang;
/**
* The initial text direction of a language literal.
* Datatype rdf:dirLangSring.
*/
private TextDirection textDir;
/**
* The type of the literal. A null type indicates a classic "plain" literal.
* The type of a literal is fixed when it is created.
*/
private RDFDatatype dtype;
/**
* The value form of the literal. It will be null only if the value
* has not been parsed or if it is an illegal value.
*/
private Object value;
private enum ValueMode { EAGER , LAZY }
// LAZY does not completely pass the test suite - the point where bad literals
// cause exceptions has changed.
// Whether this is the fact the tests are over sensitive or there is going to be
// unexpected behaviour needs investigation.
private static ValueMode valueMode = ValueMode.EAGER;
/**
* Indicates whether this is literal has a valid lexical form for the datatype.
*/
private boolean wellformed = true;
private Exception exception = null;
private final int hash;
//=======================================================================
// Constructors
// -- LiteralLabel by RDF term.
/**
* Build a LiteralLabel with lexical form and datatype.
* The validity of the lexical form as a value is not checked.
*
* @param lex the lexical form of the literal
* @param dtype the type of the literal
*/
LiteralLabel(String lex, RDFDatatype dtype) {
this(lex, "", null, dtype);
}
/**
* Build a LiteralLabel with lexical form, lang tag and datatype.
* The validity of the lexical form a a value is not checked.
*
* @param lex the lexical form of the literal
* @param lang the optional language tag, only relevant for rdf:langString and rdf:dirLangString
* @param dirLang only relevant for rdf:langString and rdf:dirLangString
* @param dtype the type of the literal
*/
/*package*/ LiteralLabel(String lex, String lang, TextDirection textDir, RDFDatatype dtype) {
this.lexicalForm = lex;
this.dtype = Objects.requireNonNull(dtype);
this.lang = lang;
this.textDir = textDir;
hash = calcHashCode();
if ( valueMode == ValueMode.EAGER ) {
this.wellformed = setValue(lex, dtype);
dtype = normalize(value, dtype);
} else
// Lazy value calculation.
value = null;
}
/** Calculate the indexing form for a language tag */
private static String indexingLang(String lang) {
return lang;
}
/**
* Build a typed literal label from its value form using
* whatever datatype is currently registered as the default
* representation for this java class.
* @param value the literal value to encapsulate
*/
/*package*/ LiteralLabel( Object value ) {
this(value, LiteralValue.datatypeForValueAny(value));
}
/**
* Build a typed literal label from its value form.
* If the value is a string, assume it is the intended lexical form to
* align with (see {@link #LiteralLabel(String, RDFDatatype)}).
*
* @param value the value of the literal
* @param dtype the type of the literal
*/
/*package*/ LiteralLabel(Object value, RDFDatatype dtype) throws DatatypeFormatException {
this.dtype = dtype;
this.lang = "";
if (value instanceof String) {
// Treat as "lex ^^ datatype"
String lex = (String)value;
this.lexicalForm = lex;
this.wellformed = setValue(lex, dtype);
dtype = normalize(value, dtype);
hash = calcHashCode();
return;
}
// No lexical form yet.
this.value = (dtype == null) ? value : dtype.cannonicalise( value );
// This can change the datatype
this.dtype = normalize(value, dtype);
this.wellformed = this.dtype.isValidValue( value );
// Eager
if (JenaParameters.enableEagerLiteralValidation && !wellformed)
throw new DatatypeFormatException(value.toString(), dtype, "in literal creation");
this.lexicalForm = (dtype == null ? value.toString() : dtype.unparse(value));
hash = calcHashCode();
}
/**
* Internal function to set the object value from the lexical form.
* Requires datatype to be set. Return true if it succeeded else false.
* @throws DatatypeFormatException if the value is ill-formed and
* eager checking is on.
*/
private boolean setValue(String lex, RDFDatatype dtype) throws DatatypeFormatException {
try {
value = dtype.parse(lex);
return true;
} catch (DatatypeFormatException e) {
// Normally this parameter is false.
if (JenaParameters.enableEagerLiteralValidation) {
e.fillInStackTrace();
throw e;
}
exception = e;
return false;
}
}
// -- Thread safe delayed initialization at the cost of "volatile" incurred in getValueLazy()
// Used by set-by-term.
// set-by-value is always eager.
private volatile Object value1 = null;
private static Object invalidValue = new Object();
/** Does not return null - returns "invalidValue" */
private Object getValueLazy() {
// Eager value processing.
if ( value != null )
return value;
if ( value1 != null ) {
// value1 only goes from null to Object, and not back to null.
return value1;
}
synchronized(this) {
if ( value1 == null )
value1 = calcValue(lexicalForm);
}
// Object assignment is atomic.
// Synchronized ensured the value object is properly constructed.
value = (value1 != invalidValue ) ? value1 : null;
return value1;
}
private Object calcValue(String lex) {
try {
Object v = dtype.parse(lex);
wellformed = true;
dtype = dtype.normalizeSubType(v, dtype);
return v;
} catch (DatatypeFormatException e) {
wellformed = false;
return invalidValue;
}
}
/**
* Normalize the literal. If the value is narrower than the current data type
* (e.g. value is xsd:date but the time is xsd:datetime) it will narrow
* the type. If the type is narrower than the value then it may normalize
* the value (e.g. set the mask of an XSDDateTime)
*/
protected static RDFDatatype normalize(Object value, RDFDatatype datatype) {
if (datatype != null && value != null) {
return datatype.normalizeSubType(value, datatype);
}
return datatype;
}
//=======================================================================
// Methods
/**
* Answer true iff this is a well-formed literal (the lexical form conforms to the datatype).
* String literals (xsd:string, rdf:LangString,m rdf:dirLangString) are always well-formed.
*/
public boolean isWellFormed() {
return dtype != null && isWellFormedRaw();
}
private boolean isWellFormedRaw() {
if ( ! wellformed )
return false;
// Force initialization.
getValueInternal();
return wellformed;
}
public String toString(boolean quoting) {
return toString(PrefixMapping.Standard, quoting);
}
public String toString(PrefixMapping pmap, boolean quoting) {
StringBuilder b = new StringBuilder();
if ( ! quoting && simpleLiteral() )
return getLexicalForm();
quoting = true;
// Always quoted for language strings and datatypes (not xsd:string).
if ( quoting )
b.append('"');
String elex = EscapeStr.stringEsc(getLexicalForm());
b.append(elex);
if ( quoting )
b.append('"');
if ( lang != null && !lang.equals("") )
b.append("@").append(lang);
else if ( ! dtype.equals(XSDDatatype.XSDstring) ) {
String dtStr = (pmap != null)
? PrefixMapping.Standard.shortForm(dtype.getURI())
: dtype.getURI();
b.append("^^").append(dtStr);
}
return b.toString();
}
private boolean simpleLiteral() {
return dtype.equals(XSDDatatype.XSDstring);
}
@Override
public String toString() {
return toString(true);
}
/**
* Answer the lexical form of this literal.
*/
public String getLexicalForm() {
return lexicalForm;
}
/**
* Answer an object used to index this literal. This object must provide
* {@link Object#equals} and {@link Object#hashCode} based on values, not object
* instance identity.
*/
public Object getIndexingValue() {
if ( indexingValueIsSelf() )
return this;
if ( !lang.equals("") )
// Assumed formatted/case-insensitive language tags.
return getLexicalForm() + "@" + indexingLang(lang);
if ( wellformed ) {
Object value = getValue();
// JENA-1936
// byte[] does not provide hashCode/equals based on the contents of the array.
if ( value instanceof byte[] )
return new ByteArray((byte[])value);
return value;
}
return getLexicalForm();
}
/**
* Return true for datatype with large values (XML, JSON, HTML) and
* the value is the lexical form, the indexing value is this object.
* Therefore getValueHashCode is the same as hashCode();
*/
private boolean indexingValueIsSelf() {
return dtype == XMLLiteralType.theXMLLiteralType ||
dtype == RDFjson.rdfJSON ||
dtype == RDFhtml.rdfHTML ;
}
/**
* {@code byte[]} wrapper that provides {@code hashCode} and {@code equals} based
* on the value of the array. This assumes the {@code byte[]} is not changed
* (which is the case for literals with binary value).
*/
static class ByteArray {
private int hashCode = 0;
private final byte[] bytes;
/*package*/ ByteArray(byte[] bytes) {
this.bytes = bytes;
}
@Override
public int hashCode() {
if ( hashCode == 0 ) {
final int prime = 31;
int result = 1;
hashCode = prime * result + Arrays.hashCode(bytes);
}
return hashCode;
}
@Override
public boolean equals(Object obj) {
if ( this == obj )
return true;
if ( obj == null )
return false;
if ( getClass() != obj.getClass() )
return false;
ByteArray other = (ByteArray)obj;
return Arrays.equals(bytes, other.bytes);
}
}
/**
* Answer the language associated with this literal (the empty string if there's
* no language).
*/
public String language() {
return lang;
}
/**
* Answer the initial text direction associated with this literal (the empty string if there's
* no text direction).
*/
public TextDirection initialTextDirection() {
return textDir;
}
/**
* Answer a suitable instance of a Java class representing this literal's value.
* May throw an exception if the literal is ill-formed.
*/
public Object getValue() throws DatatypeFormatException {
Object val = getValueInternal();
if (! wellformed )
throw new DatatypeFormatException(lexicalForm, dtype, (Throwable)null);
if ( val != null )
// Value is good.
return val;
if ( ! JenaParameters.enableEagerLiteralValidation )
throw new DatatypeFormatException();
return null;
}
private Object getValueInternal() {
Object v = getValueLazy();
return (v == invalidValue ) ? null : v;
}
/**
* Answer the datatype of this literal, null if it is untyped.
*/
public RDFDatatype getDatatype() {
return dtype;
}
/**
* Answer the datatype URI of this literal, null if it untyped.
*/
public String getDatatypeURI() {
if (dtype == null)
return null;
return dtype.getURI();
}
/**
* Answer true iff this literal is syntactically equal to <code>other</code>.
* Note: this is <i>not</i> <code>sameValueAs</code>.
*/
@Override
public boolean equals(Object other) {
if ( this == other ) return true;
if (other == null || !(other instanceof LiteralLabel)) {
return false;
}
LiteralLabel otherLiteral = (LiteralLabel) other;
boolean typeEquals = Objects.equals(dtype, otherLiteral.getDatatype());
if ( !typeEquals )
return false;
// Don't just use this.lexicalForm -- need to force delayed calculation from values.
boolean lexEquals = Objects.equals(getLexicalForm(), otherLiteral.getLexicalForm());
if ( ! lexEquals )
return false;
boolean langEquals = Objects.equals(lang, otherLiteral.language());
if ( ! langEquals )
return false;
// Ignore xml flag as it is calculated from the lexical form + datatype
// Ignore value as lexical form + datatype -> value is a function.
return true;
}
/**
* Answer true iff this literal represents the same (abstract) value as the other
* one.
*/
public boolean sameValueAs( LiteralLabel other ) {
return sameValueAs(this, other);
}
/**
* Two literal labels are the "same value" if they are the same string,
* or same language string or same value-by-datatype or .equals (= Same RDF Term)
* @param lit1
* @param lit2
* @return
*/
private static boolean sameValueAs(LiteralLabel lit1, LiteralLabel lit2) {
//return lit1.sameValueAs(lit2);
if ( lit1 == null )
throw new NullPointerException();
if ( lit2 == null )
throw new NullPointerException();
// -- Strings.
if ( isStringValue(lit1) && isStringValue(lit2) )
return lit1.getLexicalForm().equals(lit2.getLexicalForm());
else {
if ( isStringValue(lit1) ) return false;
if ( isStringValue(lit2) ) return false;
}
// -- Language tag strings
if ( isLangString(lit1) && isLangString(lit2) ) {
String lex1 = lit1.getLexicalForm();
String lex2 = lit2.getLexicalForm();
//return lex1.equals(lex2) && lit1.language().equalsIgnoreCase(lit2.language());
// Normalized language tags.
return lex1.equals(lex2)
&& lit1.language().equalsIgnoreCase(lit2.language());
} else {
if ( isLangString(lit1) ) return false;
if ( isLangString(lit2) ) return false;
}
// -- Language tag strings with initial text direction
if ( isLangStringDir(lit1) && isLangStringDir(lit2) ) {
String lex1 = lit1.getLexicalForm();
String lex2 = lit2.getLexicalForm();
return lex1.equals(lex2)
&& lit1.language().equalsIgnoreCase(lit2.language())
&& lit1.initialTextDirection().equals(lit2.initialTextDirection());
} else {
if ( isLangStringDir(lit1) ) return false;
if ( isLangStringDir(lit2) ) return false;
}
// -- datatypes.
// Both not strings, not lang strings and not dirlang strings.
// Datatype set.
if ( lit1.isWellFormedRaw() && lit2.isWellFormedRaw() )
// Both well-formed.
return lit1.getDatatype().isEqual(lit1, lit2);
if ( ! lit1.isWellFormedRaw() && ! lit2.isWellFormedRaw() )
return lit1.equals(lit2);
// One is well formed, the other is not.
return false;
}
/** Return true if the literal label is a string value (RDF 1.0 and RDF 1.1) */
private static boolean isStringValue(LiteralLabel lit) {
if ( lit.getDatatype() == null )
// RDF 1.0
return ! isLangString(lit);
if ( lit.getDatatype().equals(XSDDatatype.XSDstring) )
return true;
return false;
}
/**
* Return true if the literal label is a well-formed language string (rdf:langString).
* Language strings do not have an initial text direction.
* This test excludes "abc"^^rdf:langString (not well-formed).
*/
private static boolean isLangString(LiteralLabel lit) {
// Duplicate of Util.isLangString except for the additional consistency check.
if ( isEmpty(lit.language()) )
return false;
if ( lit.initialTextDirection() != null )
// Has an initial text direction so it is n't
return false;
// Internal check.
if ( ! Objects.equals(lit.getDatatype(), RDF.dtLangString) )
throw new JenaException("Literal with language string which is not rdf:langString: "+lit);
return true;
}
/**
* Return true if the literal label is a well-formed language string with text direction.
* This excludes "abc"^^rdf:dirLangString.
*/
private static boolean isLangStringDir(LiteralLabel lit) {
// Assume well formed.
String lang = lit.language();
// Allow "abc"@--rtl
// if ( isEmpty(lit.language()) )
// return false;
if ( lit.initialTextDirection() == null )
return false;
// Internal check.
if ( ! Objects.equals(lit.getDatatype(), RDF.dtDirLangString) )
throw new JenaException("Literal with language string and text direction which is not rdf:dirLangString: "+lit);
return true;
}
private int calcHashCode() {
return Objects.hash(lexicalForm, lang, dtype);
}
/**
* Answer the hashcode of this literal, derived from its value if it's
* well-formed and otherwise its lexical form.
*/
@Override
public int hashCode() {
return hash;
}
/**
* Answer the default hash value, suitable for datatypes which have values which
* support hashCode() naturally: it is derived from its value if it is
* well-formed and otherwise from its lexical form.
*/
public int getValueHashCode() {
if ( indexingValueIsSelf() )
return hashCode();
Object v = getValueInternal();
if ( ! wellformed )
return hashCode();
if ( ! wellformed )
return hashCode();
return v.hashCode();
}
}