blob: 570b35d2e2d3e8e362b008717afb8e953737ac8b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sis.storage.base;
import java.time.LocalDate;
import java.util.Date;
import java.util.Collections;
import org.opengis.metadata.citation.Role;
import org.opengis.metadata.citation.DateType;
import org.opengis.metadata.constraint.Restriction;
import org.apache.sis.util.CharSequences;
import org.apache.sis.metadata.iso.citation.AbstractParty;
import org.apache.sis.metadata.iso.citation.DefaultCitation;
import org.apache.sis.metadata.iso.citation.DefaultCitationDate;
import org.apache.sis.metadata.iso.constraint.DefaultLegalConstraints;
import static org.apache.sis.util.privy.Constants.MILLISECONDS_PER_DAY;
// Specific to the geoapi-4.0 branch:
import org.apache.sis.metadata.iso.citation.DefaultResponsibility;
/**
* Elements to omit in the legal notice to be parsed by {@link MetadataBuilder#parseLegalNotice(String)}.
* Some of those elements are implied by the metadata were the legal notice will be stored.
*
* @author Martin Desruisseaux (Geomatys)
*/
final class LegalSymbols {
/**
* Symbols associated to restrictions.
*/
private static final LegalSymbols[] VALUES = {
new LegalSymbols(Restriction.COPYRIGHT, "COPYRIGHT", "(C)", "©", "All rights reserved"),
new LegalSymbols(Restriction.TRADEMARK, "TRADEMARK", "(TM)", "™", "(R)", "®")
};
/**
* The restriction to use if an item in the {@linkplain #symbols} list is found.
*/
private final Restriction restriction;
/**
* Symbols to use as an indication that the {@linkplain #restriction} applies.
*/
private final String[] symbols;
/**
* Creates a new enumeration value for the given symbol.
*/
private LegalSymbols(final Restriction restriction, final String... symbols) {
this.restriction = restriction;
this.symbols = symbols;
}
/**
* Returns {@code true} if the given character is a space or a punctuation of category "other".
* The punctuation characters include coma, dot, semi-colon, <i>etc.</i> but do not include
* parenthesis or connecting punctuation.
*
* @param c the Unicode code point of the character to test.
*/
private static boolean isSpaceOrPunctuation(final int c) {
switch (Character.getType(c)) {
case Character.LINE_SEPARATOR:
case Character.SPACE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
case Character.OTHER_PUNCTUATION: return true;
default: return false;
}
}
/**
* Implementation of {@link MetadataBuilder#parseLegalNotice(String)}, provided here for reducing
* the number of class loading in the common case where there is no legal notice to parse.
*/
static void parse(final String notice, final DefaultLegalConstraints constraints) {
final int length = notice.length();
final var buffer = new StringBuilder(length);
int year = 0; // The copyright year, or 0 if none.
int quoteLevel = 0; // Incremented on ( [ « characters, decremented on ) ] » characters.
boolean isCopyright = false; // Whether the word parsed by previous iteration was "Copyright" or "(C)".
boolean wasSeparator = true; // Whether the caracter parsed by the previous iteration was a word separator.
boolean wasPunctuation = true; // Whether the previous character was a punctuation of Unicode category "other".
boolean skipNextChars = true; // Whether the next spaces and some punction characters should be ignored.
parse: for (int i = 0; i < length;) {
final int c = notice.codePointAt(i);
final int n = Character.charCount(c);
int quoteChange = 0;
boolean isSeparator = false;
boolean isPunctuation;
switch (Character.getType(c)) {
case Character.INITIAL_QUOTE_PUNCTUATION:
case Character.START_PUNCTUATION: {
quoteChange = +1; // ( [ « etc.
skipNextChars = false;
isPunctuation = false;
break;
}
case Character.FINAL_QUOTE_PUNCTUATION:
case Character.END_PUNCTUATION: {
quoteChange = -1; // ) ] » etc.
skipNextChars = false;
isPunctuation = false;
break;
}
default: { // Letter, digit, hyphen, etc.
skipNextChars = false;
isPunctuation = false;
break;
}
case Character.OTHER_PUNCTUATION: { // , . : ; / " etc. but not -.
isPunctuation = true;
isSeparator = true;
break;
}
case Character.LINE_SEPARATOR:
case Character.SPACE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR: {
isPunctuation = wasPunctuation;
isSeparator = true;
break;
}
}
if (wasSeparator && !isSeparator && quoteLevel == 0) {
/*
* Found the beginning of a new word. Ignore textes like "(C)" or "All rights reserved".
* Some of those textes are implied by the metadata where the legal notice will be stored.
*/
for (final LegalSymbols r : VALUES) {
for (final String symbol : r.symbols) {
if (notice.regionMatches(true, i, symbol, 0, symbol.length())) {
final int after = i + symbol.length();
if (after >= length || isSpaceOrPunctuation(notice.codePointAt(after))) {
isCopyright |= (r.restriction == Restriction.COPYRIGHT);
constraints.getUseConstraints().add(r.restriction);
wasPunctuation = true; // Pretend that "Copyright" was followed by a coma.
skipNextChars = true; // Ignore spaces and punctuations until the next word.
i = after; // Skip the "Copyright" (or other) word.
continue parse;
}
}
}
}
/*
* If a copyright notice is followed by digits, assume that those digits are the copyright year.
* We require the year is followed by punctuations or non-breaking space in order to reduce the
* risk of confusion with postal addresses. So this block should accept "John, 1992." but not
* "1992-1 Nowhere road".
*/
if (isCopyright && wasPunctuation && year == 0 && c >= '0' && c <= '9') {
int endOfDigits = i + n; // After the last digit in sequence.
while (endOfDigits < length) {
final int d = notice.codePointAt(endOfDigits);
if (d < '0' || d > '9') break;
endOfDigits++; // No need to use Character.charCount(s) here.
}
// Verify if the digits are followed by a punctuation.
final int endOfToken = CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length);
if (endOfToken > endOfDigits || isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try {
year = Integer.parseInt(notice.substring(i, endOfDigits));
if (year >= 1800 && year <= 9999) { // Those limits are arbitrary.
skipNextChars = true;
i = endOfToken;
continue;
}
year = 0; // Reject as not a copyright year.
} catch (NumberFormatException e) {
// Not an integer - ignore, will be handled as text.
}
}
}
/*
* End of the block that was executed at the beginning of each new word.
* Following is executed for every characters, except if the above block
* skipped a portion of the input string.
*/
wasPunctuation = isPunctuation;
wasSeparator = isSeparator;
quoteLevel += quoteChange;
if (!skipNextChars && !Character.isIdentifierIgnorable(c)) {
buffer.appendCodePoint(c);
}
i += n;
}
/*
* End of parsing. Omit trailing spaces and some punctuations if any, then store the result.
*/
int i = buffer.length();
while (i > 0) {
final int c = buffer.codePointBefore(i);
if (!isSpaceOrPunctuation(c)) break;
i -= Character.charCount(c);
}
final var c = new DefaultCitation(notice);
if (year != 0) {
final Date date = new Date(LocalDate.of(year, 1, 1).toEpochDay() * MILLISECONDS_PER_DAY);
c.setDates(Collections.singleton(new DefaultCitationDate(date, DateType.IN_FORCE)));
}
if (i != 0) {
buffer.setLength(i);
// Same limitation as MetadataBuilder.party().
final var party = new AbstractParty(buffer, null);
final var r = new DefaultResponsibility(Role.OWNER, null, party);
c.setCitedResponsibleParties(Collections.singleton(r));
}
constraints.getReferences().add(c);
}
}