endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java - sis - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.sis.storage.base;

 import java.time.LocalDate;
 import java.util.Date;
 import java.util.Collections;
 import org.opengis.metadata.citation.Role;
 import org.opengis.metadata.citation.DateType;
 import org.opengis.metadata.constraint.Restriction;
 import org.apache.sis.util.CharSequences;
 import org.apache.sis.metadata.iso.citation.AbstractParty;
 import org.apache.sis.metadata.iso.citation.DefaultCitation;
 import org.apache.sis.metadata.iso.citation.DefaultCitationDate;
 import org.apache.sis.metadata.iso.constraint.DefaultLegalConstraints;
 import static org.apache.sis.util.privy.Constants.MILLISECONDS_PER_DAY;

 // Specific to the geoapi-4.0 branch:
 import org.apache.sis.metadata.iso.citation.DefaultResponsibility;


 /**
  * Elements to omit in the legal notice to be parsed by {@link MetadataBuilder#parseLegalNotice(String)}.
  * Some of those elements are implied by the metadata were the legal notice will be stored.
  *
  * @author  Martin Desruisseaux (Geomatys)
  */
 final class LegalSymbols {
     /**
      * Symbols associated to restrictions.
      */
     private static final LegalSymbols[] VALUES = {
         new LegalSymbols(Restriction.COPYRIGHT, "COPYRIGHT", "(C)", "©", "All rights reserved"),
         new LegalSymbols(Restriction.TRADEMARK, "TRADEMARK", "(TM)", "™", "(R)", "®")
     };

     /**
      * The restriction to use if an item in the {@linkplain #symbols} list is found.
      */
     private final Restriction restriction;

     /**
      * Symbols to use as an indication that the {@linkplain #restriction} applies.
      */
     private final String[] symbols;

     /**
      * Creates a new enumeration value for the given symbol.
      */
     private LegalSymbols(final Restriction restriction, final String... symbols) {
         this.restriction = restriction;
         this.symbols = symbols;
     }

     /**
      * Returns {@code true} if the given character is a space or a punctuation of category "other".
      * The punctuation characters include coma, dot, semi-colon, <i>etc.</i> but do not include
      * parenthesis or connecting punctuation.
      *
      * @param c the Unicode code point of the character to test.
      */
     private static boolean isSpaceOrPunctuation(final int c) {
         switch (Character.getType(c)) {
             case Character.LINE_SEPARATOR:
             case Character.SPACE_SEPARATOR:
             case Character.PARAGRAPH_SEPARATOR:
             case Character.OTHER_PUNCTUATION: return true;
             default: return false;
         }
     }

     /**
      * Implementation of {@link MetadataBuilder#parseLegalNotice(String)}, provided here for reducing
      * the number of class loading in the common case where there is no legal notice to parse.
      */
     static void parse(final String notice, final DefaultLegalConstraints constraints) {
         final int length = notice.length();
         final var buffer = new StringBuilder(length);
         int     year           = 0;         // The copyright year, or 0 if none.
         int     quoteLevel     = 0;         // Incremented on ( [ « characters, decremented on ) ] » characters.
         boolean isCopyright    = false;     // Whether the word parsed by previous iteration was "Copyright" or "(C)".
         boolean wasSeparator   = true;      // Whether the caracter parsed by the previous iteration was a word separator.
         boolean wasPunctuation = true;      // Whether the previous character was a punctuation of Unicode category "other".
         boolean skipNextChars  = true;      // Whether the next spaces and some punction characters should be ignored.
 parse:  for (int i = 0; i < length;) {
             final int c = notice.codePointAt(i);
             final int n = Character.charCount(c);
             int     quoteChange   = 0;
             boolean isSeparator   = false;
             boolean isPunctuation;
             switch (Character.getType(c)) {
                 case Character.INITIAL_QUOTE_PUNCTUATION:
                 case Character.START_PUNCTUATION: {
                     quoteChange   = +1;                     //  ( [ «  etc.
                     skipNextChars = false;
                     isPunctuation = false;
                     break;
                 }
                 case Character.FINAL_QUOTE_PUNCTUATION:
                 case Character.END_PUNCTUATION: {
                     quoteChange   = -1;                     //  ) ] »  etc.
                     skipNextChars = false;
                     isPunctuation = false;
                     break;
                 }
                 default: {                                  // Letter, digit, hyphen, etc.
                     skipNextChars = false;
                     isPunctuation = false;
                     break;
                 }
                 case Character.OTHER_PUNCTUATION: {         //  , . : ; / " etc. but not -.
                     isPunctuation = true;
                     isSeparator   = true;
                     break;
                 }
                 case Character.LINE_SEPARATOR:
                 case Character.SPACE_SEPARATOR:
                 case Character.PARAGRAPH_SEPARATOR: {
                     isPunctuation = wasPunctuation;
                     isSeparator   = true;
                     break;
                 }
             }
             if (wasSeparator && !isSeparator && quoteLevel == 0) {
                 /*
                  * Found the beginning of a new word. Ignore textes like "(C)" or "All rights reserved".
                  * Some of those textes are implied by the metadata where the legal notice will be stored.
                  */
                 for (final LegalSymbols r : VALUES) {
                     for (final String symbol : r.symbols) {
                         if (notice.regionMatches(true, i, symbol, 0, symbol.length())) {
                             final int after = i + symbol.length();
                             if (after >= length || isSpaceOrPunctuation(notice.codePointAt(after))) {
                                 isCopyright |= (r.restriction == Restriction.COPYRIGHT);
                                 constraints.getUseConstraints().add(r.restriction);
                                 wasPunctuation = true;      // Pretend that "Copyright" was followed by a coma.
                                 skipNextChars  = true;      // Ignore spaces and punctuations until the next word.
                                 i = after;                  // Skip the "Copyright" (or other) word.
                                 continue parse;
                             }
                         }
                     }
                 }
                 /*
                  * If a copyright notice is followed by digits, assume that those digits are the copyright year.
                  * We require the year is followed by punctuations or non-breaking space in order to reduce the
                  * risk of confusion with postal addresses. So this block should accept "John, 1992." but not
                  * "1992-1 Nowhere road".
                  */
                 if (isCopyright && wasPunctuation && year == 0 && c >= '0' && c <= '9') {
                     int endOfDigits = i + n;            // After the last digit in sequence.
                     while (endOfDigits < length) {
                         final int d = notice.codePointAt(endOfDigits);
                         if (d < '0' || d > '9') break;
                         endOfDigits++;              // No need to use Character.charCount(s) here.
                     }
                     // Verify if the digits are followed by a punctuation.
                     final int endOfToken = CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length);
                     if (endOfToken > endOfDigits || isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try {
                         year = Integer.parseInt(notice.substring(i, endOfDigits));
                         if (year >= 1800 && year <= 9999) {                     // Those limits are arbitrary.
                             skipNextChars = true;
                             i = endOfToken;
                             continue;
                         }
                         year = 0;                                               // Reject as not a copyright year.
                     } catch (NumberFormatException e) {
                         // Not an integer - ignore, will be handled as text.
                     }
                 }
             }
             /*
              * End of the block that was executed at the beginning of each new word.
              * Following is executed for every characters, except if the above block
              * skipped a portion of the input string.
              */
             wasPunctuation = isPunctuation;
             wasSeparator   = isSeparator;
             quoteLevel    += quoteChange;
             if (!skipNextChars && !Character.isIdentifierIgnorable(c)) {
                 buffer.appendCodePoint(c);
             }
             i += n;
         }
         /*
          * End of parsing. Omit trailing spaces and some punctuations if any, then store the result.
          */
         int i = buffer.length();
         while (i > 0) {
             final int c = buffer.codePointBefore(i);
             if (!isSpaceOrPunctuation(c)) break;
             i -= Character.charCount(c);
         }
         final var c = new DefaultCitation(notice);
         if (year != 0) {
             final Date date = new Date(LocalDate.of(year, 1, 1).toEpochDay() * MILLISECONDS_PER_DAY);
             c.setDates(Collections.singleton(new DefaultCitationDate(date, DateType.IN_FORCE)));
         }
         if (i != 0) {
             buffer.setLength(i);
             // Same limitation as MetadataBuilder.party().
             final var party = new AbstractParty(buffer, null);
             final var r = new DefaultResponsibility(Role.OWNER, null, party);
             c.setCitedResponsibleParties(Collections.singleton(r));
         }
         constraints.getReferences().add(c);
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.sis.storage.base;

	import java.time.LocalDate;
	import java.util.Date;
	import java.util.Collections;
	import org.opengis.metadata.citation.Role;
	import org.opengis.metadata.citation.DateType;
	import org.opengis.metadata.constraint.Restriction;
	import org.apache.sis.util.CharSequences;
	import org.apache.sis.metadata.iso.citation.AbstractParty;
	import org.apache.sis.metadata.iso.citation.DefaultCitation;
	import org.apache.sis.metadata.iso.citation.DefaultCitationDate;
	import org.apache.sis.metadata.iso.constraint.DefaultLegalConstraints;
	import static org.apache.sis.util.privy.Constants.MILLISECONDS_PER_DAY;

	// Specific to the geoapi-4.0 branch:
	import org.apache.sis.metadata.iso.citation.DefaultResponsibility;


	/**
	* Elements to omit in the legal notice to be parsed by {@link MetadataBuilder#parseLegalNotice(String)}.
	* Some of those elements are implied by the metadata were the legal notice will be stored.
	*
	* @author Martin Desruisseaux (Geomatys)
	*/
	final class LegalSymbols {
	/**
	* Symbols associated to restrictions.
	*/
	private static final LegalSymbols[] VALUES = {
	new LegalSymbols(Restriction.COPYRIGHT, "COPYRIGHT", "(C)", "©", "All rights reserved"),
	new LegalSymbols(Restriction.TRADEMARK, "TRADEMARK", "(TM)", "™", "(R)", "®")
	};

	/**
	* The restriction to use if an item in the {@linkplain #symbols} list is found.
	*/
	private final Restriction restriction;

	/**
	* Symbols to use as an indication that the {@linkplain #restriction} applies.
	*/
	private final String[] symbols;

	/**
	* Creates a new enumeration value for the given symbol.
	*/
	private LegalSymbols(final Restriction restriction, final String... symbols) {
	this.restriction = restriction;
	this.symbols = symbols;
	}

	/**
	* Returns {@code true} if the given character is a space or a punctuation of category "other".
	* The punctuation characters include coma, dot, semi-colon, <i>etc.</i> but do not include
	* parenthesis or connecting punctuation.
	*
	* @param c the Unicode code point of the character to test.
	*/
	private static boolean isSpaceOrPunctuation(final int c) {
	switch (Character.getType(c)) {
	case Character.LINE_SEPARATOR:
	case Character.SPACE_SEPARATOR:
	case Character.PARAGRAPH_SEPARATOR:
	case Character.OTHER_PUNCTUATION: return true;
	default: return false;
	}
	}

	/**
	* Implementation of {@link MetadataBuilder#parseLegalNotice(String)}, provided here for reducing
	* the number of class loading in the common case where there is no legal notice to parse.
	*/
	static void parse(final String notice, final DefaultLegalConstraints constraints) {
	final int length = notice.length();
	final var buffer = new StringBuilder(length);
	int year = 0; // The copyright year, or 0 if none.
	int quoteLevel = 0; // Incremented on ( [ « characters, decremented on ) ] » characters.
	boolean isCopyright = false; // Whether the word parsed by previous iteration was "Copyright" or "(C)".
	boolean wasSeparator = true; // Whether the caracter parsed by the previous iteration was a word separator.
	boolean wasPunctuation = true; // Whether the previous character was a punctuation of Unicode category "other".
	boolean skipNextChars = true; // Whether the next spaces and some punction characters should be ignored.
	parse: for (int i = 0; i < length;) {
	final int c = notice.codePointAt(i);
	final int n = Character.charCount(c);
	int quoteChange = 0;
	boolean isSeparator = false;
	boolean isPunctuation;
	switch (Character.getType(c)) {
	case Character.INITIAL_QUOTE_PUNCTUATION:
	case Character.START_PUNCTUATION: {
	quoteChange = +1; // ( [ « etc.
	skipNextChars = false;
	isPunctuation = false;
	break;
	}
	case Character.FINAL_QUOTE_PUNCTUATION:
	case Character.END_PUNCTUATION: {
	quoteChange = -1; // ) ] » etc.
	skipNextChars = false;
	isPunctuation = false;
	break;
	}
	default: { // Letter, digit, hyphen, etc.
	skipNextChars = false;
	isPunctuation = false;
	break;
	}
	case Character.OTHER_PUNCTUATION: { // , . : ; / " etc. but not -.
	isPunctuation = true;
	isSeparator = true;
	break;
	}
	case Character.LINE_SEPARATOR:
	case Character.SPACE_SEPARATOR:
	case Character.PARAGRAPH_SEPARATOR: {
	isPunctuation = wasPunctuation;
	isSeparator = true;
	break;
	}
	}
	if (wasSeparator && !isSeparator && quoteLevel == 0) {
	/*
	* Found the beginning of a new word. Ignore textes like "(C)" or "All rights reserved".
	* Some of those textes are implied by the metadata where the legal notice will be stored.
	*/
	for (final LegalSymbols r : VALUES) {
	for (final String symbol : r.symbols) {
	if (notice.regionMatches(true, i, symbol, 0, symbol.length())) {
	final int after = i + symbol.length();
	if (after >= length \|\| isSpaceOrPunctuation(notice.codePointAt(after))) {
	isCopyright \|= (r.restriction == Restriction.COPYRIGHT);
	constraints.getUseConstraints().add(r.restriction);
	wasPunctuation = true; // Pretend that "Copyright" was followed by a coma.
	skipNextChars = true; // Ignore spaces and punctuations until the next word.
	i = after; // Skip the "Copyright" (or other) word.
	continue parse;
	}
	}
	}
	}
	/*
	* If a copyright notice is followed by digits, assume that those digits are the copyright year.
	* We require the year is followed by punctuations or non-breaking space in order to reduce the
	* risk of confusion with postal addresses. So this block should accept "John, 1992." but not
	* "1992-1 Nowhere road".
	*/
	if (isCopyright && wasPunctuation && year == 0 && c >= '0' && c <= '9') {
	int endOfDigits = i + n; // After the last digit in sequence.
	while (endOfDigits < length) {
	final int d = notice.codePointAt(endOfDigits);
	if (d < '0' \|\| d > '9') break;
	endOfDigits++; // No need to use Character.charCount(s) here.
	}
	// Verify if the digits are followed by a punctuation.
	final int endOfToken = CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length);
	if (endOfToken > endOfDigits \|\| isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try {
	year = Integer.parseInt(notice.substring(i, endOfDigits));
	if (year >= 1800 && year <= 9999) { // Those limits are arbitrary.
	skipNextChars = true;
	i = endOfToken;
	continue;
	}
	year = 0; // Reject as not a copyright year.
	} catch (NumberFormatException e) {
	// Not an integer - ignore, will be handled as text.
	}
	}
	}
	/*
	* End of the block that was executed at the beginning of each new word.
	* Following is executed for every characters, except if the above block
	* skipped a portion of the input string.
	*/
	wasPunctuation = isPunctuation;
	wasSeparator = isSeparator;
	quoteLevel += quoteChange;
	if (!skipNextChars && !Character.isIdentifierIgnorable(c)) {
	buffer.appendCodePoint(c);
	}
	i += n;
	}
	/*
	* End of parsing. Omit trailing spaces and some punctuations if any, then store the result.
	*/
	int i = buffer.length();
	while (i > 0) {
	final int c = buffer.codePointBefore(i);
	if (!isSpaceOrPunctuation(c)) break;
	i -= Character.charCount(c);
	}
	final var c = new DefaultCitation(notice);
	if (year != 0) {
	final Date date = new Date(LocalDate.of(year, 1, 1).toEpochDay() * MILLISECONDS_PER_DAY);
	c.setDates(Collections.singleton(new DefaultCitationDate(date, DateType.IN_FORCE)));
	}
	if (i != 0) {
	buffer.setLength(i);
	// Same limitation as MetadataBuilder.party().
	final var party = new AbstractParty(buffer, null);
	final var r = new DefaultResponsibility(Role.OWNER, null, party);
	c.setCitedResponsibleParties(Collections.singleton(r));
	}
	constraints.getReferences().add(c);
	}
	}