blob: 3c9d7f516c51f71ff4b899140fce241ec45cd1a4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
PACKAGE uima.ruta.example;
// import the types of this type system:
TYPESYSTEM types.BibtexTypeSystem;
// define and import an external dictionary containing first names
WORDLIST FirstNameList = 'FirstNames.txt';
// define some useful annotations
DECLARE FirstName, FirstNameInitial, Name, NameListPart;
//find the first names
Document{-> MARKFAST(FirstName, FirstNameList)};
// something that may links names
DECLARE NameLinker;
W{REGEXP("and", false) -> MARK(NameLinker)};
COMMA{ -> MARK(NameLinker)};
SPECIAL{REGEXP("&") -> MARK(NameLinker)};
// first name initials
CW{REGEXP(".") -> MARK(FirstNameInitial,1,2)} PERIOD;
// maybe a name
FirstName+ FirstNameInitial* CW{-> MARK(Name, 1, 2, 3)};
FirstNameInitial+{-PARTOF(Name)} CW{-> MARK(Name, 1, 2, 3)};
CW{-PARTOF(Name), -REGEXP(".")} COMMA? FirstNameInitial+{-> MARK(Name, 1, 2, 3)};
// list of names
Name{-PARTOF(NameListPart)} NameLinker[1,2]{-> MARK(NameListPart,1,2)};
NameListPart+ Name{-PARTOF(Author),-PARTOF(NameListPart) -> MARK(Author,1,2,3)} ;
NameListPart+{-PARTOF(Author) -> MARK(Author)};
// expand the author to the following punctation mark
Author{-> SHIFT(Author,1,2)} PM;