blob: 2dcde967ce011ed2ce0342ac5c89606c2a8066f2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
PACKAGE uima.ruta.example;
IMPORT PACKAGE de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos FROM GeneratedDKProCoreTypes AS pos;
IMPORT de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma FROM GeneratedDKProCoreTypes;
SCRIPT uima.ruta.example.DirectSpeech;
WORDLIST AnimalList = 'Animals.txt';
WORDLIST FirstNameList = 'FirstNames.txt';
WORDLIST GeneralTitleList = 'GeneralTitles.txt';
WORDLIST PhraseList = 'IMPhrases.txt';
WORDLIST JobList = 'Jobs.txt';
WORDLIST MilitaryTitleList = 'MilitaryTitles.txt';
WORDLIST NobleTitleList = 'NobleTitles.txt';
WORDLIST ObjectWordList = 'ObjectWords.txt';
WORDLIST RelativesTitleList = 'RelativesTitles.txt';
WORDLIST VerbList = 'Verbs.txt';
STRINGLIST AddNames;
STRINGLIST FirstNames;
STRINGLIST LastNames;
STRINGLIST PersNames;
STRING Match;
DECLARE PersName, AddName, FirstName, LastName, AnAnimal,
RelativesTitle, GeneralTitle, MilitaryTitle, NobleTitle,
Job, BodyPart, IMPhrase;
Document{-> CALL(DirectSpeech)};
Document{-> RETAINTYPE(SPACE)};
Document{-> MARKFAST(AnAnimal, AnimalList)};
Document{-> MARKFAST(FirstName, FirstNameList)};
Document{-> MARKFAST(GeneralTitle, GeneralTitleList)};
Document{-> MARKFAST(IMPhrase, PhraseList)};
Document{-> MARKFAST(Job, JobList)};
Document{-> MARKFAST(MilitaryTitle, MilitaryTitleList)};
Document{-> MARKFAST(NobleTitle, NobleTitleList)};
Document{-> MARKFAST(RelativesTitle, RelativesTitleList)};
Document{-> RETAINTYPE};
//-------------------------------------- VORNAMEN -----------------------------------------------\\
// Titel, nach denen wahrscheinlich ein Vorname kommt
(RelativesTitle|AnAnimal) pos.N{-PARTOF(FirstName), -PARTOF(SPECIAL)
-> MARK(FirstName), MATCHEDTEXT(Match), ADD(FirstNames, Match)};
// Vornamen, die mit "s" enden speichern
FirstName{-> MATCHEDTEXT(Match), ADD(FirstNames, Match + "s")};
// Alle Vornamen in der Liste FirstNames markieren
pos.N{-PARTOF(FirstName), INLIST(FirstNames) -> MARK(FirstName)};
// Vorname + Vorname = Vorname
FirstName{-> SHIFT(FirstName, 1, 2)} FirstName{-> UNMARK(FirstName)};
//-------------------------------------- NACHNAMEN ----------------------------------------------\\
// Vornamen + CW (-> Nachname)
CW{INLIST(FirstNameList)} "von"? CW{-PARTOF(FirstName), -PARTOF(LastName), -PARTOF(pos.PP)
-> MARK(LastName, 2, 3), MATCHEDTEXT(Match), ADD(LastNames, Match)};
// Titel, nach denen wahrscheinlich ein Nachname kommt
(GeneralTitle|MilitaryTitle|NobleTitle|Job) "von"? CW{-PARTOF(GeneralTitle), -PARTOF(Job),
-PARTOF(NobleTitle), -PARTOF(MilitaryTitle), -PARTOF(RelativesTitle), -PARTOF(FirstName),
-PARTOF(LastName) -> MARK(LastName, 2, 3), MATCHEDTEXT(Match), ADD(LastNames, Match)};
// Nachnamen, die mit "s" enden speichern
LastName{-> MATCHEDTEXT(Match), ADD(LastNames, Match + "s")};
// Markieren der Namen in LastName
pos.N{-PARTOF(FirstName), -PARTOF(LastName), INLIST(LastNames) -> MARK(LastName)};
//-------------------------------------- ADDNAMEN -----------------------------------------------\\
// Verschiedene Titel zusammenfassen
(AnAnimal|RelativesTitle|GeneralTitle|MilitaryTitle|NobleTitle|Job)
{-> MARK(AddName), MATCHEDTEXT(Match), ADD(AddNames, Match)};
// Alle Zusatztitel in der Liste AddNames markieren
pos.N{-PARTOF(AddName), INLIST(AddNames, Lemma.value) -> MARK(AddName)};
//-------------------------------------- PERSONEN -----------------------------------------------\\
// AddName + FirstName + LastName = PersName
(AddName* @FirstName LastName*){-PARTOF(PersName) -> MARK(PersName)};
(AddName* FirstName? @LastName){-PARTOF(PersName) -> MARK(PersName)};
// Sprecher nach direkter Rede als Person markieren
DirectSpeech COMMA? pos.V{INLIST(VerbList, Lemma.value)} pos.ART? CW+{-PARTOF(PersName)
-> MARK(PersName), MATCHEDTEXT(Match), ADD(PersNames, Match)};
// Alle Personen in der Liste PersNames markieren
pos.N{-PARTOF(PersName), INLIST(PersNames, Lemma.value) -> MARK(PersName)};
//-------------------------------------- KORREKTUR ----------------------------------------------\\
// Eigennamen, die nicht als Vornamen oder Nachnamen erkannt wurden
Document{-> CLEAR(FirstNames)};
DirectSpeech COMMA? pos.V{INLIST(VerbList, Lemma.value)} CW+{-PARTOF(FirstName), -PARTOF(LastName),
-PARTOF(AddName) -> MARK(FirstName), MATCHEDTEXT(Match), ADD(FirstNames, Match)};
// Eindeutige Phrasen nach denen ein Name kommt
IMPhrase CW{-PARTOF(FirstName) -> MARK(FirstName), MATCHEDTEXT(Match),
ADD(FirstNames, Match, Match + "s")};
// Alle Vornamen in der Liste FirstNames markieren
pos.N{-PARTOF(FirstName), INLIST(FirstNames) -> MARK(FirstName)};
FirstName{-PARTOF(PersName) -> MARK(PersName)};