blob: 48c38d9695f88db98fa446b9e00b09013ff8526a [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
PACKAGE uima.ruta.example;
// learnt using TextRulerExample\data\single
SCRIPT uima.ruta.example.Features;
Document{-> CALL(Features)};
DECLARE AuthorSTART, AuthorEND, DateSTART, DateEND, PagesSTART, PagesEND, PublisherSTART, PublisherEND, InstitutionSTART, InstitutionEND, VolumeSTART, VolumeEND, EditorSTART, EditorEND, TitleSTART, TitleEND, BooktitleSTART, BooktitleEND, NoteSTART, NoteEND, JournalSTART, JournalEND, LocationSTART, LocationEND, TechSTART, TechEND;
BOOLEAN redoContextualRules;
// Slot: Author
// LEFT BOUNDARY RULES:
FirstToken{->MARKONCE(AuthorSTART)}; // p=20; n=0
// RIGHT BOUNDARY RULES:
PERIOD{->MARKONCE(AuthorEND)} LParen; // p=20; n=0
BLOCK(contextualRules_Author) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
COLON ANY{-IS(AuthorSTART), NEAR(AuthorEND, 0,21,true,true)->MARKONCE(AuthorSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
ANY{-IS(AuthorEND), NEAR(AuthorSTART, 0,21,false,true)->MARKONCE(AuthorEND), ASSIGN(redoContextualRules, true)} LParen; // p=20; n=1
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Author)};
}
// Slot: Date
// LEFT BOUNDARY RULES:
LParen{->MARKONCE(DateSTART)} ANY{PARTOF(YearInd)}; // p=20; n=0
// RIGHT BOUNDARY RULES:
RParen PERIOD{->MARKONCE(DateEND)}; // p=20; n=0
BLOCK(contextualRules_Date) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
LParen{-IS(DateSTART), NEAR(DateEND, 0,5,true,true)->MARKONCE(DateSTART), ASSIGN(redoContextualRules, true)}; // p=20; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
PERIOD{-IS(DateEND), NEAR(DateSTART, 0,5,false,true)->MARKONCE(DateEND), ASSIGN(redoContextualRules, true)}; // p=20; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Date)};
}
// Slot: Pages
// LEFT BOUNDARY RULES:
COMMA ANY{->MARKONCE(PagesSTART)} SPECIAL{REGEXP("-")}; // p=15; n=0
// RIGHT BOUNDARY RULES:
NUM LastToken{->MARKONCE(PagesEND)}; // p=15; n=0
BLOCK(contextualRules_Pages) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
ANY{-IS(PagesSTART), NEAR(PagesEND, 0,4,true,true)->MARKONCE(PagesSTART), ASSIGN(redoContextualRules, true)} SPECIAL; // p=15; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
LastToken{-IS(PagesEND), NEAR(PagesSTART, 0,4,false,true)->MARKONCE(PagesEND), ASSIGN(redoContextualRules, true)}; // p=15; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Pages)};
}
// Slot: Publisher
// LEFT BOUNDARY RULES:
CW{REGEXP("Wiley")->MARKONCE(PublisherSTART)}; // p=1; n=0
CW{REGEXP("University")->MARKONCE(PublisherSTART)}; // p=1; n=0
CW{REGEXP("Springer")->MARKONCE(PublisherSTART)}; // p=1; n=0
// RIGHT BOUNDARY RULES:
ANY{->MARKONCE(PublisherEND)} CW{REGEXP("New")}; // p=2; n=0
ANY{->MARKONCE(PublisherEND)} CW{REGEXP("Lawrence")}; // p=1; n=0
BLOCK(contextualRules_Publisher) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
CW PERIOD ANY{-IS(PublisherSTART), NEAR(PublisherEND, 0,5,true,true)->MARKONCE(PublisherSTART), ASSIGN(redoContextualRules, true)}; // p=3; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
COMMA{-IS(PublisherEND), NEAR(PublisherSTART, 0,5,false,true)->MARKONCE(PublisherEND), ASSIGN(redoContextualRules, true)}; // p=3; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Publisher)};
}
// Slot: Institution
// LEFT BOUNDARY RULES:
CW{REGEXP("Department")->MARKONCE(InstitutionSTART)}; // p=1; n=0
// RIGHT BOUNDARY RULES:
ANY{->MARKONCE(InstitutionEND)} CW{REGEXP("Golomb")}; // p=1; n=0
BLOCK(contextualRules_Institution) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
CW{-IS(InstitutionSTART), NEAR(InstitutionEND, 0,9,true,true)->MARKONCE(InstitutionSTART), ASSIGN(redoContextualRules, true)} EnglishStopWord; // p=1; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
LastToken{-IS(InstitutionEND), NEAR(InstitutionSTART, 0,9,false,true)->MARKONCE(InstitutionEND), ASSIGN(redoContextualRules, true)}; // p=1; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Institution)};
}
// Slot: Volume
// LEFT BOUNDARY RULES:
CW ANY NUM{->MARKONCE(VolumeSTART)}; // p=11; n=0
NUM{->MARKONCE(VolumeSTART)} LParen; // p=6; n=0
CAP COMMA ANY{->MARKONCE(VolumeSTART)}; // p=3; n=0
// RIGHT BOUNDARY RULES:
COMMA{->MARKONCE(VolumeEND)} ANY SPECIAL{REGEXP("-")}; // p=14; n=1
BLOCK(contextualRules_Volume) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
CW COMMA ANY{-IS(VolumeSTART), NEAR(VolumeEND, 0,9,true,true)->MARKONCE(VolumeSTART), ASSIGN(redoContextualRules, true)}; // p=11; n=1
ANY{-IS(VolumeSTART), NEAR(VolumeEND, 0,9,true,true)->MARKONCE(VolumeSTART), ASSIGN(redoContextualRules, true)} LParen; // p=6; n=0
CAP ANY NUM{-IS(VolumeSTART), NEAR(VolumeEND, 0,9,true,true)->MARKONCE(VolumeSTART), ASSIGN(redoContextualRules, true)}; // p=3; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
ANY{-IS(VolumeEND), NEAR(VolumeSTART, 0,9,false,true)->MARKONCE(VolumeEND), ASSIGN(redoContextualRules, true)} ANY SPECIAL{REGEXP("-")}; // p=14; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Volume)};
}
// Slot: Editor
// LEFT BOUNDARY RULES:
// RIGHT BOUNDARY RULES:
BLOCK(contextualRules_Editor) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
// RIGHT BOUNDARY CONTEXTUAL RULES:
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Editor)};
}
// Slot: Title
// LEFT BOUNDARY RULES:
RParen ANY CW{->MARKONCE(TitleSTART)}; // p=20; n=0
// RIGHT BOUNDARY RULES:
SW PERIOD{->MARKONCE(TitleEND)} CW; // p=16; n=0
ANY{->MARKONCE(TitleEND)} CW{REGEXP("Wiley")}; // p=1; n=0
PERIOD{->MARKONCE(TitleEND)} ANY SW{REGEXP("of")}; // p=7; n=0
ANY{->MARKONCE(TitleEND)} CW{REGEXP("Springer")}; // p=1; n=0
BLOCK(contextualRules_Title) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
RParen ANY ANY{-IS(TitleSTART), NEAR(TitleEND, 0,16,true,true)->MARKONCE(TitleSTART), ASSIGN(redoContextualRules, true)}; // p=20; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
PERIOD{-IS(TitleEND), NEAR(TitleSTART, 0,16,false,true)->MARKONCE(TitleEND), ASSIGN(redoContextualRules, true)} ANY EnglishStopWord; // p=8; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Title)};
}
// Slot: Booktitle
// LEFT BOUNDARY RULES:
CW{REGEXP("Proceedings")->MARKONCE(BooktitleSTART)}; // p=1; n=0
// RIGHT BOUNDARY RULES:
ANY{->MARKONCE(BooktitleEND)} NUM{REGEXP("1273")}; // p=1; n=0
BLOCK(contextualRules_Booktitle) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
CW{-IS(BooktitleSTART), NEAR(BooktitleEND, 0,6,true,true)->MARKONCE(BooktitleSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
COMMA{-IS(BooktitleEND), NEAR(BooktitleSTART, 0,6,false,true)->MARKONCE(BooktitleEND), ASSIGN(redoContextualRules, true)}; // p=1; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Booktitle)};
}
// Slot: Note
// LEFT BOUNDARY RULES:
// RIGHT BOUNDARY RULES:
BLOCK(contextualRules_Note) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
// RIGHT BOUNDARY CONTEXTUAL RULES:
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Note)};
}
// Slot: Journal
// LEFT BOUNDARY RULES:
SW PERIOD CW{->MARKONCE(JournalSTART)} CW; // p=6; n=0
SW PERIOD ANY{->MARKONCE(JournalSTART)} EnglishStopWord; // p=6; n=0
PeriodSep ANY{PARTOF(FirstNameInitial)->MARKONCE(JournalSTART)}; // p=1; n=0
CW{REGEXP("Econometrica")->MARKONCE(JournalSTART)}; // p=1; n=0
// RIGHT BOUNDARY RULES:
CW ANY{->MARKONCE(JournalEND)} NUM; // p=11; n=0
ANY{->MARKONCE(JournalEND)} NUM LParen; // p=6; n=0
CAP COMMA{->MARKONCE(JournalEND)}; // p=3; n=0
BLOCK(contextualRules_Journal) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
SW PERIOD ANY{-IS(JournalSTART), NEAR(JournalEND, 0,9,true,true)->MARKONCE(JournalSTART), ASSIGN(redoContextualRules, true)}; // p=14; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
CW COMMA{-IS(JournalEND), NEAR(JournalSTART, 0,9,false,true)->MARKONCE(JournalEND), ASSIGN(redoContextualRules, true)}; // p=11; n=1
ANY{-IS(JournalEND), NEAR(JournalSTART, 0,9,false,true)->MARKONCE(JournalEND), ASSIGN(redoContextualRules, true)} ANY LParen; // p=6; n=0
CAP ANY{-IS(JournalEND), NEAR(JournalSTART, 0,9,false,true)->MARKONCE(JournalEND), ASSIGN(redoContextualRules, true)}; // p=3; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Journal)};
}
// Slot: Location
// LEFT BOUNDARY RULES:
CW{REGEXP("New")->MARKONCE(LocationSTART)}; // p=2; n=0
CW{REGEXP("Lawrence")->MARKONCE(LocationSTART)}; // p=1; n=0
// RIGHT BOUNDARY RULES:
CW{REGEXP("York")} ANY{->MARKONCE(LocationEND)}; // p=2; n=0
ANY{->MARKONCE(LocationEND)} CW{REGEXP("Dechter")}; // p=1; n=0
BLOCK(contextualRules_Location) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
CW{-IS(LocationSTART), NEAR(LocationEND, 0,4,true,true)->MARKONCE(LocationSTART), ASSIGN(redoContextualRules, true)} CW; // p=2; n=0
COMMA FirstName{-IS(LocationSTART), NEAR(LocationEND, 0,4,true,true)->MARKONCE(LocationSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
LastToken{-IS(LocationEND), NEAR(LocationSTART, 0,4,false,true)->MARKONCE(LocationEND), ASSIGN(redoContextualRules, true)}; // p=3; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Location)};
}
// Slot: Tech
// LEFT BOUNDARY RULES:
CW{REGEXP("Tech")->MARKONCE(TechSTART)}; // p=1; n=0
// RIGHT BOUNDARY RULES:
ANY{->MARKONCE(TechEND)} CW{REGEXP("Department")}; // p=1; n=0
BLOCK(contextualRules_Tech) Document{} {
Document{->ASSIGN(redoContextualRules, false)}; // reset flag
// LEFT BOUNDARY CONTEXTUAL RULES:
CW{-IS(TechSTART), NEAR(TechEND, 0,5,true,true)->MARKONCE(TechSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0
// RIGHT BOUNDARY CONTEXTUAL RULES:
COMMA{-IS(TechEND), NEAR(TechSTART, 0,5,false,true)->MARKONCE(TechEND), ASSIGN(redoContextualRules, true)}; // p=1; n=0
//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Tech)};
}
//slot-building rules:
AuthorSTART{IS(AuthorEND)->UNMARK(AuthorSTART), UNMARK(AuthorEND), MARKONCE(Author)};
AuthorSTART{->UNMARK(AuthorSTART)} ANY[0, 61]? AuthorEND{->UNMARK(AuthorEND), MARKONCE(Author, 1, 3)};
//cleaning up:
AuthorSTART{->UNMARK(AuthorSTART)};
AuthorEND{->UNMARK(AuthorEND)};
//slot-building rules:
DateSTART{IS(DateEND)->UNMARK(DateSTART), UNMARK(DateEND), MARKONCE(Date)};
DateSTART{->UNMARK(DateSTART)} ANY[0, 13]? DateEND{->UNMARK(DateEND), MARKONCE(Date, 1, 3)};
//cleaning up:
DateSTART{->UNMARK(DateSTART)};
DateEND{->UNMARK(DateEND)};
//slot-building rules:
PagesSTART{IS(PagesEND)->UNMARK(PagesSTART), UNMARK(PagesEND), MARKONCE(Pages)};
PagesSTART{->UNMARK(PagesSTART)} ANY[0, 10]? PagesEND{->UNMARK(PagesEND), MARKONCE(Pages, 1, 3)};
//cleaning up:
PagesSTART{->UNMARK(PagesSTART)};
PagesEND{->UNMARK(PagesEND)};
//slot-building rules:
PublisherSTART{IS(PublisherEND)->UNMARK(PublisherSTART), UNMARK(PublisherEND), MARKONCE(Publisher)};
PublisherSTART{->UNMARK(PublisherSTART)} ANY[0, 13]? PublisherEND{->UNMARK(PublisherEND), MARKONCE(Publisher, 1, 3)};
//cleaning up:
PublisherSTART{->UNMARK(PublisherSTART)};
PublisherEND{->UNMARK(PublisherEND)};
//slot-building rules:
InstitutionSTART{IS(InstitutionEND)->UNMARK(InstitutionSTART), UNMARK(InstitutionEND), MARKONCE(Institution)};
InstitutionSTART{->UNMARK(InstitutionSTART)} ANY[0, 25]? InstitutionEND{->UNMARK(InstitutionEND), MARKONCE(Institution, 1, 3)};
//cleaning up:
InstitutionSTART{->UNMARK(InstitutionSTART)};
InstitutionEND{->UNMARK(InstitutionEND)};
//slot-building rules:
VolumeSTART{IS(VolumeEND)->UNMARK(VolumeSTART), UNMARK(VolumeEND), MARKONCE(Volume)};
VolumeSTART{->UNMARK(VolumeSTART)} ANY[0, 25]? VolumeEND{->UNMARK(VolumeEND), MARKONCE(Volume, 1, 3)};
//cleaning up:
VolumeSTART{->UNMARK(VolumeSTART)};
VolumeEND{->UNMARK(VolumeEND)};
//slot-building rules:
EditorSTART{IS(EditorEND)->UNMARK(EditorSTART), UNMARK(EditorEND), MARKONCE(Editor)};
EditorSTART{->UNMARK(EditorSTART)} EditorEND{->UNMARK(EditorEND), MARKONCE(Editor, 1, 2)};
//cleaning up:
EditorSTART{->UNMARK(EditorSTART)};
EditorEND{->UNMARK(EditorEND)};
//slot-building rules:
TitleSTART{IS(TitleEND)->UNMARK(TitleSTART), UNMARK(TitleEND), MARKONCE(Title)};
TitleSTART{->UNMARK(TitleSTART)} ANY[0, 46]? TitleEND{->UNMARK(TitleEND), MARKONCE(Title, 1, 3)};
//cleaning up:
TitleSTART{->UNMARK(TitleSTART)};
TitleEND{->UNMARK(TitleEND)};
//slot-building rules:
BooktitleSTART{IS(BooktitleEND)->UNMARK(BooktitleSTART), UNMARK(BooktitleEND), MARKONCE(Booktitle)};
BooktitleSTART{->UNMARK(BooktitleSTART)} ANY[0, 16]? BooktitleEND{->UNMARK(BooktitleEND), MARKONCE(Booktitle, 1, 3)};
//cleaning up:
BooktitleSTART{->UNMARK(BooktitleSTART)};
BooktitleEND{->UNMARK(BooktitleEND)};
//slot-building rules:
NoteSTART{IS(NoteEND)->UNMARK(NoteSTART), UNMARK(NoteEND), MARKONCE(Note)};
NoteSTART{->UNMARK(NoteSTART)} NoteEND{->UNMARK(NoteEND), MARKONCE(Note, 1, 2)};
//cleaning up:
NoteSTART{->UNMARK(NoteSTART)};
NoteEND{->UNMARK(NoteEND)};
//slot-building rules:
JournalSTART{IS(JournalEND)->UNMARK(JournalSTART), UNMARK(JournalEND), MARKONCE(Journal)};
JournalSTART{->UNMARK(JournalSTART)} ANY[0, 25]? JournalEND{->UNMARK(JournalEND), MARKONCE(Journal, 1, 3)};
//cleaning up:
JournalSTART{->UNMARK(JournalSTART)};
JournalEND{->UNMARK(JournalEND)};
//slot-building rules:
LocationSTART{IS(LocationEND)->UNMARK(LocationSTART), UNMARK(LocationEND), MARKONCE(Location)};
LocationSTART{->UNMARK(LocationSTART)} ANY[0, 10]? LocationEND{->UNMARK(LocationEND), MARKONCE(Location, 1, 3)};
//cleaning up:
LocationSTART{->UNMARK(LocationSTART)};
LocationEND{->UNMARK(LocationEND)};
//slot-building rules:
TechSTART{IS(TechEND)->UNMARK(TechSTART), UNMARK(TechEND), MARKONCE(Tech)};
TechSTART{->UNMARK(TechSTART)} ANY[0, 13]? TechEND{->UNMARK(TechEND), MARKONCE(Tech, 1, 3)};
//cleaning up:
TechSTART{->UNMARK(TechSTART)};
TechEND{->UNMARK(TechEND)};