| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| PACKAGE uima.ruta.example; |
| |
| |
| // learnt using TextRulerExample\data\single |
| |
| SCRIPT uima.ruta.example.Features; |
| Document{-> CALL(Features)}; |
| DECLARE AuthorSTART, AuthorEND, DateSTART, DateEND, PagesSTART, PagesEND, PublisherSTART, PublisherEND, InstitutionSTART, InstitutionEND, VolumeSTART, VolumeEND, EditorSTART, EditorEND, TitleSTART, TitleEND, BooktitleSTART, BooktitleEND, NoteSTART, NoteEND, JournalSTART, JournalEND, LocationSTART, LocationEND, TechSTART, TechEND; |
| BOOLEAN redoContextualRules; |
| |
| |
| // Slot: Author |
| // LEFT BOUNDARY RULES: |
| FirstToken{->MARKONCE(AuthorSTART)}; // p=20; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| PERIOD{->MARKONCE(AuthorEND)} LParen; // p=20; n=0 |
| |
| BLOCK(contextualRules_Author) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| COLON ANY{-IS(AuthorSTART), NEAR(AuthorEND, 0,21,true,true)->MARKONCE(AuthorSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| ANY{-IS(AuthorEND), NEAR(AuthorSTART, 0,21,false,true)->MARKONCE(AuthorEND), ASSIGN(redoContextualRules, true)} LParen; // p=20; n=1 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Author)}; |
| } |
| |
| // Slot: Date |
| // LEFT BOUNDARY RULES: |
| LParen{->MARKONCE(DateSTART)} ANY{PARTOF(YearInd)}; // p=20; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| RParen PERIOD{->MARKONCE(DateEND)}; // p=20; n=0 |
| |
| BLOCK(contextualRules_Date) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| LParen{-IS(DateSTART), NEAR(DateEND, 0,5,true,true)->MARKONCE(DateSTART), ASSIGN(redoContextualRules, true)}; // p=20; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| PERIOD{-IS(DateEND), NEAR(DateSTART, 0,5,false,true)->MARKONCE(DateEND), ASSIGN(redoContextualRules, true)}; // p=20; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Date)}; |
| } |
| |
| // Slot: Pages |
| // LEFT BOUNDARY RULES: |
| COMMA ANY{->MARKONCE(PagesSTART)} SPECIAL{REGEXP("-")}; // p=15; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| NUM LastToken{->MARKONCE(PagesEND)}; // p=15; n=0 |
| |
| BLOCK(contextualRules_Pages) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| ANY{-IS(PagesSTART), NEAR(PagesEND, 0,4,true,true)->MARKONCE(PagesSTART), ASSIGN(redoContextualRules, true)} SPECIAL; // p=15; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| LastToken{-IS(PagesEND), NEAR(PagesSTART, 0,4,false,true)->MARKONCE(PagesEND), ASSIGN(redoContextualRules, true)}; // p=15; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Pages)}; |
| } |
| |
| // Slot: Publisher |
| // LEFT BOUNDARY RULES: |
| CW{REGEXP("Wiley")->MARKONCE(PublisherSTART)}; // p=1; n=0 |
| CW{REGEXP("University")->MARKONCE(PublisherSTART)}; // p=1; n=0 |
| CW{REGEXP("Springer")->MARKONCE(PublisherSTART)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| ANY{->MARKONCE(PublisherEND)} CW{REGEXP("New")}; // p=2; n=0 |
| ANY{->MARKONCE(PublisherEND)} CW{REGEXP("Lawrence")}; // p=1; n=0 |
| |
| BLOCK(contextualRules_Publisher) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| CW PERIOD ANY{-IS(PublisherSTART), NEAR(PublisherEND, 0,5,true,true)->MARKONCE(PublisherSTART), ASSIGN(redoContextualRules, true)}; // p=3; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| COMMA{-IS(PublisherEND), NEAR(PublisherSTART, 0,5,false,true)->MARKONCE(PublisherEND), ASSIGN(redoContextualRules, true)}; // p=3; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Publisher)}; |
| } |
| |
| // Slot: Institution |
| // LEFT BOUNDARY RULES: |
| CW{REGEXP("Department")->MARKONCE(InstitutionSTART)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| ANY{->MARKONCE(InstitutionEND)} CW{REGEXP("Golomb")}; // p=1; n=0 |
| |
| BLOCK(contextualRules_Institution) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| CW{-IS(InstitutionSTART), NEAR(InstitutionEND, 0,9,true,true)->MARKONCE(InstitutionSTART), ASSIGN(redoContextualRules, true)} EnglishStopWord; // p=1; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| LastToken{-IS(InstitutionEND), NEAR(InstitutionSTART, 0,9,false,true)->MARKONCE(InstitutionEND), ASSIGN(redoContextualRules, true)}; // p=1; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Institution)}; |
| } |
| |
| // Slot: Volume |
| // LEFT BOUNDARY RULES: |
| CW ANY NUM{->MARKONCE(VolumeSTART)}; // p=11; n=0 |
| NUM{->MARKONCE(VolumeSTART)} LParen; // p=6; n=0 |
| CAP COMMA ANY{->MARKONCE(VolumeSTART)}; // p=3; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| COMMA{->MARKONCE(VolumeEND)} ANY SPECIAL{REGEXP("-")}; // p=14; n=1 |
| |
| BLOCK(contextualRules_Volume) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| CW COMMA ANY{-IS(VolumeSTART), NEAR(VolumeEND, 0,9,true,true)->MARKONCE(VolumeSTART), ASSIGN(redoContextualRules, true)}; // p=11; n=1 |
| ANY{-IS(VolumeSTART), NEAR(VolumeEND, 0,9,true,true)->MARKONCE(VolumeSTART), ASSIGN(redoContextualRules, true)} LParen; // p=6; n=0 |
| CAP ANY NUM{-IS(VolumeSTART), NEAR(VolumeEND, 0,9,true,true)->MARKONCE(VolumeSTART), ASSIGN(redoContextualRules, true)}; // p=3; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| ANY{-IS(VolumeEND), NEAR(VolumeSTART, 0,9,false,true)->MARKONCE(VolumeEND), ASSIGN(redoContextualRules, true)} ANY SPECIAL{REGEXP("-")}; // p=14; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Volume)}; |
| } |
| |
| // Slot: Editor |
| // LEFT BOUNDARY RULES: |
| |
| // RIGHT BOUNDARY RULES: |
| |
| BLOCK(contextualRules_Editor) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Editor)}; |
| } |
| |
| // Slot: Title |
| // LEFT BOUNDARY RULES: |
| RParen ANY CW{->MARKONCE(TitleSTART)}; // p=20; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| SW PERIOD{->MARKONCE(TitleEND)} CW; // p=16; n=0 |
| ANY{->MARKONCE(TitleEND)} CW{REGEXP("Wiley")}; // p=1; n=0 |
| PERIOD{->MARKONCE(TitleEND)} ANY SW{REGEXP("of")}; // p=7; n=0 |
| ANY{->MARKONCE(TitleEND)} CW{REGEXP("Springer")}; // p=1; n=0 |
| |
| BLOCK(contextualRules_Title) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| RParen ANY ANY{-IS(TitleSTART), NEAR(TitleEND, 0,16,true,true)->MARKONCE(TitleSTART), ASSIGN(redoContextualRules, true)}; // p=20; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| PERIOD{-IS(TitleEND), NEAR(TitleSTART, 0,16,false,true)->MARKONCE(TitleEND), ASSIGN(redoContextualRules, true)} ANY EnglishStopWord; // p=8; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Title)}; |
| } |
| |
| // Slot: Booktitle |
| // LEFT BOUNDARY RULES: |
| CW{REGEXP("Proceedings")->MARKONCE(BooktitleSTART)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| ANY{->MARKONCE(BooktitleEND)} NUM{REGEXP("1273")}; // p=1; n=0 |
| |
| BLOCK(contextualRules_Booktitle) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| CW{-IS(BooktitleSTART), NEAR(BooktitleEND, 0,6,true,true)->MARKONCE(BooktitleSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| COMMA{-IS(BooktitleEND), NEAR(BooktitleSTART, 0,6,false,true)->MARKONCE(BooktitleEND), ASSIGN(redoContextualRules, true)}; // p=1; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Booktitle)}; |
| } |
| |
| // Slot: Note |
| // LEFT BOUNDARY RULES: |
| |
| // RIGHT BOUNDARY RULES: |
| |
| BLOCK(contextualRules_Note) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Note)}; |
| } |
| |
| // Slot: Journal |
| // LEFT BOUNDARY RULES: |
| SW PERIOD CW{->MARKONCE(JournalSTART)} CW; // p=6; n=0 |
| SW PERIOD ANY{->MARKONCE(JournalSTART)} EnglishStopWord; // p=6; n=0 |
| PeriodSep ANY{PARTOF(FirstNameInitial)->MARKONCE(JournalSTART)}; // p=1; n=0 |
| CW{REGEXP("Econometrica")->MARKONCE(JournalSTART)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| CW ANY{->MARKONCE(JournalEND)} NUM; // p=11; n=0 |
| ANY{->MARKONCE(JournalEND)} NUM LParen; // p=6; n=0 |
| CAP COMMA{->MARKONCE(JournalEND)}; // p=3; n=0 |
| |
| BLOCK(contextualRules_Journal) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| SW PERIOD ANY{-IS(JournalSTART), NEAR(JournalEND, 0,9,true,true)->MARKONCE(JournalSTART), ASSIGN(redoContextualRules, true)}; // p=14; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| CW COMMA{-IS(JournalEND), NEAR(JournalSTART, 0,9,false,true)->MARKONCE(JournalEND), ASSIGN(redoContextualRules, true)}; // p=11; n=1 |
| ANY{-IS(JournalEND), NEAR(JournalSTART, 0,9,false,true)->MARKONCE(JournalEND), ASSIGN(redoContextualRules, true)} ANY LParen; // p=6; n=0 |
| CAP ANY{-IS(JournalEND), NEAR(JournalSTART, 0,9,false,true)->MARKONCE(JournalEND), ASSIGN(redoContextualRules, true)}; // p=3; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Journal)}; |
| } |
| |
| // Slot: Location |
| // LEFT BOUNDARY RULES: |
| CW{REGEXP("New")->MARKONCE(LocationSTART)}; // p=2; n=0 |
| CW{REGEXP("Lawrence")->MARKONCE(LocationSTART)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| CW{REGEXP("York")} ANY{->MARKONCE(LocationEND)}; // p=2; n=0 |
| ANY{->MARKONCE(LocationEND)} CW{REGEXP("Dechter")}; // p=1; n=0 |
| |
| BLOCK(contextualRules_Location) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| CW{-IS(LocationSTART), NEAR(LocationEND, 0,4,true,true)->MARKONCE(LocationSTART), ASSIGN(redoContextualRules, true)} CW; // p=2; n=0 |
| COMMA FirstName{-IS(LocationSTART), NEAR(LocationEND, 0,4,true,true)->MARKONCE(LocationSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| LastToken{-IS(LocationEND), NEAR(LocationSTART, 0,4,false,true)->MARKONCE(LocationEND), ASSIGN(redoContextualRules, true)}; // p=3; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Location)}; |
| } |
| |
| // Slot: Tech |
| // LEFT BOUNDARY RULES: |
| CW{REGEXP("Tech")->MARKONCE(TechSTART)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY RULES: |
| ANY{->MARKONCE(TechEND)} CW{REGEXP("Department")}; // p=1; n=0 |
| |
| BLOCK(contextualRules_Tech) Document{} { |
| Document{->ASSIGN(redoContextualRules, false)}; // reset flag |
| |
| // LEFT BOUNDARY CONTEXTUAL RULES: |
| CW{-IS(TechSTART), NEAR(TechEND, 0,5,true,true)->MARKONCE(TechSTART), ASSIGN(redoContextualRules, true)}; // p=1; n=0 |
| |
| // RIGHT BOUNDARY CONTEXTUAL RULES: |
| COMMA{-IS(TechEND), NEAR(TechSTART, 0,5,false,true)->MARKONCE(TechEND), ASSIGN(redoContextualRules, true)}; // p=1; n=0 |
| |
| //Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_Tech)}; |
| } |
| |
| //slot-building rules: |
| AuthorSTART{IS(AuthorEND)->UNMARK(AuthorSTART), UNMARK(AuthorEND), MARKONCE(Author)}; |
| AuthorSTART{->UNMARK(AuthorSTART)} ANY[0, 61]? AuthorEND{->UNMARK(AuthorEND), MARKONCE(Author, 1, 3)}; |
| |
| //cleaning up: |
| AuthorSTART{->UNMARK(AuthorSTART)}; |
| AuthorEND{->UNMARK(AuthorEND)}; |
| |
| //slot-building rules: |
| DateSTART{IS(DateEND)->UNMARK(DateSTART), UNMARK(DateEND), MARKONCE(Date)}; |
| DateSTART{->UNMARK(DateSTART)} ANY[0, 13]? DateEND{->UNMARK(DateEND), MARKONCE(Date, 1, 3)}; |
| |
| //cleaning up: |
| DateSTART{->UNMARK(DateSTART)}; |
| DateEND{->UNMARK(DateEND)}; |
| |
| //slot-building rules: |
| PagesSTART{IS(PagesEND)->UNMARK(PagesSTART), UNMARK(PagesEND), MARKONCE(Pages)}; |
| PagesSTART{->UNMARK(PagesSTART)} ANY[0, 10]? PagesEND{->UNMARK(PagesEND), MARKONCE(Pages, 1, 3)}; |
| |
| //cleaning up: |
| PagesSTART{->UNMARK(PagesSTART)}; |
| PagesEND{->UNMARK(PagesEND)}; |
| |
| //slot-building rules: |
| PublisherSTART{IS(PublisherEND)->UNMARK(PublisherSTART), UNMARK(PublisherEND), MARKONCE(Publisher)}; |
| PublisherSTART{->UNMARK(PublisherSTART)} ANY[0, 13]? PublisherEND{->UNMARK(PublisherEND), MARKONCE(Publisher, 1, 3)}; |
| |
| //cleaning up: |
| PublisherSTART{->UNMARK(PublisherSTART)}; |
| PublisherEND{->UNMARK(PublisherEND)}; |
| |
| //slot-building rules: |
| InstitutionSTART{IS(InstitutionEND)->UNMARK(InstitutionSTART), UNMARK(InstitutionEND), MARKONCE(Institution)}; |
| InstitutionSTART{->UNMARK(InstitutionSTART)} ANY[0, 25]? InstitutionEND{->UNMARK(InstitutionEND), MARKONCE(Institution, 1, 3)}; |
| |
| //cleaning up: |
| InstitutionSTART{->UNMARK(InstitutionSTART)}; |
| InstitutionEND{->UNMARK(InstitutionEND)}; |
| |
| //slot-building rules: |
| VolumeSTART{IS(VolumeEND)->UNMARK(VolumeSTART), UNMARK(VolumeEND), MARKONCE(Volume)}; |
| VolumeSTART{->UNMARK(VolumeSTART)} ANY[0, 25]? VolumeEND{->UNMARK(VolumeEND), MARKONCE(Volume, 1, 3)}; |
| |
| //cleaning up: |
| VolumeSTART{->UNMARK(VolumeSTART)}; |
| VolumeEND{->UNMARK(VolumeEND)}; |
| |
| //slot-building rules: |
| EditorSTART{IS(EditorEND)->UNMARK(EditorSTART), UNMARK(EditorEND), MARKONCE(Editor)}; |
| EditorSTART{->UNMARK(EditorSTART)} EditorEND{->UNMARK(EditorEND), MARKONCE(Editor, 1, 2)}; |
| |
| //cleaning up: |
| EditorSTART{->UNMARK(EditorSTART)}; |
| EditorEND{->UNMARK(EditorEND)}; |
| |
| //slot-building rules: |
| TitleSTART{IS(TitleEND)->UNMARK(TitleSTART), UNMARK(TitleEND), MARKONCE(Title)}; |
| TitleSTART{->UNMARK(TitleSTART)} ANY[0, 46]? TitleEND{->UNMARK(TitleEND), MARKONCE(Title, 1, 3)}; |
| |
| //cleaning up: |
| TitleSTART{->UNMARK(TitleSTART)}; |
| TitleEND{->UNMARK(TitleEND)}; |
| |
| //slot-building rules: |
| BooktitleSTART{IS(BooktitleEND)->UNMARK(BooktitleSTART), UNMARK(BooktitleEND), MARKONCE(Booktitle)}; |
| BooktitleSTART{->UNMARK(BooktitleSTART)} ANY[0, 16]? BooktitleEND{->UNMARK(BooktitleEND), MARKONCE(Booktitle, 1, 3)}; |
| |
| //cleaning up: |
| BooktitleSTART{->UNMARK(BooktitleSTART)}; |
| BooktitleEND{->UNMARK(BooktitleEND)}; |
| |
| //slot-building rules: |
| NoteSTART{IS(NoteEND)->UNMARK(NoteSTART), UNMARK(NoteEND), MARKONCE(Note)}; |
| NoteSTART{->UNMARK(NoteSTART)} NoteEND{->UNMARK(NoteEND), MARKONCE(Note, 1, 2)}; |
| |
| //cleaning up: |
| NoteSTART{->UNMARK(NoteSTART)}; |
| NoteEND{->UNMARK(NoteEND)}; |
| |
| //slot-building rules: |
| JournalSTART{IS(JournalEND)->UNMARK(JournalSTART), UNMARK(JournalEND), MARKONCE(Journal)}; |
| JournalSTART{->UNMARK(JournalSTART)} ANY[0, 25]? JournalEND{->UNMARK(JournalEND), MARKONCE(Journal, 1, 3)}; |
| |
| //cleaning up: |
| JournalSTART{->UNMARK(JournalSTART)}; |
| JournalEND{->UNMARK(JournalEND)}; |
| |
| //slot-building rules: |
| LocationSTART{IS(LocationEND)->UNMARK(LocationSTART), UNMARK(LocationEND), MARKONCE(Location)}; |
| LocationSTART{->UNMARK(LocationSTART)} ANY[0, 10]? LocationEND{->UNMARK(LocationEND), MARKONCE(Location, 1, 3)}; |
| |
| //cleaning up: |
| LocationSTART{->UNMARK(LocationSTART)}; |
| LocationEND{->UNMARK(LocationEND)}; |
| |
| //slot-building rules: |
| TechSTART{IS(TechEND)->UNMARK(TechSTART), UNMARK(TechEND), MARKONCE(Tech)}; |
| TechSTART{->UNMARK(TechSTART)} ANY[0, 13]? TechEND{->UNMARK(TechEND), MARKONCE(Tech, 1, 3)}; |
| |
| //cleaning up: |
| TechSTART{->UNMARK(TechSTART)}; |
| TechEND{->UNMARK(TechEND)}; |