blob: d57670bf2d156ef31b2f72a00c7418718fbf7257 [file] [log] [blame]
#!/usr/bin/perl
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
use perltator;
use strict;
sub initialize {
$main::ac = shift;
$main::matchString = $main::ac->extractValue("matchString");
$main::thisScript = $main::ac->extractValue("SourceFile");
$main::debug = $main::ac->extractIntegerValue("DebugLevel");
if ($main::debug > 0) {
print "$main::thisScript: Using match string = $main::matchString \n";
}
}
sub typeSystemInit {
my $ts = shift;
if ($main::debug > 10) {
print "$main::thisScript: Type sytem init called"
}
my $keytype = "com.ibm.uima.examples.keyword";
$main::keywordtype = $ts->getType($keytype);
if (!$main::keywordtype->isValid()) {
my $error = "$main::thisScript: $keytype is NOT found in type system!";
$main::ac->logError($error);
# set eval error to cause annotator to exit prematurely
$@ = $error;
}
}
#
# the process method is passed two parameters, the CAS and
# the ResultsSpecification
sub process {
my ($tcas, $rs) = @_;
if ($main::debug > 10) {
print "$main::thisScript: This is a process function\n";
}
my $text = $tcas->getDocumentText();
my $index = $tcas->getIndexRepository();
my $annotCount = 0;
while ($text =~ m/($main::matchString)/igo) {
my $length = length($1);
my $end= pos($text);
my $fs = $tcas->createAnnotation($main::keywordtype, $end-$length, $end);
$index->addFS($fs);
$annotCount++;
}
if ($main::debug > 10) {
print "$main::thisScript: created $annotCount annotations\n";
}
if ($main::debug > 20) {
my $annots = 0;
my $iterator = $tcas->getAnnotationIndex($main::keywordtype)->iterator();
while ($iterator->isValid()) {
$annots += 1;
if ($main::debug > 30) {
my $anno = $iterator->get();
my $text = $anno->getCoveredText();
if (length($text)>40) {
$text = substr($text,0,20) . "...";
}
$text =~ s/\n+/ /g;
print "Annotation type=", $main::keywordtype->getName(),": \"$text\"\n";
}
$iterator->moveToNext();
}
print "$main::thisScript: found $annots annotations\n";
}
}