blob: 8d13a9def6d635e0fd0884cbe56c034ee734cb43 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.seed;
import java.util.*;
import java.util.regex.*;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.ruta.type.AMP;
import org.apache.uima.ruta.type.BREAK;
import org.apache.uima.ruta.type.CAP;
import org.apache.uima.ruta.type.COLON;
import org.apache.uima.ruta.type.COMMA;
import org.apache.uima.ruta.type.CW;
import org.apache.uima.ruta.type.EXCLAMATION;
import org.apache.uima.ruta.type.MARKUP;
import org.apache.uima.ruta.type.NBSP;
import org.apache.uima.ruta.type.NUM;
import org.apache.uima.ruta.type.PERIOD;
import org.apache.uima.ruta.type.QUESTION;
import org.apache.uima.ruta.type.SEMICOLON;
import org.apache.uima.ruta.type.SPACE;
import org.apache.uima.ruta.type.SPECIAL;
import org.apache.uima.ruta.type.SW;
%%
%{
private JCas cas;
public void setJCas(JCas cas) {
this.cas = cas;
}
%}
%unicode
%line
%char
%type AnnotationFS
%class SeedLexer
ALPHA=[A-Za-z]
DIGIT=[0-9]
WHITE_SPACE_CHAR=[\n\r\ \t\b\012]
BREAK=[\n\r\b\012]
SPACE=[ \t]
%%
<YYINITIAL> {
\<[/][!][^>]*> {
MARKUP t = new MARKUP(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
\<[!][^>]*> {
MARKUP t = new MARKUP(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
\xA0|&nbsp;|&NBSP; {
NBSP t = new NBSP(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
&{ALPHA}+; {
AMP t = new AMP(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
{BREAK} {
BREAK t = new BREAK(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
{SPACE} {
SPACE t = new SPACE(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
":" {
COLON t = new COLON(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
"," {
COMMA t = new COMMA(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
"." {
PERIOD t = new PERIOD(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
"!" {
EXCLAMATION t = new EXCLAMATION(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
";" {
SEMICOLON t = new SEMICOLON(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
"?" {
QUESTION t = new QUESTION(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
[:lowercase:]+ {
SW t = new SW(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
[:uppercase:][:lowercase:]* {
CW t = new CW(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
[:uppercase:]+ {
CAP t = new CAP(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
{DIGIT}+ {
NUM t = new NUM(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
. {
SPECIAL t = new SPECIAL(cas);
t.setBegin(yychar);
t.setEnd(yychar + yytext().length());
return t;
}
<<EOF>> {
return null;
}
}