Merge pull request #3 from thammegowda/glove-rnn-classifier
text sequence classification using Glove and RNN/LSTMs
diff --git a/aws-ec2-testing-scripts/README.md b/aws-ec2-testing-scripts/README.md
new file mode 100644
index 0000000..16b7752
--- /dev/null
+++ b/aws-ec2-testing-scripts/README.md
@@ -0,0 +1,25 @@
+# OpenNLP Testing Scripts
+
+These are scripts useful when testing OpenNLP builds on EC2.
+
+## Directory Structure
+
+These scripts are written expecting the following directory structure:
+
+* `/opt/` - Contains these scripts.
+* `/opt/opennlp` - Contains the OpenNLP code as cloned from https://github.com/apache/opennlp.
+* `/opt/opennlp-data` - Contains the data required for some of the OpenNLP tests. Contact dev@opennlp.apache.org for information on this data.
+
+## EC2 Instance Requirements
+
+* The instance must have the AWS CLI installed.
+* The scripts use SNS to send notifications so the instance must have permissions to publish SNS messages through either an instance role or via access/secret keys configured in the AWS CLI.
+* You must have an existing SNS topic configured to publish messages to and you must set the ARN in the `notify.sh` script.
+
+## Notifications and Results
+
+You can configure the subject, message, and destination (topic ARN) in the `notify.sh` script. The build log will be too large (>256KB) for sending in the SNS message. You can modify the `notify.sh` script for uploading the build results to an S3 bucket.
+
+## CloudFormation Template
+
+The CloudFormation template can help with creating the instance.
diff --git a/aws-ec2-testing-scripts/cf-template.json b/aws-ec2-testing-scripts/cf-template.json
new file mode 100644
index 0000000..45fe14f
--- /dev/null
+++ b/aws-ec2-testing-scripts/cf-template.json
@@ -0,0 +1,199 @@
+{
+ "AWSTemplateFormatVersion": "2010-09-09",
+ "Description": "Stack for running OpenNLP testing.",
+ "Parameters": {
+ "InstanceType": {
+ "Description": "EC2 instance type.",
+ "Type": "String",
+ "Default": "r4.xlarge"
+ },
+ "Image": {
+ "Description": "The base AMI.",
+ "Type": "String",
+ "Default": "ami-80861296"
+ },
+ "KeyName": {
+ "Description": "An existing EC2 keypair.",
+ "Type": "AWS::EC2::KeyPair::KeyName",
+ "ConstraintDescription": "Must be the name of an existing EC2 keypair."
+ }
+ },
+ "Resources": {
+ "VPC": {
+ "Type": "AWS::EC2::VPC",
+ "Properties": {
+ "CidrBlock": "10.0.0.0/16",
+ "Tags": [
+ {
+ "Key": "Application",
+ "Value": {
+ "Ref": "AWS::StackId"
+ }
+ }
+ ]
+ }
+ },
+ "Subnet": {
+ "Type": "AWS::EC2::Subnet",
+ "Properties": {
+ "VpcId": {
+ "Ref": "VPC"
+ },
+ "CidrBlock": "10.0.0.0/24",
+ "Tags": [
+ {
+ "Key": "Application",
+ "Value": {
+ "Ref": "AWS::StackId"
+ }
+ }
+ ]
+ }
+ },
+ "InternetGateway": {
+ "Type": "AWS::EC2::InternetGateway",
+ "Properties": {
+ "Tags": [
+ {
+ "Key": "Application",
+ "Value": {
+ "Ref": "AWS::StackId"
+ }
+ }
+ ]
+ }
+ },
+ "AttachGateway": {
+ "Type": "AWS::EC2::VPCGatewayAttachment",
+ "Properties": {
+ "VpcId": {
+ "Ref": "VPC"
+ },
+ "InternetGatewayId": {
+ "Ref": "InternetGateway"
+ }
+ }
+ },
+ "RouteTable": {
+ "Type": "AWS::EC2::RouteTable",
+ "Properties": {
+ "VpcId": {
+ "Ref": "VPC"
+ },
+ "Tags": [
+ {
+ "Key": "Application",
+ "Value": {
+ "Ref": "AWS::StackId"
+ }
+ }
+ ]
+ }
+ },
+ "Route": {
+ "Type": "AWS::EC2::Route",
+ "DependsOn": "AttachGateway",
+ "Properties": {
+ "RouteTableId": {
+ "Ref": "RouteTable"
+ },
+ "DestinationCidrBlock": "0.0.0.0/0",
+ "GatewayId": {
+ "Ref": "InternetGateway"
+ }
+ }
+ },
+ "SubnetRouteTableAssociation": {
+ "Type": "AWS::EC2::SubnetRouteTableAssociation",
+ "Properties": {
+ "SubnetId": {
+ "Ref": "Subnet"
+ },
+ "RouteTableId": {
+ "Ref": "RouteTable"
+ }
+ }
+ },
+ "NetworkAcl": {
+ "Type": "AWS::EC2::NetworkAcl",
+ "Properties": {
+ "VpcId": {
+ "Ref": "VPC"
+ },
+ "Tags": [
+ {
+ "Key": "Application",
+ "Value": {
+ "Ref": "AWS::StackId"
+ }
+ }
+ ]
+ }
+ },
+ "InstanceSecurityGroup": {
+ "Type": "AWS::EC2::SecurityGroup",
+ "Properties": {
+ "VpcId": {
+ "Ref": "VPC"
+ },
+ "GroupDescription": "Enable SSH access via port 22",
+ "SecurityGroupIngress": [
+ {
+ "IpProtocol": "tcp",
+ "FromPort": "22",
+ "ToPort": "22",
+ "CidrIp": "0.0.0.0/0"
+ }
+ ]
+ }
+ },
+ "OpenNLPInstance": {
+ "Type": "AWS::EC2::Instance",
+ "DependsOn": "AttachGateway",
+ "Properties": {
+ "ImageId": {
+ "Ref": "Image"
+ },
+ "InstanceType": {
+ "Ref": "InstanceType"
+ },
+ "KeyName": {
+ "Ref": "KeyName"
+ },
+ "Tags": [
+ {
+ "Key": "Application",
+ "Value": {
+ "Ref": "AWS::StackId"
+ }
+ },
+ {
+ "Key": "Name",
+ "Value": "OpenNLP Testing"
+ }
+ ],
+ "UserData": {
+ "Fn::Base64": {
+ "Fn::Join": [
+ "",
+ [
+ "#!/bin/bash -xe\n",
+ "apt-get update && sudo apt-get -y dist-upgrade\n",
+ "apt-get install -y openjdk-8-jdk git maven awscli\n",
+ "# Get the scripts\n",
+ "git clone https://github.com/apache/opennlp-sandbox.git\n",
+ "mv opennlp-sandbox/aws-ec2-testing-scripts/* /opt/\n",
+ "# Get OpenNLP\n",
+ "git clone https://github.com/apache/opennlp.git\n",
+ "mv opennlp /opt/\n",
+ "mkdir /opt/opennlp-data\n",
+ "aws s3 cp s3://bucket/opennlp-data /opt/opennlp-data --recursive\n"
+ ]
+ ]
+ }
+ }
+ }
+ }
+ },
+ "Outputs": {}
+}
diff --git a/aws-ec2-testing-scripts/notify.sh b/aws-ec2-testing-scripts/notify.sh
new file mode 100755
index 0000000..3a853e8
--- /dev/null
+++ b/aws-ec2-testing-scripts/notify.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Configure these values as desired.
+
+# Notifications via SNS.
+TOPIC_ARN="arn:aws:sns:us-east-1:XXXXXXXXXXXX:opennlp-notification"
+SUBJECT="OpenNLP Notification"
+
+# Upload of build output to S3. (Uncomment below to enable.)
+BUCKET=""
+
+# Received from the training scripts.
+MESSAGE=$1
+ACTION=$2
+
+# Publish the message to SNS.
+aws sns publish --topic-arn "$TOPIC_ARN" --message "$MESSAGE" --subject "$SUBJECT"
+
+# Upload the build output to S3.
+TIMESTAMP=`date +"%T"`
+#aws s3 cp nohup.out s3://$BUCKET/$2-output-$TIMESTAMP.txt
diff --git a/aws-ec2-testing-scripts/run-eval-tests.sh b/aws-ec2-testing-scripts/run-eval-tests.sh
new file mode 100755
index 0000000..b98bd1e
--- /dev/null
+++ b/aws-ec2-testing-scripts/run-eval-tests.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -f ./nohup.out
+nohup sh -c 'cd opennlp && mvn clean install -Peval-tests -DOPENNLP_DATA_DIR=/opt/opennlp-data/ && /opt/notify.sh "eval-tests complete" "eval-tests"' &
diff --git a/aws-ec2-testing-scripts/run-high-memory-tests.sh b/aws-ec2-testing-scripts/run-high-memory-tests.sh
new file mode 100755
index 0000000..3891318
--- /dev/null
+++ b/aws-ec2-testing-scripts/run-high-memory-tests.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+rm -f nohup.out
+nohup sh -c 'cd opennlp && mvn clean install -Phigh-memory-tests -DOPENNLP_DATA_DIR=/opt/opennlp-data/ && /opt/notify.sh "High memory tests complete" "high-memory-tests"' &
diff --git a/opennlp-coref/pom.xml b/opennlp-coref/pom.xml
index 8c5d89e..033ffc2 100644
--- a/opennlp-coref/pom.xml
+++ b/opennlp-coref/pom.xml
@@ -30,7 +30,7 @@
</parent>
<artifactId>opennlp-coref</artifactId>
- <version>1.5.4-SNAPSHOT</version>
+ <version>1.6.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Apache OpenNLP Coreferencer</name>
@@ -38,7 +38,7 @@
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
- <version>1.5.3</version>
+ <version>1.6.0</version>
<scope>compile</scope>
</dependency>
@@ -69,23 +69,11 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
- <source>1.5</source>
- <target>1.5</target>
+ <source>1.8</source>
+ <target>1.8</target>
<compilerArgument>-Xlint</compilerArgument>
</configuration>
</plugin>
-
- <plugin>
- <artifactId>maven-javadoc-plugin</artifactId>
- <executions>
- <execution>
- <id>create-javadoc-jar</id>
- <configuration>
- <excludePackageNames>opennlp.tools.cmdline</excludePackageNames>
- </configuration>
- </execution>
- </executions>
- </plugin>
</plugins>
</build>
</project>
diff --git a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
index 94648de..885951c 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
@@ -53,19 +53,19 @@
public CorefParse(List<Parse> parses, DiscourseEntity[] entities) {
this.parses = parses;
parseMap = new HashMap<Parse, Integer>();
- for (int ei=0,en=entities.length;ei<en;ei++) {
+ for (int ei = 0, en = entities.length; ei < en;ei++) {
if (entities[ei].getNumMentions() > 1) {
for (Iterator<MentionContext> mi = entities[ei].getMentions(); mi.hasNext();) {
MentionContext mc = mi.next();
Parse mentionParse = ((DefaultParse) mc.getParse()).getParse();
- parseMap.put(mentionParse,ei+1);
+ parseMap.put(mentionParse,ei + 1);
}
}
}
}
public void show() {
- for (int pi=0,pn=parses.size();pi<pn;pi++) {
+ for (int pi = 0, pn = parses.size(); pi < pn;pi++) {
Parse p = parses.get(pi);
show(p);
System.out.println();
@@ -79,13 +79,13 @@
System.out.print("(");
System.out.print(p.getType());
if (parseMap.containsKey(p)) {
- System.out.print("#"+parseMap.get(p));
+ System.out.print("#" + parseMap.get(p));
}
//System.out.print(p.hashCode()+"-"+parseMap.containsKey(p));
System.out.print(" ");
}
Parse[] children = p.getChildren();
- for (int pi=0,pn=children.length;pi<pn;pi++) {
+ for (int pi = 0, pn = children.length; pi < pn;pi++) {
Parse c = children[pi];
Span s = c.getSpan();
if (start < s.getStart()) {
@@ -134,19 +134,21 @@
while ((line = lineStream.read()) != null) {
if (line.equals("")) {
- DiscourseEntity[] entities = treebankLinker.getEntities(document.toArray(new Mention[document.size()]));
+ DiscourseEntity[] entities =
+ treebankLinker.getEntities(document.toArray(new Mention[document.size()]));
//showEntities(entities);
new CorefParse(parses,entities).show();
- sentenceNumber=0;
+ sentenceNumber = 0;
document.clear();
parses.clear();
}
else {
Parse p = Parse.parseParse(line);
parses.add(p);
- Mention[] extents = treebankLinker.getMentionFinder().getMentions(new DefaultParse(p,sentenceNumber));
+ Mention[] extents =
+ treebankLinker.getMentionFinder().getMentions(new DefaultParse(p,sentenceNumber));
//construct new parses for mentions which don't have constituents.
- for (int ei=0,en=extents.length;ei<en;ei++) {
+ for (int ei = 0, en = extents.length; ei < en;ei++) {
//System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]);
if (extents[ei].getParse() == null) {
diff --git a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTrainerTool.java b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTrainerTool.java
index 4a84073..2549c32 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTrainerTool.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTrainerTool.java
@@ -21,8 +21,8 @@
import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.TerminateToolException;
-import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.cmdline.coref.CoreferencerTrainerTool.TrainerToolParams;
+import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.coref.CorefSample;
import opennlp.tools.coref.CorefTrainer;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractLinker.java b/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractLinker.java
index 184718c..38af1f7 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractLinker.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractLinker.java
@@ -97,7 +97,9 @@
}
/**
- * Resolves the specified mention to an entity in the specified discourse model or creates a new entity for the mention.
+ * Resolves the specified mention to an entity in the specified discourse model
+ * or creates a new entity for the mention.
+ *
* @param mention The mention to resolve.
* @param discourseModel The discourse model of existing entities.
*/
@@ -114,7 +116,7 @@
}
else if (mode == LinkerMode.TRAIN) {
entities[ri] = resolvers[ri].retain(mention, discourseModel);
- if (ri+1 != resolvers.length) {
+ if (ri + 1 != resolvers.length) {
canResolve = true;
}
}
@@ -135,7 +137,8 @@
}
}
if (!canResolve && removeUnresolvedMentions) {
- //System.err.println("No resolver for: "+econtext.toText()+ " head="+econtext.headTokenText+" "+econtext.headTokenTag);
+ //System.err.println("No resolver for: "+econtext.toText()
+ // + " head="+econtext.headTokenText+" "+econtext.headTokenTag);
validEntity = false;
}
DiscourseEntity de = checkForMerges(discourseModel, entities);
@@ -155,7 +158,8 @@
* @param entity The entity which is mentioned by the specified mention.
* @param useDiscourseModel Whether the mentions should be kept as an entiy or simply co-indexed.
*/
- protected void updateExtent(DiscourseModel dm, MentionContext mention, DiscourseEntity entity, boolean useDiscourseModel) {
+ protected void updateExtent(DiscourseModel dm, MentionContext mention, DiscourseEntity entity,
+ boolean useDiscourseModel) {
if (useDiscourseModel) {
if (entity != null) {
//System.err.println("AbstractLinker.updateExtent: addingExtent:
@@ -174,18 +178,21 @@
else {
//System.err.println("AbstractLinker.updateExtent: creatingExtent:
// "+econtext.toText()+" "+econtext.gender+" "+econtext.number);
- entity = new DiscourseEntity(mention, mention.getGender(), mention.getGenderProb(), mention.getNumber(), mention.getNumberProb());
+ entity = new DiscourseEntity(mention, mention.getGender(), mention.getGenderProb(),
+ mention.getNumber(), mention.getNumberProb());
dm.addEntity(entity);
}
}
else {
if (entity != null) {
- DiscourseEntity newEntity = new DiscourseEntity(mention, mention.getGender(), mention.getGenderProb(), mention.getNumber(), mention.getNumberProb());
+ DiscourseEntity newEntity = new DiscourseEntity(mention, mention.getGender(),
+ mention.getGenderProb(), mention.getNumber(), mention.getNumberProb());
dm.addEntity(newEntity);
newEntity.setId(entity.getId());
}
else {
- DiscourseEntity newEntity = new DiscourseEntity(mention, mention.getGender(), mention.getGenderProb(), mention.getNumber(), mention.getNumberProb());
+ DiscourseEntity newEntity = new DiscourseEntity(mention, mention.getGender(),
+ mention.getGenderProb(), mention.getNumber(), mention.getNumberProb());
dm.addEntity(newEntity);
}
}
@@ -235,30 +242,33 @@
}
public MentionContext[] constructMentionContexts(Mention[] mentions) {
- int mentionInSentenceIndex=-1;
- int numMentionsInSentence=-1;
+ int mentionInSentenceIndex = -1;
+ int numMentionsInSentence = -1;
int prevSentenceIndex = -1;
MentionContext[] contexts = new MentionContext[mentions.length];
- for (int mi=0,mn=mentions.length;mi<mn;mi++) {
+ for (int mi = 0,mn = mentions.length;mi < mn; mi++) {
Parse mentionParse = mentions[mi].getParse();
//System.err.println("AbstractLinker.constructMentionContexts: mentionParse="+mentionParse);
if (mentionParse == null) {
- System.err.println("no parse for "+mentions[mi]);
+ System.err.println("no parse for " + mentions[mi]);
}
int sentenceIndex = mentionParse.getSentenceNumber();
if (sentenceIndex != prevSentenceIndex) {
- mentionInSentenceIndex=0;
+ mentionInSentenceIndex = 0;
prevSentenceIndex = sentenceIndex;
numMentionsInSentence = 0;
- for (int msi=mi;msi<mentions.length;msi++) {
+ for (int msi = mi; msi < mentions.length; msi++) {
if (sentenceIndex != mentions[msi].getParse().getSentenceNumber()) {
break;
}
numMentionsInSentence++;
}
}
- contexts[mi]=new MentionContext(mentions[mi], mentionInSentenceIndex, numMentionsInSentence, mi, sentenceIndex, getHeadFinder());
- //System.err.println("AbstractLinker.constructMentionContexts: mi="+mi+" sn="+mentionParse.getSentenceNumber()+" extent="+mentions[mi]+" parse="+mentionParse.getSpan()+" mc="+contexts[mi].toText());
+ contexts[mi] = new MentionContext(mentions[mi], mentionInSentenceIndex,
+ numMentionsInSentence, mi, sentenceIndex, getHeadFinder());
+ //System.err.println("AbstractLinker.constructMentionContexts: mi="+mi
+ // +" sn="+mentionParse.getSentenceNumber()+" extent="+mentions[mi]+" parse="
+ // +mentionParse.getSpan()+" mc="+contexts[mi].toText());
contexts[mi].setId(mentions[mi].getId());
mentionInSentenceIndex++;
if (mode != LinkerMode.SIM) {
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java
index d74e037..50adcbc 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java
@@ -15,7 +15,6 @@
* limitations under the License.
*/
-
package opennlp.tools.coref;
import java.io.BufferedReader;
@@ -26,11 +25,10 @@
import java.io.FileReader;
import java.io.IOException;
import java.util.zip.GZIPInputStream;
-import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
-//import opennlp.maxent.io.BinaryGISModelReader;
-//import opennlp.model.AbstractModel;
-import opennlp.tools.ml.model.AbstractModel;
+
import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
+import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.util.StringList;
import opennlp.tools.util.model.BaseModel;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/CorefTrainer.java b/opennlp-coref/src/main/java/opennlp/tools/coref/CorefTrainer.java
index 9d6ec8c..3eb9dc3 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/CorefTrainer.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/CorefTrainer.java
@@ -53,9 +53,9 @@
Parse p = ((DefaultParse) corefParse).getParse();
- Mention extents[] = mentionFinder.getMentions(corefParse);
+ Mention[] extents = mentionFinder.getMentions(corefParse);
- for (int ei = 0, en = extents.length; ei < en;ei++) {
+ for (int ei = 0, en = extents.length; ei < en; ei++) {
if (extents[ei].getParse() == null) {
@@ -117,7 +117,7 @@
genTrain.trainModel();
numTrain.trainModel();
- MaxentResolver.setSimilarityModel(SimilarityModel.testModel(modelDirectory + "/coref"+"/sim"));
+ MaxentResolver.setSimilarityModel(SimilarityModel.testModel(modelDirectory + "/coref" + "/sim"));
// Done with similarity training
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/DefaultLinker.java b/opennlp-coref/src/main/java/opennlp/tools/coref/DefaultLinker.java
index 74ebbfc..1869a12 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/DefaultLinker.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/DefaultLinker.java
@@ -62,27 +62,32 @@
}
/**
- * Creates a new linker with the specified model directory, running in the specified mode which uses a discourse model
+ * Creates a new linker with the specified model directory,
+ * running in the specified mode which uses a discourse model
* based on the specified parameter.
* @param modelDirectory The directory where the models for this linker are kept.
* @param mode The mode that this linker is running in.
* @param useDiscourseModel Whether the model should use a discourse model or not.
* @throws IOException when the models can not be read or written to based on the mode.
*/
- public DefaultLinker(String modelDirectory, LinkerMode mode, boolean useDiscourseModel) throws IOException {
+ public DefaultLinker(String modelDirectory, LinkerMode mode, boolean useDiscourseModel)
+ throws IOException {
this(modelDirectory,mode,useDiscourseModel,-1);
}
/**
- * Creates a new linker with the specified model directory, running in the specified mode which uses a discourse model
+ * Creates a new linker with the specified model directory,
+ * running in the specified mode which uses a discourse model
* based on the specified parameter and uses the specified fixed non-referential probability.
* @param modelDirectory The directory where the models for this linker are kept.
* @param mode The mode that this linker is running in.
* @param useDiscourseModel Whether the model should use a discourse model or not.
- * @param fixedNonReferentialProbability The probability which resolvers are required to exceed to positi a coreference relationship.
+ * @param fixedNonReferentialProbability The probability which resolvers are
+ * required to exceed to positi a coreference relationship.
* @throws IOException when the models can not be read or written to based on the mode.
*/
- public DefaultLinker(String modelDirectory, LinkerMode mode, boolean useDiscourseModel, double fixedNonReferentialProbability) throws IOException {
+ public DefaultLinker(String modelDirectory, LinkerMode mode, boolean useDiscourseModel,
+ double fixedNonReferentialProbability) throws IOException {
super(modelDirectory, mode, useDiscourseModel);
if (mode != LinkerMode.SIM) {
mcm = new MaxentCompatibilityModel(corefProject);
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java
index 9336fad..9d4b533 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java
@@ -33,7 +33,7 @@
public abstract class DiscourseElement {
private List<MentionContext> extents;
- private int id=-1;
+ private int id = -1;
private MentionContext lastExtent;
/**
@@ -48,11 +48,12 @@
}
/**
- * Returns an iterator over the mentions which iterates through them based on which were most recently mentioned.
+ * Returns an iterator over the mentions which iterates through them
+ * based on which were most recently mentioned.
* @return the {@link Iterator}.
*/
public Iterator<MentionContext> getRecentMentions() {
- return(new ReverseListIterator<MentionContext>(extents));
+ return new ReverseListIterator<MentionContext>(extents);
}
/**
@@ -62,7 +63,7 @@
* @return the {@link Iterator}
*/
public Iterator<MentionContext> getMentions() {
- return(extents.listIterator());
+ return extents.listIterator();
}
/**
@@ -71,7 +72,7 @@
* @return number of mentions
*/
public int getNumMentions() {
- return(extents.size());
+ return extents.size();
}
/**
@@ -80,7 +81,7 @@
*/
public void addMention(MentionContext mention) {
extents.add(mention);
- lastExtent=mention;
+ lastExtent = mention;
}
/**
@@ -89,7 +90,7 @@
* @return the last mention for this element.
*/
public MentionContext getLastExtent() {
- return(lastExtent);
+ return lastExtent;
}
/**
@@ -97,7 +98,7 @@
* @param id The id.
*/
public void setId(int id) {
- this.id=id;
+ this.id = id;
}
/**
@@ -106,7 +107,7 @@
* @return the id associated with this element.
*/
public int getId() {
- return(id);
+ return id;
}
@Override
@@ -120,6 +121,6 @@
de.append(", ").append(ex.toText());//.append("<").append(ex.getHeadText()).append(">");
}
de.append(" ]");
- return(de.toString());
+ return de.toString();
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseEntity.java b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseEntity.java
index f92a883..9b8f794 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseEntity.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseEntity.java
@@ -41,7 +41,8 @@
* @param number The number for this entity.
* @param numberProb The probability that the specified number is correct.
*/
- public DiscourseEntity(MentionContext mention, GenderEnum gender, double genderProb, NumberEnum number, double numberProb) {
+ public DiscourseEntity(MentionContext mention, GenderEnum gender, double genderProb,
+ NumberEnum number, double numberProb) {
super(mention);
this.gender = gender;
this.genderProb = genderProb;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java
index f0552a7..b7bd1e0 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java
@@ -49,7 +49,7 @@
entities.add(0,e);
}
else {
- System.err.println("DiscourseModel.mentionEntity: failed to remove "+e);
+ System.err.println("DiscourseModel.mentionEntity: failed to remove " + e);
}
}
@@ -91,7 +91,7 @@
* @param confidence The confidence.
*/
public void mergeEntities(DiscourseEntity e1,DiscourseEntity e2,float confidence) {
- for (Iterator<MentionContext> ei=e2.getMentions();ei.hasNext();) {
+ for (Iterator<MentionContext> ei = e2.getMentions(); ei.hasNext();) {
e1.addMention(ei.next());
}
//System.err.println("DiscourseModel.mergeEntities: removing "+e2);
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/Linker.java b/opennlp-coref/src/main/java/opennlp/tools/coref/Linker.java
index 8e0c249..263b1b1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/Linker.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/Linker.java
@@ -83,7 +83,9 @@
public DiscourseEntity[] getEntities(Mention[] mentions);
/**
- * Creates mention contexts for the specified mention exents. These are used to compute coreference features over.
+ * Creates mention contexts for the specified mention exents.
+ * These are used to compute coreference features over.
+ *
* @param mentions The mention of a document.
*
* @return mention contexts for the specified mention exents.
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/TreebankLinker.java b/opennlp-coref/src/main/java/opennlp/tools/coref/TreebankLinker.java
index db265e7..2850faf 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/TreebankLinker.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/TreebankLinker.java
@@ -40,7 +40,8 @@
super(project,mode,useDiscourseModel);
}
- public TreebankLinker(String project, LinkerMode mode, boolean useDiscourseModel, double fixedNonReferentialProbability) throws IOException {
+ public TreebankLinker(String project, LinkerMode mode, boolean useDiscourseModel,
+ double fixedNonReferentialProbability) throws IOException {
super(project,mode,useDiscourseModel,fixedNonReferentialProbability);
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
index 4bf28a2..8d50266 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
@@ -92,7 +92,8 @@
Parse child0 = parts.get(0);
Parse child1 = parts.get(1);
Parse child2 = parts.get(2);
- if (child1.isToken() && child1.getSyntacticType().equals("POS") && child0.isNounPhrase() && child2.isNounPhrase()) {
+ if (child1.isToken() && child1.getSyntacticType().equals("POS")
+ && child0.isNounPhrase() && child2.isNounPhrase()) {
return true;
}
}
@@ -155,10 +156,12 @@
if ((tokStr.equals("and") || tokStr.equals("or")) && !isPartOfName(tok)) {
if (lastNpTokenIndex != ti) {
if (ti - 1 >= 0 && (npTokens.get(ti - 1)).getSyntacticType().startsWith("NN")) {
- Span npSpan = new Span((npTokens.get(ti + 1)).getSpan().getStart(), (npTokens.get(lastNpTokenIndex)).getSpan().getEnd());
+ Span npSpan = new Span((npTokens.get(ti + 1)).getSpan().getStart(),
+ npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(), null,"CNP");
entities.add(snpExtent);
- //System.err.println("adding extent for conjunction in: "+np+" preeceeded by "+((Parse) npTokens.get(ti-1)).getSyntacticType());
+ //System.err.println("adding extent for conjunction in: "+np+" preeceeded by "
+ // +((Parse) npTokens.get(ti-1)).getSyntacticType());
inCoordinatedNounPhrase = true;
}
else {
@@ -169,7 +172,8 @@
}
else if (inCoordinatedNounPhrase && tokStr.equals(",")) {
if (lastNpTokenIndex != ti) {
- Span npSpan = new Span((npTokens.get(ti + 1)).getSpan().getStart(), (npTokens.get(lastNpTokenIndex)).getSpan().getEnd());
+ Span npSpan = new Span((npTokens.get(ti + 1)).getSpan().getStart(),
+ npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(), null,"CNP");
entities.add(snpExtent);
//System.err.println("adding extent for comma in: "+np);
@@ -177,7 +181,8 @@
lastNpTokenIndex = ti - 1;
}
else if (inCoordinatedNounPhrase && ti == 0 && lastNpTokenIndex >= 0) {
- Span npSpan = new Span((npTokens.get(ti)).getSpan().getStart(), (npTokens.get(lastNpTokenIndex)).getSpan().getEnd());
+ Span npSpan = new Span((npTokens.get(ti)).getSpan().getStart(),
+ npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(), null,"CNP");
entities.add(snpExtent);
//System.err.println("adding extent for start coord in: "+np);
@@ -192,14 +197,16 @@
}
private void collectPossesivePronouns(Parse np, List<Mention> entities) {
- //TODO: Look at how training is done and examine whether this is needed or can be accomidated in a different way.
+ //TODO: Look at how training is done and examine whether this is
+ // needed or can be accomidated in a different way.
/*
List snps = np.getSubNounPhrases();
if (snps.size() != 0) {
//System.err.println("AbstractMentionFinder: Found existing snps");
for (int si = 0, sl = snps.size(); si < sl; si++) {
Parse snp = (Parse) snps.get(si);
- Extent ppExtent = new Extent(snp.getSpan(), snp.getSpan(), snp.getEntityId(), null,Linker.PRONOUN_MODIFIER);
+ Extent ppExtent = new Extent(snp.getSpan(), snp.getSpan(), snp.getEntityId(),
+ null,Linker.PRONOUN_MODIFIER);
entities.add(ppExtent);
}
}
@@ -214,8 +221,10 @@
continue;
}
if (tok.getSyntacticType().startsWith("PRP") && handledPronoun(tok.toString())) {
- Mention ppExtent = new Mention(tok.getSpan(), tok.getSpan(), tok.getEntityId(), null,Linker.PRONOUN_MODIFIER);
- //System.err.println("AbstractEntityFinder.collectPossesivePronouns: adding possesive pronoun: "+tok+" "+tok.getEntityId());
+ Mention ppExtent = new Mention(tok.getSpan(), tok.getSpan(),
+ tok.getEntityId(), null,Linker.PRONOUN_MODIFIER);
+ //System.err.println("AbstractEntityFinder.collectPossesivePronouns: adding possesive pronoun: "
+ // +tok+" "+tok.getEntityId());
entities.add(ppExtent);
//System.err.println("AbstractMentionFinder: adding pos-pro: "+ppExtent);
break;
@@ -240,7 +249,7 @@
private boolean isHeadOfExistingMention(Parse np, Map<Parse, Parse> headMap,
Set<Parse> mentions) {
Parse head = headMap.get(np);
- while(head != null){
+ while (head != null) {
if (mentions.contains(head)) {
return true;
}
@@ -251,8 +260,8 @@
private void clearMentions(Set<Parse> mentions, Parse np) {
- Span npSpan =np.getSpan();
- for(Iterator<Parse> mi=mentions.iterator();mi.hasNext();) {
+ Span npSpan = np.getSpan();
+ for (Iterator<Parse> mi = mentions.iterator(); mi.hasNext();) {
Parse mention = mi.next();
if (!mention.getSpan().contains(npSpan)) {
//System.err.println("clearing "+mention+" for "+np);
@@ -267,7 +276,8 @@
//System.err.println("AbtractMentionFinder.collectMentions: "+headMap);
for (int npi = 0, npl = nps.size(); npi < npl; npi++) {
Parse np = nps.get(npi);
- //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]=" + np + " head=" + headMap.get(np));
+ //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]="
+ // + np + " head=" + headMap.get(np));
if (!isHeadOfExistingMention(np,headMap, recentMentions)) {
clearMentions(recentMentions, np);
if (!isPartOfName(np)) {
@@ -283,11 +293,13 @@
}
}
else {
- //System.err.println("AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np);
+ //System.err.println(
+ // "AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np);
}
}
- else {
- //System.err.println("AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np);
+ else {
+ //System.err.println(
+ // "AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np);
}
if (isBasalNounPhrase(np)) {
if (collectPrenominalNamedEntities) {
@@ -325,7 +337,8 @@
// mentions.add(extent);
// }
// else {
-// System.err.println("AbstractMentionFinder.addPossesiveMentions: odd parse structure: "+possesiveNounPhrase);
+// System.err.println("AbstractMentionFinder.addPossesiveMentions: odd parse structure: "
+// +possesiveNounPhrase);
// }
// }
// }
@@ -401,7 +414,8 @@
List<Parse> nps = p.getNounPhrases();
Collections.sort(nps);
Map<Parse, Parse> headMap = constructHeadMap(nps);
- //System.err.println("AbstractMentionFinder.getMentions: got " + nps.size()); // + " nps, and " + nes.size() + " named entities");
+ //System.err.println("AbstractMentionFinder.getMentions: got " + nps.size()); // + " nps, and "
+ // + nes.size() + " named entities");
Mention[] mentions = collectMentions(nps, headMap);
return mentions;
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractParse.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractParse.java
index b9fcfd3..71b9bb1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractParse.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractParse.java
@@ -41,10 +41,10 @@
public List<Parse> getNounPhrases() {
List<Parse> parts = getSyntacticChildren();
- List<Parse> nps = new ArrayList<Parse>();
+ List<Parse> nps = new ArrayList<>();
while (parts.size() > 0) {
- List<Parse> newParts = new ArrayList<Parse>();
- for (int pi=0,pn=parts.size();pi<pn;pi++) {
+ List<Parse> newParts = new ArrayList<>();
+ for (int pi = 0, pn = parts.size(); pi < pn;pi++) {
//System.err.println("AbstractParse.getNounPhrases "+parts.get(pi).getClass());
Parse cp = parts.get(pi);
if (cp.isNounPhrase()) {
@@ -58,4 +58,4 @@
}
return nps;
}
- }
+}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
index e096b7b..4d3d1a1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
@@ -30,12 +30,14 @@
import opennlp.tools.util.Span;
/**
- * This class is a wrapper for {@link opennlp.tools.parser.Parse} mapping it to the API specified in {@link opennlp.tools.coref.mention.Parse}.
- * This allows coreference to be done on the output of the parser.
+ * This class is a wrapper for {@link opennlp.tools.parser.Parse} mapping
+ * it to the API specified in {@link opennlp.tools.coref.mention.Parse}.
+ * This allows coreference to be done on the output of the parser.
*/
public class DefaultParse extends AbstractParse {
- public static String[] NAME_TYPES = {"person", "organization", "location", "date", "time", "percentage", "money"};
+ public static String[] NAME_TYPES = {"person", "organization", "location", "date",
+ "time", "percentage", "money"};
private Parse parse;
private int sentenceNumber;
@@ -118,10 +120,9 @@
}
private List<opennlp.tools.coref.mention.Parse> createParses(Parse[] parses) {
- List<opennlp.tools.coref.mention.Parse> newParses =
- new ArrayList<opennlp.tools.coref.mention.Parse>(parses.length);
+ List<opennlp.tools.coref.mention.Parse> newParses = new ArrayList<>(parses.length);
- for (int pi=0,pn=parses.length;pi<pn;pi++) {
+ for (int pi = 0, pn = parses.length; pi < pn;pi++) {
newParses.add(new DefaultParse(parses[pi],sentenceNumber));
}
@@ -139,7 +140,7 @@
public boolean isParentNAC() {
Parse parent = parse.getParent();
- while(parent != null) {
+ while (parent != null) {
if (parent.getType().equals("NAC")) {
return true;
}
@@ -228,7 +229,7 @@
// get parent and update distance
// if match return distance
// if not match do it again
- }
+ }
return parse.getSpan().compareTo(p.getSpan());
}
@@ -243,10 +244,10 @@
public opennlp.tools.coref.mention.Parse getPreviousToken() {
Parse parent = parse.getParent();
Parse node = parse;
- int index=-1;
+ int index = -1;
//find parent with previous children
- while(parent != null && index < 0) {
- index = parent.indexOf(node)-1;
+ while (parent != null && index < 0) {
+ index = parent.indexOf(node) - 1;
if (index < 0) {
node = parent;
parent = parent.getParent();
@@ -260,7 +261,7 @@
Parse p = parent.getChildren()[index];
while (!p.isPosTag()) {
Parse[] kids = p.getChildren();
- p = kids[kids.length-1];
+ p = kids[kids.length - 1];
}
return new DefaultParse(p,sentenceNumber);
}
@@ -269,10 +270,10 @@
public opennlp.tools.coref.mention.Parse getNextToken() {
Parse parent = parse.getParent();
Parse node = parse;
- int index=-1;
+ int index = -1;
//find parent with subsequent children
- while(parent != null) {
- index = parent.indexOf(node)+1;
+ while (parent != null) {
+ index = parent.indexOf(node) + 1;
if (index == parent.getChildCount()) {
node = parent;
parent = parent.getParent();
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Dictionary.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Dictionary.java
index ef18faa..88ac6e1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Dictionary.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Dictionary.java
@@ -42,7 +42,7 @@
* @return a key indicating the specified sense number of the specified
* lemma with the specified part-of-speech.
*/
- public String getSenseKey(String lemma, String pos, int senseNumber);
+ public String getSenseKey(String lemma, String pos, int senseNumber);
/**
* Returns the number of senses in the dictionary for the specified lemma.
@@ -54,12 +54,14 @@
public int getNumSenses(String lemma, String pos);
/**
- * Returns an array of keys for each parent of the specified sense number of the specified lemma with the specified part-of-speech.
+ * Returns an array of keys for each parent of the specified sense number
+ * of the specified lemma with the specified part-of-speech.
*
* @param lemma A lemmatized form of the word to look up.
* @param pos The part-of-speech for the lemma.
* @param senseNumber The sense number for which the parent keys are desired.
- * @return an array of keys for each parent of the specified sense number of the specified lemma with the specified part-of-speech.
+ * @return an array of keys for each parent of the specified sense number of
+ * the specified lemma with the specified part-of-speech.
*/
public String[] getParentSenseKeys(String lemma, String pos, int senseNumber);
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DictionaryFactory.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DictionaryFactory.java
index eb0e402..5def919 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DictionaryFactory.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DictionaryFactory.java
@@ -37,10 +37,10 @@
try {
dictionary = new JWNLDictionary(System.getProperty("WNSEARCHDIR"));
}
- catch(IOException e) {
+ catch (IOException e) {
System.err.println(e);
}
- catch(JWNLException e) {
+ catch (JWNLException e) {
System.err.println(e);
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java
index 2c2d4ee..fcc3424 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -56,21 +57,27 @@
public JWNLDictionary(String searchDirectory) throws IOException, JWNLException {
PointerType.initialize();
- Adjective.initialize();
- VerbFrame.initialize();
+ Adjective.initialize();
+ VerbFrame.initialize();
Map<POS, String[][]> suffixMap = new HashMap<POS, String[][]>();
- suffixMap.put(POS.NOUN,new String[][] {{"s",""},{"ses","s"},{"xes","x"},{"zes","z"},{"ches","ch"},{"shes","sh"},{"men","man"},{"ies","y"}});
- suffixMap.put(POS.VERB,new String[][] {{"s",""},{"ies","y"},{"es","e"},{"es",""},{"ed","e"},{"ed",""},{"ing","e"},{"ing",""}});
+ suffixMap.put(POS.NOUN,new String[][] {{"s",""},{"ses","s"},{"xes","x"},{"zes","z"},
+ {"ches","ch"},{"shes","sh"},{"men","man"},{"ies","y"}});
+ suffixMap.put(POS.VERB,new String[][] {{"s",""},{"ies","y"},{"es","e"},{"es",""},{"ed","e"},
+ {"ed",""},{"ing","e"},{"ing",""}});
suffixMap.put(POS.ADJECTIVE,new String[][] {{"er",""},{"est",""},{"er","e"},{"est","e"}});
DetachSuffixesOperation tokDso = new DetachSuffixesOperation(suffixMap);
- tokDso.addDelegate(DetachSuffixesOperation.OPERATIONS,new Operation[] {new LookupIndexWordOperation(),new LookupExceptionsOperation()});
+ tokDso.addDelegate(DetachSuffixesOperation.OPERATIONS,new Operation[] {
+ new LookupIndexWordOperation(),new LookupExceptionsOperation()});
TokenizerOperation tokOp = new TokenizerOperation(new String[] {" ","-"});
- tokOp.addDelegate(TokenizerOperation.TOKEN_OPERATIONS,new Operation[] {new LookupIndexWordOperation(),new LookupExceptionsOperation(),tokDso});
+ tokOp.addDelegate(TokenizerOperation.TOKEN_OPERATIONS,new Operation[] {
+ new LookupIndexWordOperation(),new LookupExceptionsOperation(),tokDso});
DetachSuffixesOperation morphDso = new DetachSuffixesOperation(suffixMap);
- morphDso.addDelegate(DetachSuffixesOperation.OPERATIONS,new Operation[] {new LookupIndexWordOperation(),new LookupExceptionsOperation()});
+ morphDso.addDelegate(DetachSuffixesOperation.OPERATIONS,new Operation[] {
+ new LookupIndexWordOperation(),new LookupExceptionsOperation()});
Operation[] operations = {new LookupExceptionsOperation(), morphDso , tokOp};
morphy = new DefaultMorphologicalProcessor(operations);
- FileManager manager = new FileManagerImpl(searchDirectory,PrincetonRandomAccessDictionaryFile.class);
+ FileManager manager = new FileManagerImpl(searchDirectory,
+ PrincetonRandomAccessDictionaryFile.class);
FileDictionaryElementFactory factory = new PrincetonWN17FileDictionaryElementFactory();
FileBackedDictionary.install(manager, morphy,factory,true);
dict = net.didion.jwnl.dictionary.Dictionary.getInstance();
@@ -123,7 +130,7 @@
public int getNumSenses(String lemma, String pos) {
try {
IndexWord iw = dict.getIndexWord(POS.NOUN,lemma);
- if (iw == null){
+ if (iw == null) {
return 0;
}
return iw.getSenseCount();
@@ -135,7 +142,7 @@
private void getParents(Synset synset, List<String> parents) throws JWNLException {
Pointer[] pointers = synset.getPointers();
- for (int pi=0,pn=pointers.length;pi<pn;pi++) {
+ for (int pi = 0, pn = pointers.length; pi < pn;pi++) {
if (pointers[pi].getType() == PointerType.HYPERNYM) {
Synset parent = pointers[pi].getTargetSynset();
parents.add(String.valueOf(parent.getOffset()));
@@ -149,8 +156,8 @@
try {
IndexWord iw = dict.getIndexWord(POS.NOUN,lemma);
if (iw != null) {
- Synset synset = iw.getSense(sense+1);
- List<String> parents = new ArrayList<String>();
+ Synset synset = iw.getSense(sense + 1);
+ List<String> parents = new ArrayList<>();
getParents(synset,parents);
return parents.toArray(new String[parents.size()]);
}
@@ -166,14 +173,15 @@
public static void main(String[] args) throws IOException, JWNLException {
String searchDir = System.getProperty("WNSEARCHDIR");
- System.err.println("searchDir="+searchDir);
+ System.err.println("searchDir=" + searchDir);
if (searchDir != null) {
Dictionary dict = new JWNLDictionary(System.getProperty("WNSEARCHDIR"));
String word = args[0];
String[] lemmas = dict.getLemmas(word,"NN");
- for (int li=0,ln=lemmas.length;li<ln;li++) {
- for (int si=0,sn=dict.getNumSenses(lemmas[li],"NN");si<sn;si++) {
- System.out.println(lemmas[li]+" ("+si+")\t"+java.util.Arrays.asList(dict.getParentSenseKeys(lemmas[li],"NN",si)));
+ for (int li = 0,ln = lemmas.length; li < ln;li++) {
+ for (int si = 0, sn = dict.getNumSenses(lemmas[li],"NN"); si < sn;si++) {
+ System.out.println(lemmas[li] + " (" + si + ")\t" +
+ Arrays.asList(dict.getParentSenseKeys(lemmas[li],"NN",si)));
}
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java
index 9593eaf..25259e7 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java
@@ -57,18 +57,18 @@
protected String nameType;
public Mention(Span span, Span headSpan, int entityId, Parse parse, String extentType) {
- this.span=span;
- this.headSpan=headSpan;
- this.id=entityId;
- this.type=extentType;
+ this.span = span;
+ this.headSpan = headSpan;
+ this.id = entityId;
+ this.type = extentType;
this.parse = parse;
}
public Mention(Span span, Span headSpan, int entityId, Parse parse, String extentType, String nameType) {
- this.span=span;
- this.headSpan=headSpan;
- this.id=entityId;
- this.type=extentType;
+ this.span = span;
+ this.headSpan = headSpan;
+ this.id = entityId;
+ this.type = extentType;
this.parse = parse;
this.nameType = nameType;
}
@@ -140,7 +140,7 @@
* @param i The id for this mention.
*/
public void setId(int i) {
- id=i;
+ id = i;
}
/**
@@ -154,6 +154,7 @@
@Override
public String toString() {
- return "mention(span="+span+",hs="+headSpan+", type="+type+", id="+id+" "+parse+" )";
+ return "mention(span=" + span + ",hs=" + headSpan + ", type="
+ + type + ", id=" + id + " " + parse + " )";
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java
index be81b79..059910b 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java
@@ -120,7 +120,9 @@
*/
private double numberProb;
- public MentionContext(Span span, Span headSpan, int entityId, Parse parse, String extentType, String nameType, int mentionIndex, int mentionsInSentence, int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) {
+ public MentionContext(Span span, Span headSpan, int entityId, Parse parse, String extentType,
+ String nameType, int mentionIndex, int mentionsInSentence,
+ int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) {
super(span,headSpan,entityId,parse,extentType,nameType,headFinder);
nounLocation = mentionIndex;
maxNounLocation = mentionsInSentence;
@@ -151,8 +153,11 @@
* @param sentenceIndex The index of the sentence which contains this mention.
* @param headFinder An object which provides head information.
*/
- public MentionContext(Mention mention, int mentionIndexInSentence, int mentionsInSentence, int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) {
- this(mention.getSpan(),mention.getHeadSpan(),mention.getId(),mention.getParse(),mention.type,mention.nameType, mentionIndexInSentence,mentionsInSentence,mentionIndexInDocument,sentenceIndex,headFinder);
+ public MentionContext(Mention mention, int mentionIndexInSentence, int mentionsInSentence,
+ int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) {
+ this(mention.getSpan(), mention.getHeadSpan(), mention.getId(), mention.getParse(),
+ mention.type, mention.nameType, mentionIndexInSentence, mentionsInSentence,
+ mentionIndexInDocument, sentenceIndex, headFinder);
}
@@ -168,7 +173,8 @@
* @param headFinder Object which provides head information.
*/
/*
- public MentionContext(Parse mentionParse, int mentionIndex, int mentionsInSentence, int mentionsInDocument, int sentenceIndex, String nameType, HeadFinder headFinder) {
+ public MentionContext(Parse mentionParse, int mentionIndex, int mentionsInSentence,
+ int mentionsInDocument, int sentenceIndex, String nameType, HeadFinder headFinder) {
nounLocation = mentionIndex;
maxNounLocation = mentionsInDocument;
sentenceNumber = sentenceIndex;
@@ -200,13 +206,13 @@
*/
private void initHeads(int headIndex) {
- this.headTokenIndex=headIndex;
+ this.headTokenIndex = headIndex;
this.headToken = (Parse) tokens[getHeadTokenIndex()];
this.headTokenText = headToken.toString();
- this.headTokenTag=headToken.getSyntacticType();
+ this.headTokenTag = headToken.getSyntacticType();
this.firstToken = (Parse) tokens[0];
this.firstTokenTag = firstToken.getSyntacticType();
- this.firstTokenText=firstToken.toString();
+ this.firstTokenText = firstToken.toString();
}
/**
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java
index 2337dea..73921e2 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java
@@ -27,7 +27,8 @@
/**
* Specifies whether pre-nominal named-entities should be collected as mentions.
*
- * @param collectPrenominalNamedEntities true if pre-nominal named-entities should be collected; false otherwise.
+ * @param collectPrenominalNamedEntities true if pre-nominal named-entities
+ * should be collected; false otherwise.
*/
public void setPrenominalNamedEntityCollection(boolean collectPrenominalNamedEntities);
@@ -48,7 +49,8 @@
/**
* Specifies whether coordinated noun phrases should be collected as mentions.
*
- * @param collectCoordinatedNounPhrases true if coordinated noun phrases should be collected; false otherwise.
+ * @param collectCoordinatedNounPhrases true if coordinated noun phrases should be collected;
+ * false otherwise.
*/
public void setCoordinatedNounPhraseCollection(boolean collectCoordinatedNounPhrases);
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
index 723dca8..bb1ae5e 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
@@ -63,7 +63,8 @@
Parse child0 = parts.get(0);
Parse child1 = parts.get(1);
Parse child2 = parts.get(2);
- if (child1.isToken() && child1.getSyntacticType().equals("POS") && child0.isNounPhrase() && child2.isNounPhrase()) {
+ if (child1.isToken() && child1.getSyntacticType().equals("POS")
+ && child0.isNounPhrase() && child2.isNounPhrase()) {
return child2;
}
}
@@ -73,7 +74,7 @@
if (child0.isNounPhrase()) {
List<Parse> ctoks = child0.getTokens();
if (ctoks.size() == 0) {
- System.err.println("PTBHeadFinder: NP "+child0+" with no tokens");
+ System.err.println("PTBHeadFinder: NP " + child0 + " with no tokens");
}
Parse tok = ctoks.get(ctoks.size() - 1);
if (tok.getSyntacticType().equals("POS")) {
@@ -93,7 +94,8 @@
//all other NPs
for (int pi = 0; pi < parts.size(); pi++) {
Parse child = parts.get(pi);
- //System.err.println("PTBHeadFinder.getHead: "+p.getSyntacticType()+" "+p+" child "+pi+"="+child.getSyntacticType()+" "+child);
+ //System.err.println("PTBHeadFinder.getHead: "+p.getSyntacticType()+" "+p
+ // +" child "+pi+"="+child.getSyntacticType()+" "+child);
if (child.isNounPhrase()) {
return child;
}
@@ -110,9 +112,10 @@
boolean countTokens = false;
int tokenCount = 0;
//check for NP -> NN S type structures and return last token before S as head.
- for (int sci=0,scn = sChildren.size();sci<scn;sci++) {
+ for (int sci = 0, scn = sChildren.size(); sci < scn;sci++) {
Parse sc = sChildren.get(sci);
- //System.err.println("PTBHeadFinder.getHeadIndex "+p+" "+p.getSyntacticType()+" sChild "+sci+" type = "+sc.getSyntacticType());
+ //System.err.println("PTBHeadFinder.getHeadIndex "+p+" "+p.getSyntacticType()
+ // +" sChild "+sci+" type = "+sc.getSyntacticType());
if (sc.getSyntacticType().startsWith("S")) {
if (sci != 0) {
countTokens = true;
@@ -122,21 +125,22 @@
}
}
if (countTokens) {
- tokenCount+=sc.getTokens().size();
+ tokenCount += sc.getTokens().size();
}
}
List<Parse> toks = p.getTokens();
if (toks.size() == 0) {
- System.err.println("PTBHeadFinder.getHeadIndex(): empty tok list for parse "+p);
+ System.err.println("PTBHeadFinder.getHeadIndex(): empty tok list for parse " + p);
}
- for (int ti = toks.size() - tokenCount -1; ti >= 0; ti--) {
+ for (int ti = toks.size() - tokenCount - 1; ti >= 0; ti--) {
Parse tok = toks.get(ti);
if (!skipSet.contains(tok.getSyntacticType())) {
return ti;
}
}
- //System.err.println("PTBHeadFinder.getHeadIndex: "+p+" hi="+toks.size()+"-"+tokenCount+" -1 = "+(toks.size()-tokenCount -1));
- return toks.size() - tokenCount -1;
+ //System.err.println("PTBHeadFinder.getHeadIndex: "+p+" hi="+toks.size()+"-"+tokenCount
+ // +" -1 = "+(toks.size()-tokenCount -1));
+ return toks.size() - tokenCount - 1;
}
/** Returns the bottom-most head of a <code>Parse</code>. If no
@@ -148,7 +152,9 @@
while (null != (head = getHead(p))) {
//System.err.print(" -> "+head);
- //if (p.getEntityId() != -1 && head.getEntityId() != p.getEntityId()) { System.err.println(p+" ("+p.getEntityId()+") -> "+head+" ("+head.getEntityId()+")"); }
+ //if (p.getEntityId() != -1 && head.getEntityId() != p.getEntityId()) {
+ // System.err.println(p+" ("+p.getEntityId()+") -> "+head+" ("+head.getEntityId()+")");
+ // }
p = head;
}
//System.err.println(" -> null");
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java
index 553d2ba..659fdd8 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java
@@ -26,8 +26,8 @@
private ShallowParseMentionFinder(HeadFinder hf) {
headFinder = hf;
- collectPrenominalNamedEntities=true;
- collectCoordinatedNounPhrases=true;
+ collectPrenominalNamedEntities = true;
+ collectCoordinatedNounPhrases = true;
}
/**
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/package-info.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/package-info.java
index 075aae6..3f3c79f 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/package-info.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/package-info.java
@@ -18,4 +18,4 @@
/**
* Package related to the modeling mentions for coreference resolution.
*/
-package opennlp.tools.coref.mention;
\ No newline at end of file
+package opennlp.tools.coref.mention;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/package-info.java b/opennlp-coref/src/main/java/opennlp/tools/coref/package-info.java
index 8ec4703..4b52eea 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/package-info.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/package-info.java
@@ -18,4 +18,4 @@
/**
* Package related to performing coreference resolution.
*/
-package opennlp.tools.coref;
\ No newline at end of file
+package opennlp.tools.coref;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
index 166b8dd..77b1384 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
@@ -54,7 +54,7 @@
protected int numSentencesBack;
public AbstractResolver(int neb) {
- numEntitiesBack=neb;
+ numEntitiesBack = neb;
showExclusions = true;
distances = new CountedSet<Integer>();
}
@@ -108,14 +108,14 @@
*/
protected int getHeadIndex(MentionContext mention) {
Parse[] mtokens = mention.getTokenParses();
- for (int ti=mtokens.length-1;ti>=0;ti--) {
+ for (int ti = mtokens.length - 1; ti >= 0; ti--) {
Parse tok = mtokens[ti];
if (!tok.getSyntacticType().equals("POS") && !tok.getSyntacticType().equals(",") &&
!tok.getSyntacticType().equals(".")) {
return ti;
}
}
- return mtokens.length-1;
+ return mtokens.length - 1;
}
/**
@@ -157,7 +157,7 @@
protected boolean excluded(MentionContext mention, DiscourseEntity entity) {
MentionContext cec = entity.getLastExtent();
return mention.getSentenceNumber() == cec.getSentenceNumber() &&
- mention.getIndexSpan().getEnd() <= cec.getIndexSpan().getEnd();
+ mention.getIndexSpan().getEnd() <= cec.getIndexSpan().getEnd();
}
public DiscourseEntity retain(MentionContext mention, DiscourseModel dm) {
@@ -184,11 +184,11 @@
*
* @return the string of "_" delimited tokens for the specified mention.
*/
- protected String featureString(MentionContext mention){
+ protected String featureString(MentionContext mention) {
StringBuilder fs = new StringBuilder();
- Object[] mtokens =mention.getTokens();
+ Object[] mtokens = mention.getTokens();
fs.append(mtokens[0].toString());
- for (int ti=1,tl=mtokens.length;ti<tl;ti++) {
+ for (int ti = 1, tl = mtokens.length; ti < tl; ti++) {
fs.append("_").append(mtokens[ti].toString());
}
return fs.toString();
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java
index ab2497a..9ee64b6 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java
@@ -35,7 +35,8 @@
preferFirstReferent = true;
}
- public CommonNounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr) throws IOException {
+ public CommonNounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr)
+ throws IOException {
super(projectName,"cmodel", m, 80, true,nrr);
showExclusions = false;
preferFirstReferent = true;
@@ -55,7 +56,8 @@
public boolean canResolve(MentionContext mention) {
String firstTok = mention.getFirstTokenText().toLowerCase();
String firstTokTag = mention.getFirstToken().getSyntacticType();
- boolean rv = mention.getHeadTokenTag().equals("NN") && !ResolverUtils.definiteArticle(firstTok, firstTokTag);
+ boolean rv = mention.getHeadTokenTag().equals("NN")
+ && !ResolverUtils.definiteArticle(firstTok, firstTokTag);
return rv;
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
index 87f9df8..142bab1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
@@ -25,27 +25,15 @@
import java.util.Iterator;
import java.util.List;
-//import opennlp.maxent.GIS;
-//import opennlp.maxent.io.BinaryGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
-//import opennlp.maxent.GIS;
-import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
-//import opennlp.model.Event;
-import opennlp.tools.ml.model.MaxentModel;
-//import opennlp.model.MaxentModel;
-
-import opennlp.tools.ml.model.EventStream;
-//import opennlp.model.MaxentModel;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.mention.Parse;
+import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.tools.ml.model.Event;
-import opennlp.tools.util.CollectionEventStream;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.ObjectStreamUtils;
/**
* Default implementation of the {@link NonReferentialResolver} interface.
@@ -61,30 +49,32 @@
private String modelExtension = ".bin.gz";
private int nonRefIndex;
- public DefaultNonReferentialResolver(String projectName, String name, ResolverMode mode) throws IOException {
+ public DefaultNonReferentialResolver(String projectName, String name, ResolverMode mode)
+ throws IOException {
this.mode = mode;
- this.modelName = projectName+"/"+name+".nr";
+ this.modelName = projectName + "/" + name + ".nr";
if (mode == ResolverMode.TRAIN) {
events = new ArrayList<Event>();
}
else if (mode == ResolverMode.TEST) {
if (loadAsResource) {
- model = (new BinaryGISModelReader(new DataInputStream(this.getClass().getResourceAsStream(modelName)))).getModel();
+ model = new BinaryGISModelReader(new DataInputStream(
+ this.getClass().getResourceAsStream(modelName))).getModel();
}
else {
- model = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();
+ model = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
}
nonRefIndex = model.getIndex(MaxentResolver.SAME);
}
else {
- throw new RuntimeException("unexpected mode "+mode);
+ throw new RuntimeException("unexpected mode " + mode);
}
}
public double getNonReferentialProbability(MentionContext mention) {
List<String> features = getFeatures(mention);
double r = model.eval(features.toArray(new String[features.size()]))[nonRefIndex];
- if (debugOn) System.err.println(this +" " + mention.toText() + " -> null " + r + " " + features);
+ if (debugOn) System.err.println(this + " " + mention.toText() + " -> null " + r + " " + features);
return r;
}
@@ -127,16 +117,18 @@
public void train() throws IOException {
if (ResolverMode.TRAIN == mode) {
- System.err.println(this +" referential");
+ System.err.println(this + " referential");
if (debugOn) {
- FileWriter writer = new FileWriter(modelName+".events");
- for (Iterator<Event> ei=events.iterator();ei.hasNext();) {
+ FileWriter writer = new FileWriter(modelName + ".events");
+ for (Iterator<Event> ei = events.iterator(); ei.hasNext();) {
Event e = ei.next();
- writer.write(e.toString()+"\n");
+ writer.write(e.toString() + "\n");
}
writer.close();
}
- (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist();
+ new SuffixSensitiveGISModelWriter(GIS.trainModel(
+ ObjectStreamUtils.createObjectStream(events),100,10),
+ new File(modelName + modelExtension)).persist();
}
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java
index c64121d..f9921e6 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java
@@ -34,7 +34,8 @@
//preferFirstReferent = true;
}
- public DefiniteNounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr) throws IOException {
+ public DefiniteNounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr)
+ throws IOException {
super(projectName, "defmodel", m, 80,nrr);
//preferFirstReferent = true;
}
@@ -44,7 +45,8 @@
Object[] mtokens = mention.getTokens();
String firstTok = mention.getFirstTokenText().toLowerCase();
- boolean rv = mtokens.length > 1 && !mention.getHeadTokenTag().startsWith("NNP") && ResolverUtils.definiteArticle(firstTok, mention.getFirstTokenTag());
+ boolean rv = mtokens.length > 1 && !mention.getHeadTokenTag().startsWith("NNP")
+ && ResolverUtils.definiteArticle(firstTok, mention.getFirstTokenTag());
//if (rv) {
// System.err.println("defNp "+ec);
//}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java
index 37629d3..412e00f 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java
@@ -49,7 +49,8 @@
public boolean canResolve(MentionContext ec) {
if (ec.getHeadTokenTag().startsWith("NN")) {
- return (ec.getPreviousToken() != null && predicativePattern.matcher(ec.getPreviousToken().toString()).matches());
+ return ec.getPreviousToken() != null
+ && predicativePattern.matcher(ec.getPreviousToken().toString()).matches();
}
return false;
}
@@ -57,13 +58,15 @@
@Override
protected boolean excluded(MentionContext ec, DiscourseEntity de) {
MentionContext cec = de.getLastExtent();
- //System.err.println("IsAResolver.excluded?: ec.span="+ec.getSpan()+" cec.span="+cec.getSpan()+" cec="+cec.toText()+" lastToken="+ec.getNextToken());
+ //System.err.println("IsAResolver.excluded?: ec.span="+ec.getSpan()+" cec.span="+cec.getSpan()
+ // +" cec="+cec.toText()+" lastToken="+ec.getNextToken());
if (ec.getSentenceNumber() != cec.getSentenceNumber()) {
//System.err.println("IsAResolver.excluded: (true) not same sentence");
return (true);
}
//shallow parse appositives
- //System.err.println("IsAResolver.excluded: ec="+ec.toText()+" "+ec.span+" cec="+cec.toText()+" "+cec.span);
+ //System.err.println("IsAResolver.excluded: ec="+ec.toText()+" "
+ // +ec.span+" cec="+cec.toText()+" "+cec.span);
if (cec.getIndexSpan().getEnd() == ec.getIndexSpan().getStart() - 2) {
return (false);
}
@@ -73,7 +76,8 @@
return (false);
}
//full parse w/ trailing comma or period
- if (cec.getIndexSpan().getEnd() <= ec.getIndexSpan().getEnd() + 2 && (ec.getNextToken() != null && (ec.getNextToken().toString().equals(",") || ec.getNextToken().toString().equals(".")))) {
+ if (cec.getIndexSpan().getEnd() <= ec.getIndexSpan().getEnd() + 2 && (ec.getNextToken() != null
+ && (ec.getNextToken().toString().equals(",") || ec.getNextToken().toString().equals(".")))) {
//System.err.println("IsAResolver.excluded: (false) spans end + punct");
return (false);
}
@@ -106,11 +110,12 @@
for (int ci = 0, cn = rightContexts.size(); ci < cn; ci++) {
features.add("r" + rightContexts.get(ci));
}
- features.add("hts"+ant.getHeadTokenTag()+","+mention.getHeadTokenTag());
+ features.add("hts" + ant.getHeadTokenTag() + "," + mention.getHeadTokenTag());
}
/*
if (entity != null) {
- //System.err.println("MaxentIsResolver.getFeatures: ["+ec2.toText()+"] -> ["+de.getLastExtent().toText()+"]");
+ //System.err.println("MaxentIsResolver.getFeatures:
+ [ "+ec2.toText()+"] -> ["+de.getLastExtent().toText()+"]");
//previous word and tag
if (ant.prevToken != null) {
features.add("pw=" + ant.prevToken);
@@ -159,7 +164,8 @@
features.add("w=" + c1toks.get(i) + "|" + "w=" + c2toks.get(j));
features.add("w=" + c1toks.get(i) + "|" + "t=" + ((Parse) c2toks.get(j)).getSyntacticType());
features.add("t=" + ((Parse) c1toks.get(i)).getSyntacticType() + "|" + "w=" + c2toks.get(j));
- features.add("t=" + ((Parse) c1toks.get(i)).getSyntacticType() + "|" + "t=" + ((Parse) c2toks.get(j)).getSyntacticType());
+ features.add("t=" + ((Parse) c1toks.get(i)).getSyntacticType() + "|" + "t=" +
+ ((Parse) c2toks.get(j)).getSyntacticType());
}
}
features.add("ht=" + ant.headTokenTag + "|" + "ht=" + mention.headTokenTag);
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
index 0490dcb..12ff359 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
@@ -24,32 +24,20 @@
import java.util.Iterator;
import java.util.List;
-//import opennlp.maxent.GIS;
-//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
-//import opennlp.model.EventStream;
-//import opennlp.model.MaxentModel;
-
-
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.GIS;
-//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
-//import opennlp.model.Event;
-//import opennlp.model.MaxentModel;
-import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.ml.model.EventStream;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.DiscourseModel;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.sim.TestSimilarityModel;
+import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.tools.ml.model.Event;
-import opennlp.tools.util.CollectionEventStream;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.ObjectStreamUtils;
/**
- * Provides common functionality used by classes which implement the {@link Resolver} class and use maximum entropy models to make resolution decisions.
+ * Provides common functionality used by classes which implement the {@link Resolver} class
+ * and use maximum entropy models to make resolution decisions.
*/
public abstract class MaxentResolver extends AbstractResolver {
@@ -61,7 +49,7 @@
public static final String DEFAULT = "default";
- private static boolean debugOn=false;
+ private static boolean debugOn = false;
private String modelName;
private MaxentModel model;
@@ -75,8 +63,10 @@
* are considered.
*/
protected boolean preferFirstReferent;
- /** When true, this designates that training should consist of a single positive and a single negative example
- * (when possible) for each mention. */
+ /**
+ * When true, this designates that training should consist of a single
+ * positive and a single negative example (when possible) for each mention.
+ */
protected boolean pairedSampleSelection;
/** When true, this designates that the same maximum entropy model should be used non-reference
@@ -93,8 +83,9 @@
private static final String modelExtension = ".bin.gz";
/**
- * Creates a maximum-entropy-based resolver which will look the specified number of entities back for a referent.
- * This constructor is only used for unit testing.
+ * Creates a maximum-entropy-based resolver which will look the specified number of
+ * entities back for a referent. This constructor is only used for unit testing.
+ *
* @param numberOfEntitiesBack
* @param preferFirstReferent
*/
@@ -118,14 +109,16 @@
* @param nonReferentialResolver Determines how likely it is that this entity is non-referential.
* @throws IOException If the model file is not found or can not be written to.
*/
- public MaxentResolver(String modelDirectory, String name, ResolverMode mode, int numberOfEntitiesBack, boolean preferFirstReferent, NonReferentialResolver nonReferentialResolver) throws IOException {
+ public MaxentResolver(String modelDirectory, String name, ResolverMode mode, int numberOfEntitiesBack,
+ boolean preferFirstReferent, NonReferentialResolver nonReferentialResolver)
+ throws IOException {
super(numberOfEntitiesBack);
this.preferFirstReferent = preferFirstReferent;
this.nonReferentialResolver = nonReferentialResolver;
this.mode = mode;
- this.modelName = modelDirectory+"/"+name;
+ this.modelName = modelDirectory + "/" + name;
if (ResolverMode.TEST == this.mode) {
- model = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();
+ model = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
sameIndex = model.getIndex(SAME);
}
else if (ResolverMode.TRAIN == this.mode) {
@@ -148,22 +141,32 @@
* for a referent.
* @throws IOException If the model file is not found or can not be written to.
*/
- public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack) throws IOException {
+ public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode,
+ int numberEntitiesBack) throws IOException {
this(modelDirectory, modelName, mode, numberEntitiesBack, false);
}
- public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, NonReferentialResolver nonReferentialResolver) throws IOException {
+ public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode,
+ int numberEntitiesBack, NonReferentialResolver nonReferentialResolver)
+ throws IOException {
this(modelDirectory, modelName, mode, numberEntitiesBack, false,nonReferentialResolver);
}
- public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, boolean preferFirstReferent) throws IOException {
- //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent, SingletonNonReferentialResolver.getInstance(projectName,mode));
- this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent, new DefaultNonReferentialResolver(modelDirectory, modelName, mode));
+ public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode,
+ int numberEntitiesBack, boolean preferFirstReferent) throws IOException {
+ //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent,
+ // SingletonNonReferentialResolver.getInstance(projectName,mode));
+ this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent,
+ new DefaultNonReferentialResolver(modelDirectory, modelName, mode));
}
- public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, boolean preferFirstReferent, double nonReferentialProbability) throws IOException {
- //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent, SingletonNonReferentialResolver.getInstance(projectName,mode));
- this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent, new FixedNonReferentialResolver(nonReferentialProbability));
+ public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode,
+ int numberEntitiesBack, boolean preferFirstReferent,
+ double nonReferentialProbability) throws IOException {
+ //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent,
+ // SingletonNonReferentialResolver.getInstance(projectName,mode));
+ this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent,
+ new FixedNonReferentialResolver(nonReferentialProbability));
}
public DiscourseEntity resolve(MentionContext ec, DiscourseModel dm) {
@@ -171,7 +174,7 @@
int ei = 0;
double nonReferentialProbability = nonReferentialResolver.getNonReferentialProbability(ec);
if (debugOn) {
- System.err.println(this +".resolve: " + ec.toText() + " -> " + "null "+nonReferentialProbability);
+ System.err.println(this + ".resolve: " + ec.toText() + " -> " + "null " + nonReferentialProbability);
}
for (; ei < getNumEntities(dm); ei++) {
de = dm.getEntity(ei);
@@ -181,7 +184,8 @@
if (excluded(ec, de)) {
candProbs[ei] = 0;
if (debugOn) {
- System.err.println("excluded "+this +".resolve: " + ec.toText() + " -> " + de + " " + candProbs[ei]);
+ System.err.println("excluded " + this + ".resolve: " + ec.toText() + " -> " + de + " "
+ + candProbs[ei]);
}
}
else {
@@ -195,7 +199,8 @@
candProbs[ei] = 0;
}
if (debugOn) {
- System.err.println(this +".resolve: " + ec.toText() + " -> " + de + " ("+ec.getGender()+","+de.getGender()+") " + candProbs[ei] + " " + lfeatures);
+ System.err.println(this + ".resolve: " + ec.toText() + " -> " + de + " ("
+ + ec.getGender() + "," + de.getGender() + ") " + candProbs[ei] + " " + lfeatures);
}
}
if (preferFirstReferent && candProbs[ei] > nonReferentialProbability) {
@@ -257,20 +262,25 @@
if (excluded(mention, cde)) {
if (showExclusions) {
if (mention.getId() != -1 && entityMention.getId() == mention.getId()) {
- System.err.println(this +".retain: Referent excluded: (" + mention.getId() + ") " + mention.toText() + " " + mention.getIndexSpan() + " -> (" + entityMention.getId() + ") " + entityMention.toText() + " " + entityMention.getSpan() + " " + this);
+ System.err.println(this + ".retain: Referent excluded: (" + mention.getId() + ") "
+ + mention.toText() + " " + mention.getIndexSpan() + " -> (" + entityMention.getId()
+ + ") " + entityMention.toText() + " " + entityMention.getSpan() + " " + this);
}
}
}
else {
hasReferentialCandidate = true;
boolean useAsDifferentExample = defaultReferent(cde);
- //if (!sampleSelection || (mention.getId() != -1 && entityMention.getId() == mention.getId()) || (!nonReferentFound && useAsDifferentExample)) {
+ //if (!sampleSelection || (mention.getId() != -1 && entityMention.getId() == mention.getId())
+ // || (!nonReferentFound && useAsDifferentExample)) {
List<String> features = getFeatures(mention, cde);
//add Event to Model
if (debugOn) {
- System.err.println(this +".retain: " + mention.getId() + " " + mention.toText() + " -> " + entityMention.getId() + " " + cde);
+ System.err.println(this + ".retain: " + mention.getId() + " " + mention.toText()
+ + " -> " + entityMention.getId() + " " + cde);
}
+
if (mention.getId() != -1 && entityMention.getId() == mention.getId()) {
referentFound = true;
events.add(new Event(SAME, features.toArray(new String[features.size()])));
@@ -295,21 +305,23 @@
if (hasReferentialCandidate) {
nonReferentialResolver.addEvent(mention);
}
- return (de);
+
+ return de;
}
else {
- return (super.retain(mention, dm));
+ return super.retain(mention, dm);
}
}
/**
- * Returns a list of features for deciding whether the specified mention refers to the specified discourse entity.
+ * Returns a list of features for deciding whether the specified mention refers to the
+ * specified discourse entity.
* @param mention the mention being considers as possibly referential.
* @param entity The discourse entity with which the mention is being considered referential.
* @return a list of features used to predict reference between the specified mention and entity.
*/
protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
features.add(DEFAULT);
features.addAll(ResolverUtils.getCompatibilityFeatures(mention, entity,simModel));
return features;
@@ -319,15 +331,16 @@
public void train() throws IOException {
if (ResolverMode.TRAIN == mode) {
if (debugOn) {
- System.err.println(this +" referential");
- FileWriter writer = new FileWriter(modelName+".events");
- for (Iterator<Event> ei=events.iterator();ei.hasNext();) {
+ System.err.println(this + " referential");
+ FileWriter writer = new FileWriter(modelName + ".events");
+ for (Iterator<Event> ei = events.iterator(); ei.hasNext();) {
Event e = ei.next();
- writer.write(e.toString()+"\n");
+ writer.write(e.toString() + "\n");
}
writer.close();
}
- (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist();
+ (new SuffixSensitiveGISModelWriter(GIS.trainModel(ObjectStreamUtils.createObjectStream(events),
+ 100,10),new File(modelName + modelExtension))).persist();
nonReferentialResolver.train();
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java
index 53d66d4..d7a9761 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java
@@ -35,7 +35,8 @@
showExclusions = false;
}
- public PluralNounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr) throws IOException {
+ public PluralNounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr)
+ throws IOException {
super(projectName,"plmodel", m, 80, true,nrr);
showExclusions = false;
}
@@ -56,7 +57,8 @@
public boolean canResolve(MentionContext mention) {
String firstTok = mention.getFirstTokenText().toLowerCase();
String firstTokTag = mention.getFirstToken().getSyntacticType();
- boolean rv = mention.getHeadTokenTag().equals("NNS") && !ResolverUtils.definiteArticle(firstTok, firstTokTag);
+ boolean rv = mention.getHeadTokenTag().equals("NNS")
+ && !ResolverUtils.definiteArticle(firstTok, firstTokTag);
return rv;
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
index 85c8c59..345f118 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
@@ -35,7 +35,8 @@
super(projectName, "tmodel", m, 30);
}
- public PluralPronounResolver(String projectName, ResolverMode m,NonReferentialResolver nrr) throws IOException {
+ public PluralPronounResolver(String projectName, ResolverMode m,NonReferentialResolver nrr)
+ throws IOException {
super(projectName, "tmodel", m, 30,nrr);
}
@@ -81,12 +82,15 @@
@Override
protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
MentionContext cec = entity.getLastExtent();
- //System.err.println("MaxentPluralPronounResolver.outOfRange: ["+ec.toText()+" ("+ec.id+")] ["+cec.toText()+" ("+cec.id+")] ec.sentenceNumber=("+ec.sentenceNumber+")-cec.sentenceNumber=("+cec.sentenceNumber+") > "+NUM_SENTS_BACK_PRONOUNS);
+ //System.err.println("MaxentPluralPronounResolver.outOfRange: ["+ec.toText()+" ("+ec.id+")]
+ // ["+cec.toText()+" ("+cec.id+")] ec.sentenceNumber=("+ec.sentenceNumber+")-cec.sentenceNumber
+ // =("+cec.sentenceNumber+") > "+NUM_SENTS_BACK_PRONOUNS);
return (mention.getSentenceNumber() - cec.getSentenceNumber() > NUM_SENTS_BACK_PRONOUNS);
}
public boolean canResolve(MentionContext mention) {
String tag = mention.getHeadTokenTag();
- return (tag != null && tag.startsWith("PRP") && ResolverUtils.pluralThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
+ return (tag != null && tag.startsWith("PRP")
+ && ResolverUtils.pluralThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java
index e922af2..0e6070c 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java
@@ -50,7 +50,8 @@
showExclusions = false;
}
- public ProperNounResolver(String projectName, ResolverMode m,NonReferentialResolver nonRefResolver) throws IOException {
+ public ProperNounResolver(String projectName, ResolverMode m,NonReferentialResolver nonRefResolver)
+ throws IOException {
super(projectName,"pnmodel", m, 500,nonRefResolver);
if (!acroMapLoaded) {
initAcronyms(projectName + "/acronyms");
@@ -135,7 +136,8 @@
for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) {
MentionContext xec = ei.next();
- if (xec.getHeadTokenTag().startsWith("NNP")) { // || initialCaps.matcher(xec.headToken.toString()).find()) {
+ if (xec.getHeadTokenTag().startsWith("NNP")) {
+ // || initialCaps.matcher(xec.headToken.toString()).find()) {
//System.err.println("MaxentProperNounResolver.exclude: kept "+xec.toText()+" with "+xec.headTag);
return false;
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
index 41ac100..676b6de 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
@@ -42,31 +42,47 @@
private static final Pattern initialCaps = Pattern.compile("^[A-Z]");
/** Regular expression for English singular third person pronouns. */
- public static final Pattern singularThirdPersonPronounPattern = Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern singularThirdPersonPronounPattern =
+ Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English plural third person pronouns. */
- public static final Pattern pluralThirdPersonPronounPattern = Pattern.compile("^(they|their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern pluralThirdPersonPronounPattern =
+ Pattern.compile("^(they|their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English speech pronouns. */
- public static final Pattern speechPronounPattern = Pattern.compile("^(I|me|my|you|your|you|we|us|our|ours)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern speechPronounPattern =
+ Pattern.compile("^(I|me|my|you|your|you|we|us|our|ours)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English female pronouns. */
- public static final Pattern femalePronounPattern = Pattern.compile("^(she|her|hers|herself)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern femalePronounPattern =
+ Pattern.compile("^(she|her|hers|herself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English neuter pronouns. */
- public static final Pattern neuterPronounPattern = Pattern.compile("^(it|its|itself)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern neuterPronounPattern =
+ Pattern.compile("^(it|its|itself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English first person pronouns. */
- public static final Pattern firstPersonPronounPattern = Pattern.compile("^(I|me|my|we|our|us|ours)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern firstPersonPronounPattern =
+ Pattern.compile("^(I|me|my|we|our|us|ours)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English singular second person pronouns. */
- public static final Pattern secondPersonPronounPattern = Pattern.compile("^(you|your|yours)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern secondPersonPronounPattern =
+ Pattern.compile("^(you|your|yours)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English third person pronouns. */
- public static final Pattern thirdPersonPronounPattern = Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself|they|their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern thirdPersonPronounPattern =
+ Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself|they|" +
+ "their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English singular pronouns. */
- public static final Pattern singularPronounPattern = Pattern.compile("^(I|me|my|he|she|it|him|her|his|hers|its|himself|herself|itself)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern singularPronounPattern =
+ Pattern.compile("^(I|me|my|he|she|it|him|her|his|hers|its|himself|herself|itself)$",
+ Pattern.CASE_INSENSITIVE);
/** Regular expression for English plural pronouns. */
- public static final Pattern pluralPronounPattern = Pattern.compile("^(we|us|our|ours|they|their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern pluralPronounPattern =
+ Pattern.compile("^(we|us|our|ours|they|their|theirs|them|themselves)$",
+ Pattern.CASE_INSENSITIVE);
/** Regular expression for English male pronouns. */
- public static final Pattern malePronounPattern = Pattern.compile("^(he|him|his|himself)$",Pattern.CASE_INSENSITIVE);
+ public static final Pattern malePronounPattern =
+ Pattern.compile("^(he|him|his|himself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English honorifics. */
- public static final Pattern honorificsPattern = Pattern.compile("[A-Z][a-z]+\\.$|^[A-Z][b-df-hj-np-tv-xz]+$");
+ public static final Pattern honorificsPattern =
+ Pattern.compile("[A-Z][a-z]+\\.$|^[A-Z][b-df-hj-np-tv-xz]+$");
/** Regular expression for English corporate designators. */
- public static final Pattern designatorsPattern = Pattern.compile("[a-z]\\.$|^[A-Z][b-df-hj-np-tv-xz]+$|^Co(rp)?$");
+ public static final Pattern designatorsPattern =
+ Pattern.compile("[a-z]\\.$|^[A-Z][b-df-hj-np-tv-xz]+$|^Co(rp)?$");
private static final String NUM_COMPATIBLE = "num.compatible";
@@ -237,7 +253,8 @@
if (exactMatchFeature != null) {
featureSet.add(exactMatchFeature);
}
- else if (entityMention.getParse().isCoordinatedNounPhrase() && !mention.getParse().isCoordinatedNounPhrase()) {
+ else if (entityMention.getParse().isCoordinatedNounPhrase()
+ && !mention.getParse().isCoordinatedNounPhrase()) {
featureSet.add("cmix");
}
else {
@@ -252,7 +269,8 @@
Parse[] xtoks = entityMention.getTokenParses();
int headIndex = entityMention.getHeadTokenIndex();
//if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {
- // //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag);
+ // //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText
+ // +" because "+mention.headTokenTag+" != "+xec.headTokenTag);
// continue;
//} want to match NN NNP
String entityMentionHeadString = entityMention.getHeadTokenText().toLowerCase();
@@ -340,10 +358,10 @@
* @return a normalized string representation of the specified mention.
*/
public static String stripNp(MentionContext mention) {
- int start=mention.getNonDescriptorStart(); //start after descriptors
+ int start = mention.getNonDescriptorStart(); //start after descriptors
Parse[] mtokens = mention.getTokenParses();
- int end=mention.getHeadTokenIndex()+1;
+ int end = mention.getHeadTokenIndex() + 1;
if (start == end) {
//System.err.println("stripNp: return null 1");
return null;
@@ -358,7 +376,7 @@
}
//get to first NNP
String type;
- for (int i=start;i<end;i++) {
+ for (int i = start; i < end;i++) {
type = mtokens[start].getSyntacticType();
if (type.startsWith("NNP")) {
break;
@@ -369,7 +387,7 @@
//System.err.println("stripNp: return null 3");
return null;
}
- if (start+1 != end) { // don't do this on head words, to keep "U.S."
+ if (start + 1 != end) { // don't do this on head words, to keep "U.S."
//strip off honorifics in begining
if (honorificsPattern.matcher(mtokens[start].toString()).find()) {
start++;
@@ -395,7 +413,8 @@
}
public static MentionContext getProperNounExtent(DiscourseEntity de) {
- for (Iterator<MentionContext> ei = de.getMentions(); ei.hasNext();) { //use first extent which is propername
+ //use first extent which is propername
+ for (Iterator<MentionContext> ei = de.getMentions(); ei.hasNext();) {
MentionContext xec = ei.next();
String xecHeadTag = xec.getHeadTokenTag();
if (xecHeadTag.startsWith("NNP") || initialCaps.matcher(xec.getHeadTokenText()).find()) {
@@ -450,7 +469,7 @@
if (mention.getHeadTokenTag().startsWith("PRP")) {
Map<String, String> pronounMap = getPronounFeatureMap(mention.getHeadTokenText());
//System.err.println("getPronounMatchFeatures.pronounMap:"+pronounMap);
- for (Iterator<MentionContext> mi=entity.getMentions();mi.hasNext();) {
+ for (Iterator<MentionContext> mi = entity.getMentions();mi.hasNext();) {
MentionContext candidateMention = mi.next();
if (candidateMention.getHeadTokenTag().startsWith("PRP")) {
if (mention.getHeadTokenText().equalsIgnoreCase(candidateMention.getHeadTokenText())) {
@@ -458,7 +477,8 @@
break;
}
else {
- Map<String, String> candidatePronounMap = getPronounFeatureMap(candidateMention.getHeadTokenText());
+ Map<String, String> candidatePronounMap =
+ getPronounFeatureMap(candidateMention.getHeadTokenText());
//System.err.println("getPronounMatchFeatures.candidatePronounMap:"+candidatePronounMap);
boolean allKeysMatch = true;
for (Iterator<String> ki = pronounMap.keySet().iterator(); ki.hasNext();) {
@@ -500,16 +520,19 @@
public static List<String> getDistanceFeatures(MentionContext mention, DiscourseEntity entity) {
List<String> features = new ArrayList<String>();
MentionContext cec = entity.getLastExtent();
- int entityDistance = mention.getNounPhraseDocumentIndex()- cec.getNounPhraseDocumentIndex();
+ int entityDistance = mention.getNounPhraseDocumentIndex() - cec.getNounPhraseDocumentIndex();
int sentenceDistance = mention.getSentenceNumber() - cec.getSentenceNumber();
int hobbsEntityDistance;
if (sentenceDistance == 0) {
hobbsEntityDistance = cec.getNounPhraseSentenceIndex();
}
else {
- //hobbsEntityDistance = entityDistance - (entities within sentence from mention to end) + (entities within sentence form start to mention)
- //hobbsEntityDistance = entityDistance - (cec.maxNounLocation - cec.getNounPhraseSentenceIndex) + cec.getNounPhraseSentenceIndex;
- hobbsEntityDistance = entityDistance + (2 * cec.getNounPhraseSentenceIndex()) - cec.getMaxNounPhraseSentenceIndex();
+ //hobbsEntityDistance = entityDistance - (entities within sentence from mention to end)
+ // + (entities within sentence form start to mention)
+ //hobbsEntityDistance = entityDistance - (cec.maxNounLocation - cec.getNounPhraseSentenceIndex)
+ // + cec.getNounPhraseSentenceIndex;
+ hobbsEntityDistance = entityDistance +
+ (2 * cec.getNounPhraseSentenceIndex()) - cec.getMaxNounPhraseSentenceIndex();
}
features.add("hd=" + hobbsEntityDistance);
features.add("de=" + entityDistance);
@@ -553,9 +576,11 @@
* Returns features indicating whether the specified mention and the specified entity are compatible.
* @param mention The mention.
* @param entity The entity.
- * @return list of features indicating whether the specified mention and the specified entity are compatible.
+ * @return list of features indicating whether the specified mention and the specified
+ * entity are compatible.
*/
- public static List<String> getCompatibilityFeatures(MentionContext mention, DiscourseEntity entity, TestSimilarityModel simModel) {
+ public static List<String> getCompatibilityFeatures(MentionContext mention,
+ DiscourseEntity entity, TestSimilarityModel simModel) {
List<String> compatFeatures = new ArrayList<String>();
String semCompatible = getSemanticCompatibilityFeature(mention, entity, simModel);
compatFeatures.add(semCompatible);
@@ -563,10 +588,12 @@
compatFeatures.add(genCompatible);
String numCompatible = ResolverUtils.getNumberCompatibilityFeature(mention, entity);
compatFeatures.add(numCompatible);
- if (semCompatible.equals(SIM_COMPATIBLE) && genCompatible.equals(GEN_COMPATIBLE) && numCompatible.equals(ResolverUtils.NUM_COMPATIBLE)) {
+ if (semCompatible.equals(SIM_COMPATIBLE) && genCompatible.equals(GEN_COMPATIBLE)
+ && numCompatible.equals(ResolverUtils.NUM_COMPATIBLE)) {
compatFeatures.add("all.compatible");
}
- else if (semCompatible.equals(SIM_INCOMPATIBLE) || genCompatible.equals(GEN_INCOMPATIBLE) || numCompatible.equals(ResolverUtils.NUM_INCOMPATIBLE)) {
+ else if (semCompatible.equals(SIM_INCOMPATIBLE) || genCompatible.equals(GEN_INCOMPATIBLE)
+ || numCompatible.equals(ResolverUtils.NUM_INCOMPATIBLE)) {
compatFeatures.add("some.incompatible");
}
return compatFeatures;
@@ -586,7 +613,8 @@
}
}
- public static String getSemanticCompatibilityFeature(MentionContext ec, DiscourseEntity de, TestSimilarityModel simModel) {
+ public static String getSemanticCompatibilityFeature(MentionContext ec, DiscourseEntity de,
+ TestSimilarityModel simModel) {
if (simModel != null) {
double best = 0;
for (Iterator<MentionContext> xi = de.getMentions(); xi.hasNext();) {
@@ -640,7 +668,4 @@
return "u";
}
}
-
-
-
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingletonNonReferentialResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingletonNonReferentialResolver.java
index 746f97d..a1174a4 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingletonNonReferentialResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingletonNonReferentialResolver.java
@@ -33,7 +33,8 @@
super(projectName, "nonref", mode);
}
- public static SingletonNonReferentialResolver getInstance(String modelName, ResolverMode mode) throws IOException {
+ public static SingletonNonReferentialResolver getInstance(String modelName, ResolverMode mode)
+ throws IOException {
if (resolver == null) {
resolver = new SingletonNonReferentialResolver(modelName, mode);
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
index 6e84140..139f0d7 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
@@ -40,7 +40,8 @@
this.numSentencesBack = 2;
}
- public SingularPronounResolver(String projectName, ResolverMode m, NonReferentialResolver nonReferentialResolver) throws IOException {
+ public SingularPronounResolver(String projectName, ResolverMode m,
+ NonReferentialResolver nonReferentialResolver) throws IOException {
super(projectName, "pmodel", m, 30,nonReferentialResolver);
this.numSentencesBack = 2;
}
@@ -48,7 +49,8 @@
public boolean canResolve(MentionContext mention) {
//System.err.println("MaxentSingularPronounResolver.canResolve: ec= ("+mention.id+") "+ mention.toText());
String tag = mention.getHeadTokenTag();
- return (tag != null && tag.startsWith("PRP") && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
+ return tag != null && tag.startsWith("PRP")
+ && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches();
}
@Override
@@ -109,7 +111,8 @@
for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) {
MentionContext entityMention = ei.next();
String tag = entityMention.getHeadTokenTag();
- if (tag != null && tag.startsWith("PRP") && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches()) {
+ if (tag != null && tag.startsWith("PRP")
+ && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches()) {
if (mentionGender == null) { //lazy initialization
mentionGender = ResolverUtils.getPronounGender(mention.getHeadTokenText());
}
@@ -125,7 +128,10 @@
@Override
protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
MentionContext cec = entity.getLastExtent();
- //System.err.println("MaxentSingularPronounresolve.outOfRange: ["+entity.getLastExtent().toText()+" ("+entity.getId()+")] ["+mention.toText()+" ("+mention.getId()+")] entity.sentenceNumber=("+entity.getLastExtent().getSentenceNumber()+")-mention.sentenceNumber=("+mention.getSentenceNumber()+") > "+numSentencesBack);
+ //System.err.println("MaxentSingularPronounresolve.outOfRange: ["+entity.getLastExtent().toText()
+ // +" ("+entity.getId()+")] ["+mention.toText()+" ("+mention.getId()+")] entity.sentenceNumber=("
+ // +entity.getLastExtent().getSentenceNumber()+")-mention.sentenceNumber=("
+ // +mention.getSentenceNumber()+") > "+numSentencesBack);
return (mention.getSentenceNumber() - cec.getSentenceNumber() > numSentencesBack);
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
index bc5d2d4..7d080c6 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
@@ -36,7 +36,8 @@
preferFirstReferent = true;
}
- public SpeechPronounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr) throws IOException {
+ public SpeechPronounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr)
+ throws IOException {
super(projectName,"fmodel", m, 30,nrr);
showExclusions = false;
preferFirstReferent = true;
@@ -79,7 +80,8 @@
public boolean canResolve(MentionContext mention) {
String tag = mention.getHeadTokenTag();
- boolean fpp = tag != null && tag.startsWith("PRP") && ResolverUtils.speechPronounPattern.matcher(mention.getHeadTokenText()).matches();
+ boolean fpp = tag != null && tag.startsWith("PRP")
+ && ResolverUtils.speechPronounPattern.matcher(mention.getHeadTokenText()).matches();
boolean pn = tag != null && tag.startsWith("NNP");
return (fpp || pn);
}
@@ -104,22 +106,22 @@
return !canResolve(cec);
}
}
- else if (mention.getHeadTokenTag().startsWith("PRP")){ // mention is a speech pronoun
+ else if (mention.getHeadTokenTag().startsWith("PRP")) { // mention is a speech pronoun
// cec can be either a speech pronoun or a propernoun
if (cec.getHeadTokenTag().startsWith("NNP")) {
//exclude antecedents not in the same sentence when they are not pronoun
return (mention.getSentenceNumber() - cec.getSentenceNumber() != 0);
}
- else if (cec.getHeadTokenTag().startsWith("PRP")){
+ else if (cec.getHeadTokenTag().startsWith("PRP")) {
return false;
}
else {
- System.err.println("Unexpected candidate exluded: "+cec.toText());
+ System.err.println("Unexpected candidate exluded: " + cec.toText());
return true;
}
}
else {
- System.err.println("Unexpected mention exluded: "+mention.toText());
+ System.err.println("Unexpected mention exluded: " + mention.toText());
return true;
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/package-info.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/package-info.java
index fb59395..aaf0e45 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/package-info.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/package-info.java
@@ -18,4 +18,4 @@
/**
* Package related to resolution techniques for coreference resolution.
*/
-package opennlp.tools.coref.resolver;
\ No newline at end of file
+package opennlp.tools.coref.resolver;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java
index 174437c..ea6d819 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java
@@ -42,15 +42,16 @@
/** The token index in of the head word of this mention. */
protected int headTokenIndex;
- public Context(Span span, Span headSpan, int entityId, Parse parse, String extentType, String nameType, HeadFinder headFinder) {
+ public Context(Span span, Span headSpan, int entityId, Parse parse,
+ String extentType, String nameType, HeadFinder headFinder) {
super(span,headSpan,entityId,parse,extentType,nameType);
init(headFinder);
}
public Context(Object[] tokens, String headToken, String headTag, String neType) {
super(null,null,1,null,null,neType);
- this.tokens =tokens;
- this.headTokenIndex = tokens.length-1;
+ this.tokens = tokens;
+ this.headTokenIndex = tokens.length - 1;
this.headTokenText = headToken;
this.headTokenTag = headTag;
this.synsets = getSynsetSet(this);
@@ -80,7 +81,7 @@
public static Context[] constructContexts(Mention[] mentions,HeadFinder headFinder) {
Context[] contexts = new Context[mentions.length];
- for (int mi=0;mi<mentions.length;mi++) {
+ for (int mi = 0; mi < mentions.length;mi++) {
contexts[mi] = new Context(mentions[mi],headFinder);
}
return contexts;
@@ -90,7 +91,7 @@
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
- for (int ti=0,tl=tokens.length;ti<tl;ti++){
+ for (int ti = 0, tl = tokens.length; ti < tl; ti++) {
sb.append(tokens[ti]).append(" ");
}
return sb.toString();
@@ -113,17 +114,16 @@
}
public static Context parseContext(String word) {
- String[] parts = word.split("/");
- if (parts.length == 2) {
- String[] tokens = parts[0].split(" ");
- return new Context(tokens,tokens[tokens.length-1], parts[1], null);
- }
- else if (parts.length == 3) {
- String[] tokens = parts[0].split(" ");
- return new Context(tokens,tokens[tokens.length-1], parts[1], parts[2]);
- }
- return null;
+ String[] parts = word.split("/");
+ if (parts.length == 2) {
+ String[] tokens = parts[0].split(" ");
+ return new Context(tokens, tokens[tokens.length - 1], parts[1], null);
+ } else if (parts.length == 3) {
+ String[] tokens = parts[0].split(" ");
+ return new Context(tokens, tokens[tokens.length - 1], parts[1], parts[2]);
}
+ return null;
+ }
private static Set<String> getSynsetSet(Context c) {
Set<String> synsetSet = new HashSet<String>();
@@ -135,7 +135,7 @@
if (senseKey != null) {
synsetSet.add(senseKey);
String[] synsets = dict.getParentSenseKeys(lemmas[li],"NN",0);
- for (int si=0,sn=synsets.length;si<sn;si++) {
+ for (int si = 0, sn = synsets.length; si < sn;si++) {
synsetSet.add(synsets[si]);
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
index 9b93255..13e8300 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
@@ -31,22 +31,21 @@
import java.util.List;
import java.util.Set;
-//import opennlp.maxent.GIS;
+import opennlp.tools.coref.resolver.ResolverUtils;
import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
+import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.HashList;
+import opennlp.tools.util.ObjectStreamUtils;
+
+//import opennlp.maxent.GIS;
//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
//import opennlp.model.Event;
-import opennlp.tools.ml.model.Event;
-import opennlp.tools.ml.model.MaxentModel;
//import opennlp.model.MaxentModel;
-import opennlp.tools.coref.resolver.ResolverUtils;
-import opennlp.tools.ml.model.AbstractModel;
-
-import opennlp.tools.ml.model.EventStream;
-import opennlp.tools.util.CollectionEventStream;
-import opennlp.tools.util.HashList;
/**
* Class which models the gender of a particular mentions and entities made up of mentions.
@@ -87,16 +86,17 @@
private GenderModel(String modelName, boolean train) throws IOException {
this.modelName = modelName;
- maleNames = readNames(modelName+".mas");
- femaleNames = readNames(modelName+".fem");
+ maleNames = readNames(modelName + ".mas");
+ femaleNames = readNames(modelName + ".fem");
if (train) {
events = new ArrayList<Event>();
}
else {
//if (MaxentResolver.loadAsResource()) {
- // testModel = (new BinaryGISModelReader(new DataInputStream(this.getClass().getResourceAsStream(modelName)))).getModel();
+ // testModel = (new BinaryGISModelReader(new DataInputStream(
+ // this.getClass().getResourceAsStream(modelName)))).getModel();
//}
- testModel = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();
+ testModel = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
maleIndex = testModel.getIndex(GenderEnum.MALE.toString());
femaleIndex = testModel.getIndex(GenderEnum.FEMALE.toString());
neuterIndex = testModel.getIndex(GenderEnum.NEUTER.toString());
@@ -104,17 +104,17 @@
}
private List<String> getFeatures(Context np1) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
features.add("default");
for (int ti = 0, tl = np1.getHeadTokenIndex(); ti < tl; ti++) {
features.add("mw=" + np1.getTokens()[ti].toString());
}
features.add("hw=" + np1.getHeadTokenText());
- features.add("n="+np1.getNameType());
+ features.add("n=" + np1.getNameType());
if (np1.getNameType() != null && np1.getNameType().equals("person")) {
Object[] tokens = np1.getTokens();
//System.err.println("GenderModel.getFeatures: person name="+np1);
- for (int ti=0;ti<np1.getHeadTokenIndex() || ti==0;ti++) {
+ for (int ti = 0; ti < np1.getHeadTokenIndex() || ti == 0; ti++) {
String name = tokens[ti].toString().toLowerCase();
if (femaleNames.contains(name)) {
features.add("fem");
@@ -254,7 +254,8 @@
for (String line = in.readLine(); line != null; line = in.readLine()) {
String[] words = line.split(" ");
double[] dist = model.genderDistribution(Context.parseContext(words[0]));
- System.out.println("m="+dist[model.getMaleIndex()] + " f=" +dist[model.getFemaleIndex()]+" n="+dist[model.getNeuterIndex()]+" "+model.getFeatures(Context.parseContext(words[0])));
+ System.out.println("m=" + dist[model.getMaleIndex()] + " f=" + dist[model.getFemaleIndex()]
+ + " n=" + dist[model.getNeuterIndex()] + " " + model.getFeatures(Context.parseContext(words[0])));
}
}
@@ -268,16 +269,18 @@
public void trainModel() throws IOException {
if (debugOn) {
- FileWriter writer = new FileWriter(modelName+".events");
- for (Iterator<Event> ei=events.iterator();ei.hasNext();) {
+ FileWriter writer = new FileWriter(modelName + ".events");
+ for (Iterator<Event> ei = events.iterator();ei.hasNext();) {
Event e = ei.next();
- writer.write(e.toString()+"\n");
+ writer.write(e.toString() + "\n");
}
writer.close();
}
+
new SuffixSensitiveGISModelWriter(
// GIS.trainModel((EventStream)new CollectionEventStream(events), true)).persist();
- (AbstractModel) GIS.trainModel((EventStream)new CollectionEventStream(events), true), new File(modelName+modelExtension)).persist();
+ (AbstractModel) GIS.trainModel(ObjectStreamUtils.createObjectStream(events), true),
+ new File(modelName + modelExtension)).persist();
}
public int getFemaleIndex() {
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java
index b6e00a5..217beb9 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java
@@ -41,7 +41,9 @@
Gender gender;
double[] gdist = genModel.genderDistribution(c);
if (debugOn) {
- System.err.println("MaxentCompatibilityModel.computeGender: "+c.toString()+" m="+gdist[genModel.getMaleIndex()]+" f="+gdist[genModel.getFemaleIndex()]+" n="+gdist[genModel.getNeuterIndex()]);
+ System.err.println("MaxentCompatibilityModel.computeGender: "
+ + c.toString() + " m=" + gdist[genModel.getMaleIndex()] + " f="
+ + gdist[genModel.getFemaleIndex()] + " n=" + gdist[genModel.getNeuterIndex()]);
}
if (genModel.getMaleIndex() >= 0 && gdist[genModel.getMaleIndex()] > minGenderProb) {
gender = new Gender(GenderEnum.MALE,gdist[genModel.getMaleIndex()]);
@@ -61,7 +63,8 @@
public Number computeNumber(Context c) {
double[] dist = numModel.numberDist(c);
Number number;
- //System.err.println("MaxentCompatibiltyResolver.computeNumber: "+c+" sing="+dist[numModel.getSingularIndex()]+" plural="+dist[numModel.getPluralIndex()]);
+ //System.err.println("MaxentCompatibiltyResolver.computeNumber: "+c+" sing="
+ // +dist[numModel.getSingularIndex()]+" plural="+dist[numModel.getPluralIndex()]);
if (dist[numModel.getSingularIndex()] > minNumberProb) {
number = new Number(NumberEnum.SINGULAR,dist[numModel.getSingularIndex()]);
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberEnum.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberEnum.java
index 693f894..8ec645e 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberEnum.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberEnum.java
@@ -44,8 +44,7 @@
}
@Override
- public String toString(){
+ public String toString() {
return name;
}
-
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
index 02776a5..6f3be6d 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
@@ -22,19 +22,21 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+
+import opennlp.tools.coref.resolver.ResolverUtils;
import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.HashList;
+import opennlp.tools.util.ObjectStreamUtils;
+
//import opennlp.maxent.GIS;
//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
//import opennlp.model.Event;
-import opennlp.tools.ml.model.Event;
//import opennlp.model.MaxentModel;
-import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.coref.resolver.ResolverUtils;
-import opennlp.tools.util.CollectionEventStream;
-import opennlp.tools.util.HashList;
/**
* Class which models the number of particular mentions and the entities made up of mentions.
@@ -66,9 +68,10 @@
}
else {
//if (MaxentResolver.loadAsResource()) {
- // testModel = (new PlainTextGISModelReader(new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(modelName))))).getModel();
+ // testModel = (new PlainTextGISModelReader(new BufferedReader(new InputStreamReader(
+ // this.getClass().getResourceAsStream(modelName))))).getModel();
//}
- testModel = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();
+ testModel = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
singularIndex = testModel.getIndex(NumberEnum.SINGULAR.toString());
pluralIndex = testModel.getIndex(NumberEnum.PLURAL.toString());
}
@@ -178,7 +181,8 @@
}
public void trainModel() throws IOException {
- (new SuffixSensitiveGISModelWriter(GIS.trainModel(new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist();
+ new SuffixSensitiveGISModelWriter(GIS.trainModel(
+ ObjectStreamUtils.createObjectStream(events),100,10),
+ new File(modelName + modelExtension)).persist();
}
-
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
index 9a4b8d0..e54c427 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
@@ -29,20 +29,15 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.GIS;
-//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
-import opennlp.tools.ml.model.Event;
-//import opennlp.model.MaxentModel;
-import opennlp.tools.ml.model.MaxentModel;
-//import opennlp.model.Event;
-//import opennlp.model.MaxentModel;
+
import opennlp.tools.coref.resolver.ResolverUtils;
-import opennlp.tools.util.CollectionEventStream;
+import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
+import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.HashList;
+import opennlp.tools.util.ObjectStreamUtils;
/**
* Models semantic similarity between two mentions and returns a score based on
@@ -74,7 +69,7 @@
events = new ArrayList<Event>();
}
else {
- testModel = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();
+ testModel = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
SAME_INDEX = testModel.getIndex(SAME);
}
}
@@ -82,12 +77,14 @@
private void addEvent(boolean same, Context np1, Context np2) {
if (same) {
List<String> feats = getFeatures(np1, np2);
- //System.err.println(SAME+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats);
+ //System.err.println(SAME+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+"
+ // ("+np2.id+") "+feats);
events.add(new Event(SAME, feats.toArray(new String[feats.size()])));
}
else {
List<String> feats = getFeatures(np1, np2);
- //System.err.println(DIFF+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats);
+ //System.err.println(DIFF+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+"
+ // ("+np2.id+") "+feats);
events.add(new Event(DIFF, feats.toArray(new String[feats.size()])));
}
}
@@ -152,7 +149,8 @@
* with entity indicated by the specified key.
*/
@SuppressWarnings("unchecked")
- private Set<Context> constructExclusionSet(Integer entityKey, HashList entities, Map<Integer, Set<String>> headSets, Map<Integer, Set<String>> nameSets, List<Context> singletons) {
+ private Set<Context> constructExclusionSet(Integer entityKey, HashList entities, Map<Integer,
+ Set<String>> headSets, Map<Integer, Set<String>> nameSets, List<Context> singletons) {
Set<Context> exclusionSet = new HashSet<Context>();
Set<String> entityHeadSet = headSets.get(entityKey);
Set<String> entityNameSet = nameSets.get(entityKey);
@@ -241,7 +239,8 @@
*
* @param entities A mapping between a key and a list of mentions.
*
- * @return a mapping between each key in the specified entity map and the name types associated with the each mention of that entity.
+ * @return a mapping between each key in the specified entity map and the name types associated
+ * with the each mention of that entity.
*/
@SuppressWarnings("unchecked")
private Map<Integer, Set<String>> constructNameSets(HashList entities) {
@@ -333,7 +332,8 @@
Context sec1 = allExtents.get(axi);
axi = (axi + 1) % allExtents.size();
if (!exclusionSet.contains(sec1)) {
- if (debugOn) System.err.println(ec1.toString()+" "+entityNameSet+" "+sec1.toString()+" "+nameSets.get(sec1.getId()));
+ if (debugOn) System.err.println(ec1.toString() + " " + entityNameSet + " "
+ + sec1.toString() + " " + nameSets.get(sec1.getId()));
addEvent(false, ec1, sec1);
break;
}
@@ -345,15 +345,18 @@
}
/**
- * Returns a number between 0 and 1 which represents the models belief that the specified mentions are compatible.
- * Value closer to 1 are more compatible, while values closer to 0 are less compatible.
+ * Returns a number between 0 and 1 which represents the models belief that the specified
+ * mentions are compatible. Value closer to 1 are more compatible, while values closer
+ * to 0 are less compatible.
+ *
* @param mention1 The first mention to be considered.
* @param mention2 The second mention to be considered.
- * @return a number between 0 and 1 which represents the models belief that the specified mentions are compatible.
+ * @return a number between 0 and 1 which represents the models belief that the specified
+ * mentions are compatible.
*/
public double compatible(Context mention1, Context mention2) {
List<String> feats = getFeatures(mention1, mention2);
- if (debugOn) System.err.println("SimilarityModel.compatible: feats="+feats);
+ if (debugOn) System.err.println("SimilarityModel.compatible: feats=" + feats);
return (testModel.eval(feats.toArray(new String[feats.size()]))[SAME_INDEX]);
}
@@ -363,16 +366,16 @@
*/
public void trainModel() throws IOException {
if (debugOn) {
- FileWriter writer = new FileWriter(modelName+".events");
- for (Iterator<Event> ei=events.iterator();ei.hasNext();) {
+ FileWriter writer = new FileWriter(modelName + ".events");
+ for (Iterator<Event> ei = events.iterator();ei.hasNext();) {
Event e = ei.next();
- writer.write(e.toString()+"\n");
+ writer.write(e.toString() + "\n");
}
writer.close();
}
- (new SuffixSensitiveGISModelWriter(GIS.trainModel(
- new CollectionEventStream(events),100,10),
- new File(modelName+modelExtension))).persist();
+ new SuffixSensitiveGISModelWriter(GIS.trainModel(
+ ObjectStreamUtils.createObjectStream(events),100,10),
+ new File(modelName + modelExtension)).persist();
}
private boolean isName(Context np) {
@@ -393,7 +396,7 @@
private List<String> getNameCommonFeatures(Context name, Context common) {
Set<String> synsets = common.getSynsets();
- List<String> features = new ArrayList<String>(2 + synsets.size());
+ List<String> features = new ArrayList<>(2 + synsets.size());
features.add("nn=" + name.getNameType() + "," + common.getNameType());
features.add("nw=" + name.getNameType() + "," + common.getHeadTokenText().toLowerCase());
for (Iterator<String> si = synsets.iterator(); si.hasNext();) {
@@ -610,7 +613,8 @@
else if (isPronoun(np2)) {
features.addAll(getNumberPronounFeatures(np1, np2));
}
- else if (isNumber(np2)) {}
+ else if (isNumber(np2)) {
+ }
else {
//System.err.println("unknown group for " + np1.headTokenText + " -> " + np2.headTokenText);
}
@@ -634,7 +638,8 @@
for (String line = in.readLine(); line != null; line = in.readLine()) {
String[] words = line.split(" ");
double p = model.compatible(Context.parseContext(words[0]), Context.parseContext(words[1]));
- System.out.println(p + " " + model.getFeatures(Context.parseContext(words[0]), Context.parseContext(words[1])));
+ System.out.println(p + " " + model.getFeatures(Context.parseContext(words[0]),
+ Context.parseContext(words[1])));
}
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/package-info.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/package-info.java
index 535211a..21f59d1 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/package-info.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/package-info.java
@@ -18,4 +18,4 @@
/**
* Package related to the modeling mention similarity for coreference resolution.
*/
-package opennlp.tools.coref.sim;
\ No newline at end of file
+package opennlp.tools.coref.sim;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/FullParseCorefEnhancerStream.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/FullParseCorefEnhancerStream.java
index 0666843..a83aa9e 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/FullParseCorefEnhancerStream.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/FullParseCorefEnhancerStream.java
@@ -37,10 +37,10 @@
this.parser = parser;
}
- static Parse createIncompleteParse(String tokens[]) {
+ static Parse createIncompleteParse(String[] tokens) {
// produce text
- Span tokenSpans[] = new Span[tokens.length];
+ Span[] tokenSpans = new Span[tokens.length];
StringBuilder textBuilder = new StringBuilder();
for (int i = 0; i < tokens.length; i++) {
@@ -60,7 +60,8 @@
for (int i = 0; i < tokenSpans.length; i++) {
Span tokenSpan = tokenSpans[i];
- p.insert(new Parse(text, new Span(tokenSpan.getStart(), tokenSpan.getEnd()), AbstractBottomUpParser.TOK_NODE, 0, i));
+ p.insert(new Parse(text, new Span(tokenSpan.getStart(), tokenSpan.getEnd()),
+ AbstractBottomUpParser.TOK_NODE, 0, i));
}
return p;
@@ -78,7 +79,7 @@
for (int i = 0; i < sentences.size(); i++) {
- String sentence[] = sentences.get(i);
+ String[] sentence = sentences.get(i);
Parse incompleteParse = createIncompleteParse(sentence);
Parse p = parser.parse(incompleteParse);
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java
index d095b48..13e8360 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java
@@ -130,7 +130,7 @@
public void characters(CharSequence chars) {
if (isInsideContentElement) {
- String tokens [] = tokenizer.tokenize(chars.toString());
+ String[] tokens = tokenizer.tokenize(chars.toString());
text.addAll(Arrays.asList(tokens));
}
@@ -157,7 +157,7 @@
if (MucElementNames.DOC_ELEMENT.equals(name)) {
- for (CorefMention mentions[] : sample.getMentions()) {
+ for (CorefMention[] mentions : sample.getMentions()) {
for (int i = 0; i < mentions.length; i++) {
mentions[i].id = resolveId(mentions[i].id);
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
index 95b9905..84e8222 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
@@ -60,7 +60,7 @@
int startOffset = p.toString().indexOf(min);
int endOffset = startOffset + min.length();
- Parse tokens[] = p.getTagNodes();
+ Parse[] tokens = p.getTagNodes();
int beginToken = -1;
int endToken = -1;
@@ -86,7 +86,7 @@
public static boolean addMention(int id, Span mention, Parse[] tokens) {
- boolean failed = false;
+ boolean failed = false;
Parse startToken = tokens[mention.getStart()];
Parse endToken = tokens[mention.getEnd() - 1];
@@ -128,7 +128,7 @@
List<Parse> allParses = sample.getParses();
for (int si = 0; si < allMentions.size(); si++) {
- CorefMention mentions[] = allMentions.get(si);
+ CorefMention[] mentions = allMentions.get(si);
Parse p = allParses.get(si);
for (Mention extent : mentionFinder.getMentions(new DefaultParse(p, si))) {
@@ -139,7 +139,7 @@
}
}
- Parse tokens[] = p.getTagNodes();
+ Parse[] tokens = p.getTagNodes();
for (CorefMention mention : mentions) {
Span min = getMinSpan(p, mention);
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
index db350b9..93bc89e 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/NameFinderCorefEnhancerStream.java
@@ -32,11 +32,12 @@
*/
public class NameFinderCorefEnhancerStream extends FilterObjectStream<RawCorefSample, RawCorefSample> {
- private TokenNameFinder nameFinders[];
- private String tags[];
+ private TokenNameFinder[] nameFinders;
+ private String[] tags;
// TODO: Should be updated to use tag from span instead!
- protected NameFinderCorefEnhancerStream(TokenNameFinder nameFinders[], String tags[], ObjectStream<RawCorefSample> samples) {
+ protected NameFinderCorefEnhancerStream(TokenNameFinder[] nameFinders, String[] tags,
+ ObjectStream<RawCorefSample> samples) {
super(samples);
this.nameFinders = nameFinders;
this.tags = tags;
@@ -56,15 +57,15 @@
for (Parse p : sample.getParses()) {
- Parse parseTokens[] = p.getTagNodes();
- String tokens[] = new String[parseTokens.length];
+ Parse[] parseTokens = p.getTagNodes();
+ String[] tokens = new String[parseTokens.length];
for (int i = 0; i < tokens.length; i++) {
tokens[i] = parseTokens[i].toString();
}
for (int i = 0; i < nameFinders.length; i++) {
- Span names[] = nameFinders[i].find(tokens);
+ Span[] names = nameFinders[i].find(tokens);
Parse.addNames(tags[i], names, parseTokens);
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/RawCorefSample.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/RawCorefSample.java
index d2ae672..ffb6596 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/RawCorefSample.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/RawCorefSample.java
@@ -28,8 +28,8 @@
*/
public class RawCorefSample {
- private List<String[]> texts = new ArrayList<String[]>();
- private List<CorefMention[]> mentions = new ArrayList<CorefMention[]>();
+ private List<String[]> texts = new ArrayList<>();
+ private List<CorefMention[]> mentions = new ArrayList<>();
private List<Parse> parses;
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/ShallowParseCorefEnhancerStream.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/ShallowParseCorefEnhancerStream.java
index 05a06f5..fc5d696 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/ShallowParseCorefEnhancerStream.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/ShallowParseCorefEnhancerStream.java
@@ -34,7 +34,8 @@
private final POSTagger posTagger;
private final Chunker chunker;
- public ShallowParseCorefEnhancerStream(POSTagger posTagger, Chunker chunker, ObjectStream<RawCorefSample> samples) {
+ public ShallowParseCorefEnhancerStream(POSTagger posTagger, Chunker chunker,
+ ObjectStream<RawCorefSample> samples) {
super(samples);
this.posTagger = posTagger;
this.chunker = chunker;
@@ -50,22 +51,23 @@
List<String[]> sentences = sample.getTexts();
- for (String sentence[] : sentences) {
+ for (String[] sentence : sentences) {
Parse p = FullParseCorefEnhancerStream.createIncompleteParse(sentence);
p.setType(AbstractBottomUpParser.TOP_NODE);
- Parse parseTokens[] = p.getChildren();
+ Parse[] parseTokens = p.getChildren();
// construct incomplete parse here ..
- String tags[] = posTagger.tag(sentence);
+ String[] tags = posTagger.tag(sentence);
for (int i = 0; i < parseTokens.length; i++) {
- p.insert(new Parse(p.getText(), parseTokens[i].getSpan(), tags[i], 1d, parseTokens[i].getHeadIndex()));
+ p.insert(new Parse(p.getText(), parseTokens[i].getSpan(), tags[i],
+ 1d, parseTokens[i].getHeadIndex()));
}
// insert tags into incomplete parse
- Span chunks[] = chunker.chunkAsSpans(sentence, tags);
+ Span[] chunks = chunker.chunkAsSpans(sentence, tags);
for (Span chunk : chunks) {
if ("NP".equals(chunk.getType())) {
diff --git a/opennlp-coref/src/main/java/opennlp/tools/lang/english/CorefParse.java b/opennlp-coref/src/main/java/opennlp/tools/lang/english/CorefParse.java
new file mode 100644
index 0000000..6db343d
--- /dev/null
+++ b/opennlp-coref/src/main/java/opennlp/tools/lang/english/CorefParse.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.lang.english;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.coref.DiscourseEntity;
+import opennlp.tools.coref.mention.DefaultParse;
+import opennlp.tools.coref.mention.MentionContext;
+import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.chunking.Parser;
+import opennlp.tools.util.Span;
+
+class CorefParse {
+
+ private Map<Parse, Integer> parseMap;
+ private List<Parse> parses;
+
+ public CorefParse(List<Parse> parses, DiscourseEntity[] entities) {
+ this.parses = parses;
+ parseMap = new HashMap<>();
+ for (int ei = 0, en = entities.length; ei < en;ei++) {
+ if (entities[ei].getNumMentions() > 1) {
+ for (Iterator<MentionContext> mi = entities[ei].getMentions(); mi.hasNext();) {
+ MentionContext mc = mi.next();
+ Parse mentionParse = ((DefaultParse) mc.getParse()).getParse();
+ parseMap.put(mentionParse,ei + 1);
+ //System.err.println("CorefParse: "+mc.getParse().hashCode()+" -> "+ (ei+1));
+ }
+ }
+ }
+ }
+
+ public void show() {
+ for (int pi = 0, pn = parses.size(); pi < pn; pi++) {
+ Parse p = parses.get(pi);
+ show(p);
+ System.out.println();
+ }
+ }
+
+ private void show(Parse p) {
+ int start;
+ start = p.getSpan().getStart();
+ if (!p.getType().equals(Parser.TOK_NODE)) {
+ System.out.print("(");
+ System.out.print(p.getType());
+ if (parseMap.containsKey(p)) {
+ System.out.print("#" + parseMap.get(p));
+ }
+ //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p));
+ System.out.print(" ");
+ }
+ Parse[] children = p.getChildren();
+ for (int pi = 0, pn = children.length; pi < pn;pi++) {
+ Parse c = children[pi];
+ Span s = c.getSpan();
+ if (start < s.getStart()) {
+ System.out.print(p.getText().substring(start, s.getStart()));
+ }
+ show(c);
+ start = s.getEnd();
+ }
+ System.out.print(p.getText().substring(start, p.getSpan().getEnd()));
+ if (!p.getType().equals(Parser.TOK_NODE)) {
+ System.out.print(")");
+ }
+ }
+}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankLinker.java b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankLinker.java
index 2911e64..c27d579 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankLinker.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankLinker.java
@@ -23,10 +23,8 @@
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
+
import java.util.List;
-import java.util.Map;
import opennlp.tools.coref.DefaultLinker;
import opennlp.tools.coref.DiscourseEntity;
@@ -34,11 +32,9 @@
import opennlp.tools.coref.LinkerMode;
import opennlp.tools.coref.mention.DefaultParse;
import opennlp.tools.coref.mention.Mention;
-import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.mention.PTBMentionFinder;
import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.chunking.Parser;
-import opennlp.tools.util.Span;
+
/**
* This class perform coreference for treebank style parses.
@@ -60,7 +56,8 @@
super(project,mode,useDiscourseModel);
}
- public TreebankLinker(String project, LinkerMode mode, boolean useDiscourseModel, double fixedNonReferentialProbability) throws IOException {
+ public TreebankLinker(String project, LinkerMode mode, boolean useDiscourseModel,
+ double fixedNonReferentialProbability) throws IOException {
super(project,mode,useDiscourseModel,fixedNonReferentialProbability);
}
@@ -70,8 +67,8 @@
}
private static void showEntities(DiscourseEntity[] entities) {
- for (int ei=0,en=entities.length;ei<en;ei++) {
- System.out.println(ei+" "+entities[ei]);
+ for (int ei = 0, en = entities.length; ei < en;ei++) {
+ System.out.println(ei + " " + entities[ei]);
}
}
@@ -86,7 +83,7 @@
System.exit(1);
}
BufferedReader in;
- int ai =0;
+ int ai = 0;
String dataDir = args[ai++];
if (ai == args.length) {
in = new BufferedReader(new InputStreamReader(System.in));
@@ -98,12 +95,13 @@
int sentenceNumber = 0;
List<Mention> document = new ArrayList<Mention>();
List<Parse> parses = new ArrayList<Parse>();
- for (String line=in.readLine();null != line;line = in.readLine()) {
+ for (String line = in.readLine();null != line;line = in.readLine()) {
if (line.equals("")) {
- DiscourseEntity[] entities = treebankLinker.getEntities(document.toArray(new Mention[document.size()]));
+ DiscourseEntity[] entities =
+ treebankLinker.getEntities(document.toArray(new Mention[document.size()]));
//showEntities(entities);
new CorefParse(parses,entities).show();
- sentenceNumber=0;
+ sentenceNumber = 0;
document.clear();
parses.clear();
}
@@ -112,7 +110,7 @@
parses.add(p);
Mention[] extents = treebankLinker.getMentionFinder().getMentions(new DefaultParse(p,sentenceNumber));
//construct new parses for mentions which don't have constituents.
- for (int ei=0,en=extents.length;ei<en;ei++) {
+ for (int ei = 0, en = extents.length; ei < en; ei++) {
//System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]);
if (extents[ei].getParse() == null) {
@@ -130,64 +128,9 @@
if (document.size() > 0) {
DiscourseEntity[] entities = treebankLinker.getEntities(document.toArray(new Mention[document.size()]));
//showEntities(entities);
- (new CorefParse(parses,entities)).show();
+ (new CorefParse(parses, entities)).show();
}
}
}
-class CorefParse {
- private Map<Parse, Integer> parseMap;
- private List<Parse> parses;
-
- public CorefParse(List<Parse> parses, DiscourseEntity[] entities) {
- this.parses = parses;
- parseMap = new HashMap<Parse, Integer>();
- for (int ei=0,en=entities.length;ei<en;ei++) {
- if (entities[ei].getNumMentions() > 1) {
- for (Iterator<MentionContext> mi = entities[ei].getMentions(); mi.hasNext();) {
- MentionContext mc = mi.next();
- Parse mentionParse = ((DefaultParse) mc.getParse()).getParse();
- parseMap.put(mentionParse,ei+1);
- //System.err.println("CorefParse: "+mc.getParse().hashCode()+" -> "+ (ei+1));
- }
- }
- }
- }
-
- public void show() {
- for (int pi=0,pn=parses.size();pi<pn;pi++) {
- Parse p = parses.get(pi);
- show(p);
- System.out.println();
- }
- }
-
- private void show(Parse p) {
- int start;
- start = p.getSpan().getStart();
- if (!p.getType().equals(Parser.TOK_NODE)) {
- System.out.print("(");
- System.out.print(p.getType());
- if (parseMap.containsKey(p)) {
- System.out.print("#"+parseMap.get(p));
- }
- //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p));
- System.out.print(" ");
- }
- Parse[] children = p.getChildren();
- for (int pi=0,pn=children.length;pi<pn;pi++) {
- Parse c = children[pi];
- Span s = c.getSpan();
- if (start < s.getStart()) {
- System.out.print(p.getText().substring(start, s.getStart()));
- }
- show(c);
- start = s.getEnd();
- }
- System.out.print(p.getText().substring(start, p.getSpan().getEnd()));
- if (!p.getType().equals(Parser.TOK_NODE)) {
- System.out.print(")");
- }
- }
-}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java
index a60c838..9c89fef 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java
@@ -37,7 +37,8 @@
@Deprecated
public class TreebankNameFinder {
- public static String[] NAME_TYPES = {"person", "organization", "location", "date", "time", "percentage", "money"};
+ public static String[] NAME_TYPES =
+ {"person", "organization", "location", "date", "time", "percentage", "money"};
private NameFinderME nameFinder;
@@ -54,7 +55,8 @@
}
}
- private static void processParse(TreebankNameFinder[] finders, String[] tags, BufferedReader input) throws IOException {
+ private static void processParse(TreebankNameFinder[] finders, String[] tags, BufferedReader input)
+ throws IOException {
Span[][] nameSpans = new Span[finders.length][];
for (String line = input.readLine(); null != line; line = input.readLine()) {
@@ -66,13 +68,14 @@
Parse p = Parse.parseParse(line);
Parse[] tagNodes = p.getTagNodes();
String[] tokens = new String[tagNodes.length];
- for (int ti=0;ti<tagNodes.length;ti++){
+ for (int ti = 0; ti < tagNodes.length; ti++) {
tokens[ti] = tagNodes[ti].getCoveredText();
}
//System.err.println(java.util.Arrays.asList(tokens));
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
nameSpans[fi] = finders[fi].nameFinder.find(tokens);
- //System.err.println("english.NameFinder.processParse: "+tags[fi] + " " + java.util.Arrays.asList(nameSpans[fi]));
+ //System.err.println("english.NameFinder.processParse: "+tags[fi] + " "
+ // + java.util.Arrays.asList(nameSpans[fi]));
}
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
@@ -89,7 +92,8 @@
* @param input The input reader.
* @throws IOException
*/
- private static void processText(TreebankNameFinder[] finders, String[] tags, BufferedReader input) throws IOException {
+ private static void processText(TreebankNameFinder[] finders, String[] tags, BufferedReader input)
+ throws IOException {
Span[][] nameSpans = new Span[finders.length][];
String[][] nameOutcomes = new String[finders.length][];
opennlp.tools.tokenize.Tokenizer tokenizer = new SimpleTokenizer();
@@ -105,7 +109,8 @@
String[] tokens = Span.spansToStrings(spans,line);
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
nameSpans[fi] = finders[fi].nameFinder.find(tokens);
- //System.err.println("EnglighNameFinder.processText: "+tags[fi] + " " + java.util.Arrays.asList(finderTags[fi]));
+ //System.err.println("EnglighNameFinder.processText: "+tags[fi] + " "
+ // + java.util.Arrays.asList(finderTags[fi]));
nameOutcomes[fi] = NameFinderEventStream.generateOutcomes(nameSpans[fi], null, tokens.length);
}
@@ -113,8 +118,10 @@
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
//check for end tags
if (ti != 0) {
- if ((nameOutcomes[fi][ti].equals(NameFinderME.START) || nameOutcomes[fi][ti].equals(NameFinderME.OTHER)) &&
- (nameOutcomes[fi][ti - 1].equals(NameFinderME.START) || nameOutcomes[fi][ti - 1].equals(NameFinderME.CONTINUE))) {
+ if ((nameOutcomes[fi][ti].equals(NameFinderME.START)
+ || nameOutcomes[fi][ti].equals(NameFinderME.OTHER))
+ && (nameOutcomes[fi][ti - 1].equals(NameFinderME.START)
+ || nameOutcomes[fi][ti - 1].equals(NameFinderME.CONTINUE))) {
output.append("</").append(tags[fi]).append(">");
}
}
@@ -133,7 +140,8 @@
//final end tags
if (tokens.length != 0) {
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
- if (nameOutcomes[fi][tokens.length - 1].equals(NameFinderME.START) || nameOutcomes[fi][tokens.length - 1].equals(NameFinderME.CONTINUE)) {
+ if (nameOutcomes[fi][tokens.length - 1].equals(NameFinderME.START)
+ || nameOutcomes[fi][tokens.length - 1].equals(NameFinderME.CONTINUE)) {
output.append("</").append(tags[fi]).append(">");
}
}
@@ -150,7 +158,8 @@
public static void main(String[] args) throws IOException {
if (args.length == 0) {
System.err.println("Usage NameFinder -[parse] model1 model2 ... modelN < sentences");
- System.err.println(" -parse: Use this option to find names on parsed input. Un-tokenized sentence text is the default.");
+ System.err.println(" -parse: Use this option to find names on parsed input. " +
+ "Un-tokenized sentence text is the default.");
System.exit(1);
}
int ai = 0;
@@ -160,13 +169,13 @@
parsedInput = true;
}
else {
- System.err.println("Ignoring unknown option "+args[ai]);
+ System.err.println("Ignoring unknown option " + args[ai]);
}
ai++;
}
- TreebankNameFinder[] finders = new TreebankNameFinder[args.length-ai];
- String[] names = new String[args.length-ai];
- for (int fi=0; ai < args.length; ai++,fi++) {
+ TreebankNameFinder[] finders = new TreebankNameFinder[args.length - ai];
+ String[] names = new String[args.length - ai];
+ for (int fi = 0; ai < args.length; ai++,fi++) {
String modelName = args[ai];
finders[fi] = new TreebankNameFinder(new TokenNameFinderModel(new FileInputStream(modelName)));
int nameStart = modelName.lastIndexOf(System.getProperty("file.separator")) + 1;
@@ -187,4 +196,4 @@
//long t2 = System.currentTimeMillis();
//System.err.println("Time "+(t2-t1));
}
-}
\ No newline at end of file
+}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/lang/english/package-info.java b/opennlp-coref/src/main/java/opennlp/tools/lang/english/package-info.java
index 9ab35ad..d06bd46 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/lang/english/package-info.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/lang/english/package-info.java
@@ -18,4 +18,4 @@
/**
* Package related to the processing of English data.
*/
-package opennlp.tools.lang.english;
\ No newline at end of file
+package opennlp.tools.lang.english;