Reduce compile and runtime dependencies in Similarity Component (#157)
Reduce compile and runtime dependency in Similarity Component
diff --git a/opennlp-similarity/pom.xml b/opennlp-similarity/pom.xml
index bb8aa6e..5f3029d 100644
--- a/opennlp-similarity/pom.xml
+++ b/opennlp-similarity/pom.xml
@@ -12,41 +12,41 @@
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-sandbox</artifactId>
- <version>2.3.4-SNAPSHOT</version>
- </parent>
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-sandbox</artifactId>
+ <version>2.3.4-SNAPSHOT</version>
+ </parent>
- <artifactId>opennlp-similarity</artifactId>
- <version>2.3.4-SNAPSHOT</version>
- <packaging>jar</packaging>
+ <artifactId>opennlp-similarity</artifactId>
+ <version>2.3.4-SNAPSHOT</version>
+ <packaging>jar</packaging>
- <name>Apache OpenNLP Tool Similarity distribution</name>
-
- <properties>
- <dl4j.version>1.0.0-M2.1</dl4j.version>
- <hdf5.version>1.14.3-1.5.10</hdf5.version>
- <javacpp.version>1.5.10</javacpp.version>
- <openblas.version>0.3.26-1.5.10</openblas.version>
- </properties>
+ <name>Apache OpenNLP Similarity distribution</name>
- <repositories>
- <repository>
- <id>central</id>
- <name>Maven Central Repository</name>
- <url>https://repo1.maven.org/maven2</url>
- </repository>
- <repository>
- <id>billylieurance-net</id>
- <url>https://www.billylieurance.net/maven2</url>
- <snapshots>
- <enabled>false</enabled>
- </snapshots>
- </repository>
- </repositories>
+ <properties>
+ <dl4j.version>1.0.0-M2.1</dl4j.version>
+ <hdf5.version>1.14.3-1.5.10</hdf5.version>
+ <javacpp.version>1.5.10</javacpp.version>
+ <openblas.version>0.3.26-1.5.10</openblas.version>
+ </properties>
+
+ <repositories>
+ <repository>
+ <id>central</id>
+ <name>Maven Central Repository</name>
+ <url>https://repo1.maven.org/maven2</url>
+ </repository>
+ <repository>
+ <id>billylieurance-net</id>
+ <url>https://www.billylieurance.net/maven2</url>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ </repositories>
<dependencyManagement>
<dependencies>
@@ -84,501 +84,467 @@
</dependencies>
</dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
- </dependency>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-tools</artifactId>
+ </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
- <dependency>
- <groupId>org.apache.logging.log4j</groupId>
- <artifactId>log4j-api</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.logging.log4j</groupId>
- <artifactId>log4j-core</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.logging.log4j</groupId>
- <artifactId>log4j-slf4j2-impl</artifactId>
- <scope>test</scope>
- </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ <version>20240303</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-app</artifactId>
+ <version>2.9.2</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sf.opencsv</groupId>
+ <artifactId>opencsv</artifactId>
+ <version>2.3</version>
+ </dependency>
- <dependency>
- <groupId>org.junit.jupiter</groupId>
- <artifactId>junit-jupiter-api</artifactId>
- </dependency>
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-core</artifactId>
+ <version>8.11.3</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.eclipse.jetty.http2</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
- <dependency>
- <groupId>org.junit.jupiter</groupId>
- <artifactId>junit-jupiter-engine</artifactId>
- </dependency>
+ <dependency>
+ <groupId>javax.mail</groupId>
+ <artifactId>mail</artifactId>
+ <version>1.4.7</version>
+ </dependency>
+ <dependency>
+ <groupId>com.restfb</groupId>
+ <artifactId>restfb</artifactId>
+ <version>1.49.0</version>
+ </dependency>
- <dependency>
- <groupId>org.junit.jupiter</groupId>
- <artifactId>junit-jupiter-params</artifactId>
- </dependency>
+ <dependency>
+ <groupId>net.billylieurance.azuresearch</groupId>
+ <artifactId>azure-bing-search-java</artifactId>
+ <version>0.13.0</version>
+ </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-math3</artifactId>
- </dependency>
+ <dependency>
+ <groupId>edu.mit</groupId>
+ <artifactId>jverbnet</artifactId>
+ <version>1.2.0.1</version>
+ <exclusions>
+ <exclusion>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ </exclusion>
+ <!-- Avoids problems with conflicting slf4j bindings at runtime -->
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>log4j-over-slf4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20240303</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-app</artifactId>
- <version>2.9.2</version>
- </dependency>
- <dependency>
- <groupId>net.sf.opencsv</groupId>
- <artifactId>opencsv</artifactId>
- <version>2.3</version>
- </dependency>
+ <dependency>
+ <groupId>org.docx4j</groupId>
+ <artifactId>docx4j</artifactId>
+ <version>6.1.2</version>
+ <exclusions>
+ <!-- Exclusion here as log4j version 2 bindings are used during tests/runtime-->
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
- <dependency>
- <groupId>org.apache.solr</groupId>
- <artifactId>solr-core</artifactId>
- <version>8.11.3</version>
- </dependency>
-
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpclient</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpclient-cache</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpcore</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpmime</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>fluent-hc</artifactId>
- </dependency>
+ <dependency>
+ <groupId>org.deeplearning4j</groupId>
+ <artifactId>deeplearning4j-core</artifactId>
+ <version>${dl4j.version}</version>
+ <exclusions>
+ <!-- Excluded to avoid irrelevant platforms dependencies, see profiles -->
+ <exclusion>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>openblas-platform</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>hdf5-platform</artifactId>
+ </exclusion>
+ <!-- Not required for NLP applications -->
+ <exclusion>
+ <groupId>org.datavec</groupId>
+ <artifactId>datavec-data-image</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.deeplearning4j</groupId>
+ <artifactId>deeplearning4j-ui</artifactId>
+ <version>${dl4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.deeplearning4j</groupId>
+ <artifactId>deeplearning4j-nlp</artifactId>
+ <version>${dl4j.version}</version>
+ </dependency>
- <dependency>
- <groupId>org.jgrapht</groupId>
- <artifactId>jgrapht-jdk1.5</artifactId>
- <version>0.7.3</version>
- </dependency>
- <dependency>
- <groupId>de.jollyday</groupId>
- <artifactId>jollyday</artifactId>
- <version>0.5.10</version>
- </dependency>
- <dependency>
- <groupId>jgraph</groupId>
- <artifactId>jgraph</artifactId>
- <version>5.13.0.0</version>
- </dependency>
- <dependency>
- <groupId>javax.mail</groupId>
- <artifactId>mail</artifactId>
- <version>1.4.7</version>
- </dependency>
- <dependency>
- <groupId>com.restfb</groupId>
- <artifactId>restfb</artifactId>
- <version>1.49.0</version>
- </dependency>
- <dependency>
- <groupId>com.memetix</groupId>
- <artifactId>microsoft-translator-java-api</artifactId>
- <version>0.6.2</version>
- </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>openblas</artifactId>
+ <version>${openblas.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>javacpp</artifactId>
+ <version>${javacpp.version}</version>
+ </dependency>
- <dependency>
- <groupId>net.billylieurance.azuresearch</groupId>
- <artifactId>azure-bing-search-java</artifactId>
- <version>0.13.0</version>
- </dependency>
+ <!-- TEST -->
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-engine</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-params</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j2-impl</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
- <dependency>
- <groupId>edu.mit</groupId>
- <artifactId>jverbnet</artifactId>
- <version>1.2.0.1</version>
- <exclusions>
- <exclusion>
- <groupId>ch.qos.logback</groupId>
- <artifactId>logback-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>ch.qos.logback</groupId>
- <artifactId>logback-classic</artifactId>
- </exclusion>
- <!-- Avoids problems with conflicting slf4j bindings at runtime -->
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>log4j-over-slf4j</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
- <groupId>org.docx4j</groupId>
- <artifactId>docx4j</artifactId>
- <version>6.1.2</version>
- <exclusions>
- <!-- Exclusion here as log4j version 2 bindings are used during tests/runtime-->
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- </exclusion>
- <exclusion>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
+ <profiles>
+ <profile>
+ <id>platform-win-x64</id>
+ <activation>
+ <os>
+ <family>Windows</family>
+ <arch>x64</arch>
+ </os>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>javacpp</artifactId>
+ <version>${javacpp.version}</version>
+ <classifier>windows-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>openblas</artifactId>
+ <version>${openblas.version}</version>
+ <classifier>windows-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>hdf5</artifactId>
+ <version>${hdf5.version}</version>
+ <classifier>windows-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>platform-win-x86</id>
+ <activation>
+ <os>
+ <family>Windows</family>
+ <arch>x86</arch>
+ </os>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>javacpp</artifactId>
+ <version>${javacpp.version}</version>
+ <classifier>windows-x86</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>openblas</artifactId>
+ <version>${openblas.version}</version>
+ <classifier>windows-x86</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>hdf5</artifactId>
+ <version>${hdf5.version}</version>
+ <classifier>windows-x86</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>platform-linux-x64</id>
+ <activation>
+ <os>
+ <family>unix</family>
+ <name>Linux</name>
+ <arch>amd64</arch>
+ </os>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>javacpp</artifactId>
+ <version>${javacpp.version}</version>
+ <classifier>linux-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>openblas</artifactId>
+ <version>${openblas.version}</version>
+ <classifier>linux-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>hdf5</artifactId>
+ <version>${hdf5.version}</version>
+ <classifier>linux-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>platform-macosx-x64</id>
+ <activation>
+ <os>
+ <family>Mac</family>
+ <arch>x64</arch>
+ </os>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>javacpp</artifactId>
+ <version>${javacpp.version}</version>
+ <classifier>macosx-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>openblas</artifactId>
+ <version>${openblas.version}</version>
+ <classifier>macosx-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>hdf5</artifactId>
+ <version>${hdf5.version}</version>
+ <classifier>macosx-x86_64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>platform-macosx-aarch64</id>
+ <activation>
+ <os>
+ <family>mac</family>
+ <arch>aarch64</arch>
+ </os>
+ </activation>
+ <dependencies>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>javacpp</artifactId>
+ <version>${javacpp.version}</version>
+ <classifier>macosx-arm64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>openblas</artifactId>
+ <version>${openblas.version}</version>
+ <classifier>macosx-arm64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ <!-- Not available for this platform, yet...-->
+ <!--
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>hdf5</artifactId>
+ <version>${hdf5.version}</version>
+ <classifier>macosx-arm64</classifier>
+ <scope>runtime</scope>
+ </dependency>
+ -->
+ </dependencies>
+ </profile>
+ </profiles>
- <dependency>
- <groupId>org.deeplearning4j</groupId>
- <artifactId>deeplearning4j-core</artifactId>
- <version>${dl4j.version}</version>
- <exclusions>
- <!-- Excluded to avoid irrelevant platforms dependencies, see profiles -->
- <exclusion>
- <groupId>org.bytedeco</groupId>
- <artifactId>openblas-platform</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.bytedeco</groupId>
- <artifactId>hdf5-platform</artifactId>
- </exclusion>
- <!-- Not required for NLP applications -->
- <exclusion>
- <groupId>org.datavec</groupId>
- <artifactId>datavec-data-image</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.deeplearning4j</groupId>
- <artifactId>deeplearning4j-ui</artifactId>
- <version>${dl4j.version}</version>
- </dependency>
- <dependency>
- <groupId>org.deeplearning4j</groupId>
- <artifactId>deeplearning4j-nlp</artifactId>
- <version>${dl4j.version}</version>
- </dependency>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>${maven.compiler.source}</source>
+ <target>${maven.compiler.target}</target>
+ <compilerArgument>-Xlint</compilerArgument>
+ </configuration>
+ </plugin>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>openblas</artifactId>
- <version>${openblas.version}</version>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>javacpp</artifactId>
- <version>${javacpp.version}</version>
- </dependency>
- </dependencies>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <argLine>-Xmx2048m -Dfile.encoding=UTF-8</argLine>
+ <forkCount>${opennlp.forkCount}</forkCount>
+ <reuseForks>false</reuseForks>
+ <failIfNoSpecifiedTests>false</failIfNoSpecifiedTests>
+ <excludes>
+ <exclude>**/*IT.java</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
- <profiles>
- <profile>
- <id>platform-win-x64</id>
- <activation>
- <os>
- <family>Windows</family>
- <arch>x64</arch>
- </os>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>javacpp</artifactId>
- <version>${javacpp.version}</version>
- <classifier>windows-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>openblas</artifactId>
- <version>${openblas.version}</version>
- <classifier>windows-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>hdf5</artifactId>
- <version>${hdf5.version}</version>
- <classifier>windows-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- </dependencies>
- </profile>
- <profile>
- <id>platform-win-x86</id>
- <activation>
- <os>
- <family>Windows</family>
- <arch>x86</arch>
- </os>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>javacpp</artifactId>
- <version>${javacpp.version}</version>
- <classifier>windows-x86</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>openblas</artifactId>
- <version>${openblas.version}</version>
- <classifier>windows-x86</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>hdf5</artifactId>
- <version>${hdf5.version}</version>
- <classifier>windows-x86</classifier>
- <scope>runtime</scope>
- </dependency>
- </dependencies>
- </profile>
- <profile>
- <id>platform-linux-x64</id>
- <activation>
- <os>
- <family>unix</family>
- <name>Linux</name>
- <arch>amd64</arch>
- </os>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>javacpp</artifactId>
- <version>${javacpp.version}</version>
- <classifier>linux-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>openblas</artifactId>
- <version>${openblas.version}</version>
- <classifier>linux-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>hdf5</artifactId>
- <version>${hdf5.version}</version>
- <classifier>linux-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- </dependencies>
- </profile>
- <profile>
- <id>platform-macosx-x64</id>
- <activation>
- <os>
- <family>Mac</family>
- <arch>x64</arch>
- </os>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>javacpp</artifactId>
- <version>${javacpp.version}</version>
- <classifier>macosx-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>openblas</artifactId>
- <version>${openblas.version}</version>
- <classifier>macosx-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>hdf5</artifactId>
- <version>${hdf5.version}</version>
- <classifier>macosx-x86_64</classifier>
- <scope>runtime</scope>
- </dependency>
- </dependencies>
- </profile>
- <profile>
- <id>platform-macosx-aarch64</id>
- <activation>
- <os>
- <family>mac</family>
- <arch>aarch64</arch>
- </os>
- </activation>
- <dependencies>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>javacpp</artifactId>
- <version>${javacpp.version}</version>
- <classifier>macosx-arm64</classifier>
- <scope>runtime</scope>
- </dependency>
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>openblas</artifactId>
- <version>${openblas.version}</version>
- <classifier>macosx-arm64</classifier>
- <scope>runtime</scope>
- </dependency>
- <!-- Not available for this platform, yet...-->
- <!--
- <dependency>
- <groupId>org.bytedeco</groupId>
- <artifactId>hdf5</artifactId>
- <version>${hdf5.version}</version>
- <classifier>macosx-arm64</classifier>
- <scope>runtime</scope>
- </dependency>
- -->
- </dependencies>
- </profile>
- </profiles>
+ <plugin>
+ <artifactId>maven-source-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>create-source-jar</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <phase>package</phase>
+ </execution>
+ </executions>
+ </plugin>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <configuration>
- <source>${maven.compiler.source}</source>
- <target>${maven.compiler.target}</target>
- <compilerArgument>-Xlint</compilerArgument>
- </configuration>
- </plugin>
+ <plugin>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>generate checksums for binary artifacts</id>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ <phase>verify</phase>
+ <configuration>
+ <target>
+ <checksum algorithm="sha1" format="MD5SUM">
+ <fileset dir="${project.build.directory}">
+ <include name="*.zip" />
+ <include name="*.gz" />
+ </fileset>
+ </checksum>
+ <checksum algorithm="md5" format="MD5SUM">
+ <fileset dir="${project.build.directory}">
+ <include name="*.zip" />
+ <include name="*.gz" />
+ </fileset>
+ </checksum>
+ </target>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>src</id>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <phase>package</phase>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ </execution>
+ <execution>
+ <id>source-release-assembly</id>
+ <configuration>
+ <skipAssembly>true</skipAssembly>
+ <mavenExecutorId>forked-path</mavenExecutorId>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <argLine>-Xmx2048m -Dfile.encoding=UTF-8</argLine>
- <forkCount>${opennlp.forkCount}</forkCount>
- <reuseForks>false</reuseForks>
- <failIfNoSpecifiedTests>false</failIfNoSpecifiedTests>
- <excludes>
- <exclude>**/*IT.java</exclude>
- </excludes>
- </configuration>
- </plugin>
-
- <plugin>
- <artifactId>maven-source-plugin</artifactId>
- <executions>
- <execution>
- <id>create-source-jar</id>
- <goals>
- <goal>jar</goal>
- </goals>
- <phase>package</phase>
- </execution>
- </executions>
- </plugin>
-
- <plugin>
- <artifactId>maven-antrun-plugin</artifactId>
- <executions>
- <execution>
- <id>generate checksums for binary artifacts</id>
- <goals>
- <goal>run</goal>
- </goals>
- <phase>verify</phase>
- <configuration>
- <target>
- <checksum algorithm="sha1" format="MD5SUM">
- <fileset dir="${project.build.directory}">
- <include name="*.zip" />
- <include name="*.gz" />
- </fileset>
- </checksum>
- <checksum algorithm="md5" format="MD5SUM">
- <fileset dir="${project.build.directory}">
- <include name="*.zip" />
- <include name="*.gz" />
- </fileset>
- </checksum>
- </target>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <executions>
- <execution>
- <id>src</id>
- <goals>
- <goal>single</goal>
- </goals>
- <phase>package</phase>
- <configuration>
- <descriptors>
- <descriptor>src/main/assembly/assembly.xml</descriptor>
- </descriptors>
- </configuration>
- </execution>
- <execution>
- <id>source-release-assembly</id>
- <configuration>
- <skipAssembly>true</skipAssembly>
- <mavenExecutorId>forked-path</mavenExecutorId>
- </configuration>
- </execution>
- </executions>
- </plugin>
-
- <plugin>
- <groupId>org.sonatype.plugins</groupId>
- <artifactId>nexus-staging-maven-plugin</artifactId>
- <version>1.7.0</version>
- <extensions>true</extensions>
- <configuration>
- <serverId>ossrh</serverId>
- <nexusUrl>https://oss.sonatype.org/</nexusUrl>
- <autoReleaseAfterClose>true</autoReleaseAfterClose>
- </configuration>
- </plugin>
- </plugins>
- </build>
+ <plugin>
+ <groupId>org.sonatype.plugins</groupId>
+ <artifactId>nexus-staging-maven-plugin</artifactId>
+ <version>1.7.0</version>
+ <extensions>true</extensions>
+ <configuration>
+ <serverId>ossrh</serverId>
+ <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+ <autoReleaseAfterClose>true</autoReleaseAfterClose>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
</project>
\ No newline at end of file
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
index 2db4f12..8f08443 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
@@ -36,6 +36,7 @@
public String translate(String sentence, String lang2lang) {
if (sentence==null)
return null;
+
String request = TRANSLATOR_URL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";
try {
URL urlC = new URI(request).toURL();
@@ -43,17 +44,18 @@
String line;
StringBuilder result = new StringBuilder();
- BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
- int count = 0;
- while ((line = reader.readLine()) != null)
- {
- result.append(line);
- count++;
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()))) {
+ int count = 0;
+ while ((line = reader.readLine()) != null)
+ {
+ result.append(line);
+ count++;
+ }
+ JSONObject rootObject = new JSONObject(result.toString());
+ JSONObject findObject = rootObject.getJSONObject("responseData");
+ String transl = findObject.getString("translatedText");
+ return URLDecoder.decode(transl, StandardCharsets.UTF_8);
}
- JSONObject rootObject = new JSONObject(result.toString());
- JSONObject findObject = rootObject.getJSONObject("responseData");
- String transl = findObject.getString("translatedText");
- return URLDecoder.decode(transl, StandardCharsets.UTF_8);
} catch (IOException | URISyntaxException | JSONException e) {
e.printStackTrace();
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
index ccd9f63..41bec16 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
@@ -29,8 +29,6 @@
import opennlp.tools.textsimilarity.TextProcessor;
import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
@@ -44,30 +42,25 @@
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
-import org.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DocClassifier {
- private static final Log LOGGER = LogFactory.getLog(DocClassifier.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(DocClassifier.class);
public static final String DOC_CLASSIFIER_KEY = "doc_class";
public static final String RESOURCE_DIR = null;
private Map<String, Float> scoredClasses;
-
public static final Float MIN_TOTAL_SCORE_FOR_CATEGORY = 0.3f; //3.0f;
protected static IndexReader indexReader = null;
protected static IndexSearcher indexSearcher = null;
// resource directory plus the index folder
- private static final String INDEX_PATH = RESOURCE_DIR
- + ClassifierTrainingSetIndexer.INDEX_PATH;
+ private static final String INDEX_PATH = RESOURCE_DIR + ClassifierTrainingSetIndexer.INDEX_PATH;
// http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm
private static final int MAX_DOCS_TO_USE_FOR_CLASSIFY = 10, // 10 similar
- // docs for
- // nearest
- // neighbor
- // settings
-
+ // docs for nearest neighbor settings
MAX_CATEG_RESULTS = 2;
private static final float BEST_TO_NEX_BEST_RATIO = 2.0f;
// to accumulate classif results
@@ -112,7 +105,7 @@
}
}
- public DocClassifier(String inputFilename, JSONObject inputJSON) {
+ public DocClassifier(String inputFilename) {
scoredClasses = new HashMap<>();
}
@@ -131,18 +124,15 @@
Query query;
try {
query = parser.parse(queryStr);
-
} catch (ParseException e2) {
-
return results;
}
TopDocs hits = null; // TopDocs search(Query, int)
// Finds the top n hits for query.
try {
- hits = indexSearcher
- .search(query, MAX_DOCS_TO_USE_FOR_CLASSIFY + 2);
+ hits = indexSearcher.search(query, MAX_DOCS_TO_USE_FOR_CLASSIFY + 2);
} catch (IOException e1) {
- LOGGER.error("problem searching index \n" + e1);
+ LOGGER.error("problem searching index \n", e1);
}
LOGGER.debug("Found " + hits.totalHits + " hits for " + queryStr);
int count = 0;
@@ -175,8 +165,7 @@
}
try {
scoredClasses = ValueSortMap.sortMapByValue(scoredClasses, false);
- List<String> resultsAll = new ArrayList<>(
- scoredClasses.keySet()), resultsAboveThresh = new ArrayList<>();
+ List<String> resultsAll = new ArrayList<>(scoredClasses.keySet()), resultsAboveThresh = new ArrayList<>();
for (String key : resultsAll) {
if (scoredClasses.get(key) > MIN_TOTAL_SCORE_FOR_CATEGORY)
resultsAboveThresh.add(key);
@@ -211,15 +200,11 @@
}
-
-
-
public static String formClassifQuery(String pageContentReader, int maxRes) {
// We want to control which delimiters we substitute. For example '_' &
// \n we retain
- pageContentReader = pageContentReader.replaceAll("[^A-Za-z0-9 _\\n]",
- "");
+ pageContentReader = pageContentReader.replaceAll("[^A-Za-z0-9 _\\n]", "");
Scanner in = new Scanner(pageContentReader);
in.useDelimiter("\\s+");
@@ -258,11 +243,9 @@
}
}
-
/*
* Main entry point for classifying sentences
*/
-
public List<String> getEntityOrClassFromText(String content) {
List<String> sentences = TextProcessor.splitToSentences(content);
@@ -284,7 +267,6 @@
LOGGER.debug(sentence + " => " + classifResults);
}
}
-
} catch (Exception e) {
LOGGER.error("Problem classifying sentence\n " + e);
}
@@ -294,11 +276,10 @@
aggrResults = localCats.getFrequentTags();
- LOGGER.debug(localCats.getFrequentTags());
+ LOGGER.debug(localCats.getFrequentTags().toString());
} catch (Exception e) {
- LOGGER.error("Problem aggregating search results\n" + e);
+ LOGGER.error("Problem aggregating search results\n", e);
}
return aggrResults;
}
-
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
index 90501ad..29a5107 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
@@ -33,7 +33,6 @@
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
-import org.json.JSONObject;
/*
* This utility gets 'training_corpus' as input and creates a new version of training_corpus with verified files.
@@ -56,7 +55,7 @@
public DocClassifierTrainingSetMultilingualExtender(String resource) {
- classifier = new DocClassifier("", new JSONObject());
+ classifier = new DocClassifier("");
}
private final int FRAGMENT_LENGTH = 500;
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java
index 4da160a..95c2b27 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java
@@ -26,33 +26,28 @@
import org.apache.commons.io.FileUtils;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
-import org.json.JSONObject;
/*
* This utility gets 'training_corpus' as input and creates a new version of training_corpus with verified files.
* Verified => classified by existing training set as only belonging to its target category, no other categories, not empty.
*/
public class DocClassifierTrainingSetVerifier {
+
+ private static final int FRAGMENT_LENGTH = 500;
public static String projectHome = new File(".").getAbsolutePath();
- public static String resourceDir = new File(".").getAbsolutePath().replace("/.", "") + "/src/main/resources";
+ public static String resourceDir = projectHome.replace("/.", "") + "/src/main/resources";
DocClassifier classifier;
private String sourceDir = null, destinationDir = null;
-
protected final ArrayList<File> queue = new ArrayList<>();
-
protected final Tika tika = new Tika();
+
public DocClassifierTrainingSetVerifier(String resource) {
-
-
- classifier = new DocClassifier("", new JSONObject());
-
+ classifier = new DocClassifier("");
}
- private static final int FRAGMENT_LENGTH = 500;
protected void addFiles(File file) {
-
try {
if (!file.exists()) {
System.out.println(file + " does not exist.");
@@ -90,8 +85,7 @@
//if (f.getName().indexOf(".html")<0)
//continue;
- classifier = new DocClassifier("", new JSONObject());
-
+ classifier = new DocClassifier("");
content = tika.parseToString(f);
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
index 409172b..8224273 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
@@ -421,11 +421,10 @@
}
public boolean equals(ParseTreeChunk ch) {
- List<String> lems = ch.getLemmas();
- List<String> poss = ch.POSs;
return ListUtils.isEqualList(ch.getLemmas(), this.lemmas) && ListUtils.isEqualList(ch.getPOSs(), this.POSs);
}
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder(" [");
if (mainPOS != null)