[RELEASE] [skip-ci]merging 'release-0.61.0' into 'master'
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 70aa3f5..00e9cd7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -110,7 +110,7 @@
- git fetch
- git checkout master
- git checkout dev
- - mvn -B jgitflow:release-start $MAVEN_CLI_OPTS
+ - mvn -B jgitflow:release-start $MAVEN_CLI_OPTS -Drelease=true
- git push origin --all
only:
- dev
@@ -135,7 +135,7 @@
- git checkout master
- git checkout dev
- git checkout $CI_BUILD_REF_NAME
- - mvn -B jgitflow:release-finish $MAVEN_CLI_OPTS
+ - mvn -B jgitflow:release-finish $MAVEN_CLI_OPTS -Drelease=true
- git push origin --all
- git push origin --tags
- git checkout master
@@ -273,10 +273,10 @@
variables:
CONTAINER_NAME: "processors-pattern-detection-flink"
-docker-hub-processors-filters-siddhi:
- <<: *docker_hub_script
- variables:
- CONTAINER_NAME: "processors-filters-siddhi"
+#docker-hub-processors-filters-siddhi:
+# <<: *docker_hub_script
+# variables:
+# CONTAINER_NAME: "processors-filters-siddhi"
docker-hub-processors-statistics-flink:
<<: *docker_hub_script
diff --git a/pom.xml b/pom.xml
index 4c08cb2..1771ef0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
<groupId>org.streampipes</groupId>
<artifactId>streampipes-pipeline-elements</artifactId>
<packaging>pom</packaging>
- <version>0.60.1</version>
+ <version>0.61.0</version>
<modules>
<module>streampipes-sinks-databases-jvm</module>
<module>streampipes-sinks-internal-jvm</module>
@@ -31,7 +31,7 @@
</modules>
<properties>
- <streampipes.version>0.60.1</streampipes.version>
+ <streampipes.version>0.61.0</streampipes.version>
<lightcouch.version>0.1.8</lightcouch.version>
</properties>
@@ -126,6 +126,78 @@
</dependencies>
</dependencyManagement>
+ <profiles>
+ <profile>
+ <id>release</id>
+ <activation>
+ <property>
+ <name>release</name>
+ </property>
+ </activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>attach-javadocs</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <failOnError>false</failOnError>
+ <additionalparam>-Xdoclint:none</additionalparam>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>external.atlassian.jgitflow</groupId>
+ <artifactId>jgitflow-maven-plugin</artifactId>
+ <version>1.0-m5.1</version>
+ <configuration>
+ <flowInitContext>
+ <masterBranchName>master</masterBranchName>
+ <developBranchName>dev</developBranchName>
+ <featureBranchPrefix>feature-</featureBranchPrefix>
+ <releaseBranchPrefix>release-</releaseBranchPrefix>
+ <hotfixBranchPrefix>hotfix-</hotfixBranchPrefix>
+ </flowInitContext>
+ <noDeploy>true</noDeploy>
+ <autoVersionSubmodules>true</autoVersionSubmodules>
+ <pushReleases>false</pushReleases>
+ <localOnly>true</localOnly>
+ <squash>false</squash>
+ <scmCommentPrefix>[RELEASE] [skip-ci]</scmCommentPrefix>
+ <enableSshAgent>true</enableSshAgent>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-gpg-plugin</artifactId>
+ <version>1.6</version>
+ <executions>
+ <execution>
+ <id>sign-artifacts</id>
+ <phase>verify</phase>
+ <goals>
+ <goal>sign</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <gpgArguments>
+ <arg>--pinentry-mode</arg>
+ <arg>loopback</arg>
+ </gpgArguments>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+
<build>
<pluginManagement>
<plugins>
@@ -146,71 +218,13 @@
</plugin>
</plugins>
</pluginManagement>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-javadoc-plugin</artifactId>
- <executions>
- <execution>
- <id>attach-javadocs</id>
- <goals>
- <goal>jar</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <failOnError>false</failOnError>
- <additionalparam>-Xdoclint:none</additionalparam>
- </configuration>
- </plugin>
- <plugin>
- <groupId>external.atlassian.jgitflow</groupId>
- <artifactId>jgitflow-maven-plugin</artifactId>
- <version>1.0-m5.1</version>
- <configuration>
- <flowInitContext>
- <masterBranchName>master</masterBranchName>
- <developBranchName>dev</developBranchName>
- <featureBranchPrefix>feature-</featureBranchPrefix>
- <releaseBranchPrefix>release-</releaseBranchPrefix>
- <hotfixBranchPrefix>hotfix-</hotfixBranchPrefix>
- </flowInitContext>
- <noDeploy>true</noDeploy>
- <autoVersionSubmodules>true</autoVersionSubmodules>
- <pushReleases>false</pushReleases>
- <localOnly>true</localOnly>
- <squash>false</squash>
- <scmCommentPrefix>[RELEASE] [skip-ci]</scmCommentPrefix>
- <enableSshAgent>true</enableSshAgent>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-gpg-plugin</artifactId>
- <version>1.6</version>
- <executions>
- <execution>
- <id>sign-artifacts</id>
- <phase>verify</phase>
- <goals>
- <goal>sign</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <gpgArguments>
- <arg>--pinentry-mode</arg>
- <arg>loopback</arg>
- </gpgArguments>
- </configuration>
- </plugin>
- </plugins>
</build>
<scm>
<developerConnection>scm:git:ssh://git@ipe-wim-gitlab.fzi.de:2222/streampipes/streampipes-pipeline-elements.git
</developerConnection>
- <connection>scm:git:ssh://git@ipe-wim-gitlab.fzi.de:2222/streampipes/streampipes-pipeline-elements.git</connection>
+ <connection>scm:git:ssh://git@ipe-wim-gitlab.fzi.de:2222/streampipes/streampipes-pipeline-elements.git
+ </connection>
<url>https://github.com/streampipes/streampipes-pipeline-elements</url>
</scm>
@@ -251,9 +265,9 @@
<distributionManagement>
<repository>
- <id>sonatype</id>
- <name>Releases</name>
- <url>https://oss.sonatype.org/service/local/staging/deploy/maven2</url>
+ <id>deployment</id>
+ <name>Internal Releases</name>
+ <url>https://laus.fzi.de/nexus/content/repositories/releases/</url>
</repository>
<snapshotRepository>
<id>deployment</id>
diff --git a/streampipes-pipeline-elements-shared/pom.xml b/streampipes-pipeline-elements-shared/pom.xml
index 02f2d4d..e78263a 100644
--- a/streampipes-pipeline-elements-shared/pom.xml
+++ b/streampipes-pipeline-elements-shared/pom.xml
@@ -20,7 +20,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-aggregation-flink/development/.env b/streampipes-processors-aggregation-flink/development/.env
index dcc81bc..22f7b2c 100644
--- a/streampipes-processors-aggregation-flink/development/.env
+++ b/streampipes-processors-aggregation-flink/development/.env
@@ -3,6 +3,4 @@
SP_HOST=localhost
SP_ICON_HOST=localhost
SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
-SP_ELASTICSEARCH_HOST=localhost
SP_FLINK_DEBUG=true
diff --git a/streampipes-processors-aggregation-flink/pom.xml b/streampipes-processors-aggregation-flink/pom.xml
index 489442d..0fe6b06 100644
--- a/streampipes-processors-aggregation-flink/pom.xml
+++ b/streampipes-processors-aggregation-flink/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/AggregationFlinkConfig.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/AggregationFlinkConfig.java
index 130704e..a0f48ea 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/AggregationFlinkConfig.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/AggregationFlinkConfig.java
@@ -38,9 +38,6 @@
config.register(ConfigKeys.PORT, 8090, "Port for the pe mixed flink component");
config.register(ConfigKeys.FLINK_HOST, "jobmanager", "Host for the flink cluster");
config.register(ConfigKeys.FLINK_PORT, 6123, "Port for the flink cluster");
- config.register(ConfigKeys.ELASTIC_HOST, "elasticsearch", "Elastic search host address");
- config.register(ConfigKeys.ELASTIC_PORT, 9300, "Elasitc search port");
- config.register(ConfigKeys.ELASTIC_PORT_REST, 9200, "Elasitc search rest port");
config.register(ConfigKeys.ICON_HOST, "backend", "Hostname for the icon host");
config.register(ConfigKeys.ICON_PORT, 80, "Port for the icons in nginx");
@@ -69,15 +66,6 @@
return config.getInteger(ConfigKeys.FLINK_PORT);
}
- public String getElasticsearchHost() {
- return config.getString(ConfigKeys.ELASTIC_HOST);
- }
-
- public int getElasticsearchPort() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT);
- }
-
-
public static final String iconBaseUrl = "http://" + AggregationFlinkConfig.INSTANCE.getIconHost() + ":" + AggregationFlinkConfig.INSTANCE.getIconPort() + "/assets/img/pe_icons";
public static final String getIconUrl(String pictureName) {
@@ -92,11 +80,6 @@
return config.getInteger(ConfigKeys.ICON_PORT);
}
- public int getElasticsearchPortRest() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT_REST);
- }
-
-
public boolean getDebug() {
return config.getBoolean(ConfigKeys.DEBUG);
}
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/ConfigKeys.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/ConfigKeys.java
index 36f9766..e595791 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/ConfigKeys.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/config/ConfigKeys.java
@@ -22,9 +22,6 @@
final static String PORT = "SP_PORT";
final static String FLINK_HOST = "SP_FLINK_HOST";
final static String FLINK_PORT = "SP_FLINK_PORT";
- final static String ELASTIC_HOST = "SP_ELASTICSEARCH_HOST";
- final static String ELASTIC_PORT = "SP_ELASTICSEARCH_PORT";
- final static String ELASTIC_PORT_REST = "SP_ELASTICSEARCH_PORT_REST";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
final static String SERVICE_NAME = "SP_SERVICE_NAME";
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/Aggregation.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/Aggregation.java
index 67ae5bd..ea17959 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/Aggregation.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/Aggregation.java
@@ -20,11 +20,12 @@
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
import java.util.*;
-public class Aggregation implements WindowFunction<Map<String, Object>, Map<String, Object>, String, TimeWindow>,
- AllWindowFunction<Map<String, Object>, Map<String, Object>, TimeWindow> {
+public class Aggregation implements WindowFunction<Event, Event, String, TimeWindow>,
+ AllWindowFunction<Event, Event, TimeWindow> {
private AggregationType aggregationType;
private String fieldToAggregate;
@@ -43,7 +44,7 @@
}
@Override
- public void apply(String key, TimeWindow window, Iterable<Map<String, Object>> input, Collector<Map<String, Object>>
+ public void apply(String key, TimeWindow window, Iterable<Event> input, Collector<Event>
out) throws Exception {
process(input, out, key);
}
@@ -62,23 +63,26 @@
}
@Override
- public void apply(TimeWindow window, Iterable<Map<String, Object>> input, Collector<Map<String, Object>> out) throws
+ public void apply(TimeWindow window, Iterable<Event> input, Collector<Event> out)
+ throws
Exception {
process(input, out, null);
}
- private void process(Iterable<Map<String, Object>> input, Collector<Map<String, Object>> out, String key) {
+ private void process(Iterable<Event> input, Collector<Event> out, String key) {
List<Double> values = new ArrayList<>();
- Map<String, Object> lastEvent = new HashMap<>();
+ Event lastEvent = new Event();
- for (Map<String, Object> anInput : input) {
+ for (Event anInput : input) {
lastEvent = anInput;
- if (!keyedStream || String.valueOf(lastEvent.get(keyIdentifier)).equals(key)) {
- values.add(Double.parseDouble(String.valueOf(lastEvent.get(fieldToAggregate))));
+ if (!keyedStream || (lastEvent.getFieldBySelector(keyIdentifier).getAsPrimitive().getAsString())
+ .equals(key)) {
+ values.add(lastEvent.getFieldBySelector
+ (fieldToAggregate).getAsPrimitive().getAsDouble());
}
}
- lastEvent.put("aggregatedValue", getAggregate(values));
+ lastEvent.addField("aggregatedValue", getAggregate(values));
out.collect(lastEvent);
}
}
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationProgram.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationProgram.java
index 8a6dae0..bd47412 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationProgram.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationProgram.java
@@ -21,6 +21,7 @@
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.aggregation.flink.AbstractAggregationProgram;
import java.util.Map;
@@ -33,11 +34,11 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... dataStreams) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
return getKeyedStream(dataStreams[0]);
}
- private DataStream<Map<String, Object>> getKeyedStream(DataStream<Map<String, Object>> dataStream) {
+ private DataStream<Event> getKeyedStream(DataStream<Event> dataStream) {
if (params.getGroupBy().size() > 0) {
return dataStream
.keyBy(getKeySelector())
@@ -49,13 +50,13 @@
}
}
- private KeySelector<Map<String, Object>, String> getKeySelector() {
+ private KeySelector<Event, String> getKeySelector() {
// TODO allow multiple keys
String groupBy = params.getGroupBy().get(0);
- return new KeySelector<Map<String, Object>, String>() {
+ return new KeySelector<Event, String>() {
@Override
- public String getKey(Map<String, Object> in) throws Exception {
- return String.valueOf(in.get(groupBy));
+ public String getKey(Event in) throws Exception {
+ return String.valueOf(in.getFieldBySelector(groupBy));
}
};
}
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountMapper.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountMapper.java
index 2daf0b5..d8b30e9 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountMapper.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountMapper.java
@@ -18,10 +18,9 @@
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
+import org.streampipes.model.runtime.Event;
-import java.util.Map;
-
-public class CountMapper implements MapFunction<Map<String, Object>, Tuple3<String, String, Integer>> {
+public class CountMapper implements MapFunction<Event, Tuple3<String, String, Integer>> {
private String fieldToCount;
@@ -30,7 +29,8 @@
}
@Override
- public Tuple3<String, String, Integer> map(Map<String, Object> stringObjectMap) throws Exception {
- return new Tuple3<>(fieldToCount, (String) stringObjectMap.get(fieldToCount), 1);
+ public Tuple3<String, String, Integer> map(Event stringObjectMap) throws Exception {
+ return new Tuple3<>(fieldToCount, stringObjectMap.getFieldBySelector(fieldToCount)
+ .getAsPrimitive().getAsString(), 1);
}
}
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountProgram.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountProgram.java
index 3bec6dd..27a1cc0 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountProgram.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/CountProgram.java
@@ -22,6 +22,7 @@
import org.apache.flink.streaming.api.windowing.triggers.Trigger;
import org.apache.flink.streaming.api.windowing.triggers.TriggerResult;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.aggregation.flink.AbstractAggregationProgram;
import java.util.Map;
@@ -34,7 +35,7 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... dataStreams) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
return dataStreams[0]
.map(new CountMapper(params.getFieldToCount()))
.keyBy(1)
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/Tuple2MapMapper.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/Tuple2MapMapper.java
index fa7abba..5ff6e55 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/Tuple2MapMapper.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/count/Tuple2MapMapper.java
@@ -18,16 +18,14 @@
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
+import org.streampipes.model.runtime.Event;
-import java.util.HashMap;
-import java.util.Map;
-
-public class Tuple2MapMapper implements MapFunction<Tuple3<String, String, Integer>, Map<String, Object>> {
+public class Tuple2MapMapper implements MapFunction<Tuple3<String, String, Integer>, Event> {
@Override
- public Map<String, Object> map(Tuple3<String, String, Integer> in) throws Exception {
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("value", in.f1);
- outMap.put("count", in.f2);
- return outMap;
+ public Event map(Tuple3<String, String, Integer> in) throws Exception {
+ Event outEvent = new Event();
+ outEvent.addField("value", in.f1);
+ outEvent.addField("count", in.f2);
+ return outEvent;
}
}
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRate.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRate.java
index b9f3071..796f2ee 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRate.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRate.java
@@ -19,10 +19,9 @@
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
-import java.util.Map;
-
-public class EventRate implements AllWindowFunction<Map<String, Object>, Float, TimeWindow>{
+public class EventRate implements AllWindowFunction<Event, Float, TimeWindow>{
private Integer timeWindowSize;
@@ -31,7 +30,8 @@
}
@Override
- public void apply(TimeWindow timeWindow, Iterable<Map<String, Object>> iterable, Collector<Float> collector) throws Exception {
+ public void apply(TimeWindow timeWindow, Iterable<Event> iterable, Collector<Float> collector)
+ throws Exception {
collector.collect((float) Iterables.size(iterable) / timeWindowSize);
}
}
diff --git a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRateProgram.java b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRateProgram.java
index 879d521..ec36d61 100644
--- a/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRateProgram.java
+++ b/streampipes-processors-aggregation-flink/src/main/java/org/streampipes/processors/aggregation/flink/processor/rate/EventRateProgram.java
@@ -20,11 +20,9 @@
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.aggregation.flink.AbstractAggregationProgram;
-import java.util.HashMap;
-import java.util.Map;
-
public class EventRateProgram extends AbstractAggregationProgram<EventRateParameter> {
public EventRateProgram(EventRateParameter params, boolean debug) {
@@ -33,16 +31,16 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... dataStreams) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
return dataStreams[0]
.timeWindowAll(Time.seconds(params.getAvgRate()))
.apply(new EventRate(params.getAvgRate()))
- .flatMap(new FlatMapFunction<Float, Map<String, Object>>() {
+ .flatMap(new FlatMapFunction<Float, Event>() {
@Override
- public void flatMap(Float rate, Collector<Map<String, Object>> out) throws Exception {
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("rate", rate);
- out.collect(outMap);
+ public void flatMap(Float rate, Collector<Event> out) throws Exception {
+ Event event = new Event();
+ event.addField("rate", rate);
+ out.collect(event);
}
});
}
diff --git a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationTestData.java b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationTestData.java
index 41b4922..f9139f0 100644
--- a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationTestData.java
+++ b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/AggregationTestData.java
@@ -15,15 +15,15 @@
*/
package org.streampipes.processors.aggregation.flink.processor.aggregation;
+import org.streampipes.model.runtime.Event;
+
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
public class AggregationTestData {
- private List<Map<String, Object>> expectedOutput;
- private List<Map<String, Object>> input;
+ private List<Event> expectedOutput;
+ private List<Event> input;
public AggregationTestData() {
buildOutput();
@@ -32,34 +32,34 @@
private void buildOutput() {
this.expectedOutput = new ArrayList<>();
- this.expectedOutput.add(buildOutputMap(1, 1.0));
- this.expectedOutput.add(buildOutputMap(2, 1.5));
+ this.expectedOutput.add(buildOutputMap(1.0f, 1.0f));
+ this.expectedOutput.add(buildOutputMap(2.0f, 1.5f));
}
private void buildInput() {
this.input = new ArrayList<>();
- input.add(buildMap(1));
- input.add(buildMap(2));
+ input.add(buildEvent(1.0f));
+ input.add(buildEvent(2.0f));
}
- private Map<String, Object> buildOutputMap(Object value, Object aggregatedValue) {
- Map<String, Object> map = buildMap(value);
- map.put("aggregatedValue", aggregatedValue);
- return map;
+ private Event buildOutputMap(Float value, Float aggregatedValue) {
+ Event event = buildEvent(value);
+ event.addField("aggregatedValue", aggregatedValue);
+ return event;
}
- private Map<String, Object> buildMap(Object value) {
- Map<String, Object> map = new HashMap<>();
- map.put("sensorId", "a");
- map.put("value", value);
- return map;
+ private Event buildEvent(Float value) {
+ Event event = new Event();
+ event.addField("sensorId", "a");
+ event.addField("value", value);
+ return event;
}
- public List<Map<String, Object>> getExpectedOutput() {
+ public List<Event> getExpectedOutput() {
return expectedOutput;
}
- public List<Map<String, Object>> getInput() {
+ public List<Event> getInput() {
return input;
}
}
diff --git a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/TestAggregationProgram.java b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/TestAggregationProgram.java
index 8f6baab..398df26 100644
--- a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/TestAggregationProgram.java
+++ b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/aggregation/TestAggregationProgram.java
@@ -21,9 +21,10 @@
import io.flinkspector.datastream.input.EventTimeInputBuilder;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.junit.Test;
+import org.streampipes.model.runtime.Event;
import org.streampipes.test.generator.InvocationGraphGenerator;
-import java.util.*;
+import java.util.Arrays;
//@RunWith(Parameterized.class)
public class TestAggregationProgram extends DataStreamTestBase {
@@ -44,10 +45,11 @@
AggregationProgram program = new AggregationProgram(params, true);
AggregationTestData testData = new AggregationTestData();
- DataStream<Map<String, Object>> stream = program.getApplicationLogic(createTestStream(makeInputData(testData)));
+ DataStream<Event> stream = program.getApplicationLogic(createTestStream(makeInputData
+ (testData)));
- ExpectedRecords<Map<String, Object>> expected =
- new ExpectedRecords<Map<String, Object>>().expectAll(testData.getExpectedOutput());
+ ExpectedRecords<Event> expected =
+ new ExpectedRecords<Event>().expectAll(testData.getExpectedOutput());
assertStream(stream, expected);
}
@@ -62,7 +64,7 @@
Arrays.asList("value"));
}
- private EventTimeInput<Map<String, Object>> makeInputData(AggregationTestData testData) {
+ private EventTimeInput<Event> makeInputData(AggregationTestData testData) {
return EventTimeInputBuilder.startWith(testData.getInput().get(0))
.emit(testData.getInput().get(1), after(1, seconds));
}
diff --git a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/count/TestCountProgram.java b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/count/TestCountProgram.java
index 91b7ac7..6f59367 100644
--- a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/count/TestCountProgram.java
+++ b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/count/TestCountProgram.java
@@ -23,9 +23,12 @@
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.junit.Test;
+import org.streampipes.model.runtime.Event;
import org.streampipes.test.generator.InvocationGraphGenerator;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
public class TestCountProgram extends DataStreamTestBase {
@@ -34,8 +37,8 @@
EventTimeInput input = makeInputData(makeTestData(), makeTestData().size());
- ExpectedRecords<Map<String, Object>> expected =
- new ExpectedRecords<Map<String, Object>>().expectAll(getOutput());
+ ExpectedRecords<Event> expected =
+ new ExpectedRecords<Event>().expectAll(getOutput());
runProgram(input, expected);
}
@@ -45,24 +48,25 @@
EventTimeInput input = makeInputData(makeTestData(), 2);
- ExpectedRecords<Map<String, Object>> expected =
- new ExpectedRecords<Map<String, Object>>().expectAll(getOutOfWindowOutput());
+ ExpectedRecords<Event> expected =
+ new ExpectedRecords<Event>().expectAll(getOutOfWindowOutput());
runProgram(input, expected);
}
- private void runProgram(EventTimeInput<Map<String, Object>> input, ExpectedRecords<Map<String, Object>> expected) {
+ private void runProgram(EventTimeInput<Event> input, ExpectedRecords<Event>
+ expected) {
CountParameters params = new CountParameters(InvocationGraphGenerator.makeEmptyInvocation(new CountController().declareModel()), Time.seconds(10), "field");
CountProgram program = new CountProgram(params, true);
- DataStream<Map<String, Object>> stream = program.getApplicationLogic(createTestStream(input));
+ DataStream<Event> stream = program.getApplicationLogic(createTestStream(input));
assertStream(stream, expected);
}
- private Collection<Map<String, Object>> getOutput() {
- List<Map<String, Object>> outRecords = new ArrayList<>();
+ private Collection<Event> getOutput() {
+ List<Event> outRecords = new ArrayList<>();
outRecords.add(makeOutMap("v1", 1));
outRecords.add(makeOutMap("v2", 1));
outRecords.add(makeOutMap("v1", 2));
@@ -72,8 +76,8 @@
return outRecords;
}
- private Collection<Map<String, Object>> getOutOfWindowOutput() {
- List<Map<String, Object>> outRecords = new ArrayList<>();
+ private Collection<Event> getOutOfWindowOutput() {
+ List<Event> outRecords = new ArrayList<>();
outRecords.add(makeOutMap("v1", 1));
outRecords.add(makeOutMap("v2", 1));
outRecords.add(makeOutMap("v1", 1));
@@ -83,15 +87,16 @@
return outRecords;
}
- private Map<String, Object> makeOutMap(String key, Integer count) {
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("value", key);
- outMap.put("count", count);
- return outMap;
+ private Event makeOutMap(String key, Integer count) {
+ Event outEvent = new Event();
+ outEvent.addField("value", key);
+ outEvent.addField("count", count);
+ return outEvent;
}
- private EventTimeInput<Map<String, Object>> makeInputData(List<Map<String, Object>> testData, Integer splitIndex) {
- EventTimeInputBuilder<Map<String, Object>> builder = EventTimeInputBuilder.startWith(testData.get(0));
+ private EventTimeInput<Event> makeInputData(List<Event> testData, Integer
+ splitIndex) {
+ EventTimeInputBuilder<Event> builder = EventTimeInputBuilder.startWith(testData.get(0));
for (int i = 1; i < splitIndex; i++) {
builder.emit(testData.get(i), after(1, seconds));
@@ -104,20 +109,20 @@
return builder;
}
- private List<Map<String, Object>> makeTestData() {
- List<Map<String, Object>> inMap = new ArrayList<>();
- inMap.add(makeMap("v1"));
- inMap.add(makeMap("v2"));
- inMap.add(makeMap("v1"));
- inMap.add(makeMap("v3"));
- inMap.add(makeMap("v2"));
+ private List<Event> makeTestData() {
+ List<Event> inEvent = new ArrayList<>();
+ inEvent.add(makeMap("v1"));
+ inEvent.add(makeMap("v2"));
+ inEvent.add(makeMap("v1"));
+ inEvent.add(makeMap("v3"));
+ inEvent.add(makeMap("v2"));
- return inMap;
+ return inEvent;
}
- private Map<String, Object> makeMap(String s) {
- Map<String, Object> testMap = new HashMap<>();
- testMap.put("field", s);
- return testMap;
+ private Event makeMap(String s) {
+ Event testEvent = new Event();
+ testEvent.addField("field", s);
+ return testEvent;
}
}
diff --git a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/rate/TestRateProgram.java b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/rate/TestRateProgram.java
index ce1aa14..9832e9a 100644
--- a/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/rate/TestRateProgram.java
+++ b/streampipes-processors-aggregation-flink/src/test/java/org/streampipes/processors/aggregation/flink/processor/rate/TestRateProgram.java
@@ -24,9 +24,13 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import org.streampipes.model.runtime.Event;
import org.streampipes.test.generator.InvocationGraphGenerator;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
import java.util.concurrent.TimeUnit;
@RunWith(Parameterized.class)
@@ -66,18 +70,21 @@
EventRateProgram program = new EventRateProgram(params, true);
- DataStream<Map<String, Object>> stream = program.getApplicationLogic(createTestStream(makeInputData(numEvents, waitTime, timeUnit)));
+ DataStream<Event> stream = program.getApplicationLogic(createTestStream(makeInputData
+ (numEvents, waitTime, timeUnit)));
- ExpectedRecords<Map<String, Object>> expected =
- new ExpectedRecords<Map<String, Object>>().expectAll(getOutput(timeWindowSize, expectedFrequency, numEvents));
+ ExpectedRecords<Event> expected =
+ new ExpectedRecords<Event>().expectAll(getOutput(timeWindowSize, expectedFrequency,
+ numEvents));
assertStream(stream, expected);
}
- private Collection<Map<String, Object>> getOutput(Integer timeWindowSize, Float eventsPerSecond, Integer numEvents) {
- List<Map<String, Object>> allEvents = new ArrayList<>();
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("rate", eventsPerSecond);
+ private Collection<Event> getOutput(Integer timeWindowSize, Float eventsPerSecond, Integer
+ numEvents) {
+ List<Event> allEvents = new ArrayList<>();
+ Event outMap = new Event();
+ outMap.addField("rate", eventsPerSecond);
for (int i = 0; i < numEvents % timeWindowSize; i++) {
allEvents.add(outMap);
@@ -86,9 +93,9 @@
return allEvents;
}
- private EventTimeInput<Map<String, Object>> makeInputData(Integer count, Integer time, TimeUnit timeUnit) {
- List<Map<String, Object>> testData = makeTestData(count);
- EventTimeInputBuilder<Map<String, Object>> builder = EventTimeInputBuilder.startWith(testData.get(0));
+ private EventTimeInput<Event> makeInputData(Integer count, Integer time, TimeUnit timeUnit) {
+ List<Event> testData = makeTestData(count);
+ EventTimeInputBuilder<Event> builder = EventTimeInputBuilder.startWith(testData.get(0));
for (int i = 1; i < testData.size(); i++) {
builder.emit(testData.get(i), after(time, timeUnit));
@@ -97,10 +104,10 @@
return builder;
}
- private List<Map<String, Object>> makeTestData(Integer count) {
- List<Map<String, Object>> allEvents = new ArrayList<>();
- Map<String, Object> event = new HashMap<>();
- event.put("test", 1);
+ private List<Event> makeTestData(Integer count) {
+ List<Event> allEvents = new ArrayList<>();
+ Event event = new Event();
+ event.addField("test", 1);
for (int i = 0; i < count; i++) {
allEvents.add(event);
diff --git a/streampipes-processors-enricher-flink/development/.env b/streampipes-processors-enricher-flink/development/.env
index b5ab2b6..1a55b50 100644
--- a/streampipes-processors-enricher-flink/development/.env
+++ b/streampipes-processors-enricher-flink/development/.env
@@ -2,7 +2,4 @@
SP_PORT=6010
SP_HOST=localhost
SP_ICON_HOST=localhost
-SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
-SP_ELASTICSEARCH_HOST=localhost
SP_FLINK_DEBUG=true
\ No newline at end of file
diff --git a/streampipes-processors-enricher-flink/pom.xml b/streampipes-processors-enricher-flink/pom.xml
index 338be7c..f4d5586 100644
--- a/streampipes-processors-enricher-flink/pom.xml
+++ b/streampipes-processors-enricher-flink/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/EnricherFlinkInit.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/EnricherFlinkInit.java
index 0649e99..b2582c9 100644
--- a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/EnricherFlinkInit.java
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/EnricherFlinkInit.java
@@ -17,15 +17,23 @@
package org.streampipes.processors.enricher.flink;
import org.streampipes.processors.enricher.flink.config.EnricherFlinkConfig;
+import org.streampipes.processors.enricher.flink.processor.math.mathop.MathOpController;
+import org.streampipes.processors.enricher.flink.processor.math.staticmathop.StaticMathOpController;
import org.streampipes.processors.enricher.flink.processor.timestamp.TimestampController;
import org.streampipes.container.init.DeclarersSingleton;
import org.streampipes.container.standalone.init.StandaloneModelSubmitter;
+import org.streampipes.processors.enricher.flink.processor.trigonometry.TrigonometryController;
+import org.streampipes.processors.enricher.flink.processor.urldereferencing.UrlDereferencingController;
public class EnricherFlinkInit extends StandaloneModelSubmitter {
public static void main(String[] args) {
DeclarersSingleton.getInstance()
- .add(new TimestampController());
+ .add(new TimestampController())
+ .add(new MathOpController())
+ .add(new StaticMathOpController())
+ .add(new UrlDereferencingController())
+ .add(new TrigonometryController());
new EnricherFlinkInit().init(EnricherFlinkConfig.INSTANCE);
}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/ConfigKeys.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/ConfigKeys.java
index 428701c..74a5588 100644
--- a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/ConfigKeys.java
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/ConfigKeys.java
@@ -22,9 +22,6 @@
final static String PORT = "SP_PORT";
final static String FLINK_HOST = "SP_FLINK_HOST";
final static String FLINK_PORT = "SP_FLINK_PORT";
- final static String ELASTIC_HOST = "SP_ELASTICSEARCH_HOST";
- final static String ELASTIC_PORT = "SP_ELASTICSEARCH_PORT";
- final static String ELASTIC_PORT_REST = "SP_ELASTICSEARCH_PORT_REST";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
final static String SERVICE_NAME = "SP_SERVICE_NAME";
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/EnricherFlinkConfig.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/EnricherFlinkConfig.java
index 492f7e2..b760ff1 100644
--- a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/EnricherFlinkConfig.java
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/config/EnricherFlinkConfig.java
@@ -35,9 +35,6 @@
config.register(ConfigKeys.PORT, 8090, "Port for the pe mixed flink component");
config.register(ConfigKeys.FLINK_HOST, "jobmanager", "Host for the flink cluster");
config.register(ConfigKeys.FLINK_PORT, 6123, "Port for the flink cluster");
- config.register(ConfigKeys.ELASTIC_HOST, "elasticsearch", "Elastic search host address");
- config.register(ConfigKeys.ELASTIC_PORT, 9300, "Elasitc search port");
- config.register(ConfigKeys.ELASTIC_PORT_REST, 9200, "Elasitc search rest port");
config.register(ConfigKeys.ICON_HOST, "backend", "Hostname for the icon host");
config.register(ConfigKeys.ICON_PORT, 80, "Port for the icons in nginx");
@@ -66,15 +63,6 @@
return config.getInteger(ConfigKeys.FLINK_PORT);
}
- public String getElasticsearchHost() {
- return config.getString(ConfigKeys.ELASTIC_HOST);
- }
-
- public int getElasticsearchPort() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT);
- }
-
-
public static final String iconBaseUrl = "http://" + EnricherFlinkConfig.INSTANCE.getIconHost() + ":" + EnricherFlinkConfig.INSTANCE.getIconPort() + "/assets/img/pe_icons";
public static final String getIconUrl(String pictureName) {
@@ -89,10 +77,6 @@
return config.getInteger(ConfigKeys.ICON_PORT);
}
- public int getElasticsearchPortRest() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT_REST);
- }
-
public boolean getDebug() {
return config.getBoolean(ConfigKeys.DEBUG);
}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOp.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOp.java
new file mode 100644
index 0000000..204b4f5
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOp.java
@@ -0,0 +1,48 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.mathop;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.processors.enricher.flink.processor.math.operation.Operation;
+
+public class MathOp implements FlatMapFunction<Event, Event> {
+
+ private Operation operation;
+ private String leftOperand;
+ private String rightOperand;
+ private String resulField;
+
+ public MathOp(Operation operation, String leftOperand, String rightOperand, String resulField) {
+ this.operation = operation;
+ this.leftOperand = leftOperand;
+ this.rightOperand = rightOperand;
+ this.resulField = resulField;
+ }
+
+ @Override
+ public void flatMap(Event in, Collector<Event> out) throws Exception {
+ Double leftValue = in.getFieldBySelector(leftOperand).getAsPrimitive().getAsDouble();
+ Double rightValue = in.getFieldBySelector(rightOperand).getAsPrimitive().getAsDouble();
+
+ Double result = operation.operate(leftValue, rightValue);
+ in.addField(resulField, result);
+
+ out.collect(in);
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpController.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpController.java
new file mode 100644
index 0000000..fd04406
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpController.java
@@ -0,0 +1,87 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.mathop;
+
+import org.streampipes.model.DataProcessorType;
+import org.streampipes.model.graph.DataProcessorDescription;
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.schema.PropertyScope;
+import org.streampipes.processors.enricher.flink.config.EnricherFlinkConfig;
+import org.streampipes.processors.enricher.flink.processor.math.operation.*;
+import org.streampipes.sdk.builder.ProcessingElementBuilder;
+import org.streampipes.sdk.builder.StreamRequirementsBuilder;
+import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
+import org.streampipes.sdk.helpers.*;
+import org.streampipes.vocabulary.SO;
+import org.streampipes.wrapper.flink.FlinkDataProcessorDeclarer;
+import org.streampipes.wrapper.flink.FlinkDataProcessorRuntime;
+
+public class MathOpController extends FlinkDataProcessorDeclarer<MathOpParameters> {
+
+ private final String RESULT_FIELD = "calculationResult";
+ private final String LEFT_OPERAND = "leftOperand";
+ private final String RIGHT_OPERAND = "rightOperand";
+ private final String OPERATION = "operation";
+
+ @Override
+ public DataProcessorDescription declareModel() {
+ return ProcessingElementBuilder.create("org.streampipes.processors.enricher.flink.processor.math.mathop",
+ "Math","Performs calculations on event properties (+, -, *, /, %)")
+ .iconUrl(EnricherFlinkConfig.getIconUrl("math-icon"))
+ .category(DataProcessorType.ALGORITHM)
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredPropertyWithUnaryMapping(EpRequirements.numberReq(),
+ Labels.from(LEFT_OPERAND, "Left operand", "Select left operand"),
+ PropertyScope.NONE)
+ .requiredPropertyWithUnaryMapping(EpRequirements.numberReq(),
+ Labels.from(RIGHT_OPERAND, "Right operand", "Select right operand"),
+ PropertyScope.NONE)
+ .build())
+ .outputStrategy(
+ OutputStrategies.append(
+ EpProperties.numberEp(Labels.empty(), RESULT_FIELD, SO.Number)))
+ .requiredSingleValueSelection(OPERATION, "Select Operation", "", Options.from("+", "-", "/", "*", "%"))
+ .supportedFormats(SupportedFormats.jsonFormat())
+ .supportedProtocols(SupportedProtocols.kafka())
+ .build();
+ }
+
+ @Override
+ public FlinkDataProcessorRuntime<MathOpParameters> getRuntime(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
+ String leftOperand = extractor.mappingPropertyValue(LEFT_OPERAND);
+ String rightOperand = extractor.mappingPropertyValue(RIGHT_OPERAND);
+ String operation = extractor.selectedSingleValue(OPERATION, String.class);
+
+ Operation arithmeticOperation = null;
+ switch (operation) {
+ case "+": arithmeticOperation = new OperationAddition();
+ break;
+ case "-": arithmeticOperation = new OperationSubtracting();
+ break;
+ case "*": arithmeticOperation = new OperationMultiply();
+ break;
+ case "/": arithmeticOperation = new OperationDivide();
+ break;
+ case "%": arithmeticOperation = new OperationModulo();
+ }
+
+ MathOpParameters parameters = new MathOpParameters(graph, arithmeticOperation, leftOperand, rightOperand, RESULT_FIELD);
+
+ return new MathOpProgram(parameters, EnricherFlinkConfig.INSTANCE.getDebug());
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpParameters.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpParameters.java
new file mode 100644
index 0000000..8e44c9f
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpParameters.java
@@ -0,0 +1,53 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.mathop;
+
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.processors.enricher.flink.processor.math.operation.Operation;
+import org.streampipes.wrapper.params.binding.EventProcessorBindingParams;
+
+public class MathOpParameters extends EventProcessorBindingParams {
+
+ private Operation operation;
+ private String leftOperand;
+ private String rightOperand;
+ private String resultField;
+
+ public MathOpParameters(DataProcessorInvocation graph, Operation operation, String leftOperand, String rightOperand, String resultField) {
+ super(graph);
+ this.operation = operation;
+ this.leftOperand = leftOperand;
+ this.rightOperand = rightOperand;
+ this.resultField = resultField;
+ }
+
+ public Operation getOperation() {
+ return operation;
+ }
+
+ public String getLeftOperand() {
+ return leftOperand;
+ }
+
+ public String getRightOperand() {
+ return rightOperand;
+ }
+
+ public String getResultField() {
+ return resultField;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpProgram.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpProgram.java
new file mode 100644
index 0000000..0e4c8c6
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/mathop/MathOpProgram.java
@@ -0,0 +1,35 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.mathop;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.processors.enricher.flink.AbstractEnricherProgram;
+
+public class MathOpProgram extends AbstractEnricherProgram<MathOpParameters> {
+
+ public MathOpProgram(MathOpParameters params, boolean debug) {
+ super(params, debug);
+ }
+
+ @Override
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
+ return dataStreams[0]
+ .flatMap(new MathOp(params.getOperation(), params.getLeftOperand(),
+ params.getRightOperand(), params.getResultField()));
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/Operation.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/Operation.java
new file mode 100644
index 0000000..e9f79c3
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/Operation.java
@@ -0,0 +1,24 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.operation;
+
+import java.io.Serializable;
+
+public interface Operation extends Serializable {
+
+ Double operate(Double valLeft, Double valRight);
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationAddition.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationAddition.java
new file mode 100644
index 0000000..6788378
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationAddition.java
@@ -0,0 +1,25 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.operation;
+
+public class OperationAddition implements Operation {
+
+ @Override
+ public Double operate(Double valLeft, Double valRight) {
+ return valLeft + valRight;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationDivide.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationDivide.java
new file mode 100644
index 0000000..7bed723
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationDivide.java
@@ -0,0 +1,25 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.operation;
+
+public class OperationDivide implements Operation {
+
+ @Override
+ public Double operate(Double valLeft, Double valRight) {
+ return valLeft / valRight;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationModulo.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationModulo.java
new file mode 100644
index 0000000..b1d0ad1
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationModulo.java
@@ -0,0 +1,25 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.operation;
+
+public class OperationModulo implements Operation {
+
+ @Override
+ public Double operate(Double valLeft, Double valRight) {
+ return valLeft % valRight;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationMultiply.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationMultiply.java
new file mode 100644
index 0000000..9dea18f
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationMultiply.java
@@ -0,0 +1,25 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.operation;
+
+public class OperationMultiply implements Operation{
+
+ @Override
+ public Double operate(Double valLeft, Double valRight) {
+ return valLeft * valRight;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationSubtracting.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationSubtracting.java
new file mode 100644
index 0000000..86bf361
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/operation/OperationSubtracting.java
@@ -0,0 +1,25 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.operation;
+
+public class OperationSubtracting implements Operation {
+
+ @Override
+ public Double operate(Double valLeft, Double valRight) {
+ return valLeft - valRight;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOp.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOp.java
new file mode 100644
index 0000000..3c2547b
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOp.java
@@ -0,0 +1,48 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.staticmathop;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.processors.enricher.flink.processor.math.operation.Operation;
+
+public class StaticMathOp implements FlatMapFunction<Event, Event> {
+
+ private Operation operation;
+ private String leftOperand;
+ private double rightOperandValue;
+ private String resulField;
+
+ public StaticMathOp(Operation operation, String leftOperand, double rightOperandValue, String resultField) {
+ this.operation = operation;
+ this.leftOperand = leftOperand;
+ this.rightOperandValue = rightOperandValue;
+ this.resulField = resultField;
+ }
+
+ @Override
+ public void flatMap(Event in, Collector<Event> out) throws Exception {
+ Double leftValue = Double.parseDouble(String.valueOf(in.getFieldBySelector(leftOperand)
+ .getAsPrimitive().getAsDouble()));
+
+ Double result = operation.operate(leftValue, rightOperandValue);
+ in.updateFieldBySelector(leftOperand, result);
+
+ out.collect(in);
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpController.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpController.java
new file mode 100644
index 0000000..903dd98
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpController.java
@@ -0,0 +1,86 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.staticmathop;
+
+import org.streampipes.model.DataProcessorType;
+import org.streampipes.model.graph.DataProcessorDescription;
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.schema.PropertyScope;
+import org.streampipes.processors.enricher.flink.config.EnricherFlinkConfig;
+import org.streampipes.processors.enricher.flink.processor.math.operation.*;
+import org.streampipes.sdk.builder.ProcessingElementBuilder;
+import org.streampipes.sdk.builder.StreamRequirementsBuilder;
+import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
+import org.streampipes.sdk.helpers.*;
+import org.streampipes.wrapper.flink.FlinkDataProcessorDeclarer;
+import org.streampipes.wrapper.flink.FlinkDataProcessorRuntime;
+
+public class StaticMathOpController extends FlinkDataProcessorDeclarer<StaticMathOpParameters> {
+
+ private final String RESULT_FIELD = "calculationResultStatic";
+ private final String LEFT_OPERAND = "leftOperand";
+ private final String RIGHT_OPERAND_VALUE = "rightOperandValue";
+ private final String OPERATION = "operation";
+
+ @Override
+ public DataProcessorDescription declareModel() {
+ return ProcessingElementBuilder.create("org.streampipes.processors.enricher.flink.processor.math.staticmathop",
+ "Static Math", "Performs calculation on an event property with a static value (+, -, *, /, %)")
+ .iconUrl(EnricherFlinkConfig.getIconUrl("math-icon-static"))
+ .category(DataProcessorType.ALGORITHM)
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredPropertyWithUnaryMapping(EpRequirements.numberReq(),
+ Labels.from(LEFT_OPERAND, "Left operand", "Select left operand"),
+ PropertyScope.NONE)
+ .build())
+ .requiredFloatParameter(Labels.from(RIGHT_OPERAND_VALUE, "Right operand value",
+ "Specify the value of the right operand."))
+ .outputStrategy(
+ OutputStrategies.keep())
+// EpProperties.numberEp(Labels.empty(), RESULT_FIELD, SO.Number)))
+ .requiredSingleValueSelection(OPERATION, "Select Operation", "", Options.from("+", "-", "/", "*", "%"))
+ .supportedFormats(SupportedFormats.jsonFormat())
+ .supportedProtocols(SupportedProtocols.kafka())
+ .build();
+ }
+
+ @Override
+ public FlinkDataProcessorRuntime<StaticMathOpParameters> getRuntime(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
+ String leftOperand = extractor.mappingPropertyValue(LEFT_OPERAND);
+ double rightOperand = extractor.singleValueParameter(RIGHT_OPERAND_VALUE, Double.class);
+ String operation = extractor.selectedSingleValue(OPERATION, String.class);
+
+ Operation arithmeticOperation = null;
+ switch (operation) {
+ case "+": arithmeticOperation = new OperationAddition();
+ break;
+ case "-": arithmeticOperation = new OperationSubtracting();
+ break;
+ case "*": arithmeticOperation = new OperationMultiply();
+ break;
+ case "/": arithmeticOperation = new OperationDivide();
+ break;
+ case "%": arithmeticOperation = new OperationModulo();
+ }
+
+ StaticMathOpParameters parameters = new StaticMathOpParameters(graph, arithmeticOperation, leftOperand, rightOperand, RESULT_FIELD);
+
+ return new StaticMathOpProgram(parameters, EnricherFlinkConfig.INSTANCE.getDebug());
+
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpParameters.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpParameters.java
new file mode 100644
index 0000000..383d24b
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpParameters.java
@@ -0,0 +1,53 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.staticmathop;
+
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.processors.enricher.flink.processor.math.operation.Operation;
+import org.streampipes.wrapper.params.binding.EventProcessorBindingParams;
+
+public class StaticMathOpParameters extends EventProcessorBindingParams {
+
+ private Operation operation;
+ private String leftOperand;
+ private double rightOperandValue;
+ private String resultField;
+
+ public StaticMathOpParameters(DataProcessorInvocation graph, Operation operation, String leftOperand, double rightOperandValue, String resultField) {
+ super(graph);
+ this.operation = operation;
+ this.leftOperand = leftOperand;
+ this.rightOperandValue = rightOperandValue;
+ this.resultField = resultField;
+ }
+
+ public Operation getOperation() {
+ return operation;
+ }
+
+ public String getLeftOperand() {
+ return leftOperand;
+ }
+
+ public double getRightOperandValue() {
+ return rightOperandValue;
+ }
+
+ public String getResultField() {
+ return resultField;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpProgram.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpProgram.java
new file mode 100644
index 0000000..6827032
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/math/staticmathop/StaticMathOpProgram.java
@@ -0,0 +1,35 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.math.staticmathop;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.processors.enricher.flink.AbstractEnricherProgram;
+
+public class StaticMathOpProgram extends AbstractEnricherProgram<StaticMathOpParameters> {
+
+ public StaticMathOpProgram(StaticMathOpParameters params, boolean debug) {
+ super(params, debug);
+ }
+
+ @Override
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
+ return dataStreams[0]
+ .flatMap(new StaticMathOp(params.getOperation(), params.getLeftOperand(),
+ params.getRightOperandValue(), params.getResultField()));
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampEnricher.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampEnricher.java
index 423fa71..af3a618 100644
--- a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampEnricher.java
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampEnricher.java
@@ -18,24 +18,22 @@
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
-import java.util.Map;
+public class TimestampEnricher implements FlatMapFunction<Event, Event> {
-public class TimestampEnricher implements FlatMapFunction<Map<String, Object>, Map<String, Object>> {
+ private String appendTimePropertyName;
- private String appendTimePropertyName;
-
- public TimestampEnricher(String appendTimePropertyName) {
- this.appendTimePropertyName = appendTimePropertyName;
- }
+ public TimestampEnricher(String appendTimePropertyName) {
+ this.appendTimePropertyName = appendTimePropertyName;
+ }
- @Override
- public void flatMap(Map<String, Object> in,
- Collector<Map<String, Object>> out) throws Exception {
- in.put(appendTimePropertyName, System.currentTimeMillis());
- out.collect(in);
- }
-
-
+ @Override
+ public void flatMap(Event in,
+ Collector<Event> out) throws Exception {
+ in.addField(appendTimePropertyName, System.currentTimeMillis());
+ out.collect(in);
+ }
+
}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampProgram.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampProgram.java
index 2d73a2c..83ef153 100644
--- a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampProgram.java
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/timestamp/TimestampProgram.java
@@ -17,6 +17,7 @@
package org.streampipes.processors.enricher.flink.processor.timestamp;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.enricher.flink.AbstractEnricherProgram;
import java.util.Map;
@@ -28,8 +29,7 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(
- DataStream<Map<String, Object>>... messageStream) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... messageStream) {
return messageStream[0]
.flatMap(new TimestampEnricher(params.getAppendTimePropertyName()));
}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/Operation.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/Operation.java
new file mode 100644
index 0000000..c8889e7
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/Operation.java
@@ -0,0 +1,23 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.trigonometry;
+
+public enum Operation {
+ SIN,
+ COS,
+ TAN
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/Trigonometry.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/Trigonometry.java
new file mode 100644
index 0000000..2407774
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/Trigonometry.java
@@ -0,0 +1,51 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.trigonometry;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
+
+public class Trigonometry implements FlatMapFunction<Event, Event> {
+
+ private String operand;
+ private Operation operation;
+ private String resultField;
+
+ public Trigonometry(String operand, Operation operation, String resultField) {
+ this.operand = operand;
+ this.operation = operation;
+ this.resultField = resultField;
+ }
+
+ @Override
+ public void flatMap(Event in, Collector<Event> out) throws Exception {
+ double value = in.getFieldBySelector(operand).getAsPrimitive().getAsDouble();
+ double result;
+
+ if (operation == Operation.SIN) {
+ result = Math.sin(value);
+ } else if (operation == Operation.COS) {
+ result = Math.cos(value);
+ } else {
+ result = Math.tan(value);
+ }
+ in.addField(resultField, result);
+
+ out.collect(in);
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryController.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryController.java
new file mode 100644
index 0000000..e0434ed
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryController.java
@@ -0,0 +1,80 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.trigonometry;
+
+import org.streampipes.model.DataProcessorType;
+import org.streampipes.model.graph.DataProcessorDescription;
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.schema.PropertyScope;
+import org.streampipes.processors.enricher.flink.config.EnricherFlinkConfig;
+import org.streampipes.sdk.builder.ProcessingElementBuilder;
+import org.streampipes.sdk.builder.StreamRequirementsBuilder;
+import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
+import org.streampipes.sdk.helpers.*;
+import org.streampipes.vocabulary.SO;
+import org.streampipes.wrapper.flink.FlinkDataProcessorDeclarer;
+import org.streampipes.wrapper.flink.FlinkDataProcessorRuntime;
+
+public class TrigonometryController extends FlinkDataProcessorDeclarer<TrigonometryParameters> {
+
+ private final String OPERAND = "operand";
+ private final String OPERATION = "operation";
+ private final String RESULT_FIELD = "trigonometryResult";
+
+
+ @Override
+ public DataProcessorDescription declareModel() {
+ return ProcessingElementBuilder.create("org.streampipes.processors.enricher.flink.processor.trigonometry",
+ "Trigonometry","Performs Trigonometric function on event properties")
+ .iconUrl(EnricherFlinkConfig.getIconUrl("trigonometry_icon"))
+ .category(DataProcessorType.ALGORITHM)
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredPropertyWithUnaryMapping(EpRequirements.numberReq(),
+ Labels.from(OPERAND, "Alpha", "Select the alpha parameter"),
+ PropertyScope.NONE)
+ .build())
+ .outputStrategy(
+ OutputStrategies.append(
+ EpProperties.numberEp(Labels.empty(), RESULT_FIELD, SO.Number)))
+ .requiredSingleValueSelection(Labels.from(OPERATION, "Select function", ""), Options.from("sin(a)", "cos(a)", "tan(a)" ))
+ .supportedFormats(SupportedFormats.jsonFormat())
+ .supportedProtocols(SupportedProtocols.kafka())
+ .build();
+ }
+
+ @Override
+ public FlinkDataProcessorRuntime<TrigonometryParameters> getRuntime(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
+ String operand = extractor.mappingPropertyValue(OPERAND);
+ String operation = extractor.selectedSingleValue(OPERATION, String.class);
+
+ Operation trigonometryFunction = null;
+ switch (operation) {
+ case "sin(a)": trigonometryFunction = Operation.SIN;
+ break;
+ case "cos(a)": trigonometryFunction = Operation.COS;
+ break;
+ case "tan(a)": trigonometryFunction = Operation.TAN;
+
+ }
+
+
+ TrigonometryParameters parameters = new TrigonometryParameters(graph, operand, trigonometryFunction, RESULT_FIELD);
+
+ return new TrigonometryProgram(parameters, EnricherFlinkConfig.INSTANCE.getDebug());
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryParameters.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryParameters.java
new file mode 100644
index 0000000..dc58b6b
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryParameters.java
@@ -0,0 +1,46 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.trigonometry;
+
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.wrapper.params.binding.EventProcessorBindingParams;
+
+public class TrigonometryParameters extends EventProcessorBindingParams {
+
+ private String operand;
+ private Operation operation;
+ private String resultField;
+
+ public TrigonometryParameters(DataProcessorInvocation graph, String operand, Operation operation, String resultField) {
+ super(graph);
+ this.operand = operand;
+ this.operation = operation;
+ this.resultField = resultField;
+ }
+
+ public String getOperand() {
+ return operand;
+ }
+
+ public Operation getOperation() {
+ return operation;
+ }
+
+ public String getResultField() {
+ return resultField;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryProgram.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryProgram.java
new file mode 100644
index 0000000..c82152d
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/trigonometry/TrigonometryProgram.java
@@ -0,0 +1,33 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.trigonometry;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.processors.enricher.flink.AbstractEnricherProgram;
+
+public class TrigonometryProgram extends AbstractEnricherProgram<TrigonometryParameters> {
+
+ public TrigonometryProgram(TrigonometryParameters params, boolean debug) {
+ super(params, debug);
+ }
+
+ @Override
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
+ return dataStreams[0].flatMap(new Trigonometry(params.getOperand(), params.getOperation(), params.getResultField()));
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencing.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencing.java
new file mode 100644
index 0000000..7c86960
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencing.java
@@ -0,0 +1,57 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.urldereferencing;
+
+import com.mashape.unirest.http.HttpResponse;
+import com.mashape.unirest.http.Unirest;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+import org.streampipes.logging.api.Logger;
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
+
+public class UrlDereferencing implements FlatMapFunction<Event, Event> {
+
+ private String urlString;
+ private String appendHtml;
+ private Logger logger;
+
+ public UrlDereferencing(String urlString, String appendHtml, DataProcessorInvocation graph) {
+ this.urlString = urlString;
+ this.appendHtml = appendHtml;
+ this.logger = graph.getLogger(UrlDereferencing.class);
+ }
+
+ @Override
+ public void flatMap(Event in, Collector<Event> out) throws Exception {
+ HttpResponse<String> response;
+
+ try {
+ response = Unirest.get(
+ in.getFieldBySelector(urlString).getAsPrimitive().getAsString()
+ ).asString();
+ String body = response.getBody();
+
+ in.addField(appendHtml, body);
+ } catch (Exception e) {
+ logger.error("Error while fetching data from URL: " + urlString);
+ in.addField(appendHtml, "Error while fetching data from URL: " + urlString);
+ }
+
+ out.collect(in);
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingController.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingController.java
new file mode 100644
index 0000000..f274b16
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingController.java
@@ -0,0 +1,74 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.urldereferencing;
+
+import org.streampipes.model.DataProcessorType;
+import org.streampipes.model.graph.DataProcessorDescription;
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.schema.PropertyScope;
+import org.streampipes.processors.enricher.flink.config.EnricherFlinkConfig;
+import org.streampipes.sdk.builder.ProcessingElementBuilder;
+import org.streampipes.sdk.builder.StreamRequirementsBuilder;
+import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
+import org.streampipes.sdk.helpers.*;
+import org.streampipes.vocabulary.SO;
+import org.streampipes.wrapper.flink.FlinkDataProcessorDeclarer;
+import org.streampipes.wrapper.flink.FlinkDataProcessorRuntime;
+
+public class UrlDereferencingController extends FlinkDataProcessorDeclarer<UrlDereferencingParameter> {
+
+ private final String APPEND_HTML = "appendHtml";
+ private final String URL = "url";
+
+ @Override
+ public DataProcessorDescription declareModel() {
+ return ProcessingElementBuilder.create("org.streampipes.processors.enricher.flink.processor.urldereferencing",
+ "URL Dereferencing","Append the html page as a string to event")
+ .iconUrl(EnricherFlinkConfig.getIconUrl("html_icon"))
+ .category(DataProcessorType.ENRICH)
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredPropertyWithUnaryMapping(EpRequirements.stringReq(),
+ Labels.from(URL, "URL", "The server URL"),
+ PropertyScope.NONE)
+ .build())
+ .outputStrategy(
+ OutputStrategies.append(
+ EpProperties.stringEp(Labels.empty(), APPEND_HTML, SO.Text)))
+ .supportedFormats(SupportedFormats.jsonFormat())
+ .supportedProtocols(SupportedProtocols.kafka())
+ .build();
+ }
+
+
+ @Override
+ public FlinkDataProcessorRuntime<UrlDereferencingParameter> getRuntime(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
+ String urlString = extractor.mappingPropertyValue(URL);
+
+// java.net.URL url = null;
+/* try {
+ url = new URL(urlString);
+ } catch (MalformedURLException e) {
+ logger.error("Malformed URL:" + urlString);
+ throw new IllegalArgumentException("Malformed URL:" + urlString);
+ }
+*/
+ UrlDereferencingParameter staticParam = new UrlDereferencingParameter(graph, urlString, APPEND_HTML);
+
+ return new UrlDereferencingProgram(staticParam, EnricherFlinkConfig.INSTANCE.getDebug());
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingParameter.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingParameter.java
new file mode 100644
index 0000000..01eade5
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingParameter.java
@@ -0,0 +1,40 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.urldereferencing;
+
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.wrapper.params.binding.EventProcessorBindingParams;
+
+public class UrlDereferencingParameter extends EventProcessorBindingParams {
+
+ private String urlString;
+ private String appendHtml;
+
+ public UrlDereferencingParameter(DataProcessorInvocation graph, String urlString, String appendHtml) {
+ super(graph);
+ this.urlString = urlString;
+ this.appendHtml = appendHtml;
+ }
+
+ public String getUrl() {
+ return urlString;
+ }
+
+ public String getAppendHtml() {
+ return appendHtml;
+ }
+}
diff --git a/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingProgram.java b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingProgram.java
new file mode 100644
index 0000000..51ebfec
--- /dev/null
+++ b/streampipes-processors-enricher-flink/src/main/java/org/streampipes/processors/enricher/flink/processor/urldereferencing/UrlDereferencingProgram.java
@@ -0,0 +1,35 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.enricher.flink.processor.urldereferencing;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.processors.enricher.flink.AbstractEnricherProgram;
+
+public class UrlDereferencingProgram extends AbstractEnricherProgram<UrlDereferencingParameter> {
+
+ public UrlDereferencingProgram(UrlDereferencingParameter params, boolean debug) {
+ super(params, debug);
+ }
+
+ @Override
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
+ return dataStreams[0]
+ .flatMap(new UrlDereferencing(params.getUrl(), params.getAppendHtml(), params.getGraph()));
+ }
+
+}
diff --git a/streampipes-processors-filters-jvm/development/.env b/streampipes-processors-filters-jvm/development/.env
index b7f86f5..7b7d77a 100644
--- a/streampipes-processors-filters-jvm/development/.env
+++ b/streampipes-processors-filters-jvm/development/.env
@@ -2,9 +2,3 @@
SP_PORT=6015
SP_HOST=localhost
SP_ICON_HOST=localhost
-SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
-SP_COUCHDB_HOST=localhost
-SP_JMS_HOST=localhost
-SP_NGINX_HOST=localhost
-SP_NGINX_PORT=8082
\ No newline at end of file
diff --git a/streampipes-processors-filters-jvm/pom.xml b/streampipes-processors-filters-jvm/pom.xml
index 4e4c0a5..34385e6 100644
--- a/streampipes-processors-filters-jvm/pom.xml
+++ b/streampipes-processors-filters-jvm/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/FiltersJvmInit.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/FiltersJvmInit.java
index 84bab6a..e576910 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/FiltersJvmInit.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/FiltersJvmInit.java
@@ -20,13 +20,13 @@
import org.streampipes.container.standalone.init.StandaloneModelSubmitter;
import org.streampipes.dataformat.json.JsonDataFormatFactory;
import org.streampipes.messaging.jms.SpJmsProtocolFactory;
+import org.streampipes.messaging.kafka.SpKafkaProtocolFactory;
import org.streampipes.processors.filters.jvm.config.FiltersJvmConfig;
+import org.streampipes.processors.filters.jvm.processor.compose.ComposeController;
import org.streampipes.processors.filters.jvm.processor.numericalfilter.NumericalFilterController;
import org.streampipes.processors.filters.jvm.processor.projection.ProjectionController;
import org.streampipes.processors.filters.jvm.processor.textfilter.TextFilterController;
-import org.streampipes.messaging.kafka.SpKafkaProtocolFactory;
-
public class FiltersJvmInit extends StandaloneModelSubmitter {
public static void main(String[] args) {
@@ -34,7 +34,8 @@
.getInstance()
.add(new NumericalFilterController())
.add(new TextFilterController())
- .add(new ProjectionController());
+ .add(new ProjectionController())
+ .add(new ComposeController());
DeclarersSingleton.getInstance().registerDataFormat(new JsonDataFormatFactory());
DeclarersSingleton.getInstance().registerProtocol(new SpKafkaProtocolFactory());
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/ConfigKeys.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/ConfigKeys.java
index d6cbc54..2b6486c 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/ConfigKeys.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/ConfigKeys.java
@@ -22,15 +22,5 @@
final static String PORT = "SP_PORT";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
- final static String KAFKA_HOST = "SP_KAFKA_HOST";
- final static String KAFKA_PORT = "SP_KAFKA_PORT";
- final static String ZOOKEEPER_HOST = "SP_ZOOKEEPER_HOST";
- final static String ZOOKEEPER_PORT = "SP_ZOOKEEPER_PORT";
- final static String COUCHDB_HOST = "SP_COUCHDB_HOST";
- final static String COUCHDB_PORT = "SP_COCHDB_PORT";
- final static String JMS_HOST = "SP_JMS_HOST";
- final static String JMS_PORT = "SP_JMS_PORT";
- final static String NGINX_HOST = "SP_NGINX_HOST";
- final static String NGINX_PORT = "SP_NGINX_PORT";
final static String SERVICE_NAME_KEY = "SP_SERVICE_NAME";
}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/FiltersJvmConfig.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/FiltersJvmConfig.java
index 194da01..5accc2b 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/FiltersJvmConfig.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/config/FiltersJvmConfig.java
@@ -39,16 +39,6 @@
config.register(ConfigKeys.ICON_HOST, "backend", "Hostname for the icon host");
config.register(ConfigKeys.ICON_PORT, 80, "Port for the icons in nginx");
- config.register(ConfigKeys.NGINX_HOST, "localhost", "External hostname of StreamPipes Nginx");
- config.register(ConfigKeys.NGINX_PORT, 80, "External port of StreamPipes Nginx");
- config.register(ConfigKeys.KAFKA_HOST, "kafka", "Host for kafka of the pe sinks project");
- config.register(ConfigKeys.KAFKA_PORT, 9092, "Port for kafka of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_HOST, "zookeeper", "Host for zookeeper of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_PORT, 2181, "Port for zookeeper of the pe sinks project");
- config.register(ConfigKeys.COUCHDB_HOST, "couchdb", "Host for couchdb of the pe sinks project");
- config.register(ConfigKeys.COUCHDB_PORT, 5984, "Port for couchdb of the pe sinks project");
- config.register(ConfigKeys.JMS_HOST, "tcp://activemq", "Hostname for pe actions service for active mq");
- config.register(ConfigKeys.JMS_PORT, 61616, "Port for pe actions service for active mq");
config.register(ConfigKeys.SERVICE_NAME_KEY, service_name, "The name of the service");
@@ -81,55 +71,6 @@
return config.getInteger(ConfigKeys.ICON_PORT);
}
- public String getKafkaHost() {
- return config.getString(ConfigKeys.KAFKA_HOST);
- }
-
- public int getKafkaPort() {
- return config.getInteger(ConfigKeys.KAFKA_PORT);
- }
-
- public String getKafkaUrl() {
- return getKafkaHost() + ":" + getKafkaPort();
- }
-
- public String getZookeeperHost() {
- return config.getString(ConfigKeys.ZOOKEEPER_HOST);
- }
-
- public int getZookeeperPort() {
- return config.getInteger(ConfigKeys.ZOOKEEPER_PORT);
- }
-
- public String getCouchDbHost() {
- return config.getString(ConfigKeys.COUCHDB_HOST);
- }
-
- public int getCouchDbPort() {
- return config.getInteger(ConfigKeys.COUCHDB_PORT);
- }
-
- public String getJmsHost() {
- return config.getString(ConfigKeys.JMS_HOST);
- }
-
- public int getJmsPort() {
- return config.getInteger(ConfigKeys.JMS_PORT);
- }
-
- public String getJmsUrl() {
- return getJmsHost() + ":" + getJmsPort();
- }
-
- public String getNginxHost() {
- return config.getString(ConfigKeys.NGINX_HOST);
- }
-
- public Integer getNginxPort() {
-
- return config.getInteger(ConfigKeys.NGINX_PORT);
- }
-
@Override
public String getId() {
return service_id;
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/Compose.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/Compose.java
new file mode 100644
index 0000000..a608cec
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/Compose.java
@@ -0,0 +1,68 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package org.streampipes.processors.filters.jvm.processor.compose;
+
+import org.streampipes.model.constants.PropertySelectorConstants;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.model.runtime.EventFactory;
+import org.streampipes.model.schema.EventSchema;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
+import org.streampipes.wrapper.routing.SpOutputCollector;
+import org.streampipes.wrapper.runtime.EventProcessor;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class Compose implements EventProcessor<ComposeParameters> {
+
+ private Map<String, Event> lastEvents;
+ private EventSchema outputSchema;
+ private List<String> outputKeySelectors;
+
+
+ @Override
+ public void onInvocation(ComposeParameters composeParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
+ this.outputSchema = composeParameters.getGraph().getOutputStream().getEventSchema();
+ this.outputKeySelectors = composeParameters.getOutputKeySelectors();
+ this.lastEvents = new HashMap<>();
+ }
+
+ @Override
+ public void onDetach() {
+ this.lastEvents.clear();
+ }
+
+ @Override
+ public void onEvent(Event event, SpOutputCollector spOutputCollector) {
+ this.lastEvents.put(event.getSourceInfo().getSelectorPrefix(), event);
+ if (lastEvents.size() == 2) {
+ spOutputCollector.collect(buildOutEvent(event.getSourceInfo().getSelectorPrefix()));
+ }
+ }
+
+ private Event buildOutEvent(String currentSelectorPrefix) {
+ return EventFactory.fromEvents(lastEvents.get(currentSelectorPrefix), lastEvents.get
+ (getOtherSelectorPrefix(currentSelectorPrefix)), outputSchema).getSubset(outputKeySelectors);
+ }
+
+ private String getOtherSelectorPrefix(String currentSelectorPrefix) {
+ return currentSelectorPrefix.equals(PropertySelectorConstants.FIRST_STREAM_ID_PREFIX) ?
+ PropertySelectorConstants.SECOND_STREAM_ID_PREFIX : PropertySelectorConstants
+ .FIRST_STREAM_ID_PREFIX;
+ }
+
+}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/ComposeController.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/ComposeController.java
new file mode 100644
index 0000000..9b0a273
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/ComposeController.java
@@ -0,0 +1,67 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package org.streampipes.processors.filters.jvm.processor.compose;
+
+import org.streampipes.model.DataProcessorType;
+import org.streampipes.model.graph.DataProcessorDescription;
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.processors.filters.jvm.config.FiltersJvmConfig;
+import org.streampipes.sdk.builder.ProcessingElementBuilder;
+import org.streampipes.sdk.builder.StreamRequirementsBuilder;
+import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
+import org.streampipes.sdk.helpers.EpRequirements;
+import org.streampipes.sdk.helpers.OutputStrategies;
+import org.streampipes.sdk.helpers.SupportedFormats;
+import org.streampipes.sdk.helpers.SupportedProtocols;
+import org.streampipes.wrapper.standalone.ConfiguredEventProcessor;
+import org.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
+
+import java.util.List;
+
+public class ComposeController extends StandaloneEventProcessingDeclarer<ComposeParameters> {
+
+ @Override
+ public DataProcessorDescription declareModel() {
+ return ProcessingElementBuilder.create("org.streampipes.processors.filters.jvm.merger",
+ "Compose", "Merges two event streams ")
+ .category(DataProcessorType.TRANSFORM)
+ .iconUrl(FiltersJvmConfig.getIconUrl("projection"))
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredProperty(EpRequirements.anyProperty())
+ .build())
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredProperty(EpRequirements.anyProperty())
+ .build())
+ .outputStrategy(OutputStrategies.custom(true))
+ .supportedFormats(SupportedFormats.jsonFormat())
+ .supportedProtocols(SupportedProtocols.jms(), SupportedProtocols.kafka())
+ .build();
+ }
+
+ @Override
+ public ConfiguredEventProcessor<ComposeParameters>
+ onInvocation(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
+
+ List<String> outputKeySelectors = extractor.outputKeySelectors();
+
+ ComposeParameters staticParam = new ComposeParameters(
+ graph, outputKeySelectors);
+
+ return new ConfiguredEventProcessor<>(staticParam, Compose::new);
+ }
+}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/ComposeParameters.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/ComposeParameters.java
new file mode 100644
index 0000000..0ed2256
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/compose/ComposeParameters.java
@@ -0,0 +1,35 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package org.streampipes.processors.filters.jvm.processor.compose;
+
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.wrapper.params.binding.EventProcessorBindingParams;
+
+import java.util.List;
+
+public class ComposeParameters extends EventProcessorBindingParams {
+
+ private List<String> outputKeySelectors;
+
+ public ComposeParameters(DataProcessorInvocation graph, List<String> outputKeySelectors) {
+ super(graph);
+ this.outputKeySelectors = outputKeySelectors;
+ }
+
+ public List<String> getOutputKeySelectors() {
+ return outputKeySelectors;
+ }
+}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilter.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilter.java
new file mode 100644
index 0000000..e95c2ec
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilter.java
@@ -0,0 +1,19 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package org.streampipes.processors.filters.jvm.processor.listfilter;
+
+public class ListFilter {
+}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilterController.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilterController.java
new file mode 100644
index 0000000..5974d04
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilterController.java
@@ -0,0 +1,19 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package org.streampipes.processors.filters.jvm.processor.listfilter;
+
+public class ListFilterController {
+}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilterParameters.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilterParameters.java
new file mode 100644
index 0000000..c807023
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/listfilter/ListFilterParameters.java
@@ -0,0 +1,19 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package org.streampipes.processors.filters.jvm.processor.listfilter;
+
+public class ListFilterParameters {
+}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilter.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilter.java
index fbd4f1c..5b4370c 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilter.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilter.java
@@ -16,30 +16,29 @@
package org.streampipes.processors.filters.jvm.processor.numericalfilter;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
-import java.util.Map;
-
-public class NumericalFilter extends StandaloneEventProcessorEngine<NumericalFilterParameters> {
+public class NumericalFilter implements EventProcessor<NumericalFilterParameters> {
private NumericalFilterParameters params;
- public NumericalFilter(NumericalFilterParameters params) {
- super(params);
- }
-
@Override
- public void onInvocation(NumericalFilterParameters numericalFilterParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(NumericalFilterParameters numericalFilterParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext
+ runtimeContext) {
this.params = numericalFilterParameters;
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector out) {
+ public void onEvent(Event event, SpOutputCollector out) {
Boolean satisfiesFilter = false;
- Double value = Double.parseDouble(String.valueOf(in.get(params.getFilterProperty())));
+ Double value = event.getFieldBySelector(params.getFilterProperty()).getAsPrimitive()
+ .getAsDouble();
+
+ //Double value = Double.parseDouble(String.valueOf(in.get(params.getFilterProperty())));
Double threshold = params.getThreshold();
if (params.getNumericalOperator() == NumericalOperator.EQ) {
@@ -55,7 +54,7 @@
}
if (satisfiesFilter) {
- out.onEvent(in);
+ out.collect(event);
}
}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilterController.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilterController.java
index a806262..f43ba9f 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilterController.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/numericalfilter/NumericalFilterController.java
@@ -20,11 +20,15 @@
import org.streampipes.model.graph.DataProcessorDescription;
import org.streampipes.model.graph.DataProcessorInvocation;
import org.streampipes.model.schema.PropertyScope;
-import org.streampipes.processors.filters.jvm.config.FiltersJvmConfig;
import org.streampipes.sdk.builder.ProcessingElementBuilder;
import org.streampipes.sdk.builder.StreamRequirementsBuilder;
import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
-import org.streampipes.sdk.helpers.*;
+import org.streampipes.sdk.helpers.EpRequirements;
+import org.streampipes.sdk.helpers.Labels;
+import org.streampipes.sdk.helpers.Options;
+import org.streampipes.sdk.helpers.OutputStrategies;
+import org.streampipes.sdk.helpers.SupportedFormats;
+import org.streampipes.sdk.helpers.SupportedProtocols;
import org.streampipes.wrapper.standalone.ConfiguredEventProcessor;
import org.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
@@ -38,12 +42,11 @@
public DataProcessorDescription declareModel() {
return ProcessingElementBuilder.create("org.streampipes.processors.filters.jvm.numericalfilter", "Numerical Filter", "Numerical Filter Description")
.category(DataProcessorType.FILTER)
- .iconUrl(FiltersJvmConfig.getIconUrl("Numerical_Filter_Icon_HQ"))
+ .providesAssets()
.requiredStream(StreamRequirementsBuilder
.create()
.requiredPropertyWithUnaryMapping(EpRequirements.numberReq(), Labels.from(NUMBER_MAPPING, "Specifies the field name where the filter operation should" +
- " be applied " +
- "on.", ""), PropertyScope.NONE).build())
+ " be applied on.", ""), PropertyScope.NONE).build())
.outputStrategy(OutputStrategies.keep())
.requiredSingleValueSelection(Labels.from(OPERATION, "Filter Operation", "Specifies the filter " +
"operation that should be applied on the field"), Options.from("<", "<=", ">", ">=", "=="))
@@ -56,9 +59,7 @@
@Override
public ConfiguredEventProcessor<NumericalFilterParameters> onInvocation
- (DataProcessorInvocation sepa) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(sepa);
-
+ (DataProcessorInvocation sepa, ProcessingElementParameterExtractor extractor) {
Double threshold = extractor.singleValueParameter(VALUE, Double.class);
String stringOperation = extractor.selectedSingleValue(OPERATION, String.class);
@@ -76,10 +77,11 @@
String filterProperty = extractor.mappingPropertyValue(NUMBER_MAPPING);
- NumericalFilterParameters staticParam = new NumericalFilterParameters(sepa, threshold, NumericalOperator.valueOf
- (operation)
- , filterProperty);
+ NumericalFilterParameters staticParam = new NumericalFilterParameters(sepa,
+ threshold,
+ NumericalOperator.valueOf(operation),
+ filterProperty);
- return new ConfiguredEventProcessor<>(staticParam, () -> new NumericalFilter(staticParam));
+ return new ConfiguredEventProcessor<>(staticParam, NumericalFilter::new);
}
}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/Projection.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/Projection.java
index 3260551..a8d56c7 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/Projection.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/Projection.java
@@ -17,34 +17,26 @@
package org.streampipes.processors.filters.jvm.processor.projection;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
-public class Projection extends StandaloneEventProcessorEngine<ProjectionParameters> {
+public class Projection implements EventProcessor<ProjectionParameters> {
private List<String> outputKeys;
- public Projection(ProjectionParameters params) {
- super(params);
- }
@Override
- public void onInvocation(ProjectionParameters projectionParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(ProjectionParameters projectionParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.outputKeys = projectionParameters.getOutputKeys();
}
@Override
- public void onEvent(Map<String, Object> in, String sourceInfo, SpOutputCollector out) {
- Map<String, Object> outEvent = new HashMap<>();
- for(String outputKey : outputKeys) {
- outEvent.put(outputKey, in.get(outputKey));
- }
- out.onEvent(outEvent);
+ public void onEvent(Event event, SpOutputCollector out) {
+ out.collect(event.getSubset(outputKeys));
}
@Override
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/ProjectionController.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/ProjectionController.java
index bf1fa2e..e073785 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/ProjectionController.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/projection/ProjectionController.java
@@ -20,10 +20,10 @@
import org.streampipes.model.DataProcessorType;
import org.streampipes.model.graph.DataProcessorDescription;
import org.streampipes.model.graph.DataProcessorInvocation;
-import org.streampipes.model.schema.EventProperty;
import org.streampipes.processors.filters.jvm.config.FiltersJvmConfig;
import org.streampipes.sdk.builder.ProcessingElementBuilder;
import org.streampipes.sdk.builder.StreamRequirementsBuilder;
+import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
import org.streampipes.sdk.helpers.EpRequirements;
import org.streampipes.sdk.helpers.OutputStrategies;
import org.streampipes.sdk.helpers.SupportedFormats;
@@ -32,7 +32,6 @@
import org.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
import java.util.List;
-import java.util.stream.Collectors;
public class ProjectionController extends StandaloneEventProcessingDeclarer<ProjectionParameters> {
@@ -53,19 +52,13 @@
@Override
public ConfiguredEventProcessor<ProjectionParameters>
- onInvocation(DataProcessorInvocation graph) {
+ onInvocation(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
- List<String> outputKeys = graph
- .getOutputStream()
- .getEventSchema()
- .getEventProperties()
- .stream()
- .map(EventProperty::getRuntimeName)
- .collect(Collectors.toList());
+ List<String> outputKeySelectors = extractor.outputKeySelectors();
ProjectionParameters staticParam = new ProjectionParameters(
- graph, outputKeys);
+ graph, outputKeySelectors);
- return new ConfiguredEventProcessor<>(staticParam, () -> new Projection(staticParam));
+ return new ConfiguredEventProcessor<>(staticParam, Projection::new);
}
}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilter.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilter.java
index 75bb5cb..04ab277 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilter.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilter.java
@@ -16,29 +16,26 @@
package org.streampipes.processors.filters.jvm.processor.textfilter;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
-import java.util.Map;
-
-public class TextFilter extends StandaloneEventProcessorEngine<TextFilterParameters> {
+public class TextFilter implements EventProcessor<TextFilterParameters> {
private TextFilterParameters params;
- public TextFilter(TextFilterParameters params) {
- super(params);
- }
-
@Override
- public void onInvocation(TextFilterParameters textFilterParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(TextFilterParameters textFilterParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.params = textFilterParameters;
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector out) {
+ public void onEvent(Event event, SpOutputCollector out) {
Boolean satisfiesFilter = false;
- String value = String.valueOf(in.get(params.getFilterProperty()));
+ String value = event.getFieldBySelector(params.getFilterProperty())
+ .getAsPrimitive()
+ .getAsString();
if (params.getStringOperator() == StringOperator.MATCHES) {
satisfiesFilter = (value.equals(params.getKeyword()));
@@ -47,7 +44,7 @@
}
if (satisfiesFilter) {
- out.onEvent(in);
+ out.collect(event);
}
}
diff --git a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilterController.java b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilterController.java
index dcf8b83..21a210f 100644
--- a/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilterController.java
+++ b/streampipes-processors-filters-jvm/src/main/java/org/streampipes/processors/filters/jvm/processor/textfilter/TextFilterController.java
@@ -59,8 +59,7 @@
@Override
public ConfiguredEventProcessor<TextFilterParameters> onInvocation
- (DataProcessorInvocation sepa) {
- ProcessingElementParameterExtractor extractor = getExtractor(sepa);
+ (DataProcessorInvocation sepa, ProcessingElementParameterExtractor extractor) {
String keyword = extractor.singleValueParameter(KEYWORD_ID, String.class);
String operation = extractor.selectedSingleValue(OPERATION_ID, String.class);
@@ -73,6 +72,6 @@
StringOperator.valueOf(operation),
filterProperty);
- return new ConfiguredEventProcessor<>(staticParam, () -> new TextFilter(staticParam));
+ return new ConfiguredEventProcessor<>(staticParam, TextFilter::new);
}
}
diff --git a/streampipes-processors-filters-jvm/src/main/resources/org.streampipes.processors.filters.jvm.numericalfilter/documentation.md b/streampipes-processors-filters-jvm/src/main/resources/org.streampipes.processors.filters.jvm.numericalfilter/documentation.md
new file mode 100644
index 0000000..2f3a185
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/resources/org.streampipes.processors.filters.jvm.numericalfilter/documentation.md
@@ -0,0 +1,22 @@
+## Numerical Filter
+
+<p align="center">
+ <img src="icon.png" width="30%"/>
+</p>
+
+## Description
+
+Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
+
+Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.
+
+Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.
+
+Nam liber tempor cum soluta nobis eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis nisl ut aliquip ex ea commodo consequat.
+
+Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis.
+
+At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, At accusam aliquyam diam diam dolore dolores duo eirmod eos erat, et nonumy sed tempor et et invidunt justo labore Stet clita ea et gubergren, kasd magna no rebum. sanctus sea sed takimata ut vero voluptua. est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur
+
+
+## Configuration
\ No newline at end of file
diff --git a/streampipes-processors-filters-jvm/src/main/resources/org.streampipes.processors.filters.jvm.numericalfilter/icon.png b/streampipes-processors-filters-jvm/src/main/resources/org.streampipes.processors.filters.jvm.numericalfilter/icon.png
new file mode 100644
index 0000000..643d474
--- /dev/null
+++ b/streampipes-processors-filters-jvm/src/main/resources/org.streampipes.processors.filters.jvm.numericalfilter/icon.png
Binary files differ
diff --git a/streampipes-processors-filters-siddhi/development/.env b/streampipes-processors-filters-siddhi/development/.env
index 7286d9c..6295580 100644
--- a/streampipes-processors-filters-siddhi/development/.env
+++ b/streampipes-processors-filters-siddhi/development/.env
@@ -2,9 +2,3 @@
SP_PORT=6020
SP_HOST=localhost
SP_ICON_HOST=localhost
-SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
-SP_COUCHDB_HOST=localhost
-SP_JMS_HOST=localhost
-SP_NGINX_HOST=localhost
-SP_NGINX_PORT=8082
\ No newline at end of file
diff --git a/streampipes-processors-filters-siddhi/pom.xml b/streampipes-processors-filters-siddhi/pom.xml
index dca70fc..acad5db 100644
--- a/streampipes-processors-filters-siddhi/pom.xml
+++ b/streampipes-processors-filters-siddhi/pom.xml
@@ -20,7 +20,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/ConfigKeys.java b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/ConfigKeys.java
index 0ad5549..c4ac66d 100644
--- a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/ConfigKeys.java
+++ b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/ConfigKeys.java
@@ -22,15 +22,5 @@
final static String PORT = "SP_PORT";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
- final static String KAFKA_HOST = "SP_KAFKA_HOST";
- final static String KAFKA_PORT = "SP_KAFKA_PORT";
- final static String ZOOKEEPER_HOST = "SP_ZOOKEEPER_HOST";
- final static String ZOOKEEPER_PORT = "SP_ZOOKEEPER_PORT";
- final static String COUCHDB_HOST = "SP_COUCHDB_HOST";
- final static String COUCHDB_PORT = "SP_COCHDB_PORT";
- final static String JMS_HOST = "SP_JMS_HOST";
- final static String JMS_PORT = "SP_JMS_PORT";
- final static String NGINX_HOST = "SP_NGINX_HOST";
- final static String NGINX_PORT = "SP_NGINX_PORT";
final static String SERVICE_NAME_KEY = "SP_SERVICE_NAME";
}
diff --git a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/FilterSiddhiConfig.java b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/FilterSiddhiConfig.java
index c168fed..a1e44ff 100644
--- a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/FilterSiddhiConfig.java
+++ b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/config/FilterSiddhiConfig.java
@@ -39,16 +39,6 @@
config.register(ConfigKeys.ICON_HOST, "backend", "Hostname for the icon host");
config.register(ConfigKeys.ICON_PORT, 80, "Port for the icons in nginx");
- config.register(ConfigKeys.NGINX_HOST, "localhost", "External hostname of StreamPipes Nginx");
- config.register(ConfigKeys.NGINX_PORT, 80, "External port of StreamPipes Nginx");
- config.register(ConfigKeys.KAFKA_HOST, "kafka", "Host for kafka of the pe sinks project");
- config.register(ConfigKeys.KAFKA_PORT, 9092, "Port for kafka of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_HOST, "zookeeper", "Host for zookeeper of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_PORT, 2181, "Port for zookeeper of the pe sinks project");
- config.register(ConfigKeys.COUCHDB_HOST, "couchdb", "Host for couchdb of the pe sinks project");
- config.register(ConfigKeys.COUCHDB_PORT, 5984, "Port for couchdb of the pe sinks project");
- config.register(ConfigKeys.JMS_HOST, "tcp://activemq", "Hostname for pe actions service for active mq");
- config.register(ConfigKeys.JMS_PORT, 61616, "Port for pe actions service for active mq");
config.register(ConfigKeys.SERVICE_NAME_KEY, service_name, "The name of the service");
@@ -81,55 +71,6 @@
return config.getInteger(ConfigKeys.ICON_PORT);
}
- public String getKafkaHost() {
- return config.getString(ConfigKeys.KAFKA_HOST);
- }
-
- public int getKafkaPort() {
- return config.getInteger(ConfigKeys.KAFKA_PORT);
- }
-
- public String getKafkaUrl() {
- return getKafkaHost() + ":" + getKafkaPort();
- }
-
- public String getZookeeperHost() {
- return config.getString(ConfigKeys.ZOOKEEPER_HOST);
- }
-
- public int getZookeeperPort() {
- return config.getInteger(ConfigKeys.ZOOKEEPER_PORT);
- }
-
- public String getCouchDbHost() {
- return config.getString(ConfigKeys.COUCHDB_HOST);
- }
-
- public int getCouchDbPort() {
- return config.getInteger(ConfigKeys.COUCHDB_PORT);
- }
-
- public String getJmsHost() {
- return config.getString(ConfigKeys.JMS_HOST);
- }
-
- public int getJmsPort() {
- return config.getInteger(ConfigKeys.JMS_PORT);
- }
-
- public String getJmsUrl() {
- return getJmsHost() + ":" + getJmsPort();
- }
-
- public String getNginxHost() {
- return config.getString(ConfigKeys.NGINX_HOST);
- }
-
- public Integer getNginxPort() {
-
- return config.getInteger(ConfigKeys.NGINX_PORT);
- }
-
@Override
public String getId() {
return service_id;
diff --git a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilter.java b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilter.java
index a75a549..5f6ffbb 100644
--- a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilter.java
+++ b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilter.java
@@ -21,10 +21,6 @@
public class NumericalFilter extends SiddhiEventEngine<NumericalFilterParameters> {
- public NumericalFilter(NumericalFilterParameters params) {
- super(params);
- }
-
@Override
protected String fromStatement(List<String> inputStreamNames, NumericalFilterParameters params) {
return "from " +inputStreamNames.get(0) +"[" +params.getFilterProperty() +"<" +params.getThreshold() +"]";
diff --git a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilterController.java b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilterController.java
index eed20cd..0938615 100644
--- a/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilterController.java
+++ b/streampipes-processors-filters-siddhi/src/main/java/org/streampipes/processors/siddhi/filter/NumericalFilterController.java
@@ -33,8 +33,7 @@
private static final String OPERATION = "operation";
@Override
- public ConfiguredEventProcessor<NumericalFilterParameters> onInvocation(DataProcessorInvocation graph) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(graph);
+ public ConfiguredEventProcessor<NumericalFilterParameters> onInvocation(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
Double threshold = extractor.singleValueParameter(VALUE, Double.class);
String stringOperation = extractor.selectedSingleValue(OPERATION, String.class);
@@ -57,7 +56,7 @@
(operation)
, filterProperty);
- return new ConfiguredEventProcessor<>(staticParam, () -> new NumericalFilter(staticParam));
+ return new ConfiguredEventProcessor<>(staticParam, NumericalFilter::new);
}
@Override
diff --git a/streampipes-processors-geo-flink/pom.xml b/streampipes-processors-geo-flink/pom.xml
index 2ecfe54..3a5f899 100644
--- a/streampipes-processors-geo-flink/pom.xml
+++ b/streampipes-processors-geo-flink/pom.xml
@@ -20,7 +20,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnricher.java b/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnricher.java
index 20a9e5d..431774e 100644
--- a/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnricher.java
+++ b/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnricher.java
@@ -19,10 +19,9 @@
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
-import java.util.Map;
-
-public class SpatialGridEnricher implements FlatMapFunction<Map<String, Object>, Map<String, Object>> {
+public class SpatialGridEnricher implements FlatMapFunction<Event, Event> {
private EnrichmentSettings settings;
private SpatialGridCalculator calculator;
@@ -33,10 +32,11 @@
}
@Override
- public void flatMap(Map<String, Object> in, Collector<Map<String, Object>> out) throws
+ public void flatMap(Event in, Collector<Event> out) throws
Exception {
- Double latitude = toDouble(in.get(settings.getLatPropertyName()));
- Double longitude = toDouble(in.get(settings.getLngPropertyName()));
+ Double latitude = in.getFieldBySelector(settings.getLatPropertyName()).getAsPrimitive()
+ .getAsDouble();
+ Double longitude = in.getFieldBySelector(settings.getLngPropertyName()).getAsPrimitive().getAsDouble();
CellOption result = calculator.computeCells(latitude, longitude);
// System.out.println("x=" +result.getCellX() +", y=" +result.getCellY());
@@ -44,19 +44,16 @@
out.collect(toOutput(in, result));
}
- private Map<String,Object> toOutput(Map<String, Object> in, CellOption result) {
- in.put(SpatialGridConstants.GRID_X_KEY, result.getCellX());
- in.put(SpatialGridConstants.GRID_Y_KEY, result.getCellY());
- in.put(SpatialGridConstants.GRID_CELLSIZE_KEY, result.getCellSize());
- in.put(SpatialGridConstants.GRID_LAT_NW_KEY, result.getLatitudeNW());
- in.put(SpatialGridConstants.GRID_LON_NW_KEY, result.getLongitudeNW());
- in.put(SpatialGridConstants.GRID_LAT_SE_KEY, result.getLatitudeSE());
- in.put(SpatialGridConstants.GRID_LON_SE_KEY, result.getLongitudeSE());
+ private Event toOutput(Event in, CellOption result) {
+ in.addField(SpatialGridConstants.GRID_X_KEY, result.getCellX());
+ in.addField(SpatialGridConstants.GRID_Y_KEY, result.getCellY());
+ in.addField(SpatialGridConstants.GRID_CELLSIZE_KEY, result.getCellSize());
+ in.addField(SpatialGridConstants.GRID_LAT_NW_KEY, result.getLatitudeNW());
+ in.addField(SpatialGridConstants.GRID_LON_NW_KEY, result.getLongitudeNW());
+ in.addField(SpatialGridConstants.GRID_LAT_SE_KEY, result.getLatitudeSE());
+ in.addField(SpatialGridConstants.GRID_LON_SE_KEY, result.getLongitudeSE());
return in;
}
- private Double toDouble(Object value) {
- return Double.parseDouble(String.valueOf(value));
- }
}
diff --git a/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnrichmentProgram.java b/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnrichmentProgram.java
index 0090319..e2ea060 100644
--- a/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnrichmentProgram.java
+++ b/streampipes-processors-geo-flink/src/main/java/org/streampipes/processor/geo/flink/processor/gridenricher/SpatialGridEnrichmentProgram.java
@@ -18,10 +18,9 @@
package org.streampipes.processor.geo.flink.processor.gridenricher;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processor.geo.flink.AbstractGeoProgram;
-import java.util.Map;
-
public class SpatialGridEnrichmentProgram extends AbstractGeoProgram<SpatialGridEnrichmentParameters> {
public SpatialGridEnrichmentProgram(SpatialGridEnrichmentParameters params, boolean debug) {
@@ -29,7 +28,7 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>[] messageStream) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>[] messageStream) {
return messageStream[0].flatMap(new SpatialGridEnricher(params.getEnrichmentSettings()));
}
}
diff --git a/streampipes-processors-geo-jvm/development/.env b/streampipes-processors-geo-jvm/development/.env
index d458efa..aa6cd58 100644
--- a/streampipes-processors-geo-jvm/development/.env
+++ b/streampipes-processors-geo-jvm/development/.env
@@ -2,7 +2,4 @@
SP_PORT=6030
SP_HOST=localhost
SP_ICON_HOST=localhost
-SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
-SP_NGINX_HOST=localhost
-SP_NGINX_PORT=8082
\ No newline at end of file
+GOOGLE_API_KEY=localhost
diff --git a/streampipes-processors-geo-jvm/pom.xml b/streampipes-processors-geo-jvm/pom.xml
index 90c0694..e9391e1 100644
--- a/streampipes-processors-geo-jvm/pom.xml
+++ b/streampipes-processors-geo-jvm/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/ConfigKeys.java b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/ConfigKeys.java
index 44814fd..b7b038c 100644
--- a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/ConfigKeys.java
+++ b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/ConfigKeys.java
@@ -22,12 +22,6 @@
final static String PORT = "SP_PORT";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
- final static String KAFKA_HOST = "SP_KAFKA_HOST";
- final static String KAFKA_PORT = "SP_KAFKA_PORT";
- final static String ZOOKEEPER_HOST = "SP_ZOOKEEPER_HOST";
- final static String ZOOKEEPER_PORT = "SP_ZOOKEEPER_PORT";
- final static String NGINX_HOST = "SP_NGINX_HOST";
- final static String NGINX_PORT = "SP_NGINX_PORT";
final static String SERVICE_NAME_KEY = "SP_SERVICE_NAME";
final static String GOOGLE_API_KEY = "SP_GOOGLE_API_KEY";
}
diff --git a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/GeoJvmConfig.java b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/GeoJvmConfig.java
index 46c76f4..212bf47 100644
--- a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/GeoJvmConfig.java
+++ b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/config/GeoJvmConfig.java
@@ -40,12 +40,6 @@
config.register(ConfigKeys.ICON_HOST, "backend", "Hostname for the icon host");
config.register(ConfigKeys.ICON_PORT, 80, "Port for the icons in nginx");
- config.register(ConfigKeys.NGINX_HOST, "localhost", "External hostname of StreamPipes Nginx");
- config.register(ConfigKeys.NGINX_PORT, 80, "External port of StreamPipes Nginx");
- config.register(ConfigKeys.KAFKA_HOST, "kafka", "Host for kafka of the pe sinks project");
- config.register(ConfigKeys.KAFKA_PORT, 9092, "Port for kafka of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_HOST, "zookeeper", "Host for zookeeper of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_PORT, 2181, "Port for zookeeper of the pe sinks project");
config.registerPassword(ConfigKeys.GOOGLE_API_KEY, "", "Google API Key for the routing service");
@@ -80,35 +74,6 @@
return config.getInteger(ConfigKeys.ICON_PORT);
}
- public String getKafkaHost() {
- return config.getString(ConfigKeys.KAFKA_HOST);
- }
-
- public int getKafkaPort() {
- return config.getInteger(ConfigKeys.KAFKA_PORT);
- }
-
- public String getKafkaUrl() {
- return getKafkaHost() + ":" + getKafkaPort();
- }
-
- public String getZookeeperHost() {
- return config.getString(ConfigKeys.ZOOKEEPER_HOST);
- }
-
- public int getZookeeperPort() {
- return config.getInteger(ConfigKeys.ZOOKEEPER_PORT);
- }
-
- public String getNginxHost() {
- return config.getString(ConfigKeys.NGINX_HOST);
- }
-
- public Integer getNginxPort() {
-
- return config.getInteger(ConfigKeys.NGINX_PORT);
- }
-
public String getGoogleApiKey() {
return config.getString(ConfigKeys.GOOGLE_API_KEY);
}
diff --git a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/Geocoder.java b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/Geocoder.java
index 249c459..247a7cd 100644
--- a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/Geocoder.java
+++ b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/Geocoder.java
@@ -20,25 +20,21 @@
import com.google.maps.GeocodingApi;
import com.google.maps.errors.ApiException;
import com.google.maps.model.GeocodingResult;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.geo.jvm.config.GeoJvmConfig;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
import java.io.IOException;
-import java.util.Map;
-public class Geocoder extends StandaloneEventProcessorEngine<GeocodingParameters> {
+public class Geocoder implements EventProcessor<GeocodingParameters> {
private GeocodingParameters geocodingParameters;
private GeoApiContext context;
- public Geocoder(GeocodingParameters params) {
- super(params);
- }
-
@Override
- public void onInvocation(GeocodingParameters geocodingParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(GeocodingParameters geocodingParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.geocodingParameters = geocodingParameters;
context = new GeoApiContext.Builder()
.apiKey(GeoJvmConfig.INSTANCE.getGoogleApiKey())
@@ -46,18 +42,19 @@
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector spOutputCollector) {
- String city = String.valueOf(in.get(geocodingParameters.getCity()));
- String street = String.valueOf(in.get(geocodingParameters.getStreet()));
- String number = String.valueOf(in.get(geocodingParameters.getNumber()));
+ public void onEvent(Event in, SpOutputCollector spOutputCollector) {
+ String city = in.getFieldBySelector(geocodingParameters.getCity()).getAsPrimitive().getAsString();
+ String street = in.getFieldBySelector(geocodingParameters.getStreet()).getAsPrimitive()
+ .getAsString();
+ String number = in.getFieldBySelector(geocodingParameters.getNumber()).getAsPrimitive().getAsString();
String searchQuery = street + " " +number + ", " +city;
try {
GeocodingResult[] result = GeocodingApi.geocode(context, searchQuery).await();
if(result.length > 0) {
- in.put("latitude", result[0].geometry.location.lat);
- in.put("longitude", result[0].geometry.location.lng);
+ in.addField("latitude", result[0].geometry.location.lat);
+ in.addField("longitude", result[0].geometry.location.lng);
}
} catch (ApiException e) {
e.printStackTrace();
@@ -67,7 +64,7 @@
e.printStackTrace();
}
- spOutputCollector.onEvent(in);
+ spOutputCollector.collect(in);
}
diff --git a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/GeocodingController.java b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/GeocodingController.java
index 57ceb82..b553561 100644
--- a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/GeocodingController.java
+++ b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/geocode/GeocodingController.java
@@ -36,15 +36,13 @@
@Override
public ConfiguredEventProcessor<GeocodingParameters> onInvocation(DataProcessorInvocation
- graph) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(graph);
-
+ graph, ProcessingElementParameterExtractor extractor) {
String city = extractor.mappingPropertyValue(CITY_MAPPING);
String street = extractor.mappingPropertyValue(STREET_MAPPING);
String number = extractor.mappingPropertyValue(STREET_NUMBER_MAPPING);
GeocodingParameters params = new GeocodingParameters(graph, city, street, number);
- return new ConfiguredEventProcessor<>(params, () -> new Geocoder(params));
+ return new ConfiguredEventProcessor<>(params, Geocoder::new);
}
@Override
diff --git a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRouting.java b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRouting.java
index 84b4610..db2d23e 100644
--- a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRouting.java
+++ b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRouting.java
@@ -23,28 +23,26 @@
import com.google.maps.model.DistanceMatrix;
import com.google.maps.model.LatLng;
import org.streampipes.logging.api.Logger;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.geo.jvm.config.GeoJvmConfig;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
-public class GoogleRouting extends StandaloneEventProcessorEngine<GoogleRoutingParameters> {
+public class GoogleRouting implements EventProcessor<GoogleRoutingParameters> {
private static Logger LOG;
private GoogleRoutingParameters googleRoutingParameters;
private GeoApiContext context;
- public GoogleRouting(GoogleRoutingParameters params) {
- super(params);
- }
-
@Override
- public void onInvocation(GoogleRoutingParameters googleRoutingParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(GoogleRoutingParameters googleRoutingParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext
+ runtimeContext) {
LOG = googleRoutingParameters.getGraph().getLogger(GoogleRouting.class);
this.googleRoutingParameters = googleRoutingParameters;
@@ -54,10 +52,12 @@
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector out) {
- String city = (String) in.get(googleRoutingParameters.getCity());
- String street = (String) in.get(googleRoutingParameters.getStreet());
- String number = (String) in.get(googleRoutingParameters.getNumber());
+ public void onEvent(Event in, SpOutputCollector out) {
+ String city = in.getFieldBySelector(googleRoutingParameters.getCity()).getAsPrimitive().getAsString();
+ String street = in.getFieldBySelector(googleRoutingParameters.getStreet()).getAsPrimitive
+ ().getAsString();
+ String number = in.getFieldBySelector(googleRoutingParameters.getNumber()).getAsPrimitive
+ ().getAsString();
String home = googleRoutingParameters.getHome();
String destinationLocation = city + ", " + street + ", " + number;
@@ -74,9 +74,9 @@
long l = rest.rows[0].elements[0].distance.inMeters;
- in.put("kvi", l);
+ in.addField("kvi", l);
- out.onEvent(in);
+ out.collect(in);
}
} catch (ApiException e) {
diff --git a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingController.java b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingController.java
index 8427791..0ce2e7b 100644
--- a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingController.java
+++ b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingController.java
@@ -75,9 +75,7 @@
Here you get the Configuration Parameters which the User has entered
*/
@Override
- public ConfiguredEventProcessor<GoogleRoutingParameters> onInvocation(DataProcessorInvocation graph) {
- ProcessingElementParameterExtractor extractor = getExtractor(graph);
-
+ public ConfiguredEventProcessor<GoogleRoutingParameters> onInvocation(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
String city = extractor.mappingPropertyValue(CITY_MAPPING);
String street = extractor.mappingPropertyValue(STREET_MAPPING);
String number = extractor.mappingPropertyValue(STREET_NUMBER_MAPPING);
@@ -85,6 +83,6 @@
String home = extractor.singleValueParameter(START_ADDRESS, String.class);
GoogleRoutingParameters params = new GoogleRoutingParameters(graph, city, street, number, home);
- return new ConfiguredEventProcessor<>(params, () -> new GoogleRouting(params));
+ return new ConfiguredEventProcessor<>(params, GoogleRouting::new);
}
}
diff --git a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingParameters.java b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingParameters.java
index fb88bc4..e965b3c 100644
--- a/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingParameters.java
+++ b/streampipes-processors-geo-jvm/src/main/java/org/streampipes/processors/geo/jvm/processor/route/GoogleRoutingParameters.java
@@ -26,8 +26,6 @@
private String number;
private String home;
- public GoogleRoutingParameters() {
- }
public GoogleRoutingParameters(DataProcessorInvocation graph, String city, String street, String number, String home) {
super(graph);
diff --git a/streampipes-processors-image-processing-jvm/development/.env b/streampipes-processors-image-processing-jvm/development/.env
index 3c56349..5942884 100644
--- a/streampipes-processors-image-processing-jvm/development/.env
+++ b/streampipes-processors-image-processing-jvm/development/.env
@@ -2,8 +2,3 @@
SP_PORT=6035
SP_HOST=localhost
SP_ICON_HOST=localhost
-SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
-SP_JMS_HOST=localhost
-SP_NGINX_HOST=localhost
-SP_NGINX_PORT=8082
diff --git a/streampipes-processors-image-processing-jvm/pom.xml b/streampipes-processors-image-processing-jvm/pom.xml
index c43f1ad..13aaed4 100644
--- a/streampipes-processors-image-processing-jvm/pom.xml
+++ b/streampipes-processors-image-processing-jvm/pom.xml
@@ -20,7 +20,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ConfigKeys.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ConfigKeys.java
index 61f581d..23d2af9 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ConfigKeys.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ConfigKeys.java
@@ -22,15 +22,5 @@
final static String PORT = "SP_PORT";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
- final static String KAFKA_HOST = "SP_KAFKA_HOST";
- final static String KAFKA_PORT = "SP_KAFKA_PORT";
- final static String ZOOKEEPER_HOST = "SP_ZOOKEEPER_HOST";
- final static String ZOOKEEPER_PORT = "SP_ZOOKEEPER_PORT";
- final static String JMS_HOST = "SP_JMS_HOST";
- final static String JMS_PORT = "SP_JMS_PORT";
- final static String NGINX_HOST = "SP_NGINX_HOST";
- final static String NGINX_PORT = "SP_NGINX_PORT";
final static String SERVICE_NAME_KEY = "SP_SERVICE_NAME";
- final static String GOOGLE_API_KEY = "SP_GOOGLE_API_KEY";
- final static String MODEL_DIRECTORY = "model_directory";
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ImageProcessingJvmConfig.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ImageProcessingJvmConfig.java
index b7fd115..c5a9b51 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ImageProcessingJvmConfig.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/config/ImageProcessingJvmConfig.java
@@ -42,17 +42,6 @@
config.register(ICON_HOST, "backend", "Hostname for the icon host");
config.register(ICON_PORT, 80, "Port for the icons in nginx");
- config.register(ConfigKeys.NGINX_HOST, System.getenv("STREAMPIPES_HOST"), "External hostname of " +
- "StreamPipes Nginx");
- config.register(ConfigKeys.NGINX_PORT, 80, "External port of StreamPipes Nginx");
- config.register(KAFKA_HOST, "kafka", "Host for kafka of the pe sinks project");
- config.register(ConfigKeys.KAFKA_PORT, 9092, "Port for kafka of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_HOST, "zookeeper", "Host for zookeeper of the pe sinks project");
- config.register(ConfigKeys.ZOOKEEPER_PORT, 2181, "Port for zookeeper of the pe sinks project");
- config.register(ConfigKeys.JMS_HOST, "tcp://activemq", "Hostname for pe actions service for active mq");
- config.register(ConfigKeys.JMS_PORT, 61616, "Port for pe actions service for active mq");
-
- config.register(MODEL_DIRECTORY, "/model-repository/", "The directory location for the folders of the image classification models");
config.register(ConfigKeys.SERVICE_NAME_KEY, service_name, "The name of the service");
@@ -86,50 +75,6 @@
return config.getInteger(ICON_PORT);
}
- public String getKafkaHost() {
- return config.getString(KAFKA_HOST);
- }
-
- public int getKafkaPort() {
- return config.getInteger(KAFKA_PORT);
- }
-
- public String getKafkaUrl() {
- return getKafkaHost() + ":" + getKafkaPort();
- }
-
- public String getZookeeperHost() {
- return config.getString(ZOOKEEPER_HOST);
- }
-
- public int getZookeeperPort() {
- return config.getInteger(ZOOKEEPER_PORT);
- }
-
- public String getJmsHost() {
- return config.getString(JMS_HOST);
- }
-
- public int getJmsPort() {
- return config.getInteger(JMS_PORT);
- }
-
- public String getJmsUrl() {
- return getJmsHost() + ":" + getJmsPort();
- }
-
- public String getNginxHost() {
- return config.getString(NGINX_HOST);
- }
-
- public Integer getNginxPort() {
- return config.getInteger(NGINX_PORT);
- }
-
- public String getModelDirectory() {
- return config.getString(MODEL_DIRECTORY);
- }
-
@Override
public String getId() {
return service_id;
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassification.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassification.java
index d97cbe6..67af96d 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassification.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassification.java
@@ -23,17 +23,20 @@
import boofcv.struct.image.GrayF32;
import boofcv.struct.image.Planar;
import deepboof.io.DeepBoofDataBaseOps;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.imageprocessing.jvm.processor.commons.PlainImageTransformer;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
-import java.util.*;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Optional;
-public class GenericImageClassification extends StandaloneEventProcessorEngine<GenericImageClassificationParameters> {
+public class GenericImageClassification implements EventProcessor<GenericImageClassificationParameters> {
private GenericImageClassificationParameters params;
private ClassifierAndSource cs;
@@ -41,12 +44,8 @@
private ImageClassifier<Planar<GrayF32>> classifier;
private List<String> categories;
- public GenericImageClassification(GenericImageClassificationParameters params) {
- super(params);
- }
-
@Override
- public void onInvocation(GenericImageClassificationParameters genericImageClassificationParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(GenericImageClassificationParameters genericImageClassificationParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.params = genericImageClassificationParameters;
//this.cs = FactoryImageClassifier.vgg_cifar10(); // Test set 89.9% for 10 categories
ClassifierAndSource cs = FactoryImageClassifier.nin_imagenet(); // Test set 62.6% for 1000 categories
@@ -63,8 +62,9 @@
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector out) {
- PlainImageTransformer<GenericImageClassificationParameters> imageTransformer = new PlainImageTransformer<>(in,
+ public void onEvent(Event in, SpOutputCollector out) {
+ PlainImageTransformer<GenericImageClassificationParameters> imageTransformer = new
+ PlainImageTransformer<>(in.getRaw(),
params);
@@ -87,10 +87,10 @@
if (scores.size() > 0) {
System.out.println(scores.get(0).score +":" +categories.get(scores.get(0).category));
//scores.forEach(score -> System.out.println(score.category +":" +categories.get(score.category) +":" +score));
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("score", scores.get(0).score);
- outMap.put("category", categories.get(scores.get(0).category));
- out.onEvent(outMap);
+ Event outEvent = new Event();
+ outEvent.addField("score", scores.get(0).score);
+ outEvent.addField("category", categories.get(scores.get(0).category));
+ out.collect(outEvent);
}
}
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassificationController.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassificationController.java
index 01fb768..202c56a 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassificationController.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/genericclassification/GenericImageClassificationController.java
@@ -53,13 +53,12 @@
}
@Override
- public ConfiguredEventProcessor<GenericImageClassificationParameters> onInvocation(DataProcessorInvocation graph) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(graph);
+ public ConfiguredEventProcessor<GenericImageClassificationParameters> onInvocation(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
String imageProperty = extractor.mappingPropertyValue(IMAGE);
GenericImageClassificationParameters staticParam = new GenericImageClassificationParameters(graph, imageProperty);
- return new ConfiguredEventProcessor<>(staticParam, () -> new GenericImageClassification(staticParam));
+ return new ConfiguredEventProcessor<>(staticParam, GenericImageClassification::new);
}
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropper.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropper.java
index 3f15079..bf5704b 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropper.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropper.java
@@ -16,34 +16,29 @@
*/
package org.streampipes.processors.imageprocessing.jvm.processor.imagecropper;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.imageprocessing.jvm.processor.commons.ImageTransformer;
import org.streampipes.processors.imageprocessing.jvm.processor.imageenrichment.BoxCoordinates;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
import java.awt.image.BufferedImage;
import java.util.Base64;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Optional;
-public class ImageCropper extends StandaloneEventProcessorEngine<ImageCropperParameters> {
+public class ImageCropper implements EventProcessor<ImageCropperParameters> {
private ImageCropperParameters params;
- public ImageCropper(ImageCropperParameters params) {
- super(params);
- }
-
@Override
- public void onInvocation(ImageCropperParameters imageCropperParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(ImageCropperParameters imageCropperParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.params = imageCropperParameters;
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector out) {
- ImageTransformer imageTransformer = new ImageTransformer(in, params);
+ public void onEvent(Event in, SpOutputCollector out) {
+ ImageTransformer imageTransformer = new ImageTransformer(in.getRaw(), params);
Optional<BufferedImage> imageOpt = imageTransformer.getImage();
if (imageOpt.isPresent()) {
@@ -56,9 +51,9 @@
Optional<byte[]> finalImage = imageTransformer.makeImage(dest);
if (finalImage.isPresent()) {
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("image", Base64.getEncoder().encodeToString(finalImage.get()));
- out.onEvent(outMap);
+ Event outEvent = new Event();
+ outEvent.addField("image", Base64.getEncoder().encodeToString(finalImage.get()));
+ out.collect(outEvent);
}
}
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropperController.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropperController.java
index dd89bed..f023d01 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropperController.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagecropper/ImageCropperController.java
@@ -55,8 +55,7 @@
}
@Override
- public ConfiguredEventProcessor<ImageCropperParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(dataProcessorInvocation);
+ public ConfiguredEventProcessor<ImageCropperParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation, ProcessingElementParameterExtractor extractor) {
String imageProperty = extractor.mappingPropertyValue(IMAGE_PROPERTY);
String boxWidthProperty = extractor.mappingPropertyValue(BOX_WIDTH_PROPERTY);
@@ -67,7 +66,7 @@
ImageCropperParameters params = new ImageCropperParameters(dataProcessorInvocation, imageProperty,
boxWidthProperty, boxHeightProperty, boxXProperty, boxYProperty);
- return new ConfiguredEventProcessor<>(params, () -> new ImageCropper(params));
+ return new ConfiguredEventProcessor<>(params, ImageCropper::new);
}
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnricher.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnricher.java
index baac026..e9ea04e 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnricher.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnricher.java
@@ -16,39 +16,41 @@
*/
package org.streampipes.processors.imageprocessing.jvm.processor.imageenrichment;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
-import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.*;
+import java.util.Base64;
import java.util.List;
+import java.util.Map;
+import java.util.Optional;
-public class ImageEnricher extends StandaloneEventProcessorEngine<ImageEnrichmentParameters> {
+import javax.imageio.ImageIO;
+
+public class ImageEnricher implements EventProcessor<ImageEnrichmentParameters> {
private ImageEnrichmentParameters params;
- public ImageEnricher(ImageEnrichmentParameters params) {
- super(params);
- }
@Override
- public void onInvocation(ImageEnrichmentParameters params, DataProcessorInvocation graph) {
+ public void onInvocation(ImageEnrichmentParameters params, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.params = params;
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector out) {
+ public void onEvent(org.streampipes.model.runtime.Event in, SpOutputCollector out) {
+// TODO
+ List<Map<String, Object>> allBoxes = in.getFieldBySelector(params.getBoxArray()).getAsList()
+ .parseAsCustomType(value -> (Map<String, Object>) value);
- List<Map<String, Object>> allBoxes = (List<Map<String, Object>>) in.get(params.getBoxArray());
-
- Optional<BufferedImage> imageOpt = getImage(in.get(params.getImageProperty()));
+ Optional<BufferedImage> imageOpt = getImage(in.getFieldBySelector(params.getImageProperty
+ ()).getAsPrimitive().getRawValue());
if (imageOpt.isPresent()) {
BufferedImage image = imageOpt.get();
@@ -71,9 +73,9 @@
Optional<byte[]> finalImage = makeImage(image);
if (finalImage.isPresent()) {
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("image", Base64.getEncoder().encodeToString(finalImage.get()));
- out.onEvent(outMap);
+ org.streampipes.model.runtime.Event event = new org.streampipes.model.runtime.Event();
+ event.addField("image", Base64.getEncoder().encodeToString(finalImage.get()));
+ out.collect(event);
}
}
@@ -125,6 +127,4 @@
public void onDetach() {
}
-
-
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnrichmentController.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnrichmentController.java
index cb99934..14c6bed 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnrichmentController.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imageenrichment/ImageEnrichmentController.java
@@ -72,9 +72,7 @@
}
@Override
- public ConfiguredEventProcessor<ImageEnrichmentParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(dataProcessorInvocation);
-
+ public ConfiguredEventProcessor<ImageEnrichmentParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation, ProcessingElementParameterExtractor extractor) {
String imageProperty = extractor.mappingPropertyValue(IMAGE_PROPERTY);
String boxArray = extractor.mappingPropertyValue(BOX_ARRAY_PROPERTY);
// String boxArray = "boxes";
@@ -82,7 +80,7 @@
ImageEnrichmentParameters params = new ImageEnrichmentParameters(dataProcessorInvocation, imageProperty,
boxArray, "box_width", "box_height", "box_x", "box_y");
- return new ConfiguredEventProcessor<>(params, () -> new ImageEnricher(params));
+ return new ConfiguredEventProcessor<>(params, ImageEnricher::new);
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectificationController.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectificationController.java
index b3b6bc9..d1de6ed 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectificationController.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectificationController.java
@@ -51,14 +51,12 @@
}
@Override
- public ConfiguredEventProcessor<ImageRectificationParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(dataProcessorInvocation);
-
+ public ConfiguredEventProcessor<ImageRectificationParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation, ProcessingElementParameterExtractor extractor) {
String imagePropertyName = extractor.mappingPropertyValue(IMAGE_PROPERTY);
ImageRectificationParameters params = new ImageRectificationParameters(dataProcessorInvocation, imagePropertyName);
- return new ConfiguredEventProcessor<>(params, () -> new ImageRectifier(params));
+ return new ConfiguredEventProcessor<>(params, ImageRectifier::new);
}
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectifier.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectifier.java
index ed664be..d3a0e9f 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectifier.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/imagerectification/ImageRectifier.java
@@ -16,27 +16,23 @@
*/
package org.streampipes.processors.imageprocessing.jvm.processor.imagerectification;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
-import java.util.Map;
-
-public class ImageRectifier extends StandaloneEventProcessorEngine<ImageRectificationParameters> {
+public class ImageRectifier implements EventProcessor<ImageRectificationParameters> {
private ImageRectificationParameters params;
- public ImageRectifier(ImageRectificationParameters params) {
- super(params);
- }
@Override
- public void onInvocation(ImageRectificationParameters imageRectificationParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(ImageRectificationParameters imageRectificationParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.params = imageRectificationParameters;
}
@Override
- public void onEvent(Map<String, Object> map, String s, SpOutputCollector spOutputCollector) {
+ public void onEvent(Event event, SpOutputCollector spOutputCollector) {
// TODO add logic here
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReader.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReader.java
index 78db6d5..2484f8c 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReader.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReader.java
@@ -23,52 +23,48 @@
import boofcv.struct.image.GrayU8;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.imageprocessing.jvm.processor.commons.PlainImageTransformer;
+import org.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.streampipes.wrapper.routing.SpOutputCollector;
-import org.streampipes.wrapper.standalone.engine.StandaloneEventProcessorEngine;
+import org.streampipes.wrapper.runtime.EventProcessor;
import java.awt.image.BufferedImage;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Optional;
-public class QrCodeReader extends StandaloneEventProcessorEngine<QrCodeReaderParameters> {
+public class QrCodeReader implements EventProcessor<QrCodeReaderParameters> {
private QrCodeReaderParameters params;
private static final Logger LOG = LoggerFactory.getLogger(QrCodeReader.class);
- public QrCodeReader(QrCodeReaderParameters params) {
- super(params);
- }
-
@Override
- public void onInvocation(QrCodeReaderParameters qrCodeReaderParameters, DataProcessorInvocation dataProcessorInvocation) {
+ public void onInvocation(QrCodeReaderParameters qrCodeReaderParameters, SpOutputCollector spOutputCollector, EventProcessorRuntimeContext runtimeContext) {
this.params = qrCodeReaderParameters;
}
@Override
- public void onEvent(Map<String, Object> in, String s, SpOutputCollector out) {
- PlainImageTransformer<QrCodeReaderParameters> imageTransformer = new PlainImageTransformer<>(in, params);
+ public void onEvent(Event in, SpOutputCollector out) {
+ PlainImageTransformer<QrCodeReaderParameters> imageTransformer = new PlainImageTransformer<>
+ (in.getRaw(), params);
Optional<BufferedImage> imageOpt = imageTransformer.getImage(params.getImagePropertyName());
if (imageOpt.isPresent()) {
BufferedImage input = imageOpt.get();
- GrayU8 gray = ConvertBufferedImage.convertFrom(input,(GrayU8)null);
+ GrayU8 gray = ConvertBufferedImage.convertFrom(input, (GrayU8) null);
- QrCodeDetector<GrayU8> detector = FactoryFiducial.qrcode(null,GrayU8.class);
+ QrCodeDetector<GrayU8> detector = FactoryFiducial.qrcode(null, GrayU8.class);
detector.process(gray);
List<QrCode> detections = detector.getDetections();
if (detections.size() > 0) {
LOG.info(detections.get(0).message);
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("qrvalue", detections.get(0).message);
- outMap.put("timestamp", System.currentTimeMillis());
- out.onEvent(outMap);
+ Event event = new Event();
+ event.addField("qrvalue", detections.get(0).message);
+ event.addField("timestamp", System.currentTimeMillis());
+ out.collect(event);
} else {
LOG.info("Could not find any QR code");
}
diff --git a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReaderController.java b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReaderController.java
index a0aa94d..f1bd0b6 100644
--- a/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReaderController.java
+++ b/streampipes-processors-image-processing-jvm/src/main/java/org/streampipes/processors/imageprocessing/jvm/processor/qrreader/QrCodeReaderController.java
@@ -57,14 +57,12 @@
}
@Override
- public ConfiguredEventProcessor<QrCodeReaderParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation) {
- ProcessingElementParameterExtractor extractor = ProcessingElementParameterExtractor.from(dataProcessorInvocation);
-
+ public ConfiguredEventProcessor<QrCodeReaderParameters> onInvocation(DataProcessorInvocation dataProcessorInvocation, ProcessingElementParameterExtractor extractor) {
String imagePropertyName = extractor.mappingPropertyValue(IMAGE_PROPERTY);
QrCodeReaderParameters params = new QrCodeReaderParameters(dataProcessorInvocation, imagePropertyName);
- return new ConfiguredEventProcessor<>(params, () -> new QrCodeReader(params));
+ return new ConfiguredEventProcessor<>(params, QrCodeReader::new);
}
}
diff --git a/streampipes-processors-pattern-detection-flink/development/.env b/streampipes-processors-pattern-detection-flink/development/.env
index a0c8a7b..f50f267 100644
--- a/streampipes-processors-pattern-detection-flink/development/.env
+++ b/streampipes-processors-pattern-detection-flink/development/.env
@@ -2,7 +2,4 @@
SP_PORT=6040
SP_HOST=localhost
SP_ICON_HOST=localhost
-SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
SP_FLINK_DEBUG=true
-SP_ELASTICSEARCH_HOST=localhost
diff --git a/streampipes-processors-pattern-detection-flink/pom.xml b/streampipes-processors-pattern-detection-flink/pom.xml
index e96946e..81fbe40 100644
--- a/streampipes-processors-pattern-detection-flink/pom.xml
+++ b/streampipes-processors-pattern-detection-flink/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
@@ -13,6 +13,7 @@
<elasticsearch.version>5.2.2</elasticsearch.version>
</properties>
+
<dependencies>
<dependency>
<groupId>org.streampipes</groupId>
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/PatternDetectionFlinkInit.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/PatternDetectionFlinkInit.java
index 92bd858..cd60550 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/PatternDetectionFlinkInit.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/PatternDetectionFlinkInit.java
@@ -28,7 +28,7 @@
DeclarersSingleton.getInstance()
.add(new IncreaseController())
.add(new PeakDetectionController());
- //.add(new SequenceController())
+ //.add(new SequenceController());
//.add(new AbsenceController())
//.add(new AndController());
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/ConfigKeys.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/ConfigKeys.java
index c2c2ac1..7e2814e 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/ConfigKeys.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/ConfigKeys.java
@@ -22,9 +22,6 @@
final static String PORT = "SP_PORT";
final static String FLINK_HOST = "SP_FLINK_HOST";
final static String FLINK_PORT = "SP_FLINK_PORT";
- final static String ELASTIC_HOST = "SP_ELASTICSEARCH_HOST";
- final static String ELASTIC_PORT = "SP_ELASTICSEARCH_PORT";
- final static String ELASTIC_PORT_REST = "SP_ELASTICSEARCH_PORT_REST";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
final static String SERVICE_NAME = "SP_SERVICE_NAME";
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/PatternDetectionFlinkConfig.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/PatternDetectionFlinkConfig.java
index 82629f9..7953f64 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/PatternDetectionFlinkConfig.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/config/PatternDetectionFlinkConfig.java
@@ -36,9 +36,6 @@
config.register(ConfigKeys.PORT, 8090, "Port for the pe mixed flink component");
config.register(ConfigKeys.FLINK_HOST, "jobmanager", "Host for the flink cluster");
config.register(ConfigKeys.FLINK_PORT, 6123, "Port for the flink cluster");
- config.register(ConfigKeys.ELASTIC_HOST, "elasticsearch", "Elastic search host address");
- config.register(ConfigKeys.ELASTIC_PORT, 9300, "Elasitc search port");
- config.register(ConfigKeys.ELASTIC_PORT_REST, 9200, "Elasitc search rest port");
config.register(ConfigKeys.ICON_HOST, "backend", "Hostname for the icon host");
config.register(ConfigKeys.ICON_PORT, 80, "Port for the icons in nginx");
@@ -67,15 +64,6 @@
return config.getInteger(ConfigKeys.FLINK_PORT);
}
- public String getElasticsearchHost() {
- return config.getString(ConfigKeys.ELASTIC_HOST);
- }
-
- public int getElasticsearchPort() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT);
- }
-
-
public static final String iconBaseUrl = "http://" + PatternDetectionFlinkConfig.INSTANCE.getIconHost() + ":" + PatternDetectionFlinkConfig.INSTANCE.getIconPort() + "/assets/img/pe_icons";
public static final String getIconUrl(String pictureName) {
@@ -90,10 +78,6 @@
return config.getInteger(ConfigKeys.ICON_PORT);
}
- public int getElasticsearchPortRest() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT_REST);
- }
-
public boolean getDebug() {
return config.getBoolean(ConfigKeys.DEBUG);
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/absence/AbsenceProgram.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/absence/AbsenceProgram.java
index 0794647..c08df9a 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/absence/AbsenceProgram.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/absence/AbsenceProgram.java
@@ -28,6 +28,7 @@
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.AbstractPatternDetectionProgram;
import org.streampipes.processors.pattern.detection.flink.processor.and.TimeUnitConverter;
@@ -41,65 +42,74 @@
}
@Override
- public DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... messageStream) {
+ public DataStream<Event> getApplicationLogic(DataStream<Event>... messageStream) {
Time time = TimeUnitConverter.toTime(params.getTimeUnit(), params.getTimeWindowSize());
- DataStream<Tuple2<Boolean, Map<String, Object>>> stream1 = messageStream[0].flatMap(new FlatMapFunction<Map<String, Object>, Tuple2<Boolean, Map<String, Object>>>() {
+ DataStream<Tuple2<Boolean, Event>> stream1 = messageStream[0].flatMap(new FlatMapFunction<Event, Tuple2<Boolean, Event>>() {
@Override
- public void flatMap(Map<String, Object> in, Collector<Tuple2<Boolean, Map<String, Object>>> out) throws Exception {
+ public void flatMap(Event in, Collector<Tuple2<Boolean, Event>> out) throws
+ Exception {
out.collect(new Tuple2<>(true, in));
}
});
- DataStream<Tuple2<Boolean, Map<String, Object>>> stream2 = messageStream[1].flatMap(new FlatMapFunction<Map<String, Object>, Tuple2<Boolean, Map<String, Object>>>() {
+ DataStream<Tuple2<Boolean, Event>> stream2 = messageStream[1].flatMap(new FlatMapFunction<Event, Tuple2<Boolean, Event>>() {
@Override
- public void flatMap(Map<String, Object> in, Collector<Tuple2<Boolean, Map<String, Object>>> out) throws Exception {
+ public void flatMap(Event in, Collector<Tuple2<Boolean, Event>> out) throws
+ Exception {
out.collect(new Tuple2<>(false, in));
}
});
- DataStream<Tuple2<Boolean, Map<String, Object>>> joinedStreams = stream2.union(stream1);
+ DataStream<Tuple2<Boolean, Event>> joinedStreams = stream2.union(stream1);
- Pattern<Tuple2<Boolean, Map<String, Object>>, Tuple2<Boolean, Map<String, Object>>> matchedEvents =
- Pattern.<Tuple2<Boolean, Map<String, Object>>>begin("start")
- .where(new SimpleCondition<Tuple2<Boolean, Map<String, Object>>>() {
+ Pattern<Tuple2<Boolean, Event>, Tuple2<Boolean, Event>> matchedEvents =
+ Pattern.<Tuple2<Boolean, Event>>begin("start")
+ .where(new SimpleCondition<Tuple2<Boolean, Event>>() {
@Override
- public boolean filter(Tuple2<Boolean, Map<String, Object>> ride) throws Exception {
+ public boolean filter(Tuple2<Boolean, Event> ride) throws Exception {
return ride.f0;
}
})
.next("end")
- .where(new SimpleCondition<Tuple2<Boolean, Map<String, Object>>>() {
+ .where(new SimpleCondition<Tuple2<Boolean, Event>>() {
@Override
- public boolean filter(Tuple2<Boolean, Map<String, Object>> ride) throws Exception {
+ public boolean filter(Tuple2<Boolean, Event> ride) throws Exception {
return !ride.f0;
}
});
- PatternStream<Tuple2<Boolean, Map<String, Object>>> patternStream = CEP.pattern(joinedStreams, matchedEvents.within(time));
+ PatternStream<Tuple2<Boolean, Event>> patternStream = CEP.pattern(joinedStreams, matchedEvents
+ .within(time));
- OutputTag<Tuple2<Boolean, Map<String, Object>>> timedout = new OutputTag<Tuple2<Boolean, Map<String, Object>>>("timedout"){};
+ OutputTag<Tuple2<Boolean, Event>> timedout = new OutputTag<Tuple2<Boolean, Event>>
+ ("timedout") {
+ };
- SingleOutputStreamOperator<Tuple2<Boolean, Map<String, Object>>> matched = patternStream.flatSelect(
+ SingleOutputStreamOperator<Tuple2<Boolean, Event>> matched = patternStream.flatSelect(
timedout,
new TimedOut(),
new FlatSelectNothing<>()
);
- return matched.getSideOutput(timedout).flatMap(new FlatMapFunction<Tuple2<Boolean, Map<String, Object>>, Map<String, Object>>() {
+ return matched.getSideOutput(timedout).flatMap(new FlatMapFunction<Tuple2<Boolean, Event>,
+ Event>() {
@Override
- public void flatMap(Tuple2<Boolean, Map<String, Object>> in, Collector<Map<String, Object>> out) throws Exception {
- out.collect(in.f1);
+ public void flatMap(Tuple2<Boolean, Event> in, Collector<Event> out) throws
+ Exception {
+ out.collect(in.f1);
}
});
}
- public static class TimedOut implements PatternFlatTimeoutFunction<Tuple2<Boolean, Map<String, Object>>, Tuple2<Boolean, Map<String, Object>>> {
+ public static class TimedOut implements PatternFlatTimeoutFunction<Tuple2<Boolean, Event>,
+ Tuple2<Boolean, Event>> {
@Override
- public void timeout(Map<String, List<Tuple2<Boolean, Map<String, Object>>>> map, long l, Collector<Tuple2<Boolean, Map<String, Object>>> collector) throws Exception {
- Tuple2<Boolean, Map<String, Object>> rideStarted = map.get("start").get(0);
+ public void timeout(Map<String, List<Tuple2<Boolean, Event>>> map, long l,
+ Collector<Tuple2<Boolean, Event>> collector) throws Exception {
+ Tuple2<Boolean, Event> rideStarted = map.get("start").get(0);
collector.collect(rideStarted);
}
}
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/and/AndProgram.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/and/AndProgram.java
index c23a030..ce4a083 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/and/AndProgram.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/and/AndProgram.java
@@ -20,11 +20,10 @@
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.AbstractPatternDetectionProgram;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
public class AndProgram extends AbstractPatternDetectionProgram<AndParameters> {
@@ -34,25 +33,25 @@
}
@Override
- public DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... messageStream) {
+ public DataStream<Event> getApplicationLogic(DataStream<Event>... messageStream) {
// A AND B within x minutes
List<String> leftMappings = params.getLeftMappings();
List<String> rightMappings = params.getRightMappings();
Time time = TimeUnitConverter.toTime(params.getTimeUnit(), params.getTimeWindow());
return messageStream[0].join(messageStream[1])
- .where(new KeySelector<Map<String,Object>, String>() {
+ .where(new KeySelector<Event, String>() {
@Override
- public String getKey(Map<String, Object> stringObjectMap) throws Exception {
+ public String getKey(Event stringObjectMap) throws Exception {
StringBuilder builder = new StringBuilder();
for (String key : leftMappings) {
builder.append(key);
}
return builder.toString();
}
- }).equalTo(new KeySelector<Map<String,Object>, String>() {
+ }).equalTo(new KeySelector<Event, String>() {
@Override
- public String getKey(Map<String, Object> stringObjectMap) throws Exception {
+ public String getKey(Event stringObjectMap) throws Exception {
StringBuilder builder = new StringBuilder();
for (String key : rightMappings) {
builder.append(key);
@@ -60,12 +59,12 @@
return builder.toString();
}
}).window(TumblingEventTimeWindows.of(time))
- .apply(new JoinFunction<Map<String,Object>, Map<String,Object>, Map<String, Object>>() {
+ .apply(new JoinFunction<Event, Event, Event>() {
@Override
- public Map<String, Object> join(Map<String, Object> e1, Map<String, Object> e2) throws Exception {
- Map<String, Object> map = new HashMap<>();
- map.putAll(e1);
- map.putAll(e2);
+ public Event join(Event e1, Event e2) throws Exception {
+ Event map = new Event();
+ e1.getFields().forEach((key, value) -> map.addField(value));
+ e2.getFields().forEach((key, value) -> map.addField(value));
return map;
}
});
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/common/TimestampExtractor.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/common/TimestampExtractor.java
index 3cb5e8d..67fca4a 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/common/TimestampExtractor.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/common/TimestampExtractor.java
@@ -16,10 +16,9 @@
package org.streampipes.processors.pattern.detection.flink.processor.common;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
+import org.streampipes.model.runtime.Event;
-import java.util.Map;
-
-public class TimestampExtractor extends AscendingTimestampExtractor<Map<String, Object>> {
+public class TimestampExtractor extends AscendingTimestampExtractor<Event> {
private String timestampField;
@@ -28,7 +27,7 @@
}
@Override
- public long extractAscendingTimestamp(Map<String, Object> in) {
- return Long.parseLong(String.valueOf(in.get(timestampField)));
+ public long extractAscendingTimestamp(Event in) {
+ return in.getFieldBySelector(timestampField).getAsPrimitive().getAsLong();
}
}
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/Increase.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/Increase.java
index a8be6a7..ae7815a 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/Increase.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/Increase.java
@@ -19,13 +19,12 @@
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
-public class Increase implements WindowFunction<Map<String, Object>, Map<String, Object>, String, TimeWindow> {
+public class Increase implements WindowFunction<Event, Event, String, TimeWindow> {
private String propertyFieldName;
private Integer increaseValue;
@@ -43,16 +42,16 @@
}
@Override
- public void apply(String key, TimeWindow window, Iterable<Map<String, Object>> input, Collector<Map<String, Object>>
+ public void apply(String key, TimeWindow window, Iterable<Event> input, Collector<Event>
out) throws Exception {
List<Double> values = new ArrayList<>();
- Map<String, Object> lastEvent = new HashMap<>();
+ Event lastEvent = new Event();
- for (Map<String, Object> anInput : input) {
+ for (Event anInput : input) {
lastEvent = anInput;
- if (String.valueOf(lastEvent.get(groupByFieldName)).equals(key)) {
- values.add(Double.parseDouble(String.valueOf(lastEvent.get(propertyFieldName))));
+ if (lastEvent.getFieldBySelector(groupByFieldName).getAsPrimitive().getAsString().equals(key)) {
+ values.add(lastEvent.getFieldBySelector(propertyFieldName).getAsPrimitive().getAsDouble());
}
}
if (values.size() > 0) {
@@ -69,10 +68,10 @@
}
}
- private void buildOutput(Collector<Map<String, Object>> out, Map<String, Object> lastEvent) {
- Map<String, Object> outEvent = new HashMap<>();
+ private void buildOutput(Collector<Event> out, Event lastEvent) {
+ Event outEvent = new Event();
for(String outputProperty : outputProperties) {
- outEvent.put(outputProperty, lastEvent.get(outputProperty));
+ outEvent.addField(lastEvent.getFieldBySelector(outputProperty));
}
out.collect(outEvent);
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/IncreaseProgram.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/IncreaseProgram.java
index b23761c..3ff16a3 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/IncreaseProgram.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/increase/IncreaseProgram.java
@@ -20,11 +20,10 @@
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.AbstractPatternDetectionProgram;
import org.streampipes.processors.pattern.detection.flink.processor.common.TimestampExtractor;
-import java.util.Map;
-
public class IncreaseProgram extends AbstractPatternDetectionProgram<IncreaseParameters> {
public IncreaseProgram(IncreaseParameters params, boolean debug) {
@@ -32,7 +31,7 @@
}
@Override
- public DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... dataStreams) {
+ public DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
String timestampField = params.getTimestampField();
return dataStreams[0]
.assignTimestampsAndWatermarks(new TimestampExtractor(timestampField))
@@ -42,12 +41,12 @@
.getOutputProperties(), params.getGroupBy())).setParallelism(1);
}
- private KeySelector<Map<String, Object>, String> getKeySelector() {
+ private KeySelector<Event, String> getKeySelector() {
String groupBy = params.getGroupBy();
- return new KeySelector<Map<String, Object>, String>() {
+ return new KeySelector<Event, String>() {
@Override
- public String getKey(Map<String, Object> in) throws Exception {
- return String.valueOf(in.get(groupBy));
+ public String getKey(Event in) throws Exception {
+ return in.getFieldBySelector(groupBy).getAsPrimitive().getAsString();
}
};
}
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionCalculator.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionCalculator.java
index 1fccc92..758141f 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionCalculator.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionCalculator.java
@@ -19,18 +19,16 @@
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.stream.Collectors;
/**
* Created by riemer on 20.04.2017.
*/
-public class PeakDetectionCalculator implements FlatMapFunction<List<Map<String,
- Object>>, Map<String, Object>> {
+public class PeakDetectionCalculator implements FlatMapFunction<List<Event>, Event> {
private String groupBy;
private String valueToObserve;
@@ -49,11 +47,11 @@
@Override
- public void flatMap(List<Map<String, Object>> in, Collector<Map<String, Object>> out)
+ public void flatMap(List<Event> in, Collector<Event> out)
throws Exception {
List<Double> y = in
.stream()
- .map(m -> Double.parseDouble(String.valueOf(m.get(valueToObserve))))
+ .map(m -> m.getFieldBySelector(valueToObserve).getAsPrimitive().getAsDouble())
.collect(Collectors.toList());
Integer[] signals = makeIntegerArray(y.size());
@@ -90,10 +88,10 @@
}
}
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("id", in.get(in.size() - 1).get(groupBy));
- outMap.put("timestamp", System.currentTimeMillis());
- outMap.put("signal", signals[signals.length - 1]);
+ Event outMap = new Event();
+ outMap.addField("id", in.get(in.size() - 1).getFieldBySelector(groupBy).getAsPrimitive().getAsString());
+ outMap.addField("timestamp", System.currentTimeMillis());
+ outMap.addField("signal", signals[signals.length - 1]);
out.collect(outMap);
}
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionProgram.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionProgram.java
index e6b89db..c583821 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionProgram.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/peak/PeakDetectionProgram.java
@@ -20,11 +20,11 @@
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.AbstractPatternDetectionProgram;
import org.streampipes.processors.pattern.detection.flink.processor.peak.utils.SlidingBatchWindow;
import java.util.List;
-import java.util.Map;
/**
* Created by riemer on 20.04.2017.
@@ -36,7 +36,7 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>[] messageStream) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>[] messageStream) {
Integer lag = params.getLag();
String groupBy = params.getGroupBy();
@@ -49,7 +49,7 @@
.keyBy(getKeySelector())
.transform
("sliding-batch-window-shift",
- TypeInformation.of(new TypeHint<List<Map<String, Object>>>() {
+ TypeInformation.of(new TypeHint<List<Event>>() {
}), new SlidingBatchWindow<>(countWindowSize))
.flatMap(new PeakDetectionCalculator(groupBy,
valueToObserve,
@@ -58,12 +58,12 @@
influence));
}
- private KeySelector<Map<String, Object>, String> getKeySelector() {
+ private KeySelector<Event, String> getKeySelector() {
String groupBy = params.getGroupBy();
- return new KeySelector<Map<String, Object>, String>() {
+ return new KeySelector<Event, String>() {
@Override
- public String getKey(Map<String, Object> in) throws Exception {
- return String.valueOf(in.get(groupBy));
+ public String getKey(Event in) throws Exception {
+ return in.getFieldBySelector(groupBy).getAsPrimitive().getAsString();
}
};
}
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/EventStorage.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/EventStorage.java
index 16ec8f2..271b5aa 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/EventStorage.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/EventStorage.java
@@ -16,14 +16,14 @@
package org.streampipes.processors.pattern.detection.flink.processor.sequence;
-import java.util.Map;
+import org.streampipes.model.runtime.Event;
public class EventStorage {
private Long timestamp;
- private Map<String, Object> event;
+ private Event event;
- public EventStorage(Long timestamp, Map<String, Object> event) {
+ public EventStorage(Long timestamp, Event event) {
this.timestamp = timestamp;
this.event = event;
}
@@ -32,7 +32,7 @@
return timestamp;
}
- public Map<String, Object> getEvent() {
+ public Event getEvent() {
return event;
}
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/Sequence.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/Sequence.java
index 99b102e..be516af 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/Sequence.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/Sequence.java
@@ -17,14 +17,11 @@
package org.streampipes.processors.pattern.detection.flink.processor.sequence;
import org.apache.flink.api.common.state.ValueState;
-import org.apache.flink.api.common.state.ValueStateDescriptor;
-import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.co.CoProcessFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
-import java.util.Map;
-
-public class Sequence extends CoProcessFunction<Map<String, Object>, Map<String, Object>, Map<String, Object>> {
+public class Sequence extends CoProcessFunction<Event, Event, Event> {
private String timeUnit;
private Integer timeWindow;
@@ -36,27 +33,29 @@
this.timeWindow = timeWindow;
}
- @Override
- public void open(Configuration parameters) throws Exception {
- state = getRuntimeContext().getState(new ValueStateDescriptor<>("sequence-event-storage", EventStorage.class));
- }
+ //@Override
+ //public void open(Configuration parameters) throws Exception {
+ // TODO: add RuntimeContext
+ //state = getRuntimeContext().getState(new ValueStateDescriptor<>("sequence-event-storage",
+ // EventStorage.class));
+ //}
@Override
- public void processElement1(Map<String, Object> value, Context ctx, Collector<Map<String, Object>> out) throws Exception {
+ public void processElement1(Event value, Context ctx, Collector<Event> out) throws Exception {
state.update(new EventStorage(System.currentTimeMillis(), value));
}
@Override
- public void processElement2(Map<String, Object> value, Context ctx, Collector<Map<String, Object>> out) throws Exception {
+ public void processElement2(Event value, Context ctx, Collector<Event> out) throws Exception {
EventStorage previousElementStream1 = state.value();
if (previousElementStream1 != null && isSequence(previousElementStream1, value)) {
- value.putAll(previousElementStream1.getEvent());
+ previousElementStream1.getEvent().getFields().forEach((key, v) -> value.addField(v));
out.collect(value);
}
}
- private Boolean isSequence(EventStorage previousElementStream1, Map<String, Object> value) {
+ private Boolean isSequence(EventStorage previousElementStream1, Event value) {
Long currentTime = System.currentTimeMillis();
Long earliestAllowedStartTime = getEarliestStartTime(currentTime);
@@ -78,7 +77,7 @@
}
@Override
- public void onTimer(long timestamp, OnTimerContext ctx, Collector<Map<String, Object>> out) throws Exception {
+ public void onTimer(long timestamp, OnTimerContext ctx, Collector<Event> out) throws Exception {
}
}
diff --git a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/SequenceProgram.java b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/SequenceProgram.java
index effee04..2cf02bf 100644
--- a/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/SequenceProgram.java
+++ b/streampipes-processors-pattern-detection-flink/src/main/java/org/streampipes/processors/pattern/detection/flink/processor/sequence/SequenceProgram.java
@@ -18,10 +18,9 @@
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.AbstractPatternDetectionProgram;
-import java.util.Map;
-
public class SequenceProgram extends AbstractPatternDetectionProgram<SequenceParameters> {
public SequenceProgram(SequenceParameters params, boolean debug) {
@@ -29,7 +28,7 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... dataStreams) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
return dataStreams[0].keyBy(getKeySelector()).connect(dataStreams[1].keyBy(getKeySelector())).process(new Sequence(params
.getTimeUnit(),
params
@@ -37,10 +36,10 @@
()));
}
- private KeySelector<Map<String,Object>, String> getKeySelector() {
- return new KeySelector<Map<String,Object>, String>() {
+ private KeySelector<Event, String> getKeySelector() {
+ return new KeySelector<Event, String>() {
@Override
- public String getKey(Map<String, Object> value) throws Exception {
+ public String getKey(Event value) throws Exception {
return "dummy-key";
}
};
diff --git a/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/absence/TestAbsence.java b/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/absence/TestAbsence.java
index 1fecea7..98df3fe 100644
--- a/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/absence/TestAbsence.java
+++ b/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/absence/TestAbsence.java
@@ -16,6 +16,8 @@
*/
package org.streampipes.processors.pattern.detection.processor.absence;
+import static org.hamcrest.core.IsEqual.equalTo;
+
import io.flinkspector.datastream.DataStreamTestBase;
import io.flinkspector.datastream.input.EventTimeInput;
import io.flinkspector.datastream.input.EventTimeInputBuilder;
@@ -23,15 +25,17 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.processor.absence.AbsenceController;
import org.streampipes.processors.pattern.detection.flink.processor.absence.AbsenceParameters;
import org.streampipes.processors.pattern.detection.flink.processor.absence.AbsenceProgram;
import org.streampipes.processors.pattern.detection.flink.processor.and.TimeUnit;
import org.streampipes.test.generator.InvocationGraphGenerator;
-import java.util.*;
-
-import static org.hamcrest.core.IsEqual.equalTo;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
@RunWith(Parameterized.class)
public class TestAbsence extends DataStreamTestBase {
@@ -70,13 +74,13 @@
AbsenceProgram program = new AbsenceProgram(params, true);
- DataStream<Map<String, Object>> stream = program.getApplicationLogic(createTestStream(makeInputData(1, makeMap(), 0)), createTestStream(makeInputData(waitForMs, makeMap(), 1)));
+ DataStream<Event> stream = program.getApplicationLogic(createTestStream(makeInputData(1, makeMap(), 0)), createTestStream(makeInputData(waitForMs, makeMap(), 1)));
assertStream(stream, equalTo(getOutput(shouldMatch)));
}
- private Collection<Map<String, Object>> getOutput(Boolean shouldMatch) {
- List<Map<String, Object>> allEvents = new ArrayList<>();
+ private Collection<Event> getOutput(Boolean shouldMatch) {
+ List<Event> allEvents = new ArrayList<>();
if (shouldMatch) {
allEvents.add(makeMap().get(0));
@@ -85,24 +89,24 @@
return allEvents;
}
- private EventTimeInput<Map<String, Object>> makeInputData(Integer delayEvent, List<Map<String, Object>> inputMap, Integer i) {
- List<Map<String, Object>> testData = inputMap;
- EventTimeInputBuilder<Map<String, Object>> builder = EventTimeInputBuilder.startWith(testData.get(i), after(delayEvent, seconds));
+ private EventTimeInput<Event> makeInputData(Integer delayEvent, List<Event> inputMap, Integer i) {
+ List<Event> testData = inputMap;
+ EventTimeInputBuilder<Event> builder = EventTimeInputBuilder.startWith(testData.get(i), after(delayEvent, seconds));
return builder;
}
- private List<Map<String, Object>> makeMap() {
- List<Map<String, Object>> allEvents = new ArrayList<>();
- Map<String, Object> event1 = new HashMap<>();
- event1.put("id", "a");
- event1.put("timestamp", 0);
+ private List<Event> makeMap() {
+ List<Event> allEvents = new ArrayList<>();
+ Event event1 = new Event();
+ event1.addField("id", "a");
+ event1.addField("timestamp", 0);
allEvents.add(event1);
- Map<String, Object> event2 = new HashMap<>();
- event2.put("id", "a");
- event2.put("timestamp", waitForMs);
+ Event event2 = new Event();
+ event2.addField("id", "a");
+ event2.addField("timestamp", waitForMs);
allEvents.add(event2);
diff --git a/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/and/TestAnd.java b/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/and/TestAnd.java
index b98a5bb..eb6a449 100644
--- a/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/and/TestAnd.java
+++ b/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/and/TestAnd.java
@@ -16,6 +16,8 @@
*/
package org.streampipes.processors.pattern.detection.processor.and;
+import static org.hamcrest.core.IsEqual.equalTo;
+
import io.flinkspector.datastream.DataStreamTestBase;
import io.flinkspector.datastream.input.EventTimeInput;
import io.flinkspector.datastream.input.EventTimeInputBuilder;
@@ -23,15 +25,17 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.processor.and.AndController;
import org.streampipes.processors.pattern.detection.flink.processor.and.AndParameters;
import org.streampipes.processors.pattern.detection.flink.processor.and.AndProgram;
import org.streampipes.processors.pattern.detection.flink.processor.and.TimeUnit;
import org.streampipes.test.generator.InvocationGraphGenerator;
-import java.util.*;
-
-import static org.hamcrest.core.IsEqual.equalTo;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
@RunWith(Parameterized.class)
public class TestAnd extends DataStreamTestBase {
@@ -77,37 +81,37 @@
AndProgram program = new AndProgram(params, true);
- DataStream<Map<String, Object>> stream = program.getApplicationLogic(createTestStream(makeInputData(delayFirstEvent, makeMap("field1"))), createTestStream(makeInputData(delaySecondEvent, makeMap("field2"))));
+ DataStream<Event> stream = program.getApplicationLogic(createTestStream(makeInputData(delayFirstEvent, makeMap("field1"))), createTestStream(makeInputData(delaySecondEvent, makeMap("field2"))));
assertStream(stream, equalTo(getOutput(shouldMatch)));
}
- private Collection<Map<String, Object>> getOutput(Boolean shouldMatch) {
- List<Map<String, Object>> allEvents = new ArrayList<>();
+ private Collection<Event> getOutput(Boolean shouldMatch) {
+ List<Event> allEvents = new ArrayList<>();
if (shouldMatch) {
- Map<String, Object> outMap = new HashMap<>();
- outMap.put("id", "a");
- outMap.put("field1", 1);
- outMap.put("field2", 1);
+ Event outMap = new Event();
+ outMap.addField("id", "a");
+ outMap.addField("field1", 1);
+ outMap.addField("field2", 1);
allEvents.add(outMap);
}
return allEvents;
}
- private EventTimeInput<Map<String, Object>> makeInputData(Integer delayEvent, List<Map<String, Object>> inputMap) {
- List<Map<String, Object>> testData = inputMap;
- EventTimeInputBuilder<Map<String, Object>> builder = EventTimeInputBuilder.startWith(testData.get(0), after(delayEvent, seconds));
+ private EventTimeInput<Event> makeInputData(Integer delayEvent, List<Event> inputMap) {
+ List<Event> testData = inputMap;
+ EventTimeInputBuilder<Event> builder = EventTimeInputBuilder.startWith(testData.get(0), after(delayEvent, seconds));
return builder;
}
- private List<Map<String, Object>> makeMap(String fieldName) {
- List<Map<String, Object>> allEvents = new ArrayList<>();
- Map<String, Object> event = new HashMap<>();
- event.put("id", "a");
- event.put(fieldName, 1);
+ private List<Event> makeMap(String fieldName) {
+ List<Event> allEvents = new ArrayList<>();
+ Event event = new Event();
+ event.addField("id", "a");
+ event.addField(fieldName, 1);
allEvents.add(event);
diff --git a/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/increase/TestIncrease.java b/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/increase/TestIncrease.java
index 2cdec4d..702f2fe 100644
--- a/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/increase/TestIncrease.java
+++ b/streampipes-processors-pattern-detection-flink/src/test/java/org/streampipes/processors/pattern/detection/processor/increase/TestIncrease.java
@@ -16,6 +16,8 @@
*/
package org.streampipes.processors.pattern.detection.processor.increase;
+import static org.hamcrest.core.IsEqual.equalTo;
+
import io.flinkspector.core.input.Input;
import io.flinkspector.core.input.InputBuilder;
import io.flinkspector.datastream.DataStreamTestBase;
@@ -23,15 +25,17 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.pattern.detection.flink.processor.increase.IncreaseController;
import org.streampipes.processors.pattern.detection.flink.processor.increase.IncreaseParameters;
import org.streampipes.processors.pattern.detection.flink.processor.increase.IncreaseProgram;
import org.streampipes.processors.pattern.detection.flink.processor.increase.Operation;
import org.streampipes.test.generator.InvocationGraphGenerator;
-import java.util.*;
-
-import static org.hamcrest.core.IsEqual.equalTo;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
@RunWith(Parameterized.class)
public class TestIncrease extends DataStreamTestBase {
@@ -83,13 +87,13 @@
IncreaseProgram program = new IncreaseProgram(params, true);
- DataStream<Map<String, Object>> stream = program.getApplicationLogic(createTestStream(makeInputData(makeMap())));
+ DataStream<Event> stream = program.getApplicationLogic(createTestStream(makeInputData(makeMap())));
assertStream(stream, equalTo(getOutput(shouldMatch)));
}
- private Collection<Map<String, Object>> getOutput(Boolean shouldMatch) {
- List<Map<String, Object>> allEvents = new ArrayList<>();
+ private Collection<Event> getOutput(Boolean shouldMatch) {
+ List<Event> allEvents = new ArrayList<>();
if (shouldMatch) {
allEvents.add(makeMap().get(1));
@@ -98,28 +102,28 @@
return allEvents;
}
- private Input<Map<String, Object>> makeInputData(List<Map<String, Object>> inputMap) {
- List<Map<String, Object>> testData = inputMap;
- InputBuilder<Map<String, Object>> builder = InputBuilder.startWith(testData.get(0));
+ private Input<Event> makeInputData(List<Event> inputMap) {
+ List<Event> testData = inputMap;
+ InputBuilder<Event> builder = InputBuilder.startWith(testData.get(0));
for(int i = 1; i < inputMap.size(); i++) {
builder.emit(inputMap.get(i));
}
return builder;
}
- private List<Map<String, Object>> makeMap() {
- List<Map<String, Object>> allEvents = new ArrayList<>();
- Map<String, Object> event1 = new HashMap<>();
- event1.put("id", "a");
- event1.put("timestamp", 0);
- event1.put("value", value1);
+ private List<Event> makeMap() {
+ List<Event> allEvents = new ArrayList<>();
+ Event event1 = new Event();
+ event1.addField("id", "a");
+ event1.addField("timestamp", 0);
+ event1.addField("value", value1);
allEvents.add(event1);
- Map<String, Object> event2 = new HashMap<>();
- event2.put("id", "a");
- event2.put("timestamp", waitForMs);
- event2.put("value", value2);
+ Event event2 = new Event();
+ event2.addField("id", "a");
+ event2.addField("timestamp", waitForMs);
+ event2.addField("value", value2);
allEvents.add(event2);
diff --git a/streampipes-processors-statistics-flink/development/.env b/streampipes-processors-statistics-flink/development/.env
index cdf33dc..5b89211 100644
--- a/streampipes-processors-statistics-flink/development/.env
+++ b/streampipes-processors-statistics-flink/development/.env
@@ -2,7 +2,4 @@
SP_PORT=6045
SP_HOST=localhost
SP_ICON_HOST=localhost
-SP_KAFKA_HOST=localhost
-SP_ZOOKEEPER_HOST=localhost
SP_FLINK_DEBUG=true
-SP_ELASTICSEARCH_HOST=localhost
diff --git a/streampipes-processors-statistics-flink/pom.xml b/streampipes-processors-statistics-flink/pom.xml
index 865f8c2..5873ef6 100644
--- a/streampipes-processors-statistics-flink/pom.xml
+++ b/streampipes-processors-statistics-flink/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/ConfigKeys.java b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/ConfigKeys.java
index b75ae02..f9d2f64 100644
--- a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/ConfigKeys.java
+++ b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/ConfigKeys.java
@@ -22,9 +22,6 @@
final static String PORT = "SP_PORT";
final static String FLINK_HOST = "SP_FLINK_HOST";
final static String FLINK_PORT = "SP_FLINK_PORT";
- final static String ELASTIC_HOST = "SP_ELASTICSEARCH_HOST";
- final static String ELASTIC_PORT = "SP_ELASTICSEARCH_PORT";
- final static String ELASTIC_PORT_REST = "SP_ELASTICSEARCH_PORT_REST";
final static String ICON_HOST = "SP_ICON_HOST";
final static String ICON_PORT = "SP_ICON_PORT";
final static String SERVICE_NAME = "SP_SERVICE_NAME";
diff --git a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/StatisticsFlinkConfig.java b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/StatisticsFlinkConfig.java
index 48c940d..223eb73 100644
--- a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/StatisticsFlinkConfig.java
+++ b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/config/StatisticsFlinkConfig.java
@@ -39,9 +39,6 @@
config.register(ConfigKeys.PORT, 8090, "Port for the pe mixed flink component");
config.register(ConfigKeys.FLINK_HOST, "jobmanager", "Host for the flink cluster");
config.register(ConfigKeys.FLINK_PORT, 6123, "Port for the flink cluster");
- config.register(ConfigKeys.ELASTIC_HOST, "elasticsearch", "Elastic search host address");
- config.register(ConfigKeys.ELASTIC_PORT, 9300, "Elasitc search port");
- config.register(ConfigKeys.ELASTIC_PORT_REST, 9200, "Elasitc search rest port");
config.register(ConfigKeys.ICON_HOST, "backend", "Hostname for the icon host");
config.register(ConfigKeys.ICON_PORT, 80, "Port for the icons in nginx");
@@ -70,15 +67,6 @@
return config.getInteger(ConfigKeys.FLINK_PORT);
}
- public String getElasticsearchHost() {
- return config.getString(ConfigKeys.ELASTIC_HOST);
- }
-
- public int getElasticsearchPort() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT);
- }
-
-
public static final String iconBaseUrl = "http://" + StatisticsFlinkConfig.INSTANCE.getIconHost() + ":" + StatisticsFlinkConfig.INSTANCE.getIconPort() + "/assets/img/pe_icons";
public static final String getIconUrl(String pictureName) {
@@ -93,11 +81,6 @@
return config.getInteger(ConfigKeys.ICON_PORT);
}
- public int getElasticsearchPortRest() {
- return config.getInteger(ConfigKeys.ELASTIC_PORT_REST);
- }
-
-
public boolean getDebug() {
return config.getBoolean(ConfigKeys.DEBUG);
}
diff --git a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/extensions/MapKeySelector.java b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/extensions/MapKeySelector.java
index ce655e3..349a5ed 100644
--- a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/extensions/MapKeySelector.java
+++ b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/extensions/MapKeySelector.java
@@ -18,9 +18,9 @@
package org.streampipes.processors.statistics.flink.extensions;
import org.apache.flink.api.java.functions.KeySelector;
+import org.streampipes.model.runtime.Event;
import java.io.Serializable;
-import java.util.Map;
public class MapKeySelector implements Serializable {
@@ -30,11 +30,11 @@
this.groupBy = groupBy;
}
- public KeySelector<Map<String, Object>, String> getKeySelector() {
- return new KeySelector<Map<String, Object>, String>() {
+ public KeySelector<Event, String> getKeySelector() {
+ return new KeySelector<Event, String>() {
@Override
- public String getKey(Map<String, Object> in) throws Exception {
- return String.valueOf(in.get(groupBy));
+ public String getKey(Event in) throws Exception {
+ return in.getFieldBySelector(groupBy).getAsPrimitive().getAsString();
}
};
}
diff --git a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryCalculator.java b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryCalculator.java
index 1f44aa6..54e1d7e 100644
--- a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryCalculator.java
+++ b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryCalculator.java
@@ -20,12 +20,11 @@
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-public class StatisticsSummaryCalculator implements FlatMapFunction<Map<String, Object>, Map<String, Object>> {
+public class StatisticsSummaryCalculator implements FlatMapFunction<Event, Event> {
private String listPropertyName;
@@ -34,25 +33,22 @@
}
@Override
- public void flatMap(Map<String, Object> in, Collector<Map<String, Object>> out) throws
+ public void flatMap(Event in, Collector<Event> out) throws
Exception {
- List<Double> listValues = ((List<Object>) in
- .get(listPropertyName))
- .stream()
- .map(o -> Double.parseDouble(o.toString()))
- .collect(Collectors.toList());
+ List<Double> listValues = (in.getFieldBySelector(listPropertyName).getAsList().castItems
+ (Double.class));
SummaryStatistics stats = new SummaryStatistics();
- listValues.forEach(lv -> stats.addValue(lv));
+ listValues.forEach(stats::addValue);
- in.put(StatisticsSummaryController.MIN, stats.getMin());
- in.put(StatisticsSummaryController.MAX, stats.getMax());
- in.put(StatisticsSummaryController.MEAN, stats.getMean());
- in.put(StatisticsSummaryController.N, stats.getN());
- in.put(StatisticsSummaryController.SUM, stats.getSum());
- in.put(StatisticsSummaryController.STDDEV, stats.getStandardDeviation());
- in.put(StatisticsSummaryController.VARIANCE, stats.getVariance());
+ in.addField(StatisticsSummaryController.MIN, stats.getMin());
+ in.addField(StatisticsSummaryController.MAX, stats.getMax());
+ in.addField(StatisticsSummaryController.MEAN, stats.getMean());
+ in.addField(StatisticsSummaryController.N, stats.getN());
+ in.addField(StatisticsSummaryController.SUM, stats.getSum());
+ in.addField(StatisticsSummaryController.STDDEV, stats.getStandardDeviation());
+ in.addField(StatisticsSummaryController.VARIANCE, stats.getVariance());
out.collect(in);
diff --git a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryProgram.java b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryProgram.java
index df8fdbc..5797602 100644
--- a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryProgram.java
+++ b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/StatisticsSummaryProgram.java
@@ -18,10 +18,9 @@
package org.streampipes.processors.statistics.flink.processor.stat;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.statistics.flink.AbstractStatisticsProgram;
-import java.util.Map;
-
public class StatisticsSummaryProgram extends AbstractStatisticsProgram<StatisticsSummaryParameters> {
public StatisticsSummaryProgram(StatisticsSummaryParameters params, boolean debug) {
@@ -33,7 +32,7 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... messageStream) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... messageStream) {
return messageStream[0].flatMap(new StatisticsSummaryCalculator(bindingParams.getListPropertyName()));
}
}
diff --git a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryCalculatorWindow.java b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryCalculatorWindow.java
index 20fd4c7..4e0fffa 100644
--- a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryCalculatorWindow.java
+++ b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryCalculatorWindow.java
@@ -20,16 +20,15 @@
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.statistics.flink.processor.stat.StatisticsSummaryController;
import java.io.Serializable;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.stream.Collectors;
-public class StatisticsSummaryCalculatorWindow implements FlatMapFunction<List<Map<String,
- Object>>, Map<String, Object>>, Serializable {
+public class StatisticsSummaryCalculatorWindow implements FlatMapFunction<List<Event>, Event>,
+ Serializable {
private String partitionMapping;
private String valueToObserveMapping;
@@ -40,27 +39,27 @@
}
@Override
- public void flatMap(List<Map<String, Object>> in, Collector<Map<String, Object>> out)
+ public void flatMap(List<Event> in, Collector<Event> out)
throws Exception {
- List<Double> listValues = (in.stream().map(m -> Double.parseDouble(String.valueOf(m.get
- (valueToObserveMapping))))
+ List<Double> listValues = (in.stream().map(m -> m.getFieldBySelector(valueToObserveMapping)
+ .getAsPrimitive().getAsDouble())
.collect(Collectors.toList()));
SummaryStatistics stats = new SummaryStatistics();
listValues.forEach(lv -> stats.addValue(lv));
- Map<String, Object> outMap = new HashMap<>();
+ Event outMap = new Event();
- outMap.put("timestamp", System.currentTimeMillis());
- outMap.put("id", in.get(in.size() - 1).get(partitionMapping));
- outMap.put(StatisticsSummaryController.MIN, stats.getMin());
- outMap.put(StatisticsSummaryController.MAX, stats.getMax());
- outMap.put(StatisticsSummaryController.MEAN, stats.getMean());
- outMap.put(StatisticsSummaryController.N, stats.getN());
- outMap.put(StatisticsSummaryController.SUM, stats.getSum());
- outMap.put(StatisticsSummaryController.STDDEV, stats.getStandardDeviation());
- outMap.put(StatisticsSummaryController.VARIANCE, stats.getVariance());
+ outMap.addField("timestamp", System.currentTimeMillis());
+ outMap.addField("id", in.get(in.size() - 1).getFieldBySelector(partitionMapping).getRawValue());
+ outMap.addField(StatisticsSummaryController.MIN, stats.getMin());
+ outMap.addField(StatisticsSummaryController.MAX, stats.getMax());
+ outMap.addField(StatisticsSummaryController.MEAN, stats.getMean());
+ outMap.addField(StatisticsSummaryController.N, stats.getN());
+ outMap.addField(StatisticsSummaryController.SUM, stats.getSum());
+ outMap.addField(StatisticsSummaryController.STDDEV, stats.getStandardDeviation());
+ outMap.addField(StatisticsSummaryController.VARIANCE, stats.getVariance());
out.collect(outMap);
}
diff --git a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryProgramWindow.java b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryProgramWindow.java
index 6559570..aee41a3 100644
--- a/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryProgramWindow.java
+++ b/streampipes-processors-statistics-flink/src/main/java/org/streampipes/processors/statistics/flink/processor/stat/window/StatisticsSummaryProgramWindow.java
@@ -21,13 +21,13 @@
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.statistics.flink.AbstractStatisticsProgram;
import org.streampipes.processors.statistics.flink.extensions.MapKeySelector;
import org.streampipes.processors.statistics.flink.extensions.SlidingEventTimeWindow;
import org.streampipes.processors.statistics.flink.extensions.TimestampMappingFunction;
import java.util.List;
-import java.util.Map;
public class StatisticsSummaryProgramWindow extends
AbstractStatisticsProgram<StatisticsSummaryParametersWindow> {
@@ -47,20 +47,21 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... messageStream) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... messageStream) {
StatisticsSummaryParamsSerializable sp = new
StatisticsSummaryParamsSerializable(serializableParams.getValueToObserve(),
serializableParams.getTimestampMapping(), serializableParams.getGroupBy(),
serializableParams.getTimeWindowSize(), serializableParams.getTimeUnit());
- DataStream<Map<String, Object>> output = messageStream[0]
+ DataStream<Event> output = messageStream[0]
.keyBy(new MapKeySelector(sp.getGroupBy()).getKeySelector())
.transform
("sliding-window-event-shift",
- TypeInformation.of(new TypeHint<List<Map<String, Object>>>() {
+ TypeInformation.of(new TypeHint<List<Event>>() {
}), new SlidingEventTimeWindow<>(sp.getTimeWindowSize(), sp.getTimeUnit(),
- (TimestampMappingFunction<Map<String, Object>>) in ->
- Long.parseLong(String.valueOf(in.get(sp.getTimestampMapping())))))
+ (TimestampMappingFunction<Event>) in ->
+ in.getFieldBySelector(sp.getTimestampMapping())
+ .getAsPrimitive().getAsLong()))
.flatMap(new StatisticsSummaryCalculatorWindow(sp.getGroupBy(), sp.getValueToObserve()));
return output;
diff --git a/streampipes-processors-text-mining-flink/deployment/docker-compose.yml b/streampipes-processors-text-mining-flink/deployment/docker-compose.yml
new file mode 100644
index 0000000..e74ff0e
--- /dev/null
+++ b/streampipes-processors-text-mining-flink/deployment/docker-compose.yml
@@ -0,0 +1,13 @@
+version: "2"
+services:
+ processors-text-mining-flink:
+ image: ${SP_DOCKER_REGISTRY}/streampipes/streampipes-pipeline-elements/processors-text-mining-flink:${SP_PE_VERSION}
+ depends_on:
+ - "consul"
+# ports:
+# - "8098:8090"
+ environment:
+ - SP_ICON_HOST=${SP_ICON_HOST}
+ networks:
+ spnet:
+
diff --git a/streampipes-processors-text-mining-flink/pom.xml b/streampipes-processors-text-mining-flink/pom.xml
index 86efb96..db27783 100644
--- a/streampipes-processors-text-mining-flink/pom.xml
+++ b/streampipes-processors-text-mining-flink/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
diff --git a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetection.java b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetection.java
index 9de488b..7ce6eea 100644
--- a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetection.java
+++ b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetection.java
@@ -26,12 +26,12 @@
import com.optimaize.langdetect.text.TextObjectFactory;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
import java.io.IOException;
import java.util.List;
-import java.util.Map;
-public class LanguageDetection implements FlatMapFunction<Map<String, Object>, Map<String, Object>> {
+public class LanguageDetection implements FlatMapFunction<Event, Event> {
private static final String LANGUAGE_KEY = "language";
@@ -56,15 +56,16 @@
}
@Override
- public void flatMap(Map<String, Object> in, Collector<Map<String, Object>> out) {
+ public void flatMap(Event in, Collector<Event> out) {
- TextObject textObject = textObjectFactory.forText(String.valueOf(in.get(fieldName)));
+ TextObject textObject = textObjectFactory.forText(in.getFieldBySelector(fieldName)
+ .getAsPrimitive().getAsString());
com.google.common.base.Optional<LdLocale> lang = languageDetector.detect(textObject);
if (lang.isPresent()) {
- in.put(LANGUAGE_KEY, lang.get().getLanguage());
+ in.addField(LANGUAGE_KEY, lang.get().getLanguage());
} else {
- in.put(LANGUAGE_KEY, "unknown");
+ in.addField(LANGUAGE_KEY, "unknown");
}
out.collect(in);
diff --git a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetectionProgram.java b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetectionProgram.java
index e701a6e..36661bf 100644
--- a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetectionProgram.java
+++ b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/language/LanguageDetectionProgram.java
@@ -16,10 +16,9 @@
package org.streampipes.processors.textmining.flink.processor.language;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.textmining.flink.AbstractTextMiningProgram;
-import java.util.Map;
-
public class LanguageDetectionProgram extends AbstractTextMiningProgram<LanguageDetectionParameters> {
public LanguageDetectionProgram(LanguageDetectionParameters params, boolean debug) {
@@ -31,7 +30,7 @@
}
@Override
- protected DataStream<Map<String, Object>> getApplicationLogic(DataStream<Map<String, Object>>... messageStream) {
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... messageStream) {
return messageStream[0]
.flatMap(new LanguageDetection(params.getFieldName()));
}
diff --git a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/Word.java b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/Word.java
index 5264853..fa45cd4 100644
--- a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/Word.java
+++ b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/Word.java
@@ -2,38 +2,35 @@
public class Word {
- private String word;
- private int count;
-
- public Word() {
-
- }
+ private String word;
+ private int count;
- public Word(String word, int count) {
- super();
- this.word = word;
- this.count = count;
- }
-
- public String getWord() {
- return word;
- }
+ public Word() {
- public void setWord(String word) {
- this.word = word;
- }
+ }
- public int getCount() {
- return count;
- }
+ public Word(String word, int count) {
+ super();
+ this.word = word;
+ this.count = count;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public void setWord(String word) {
+ this.word = word;
+ }
+
+ public int getCount() {
+ return count;
+ }
- public void setCount(int count) {
- this.count = count;
- }
+ public void setCount(int count) {
+ this.count = count;
+ }
-
-
-
}
diff --git a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountController.java b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountController.java
index b9f0ea4..c7ef342 100644
--- a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountController.java
+++ b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountController.java
@@ -8,50 +8,54 @@
import org.streampipes.sdk.builder.ProcessingElementBuilder;
import org.streampipes.sdk.builder.StreamRequirementsBuilder;
import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
-import org.streampipes.sdk.helpers.*;
+import org.streampipes.sdk.helpers.EpProperties;
+import org.streampipes.sdk.helpers.EpRequirements;
+import org.streampipes.sdk.helpers.Label;
+import org.streampipes.sdk.helpers.Labels;
+import org.streampipes.sdk.helpers.OutputStrategies;
import org.streampipes.wrapper.flink.FlinkDataProcessorDeclarer;
import org.streampipes.wrapper.flink.FlinkDataProcessorRuntime;
public class WordCountController extends FlinkDataProcessorDeclarer<WordCountParameters> {
- private static final String RESOURCE_ID = "strings.wordcount";
- private static final String PE_ID = "org.streampipes.processors.textmining.flink.wordcount";
+ private static final String RESOURCE_ID = "strings.wordcount";
+ private static final String PE_ID = "org.streampipes.processors.textmining.flink.wordcount";
- private static final String WORD_COUNT_FIELD_KEY = "wordcountField";
- private static final String TIME_WINDOW_KEY = "timeWindowKey";
- private static final String WORD_KEY = "word";
- private static final String COUNT_KEY = "count";
+ private static final String WORD_COUNT_FIELD_KEY = "wordcountField";
+ private static final String TIME_WINDOW_KEY = "timeWindowKey";
+ private static final String WORD_KEY = "word";
+ private static final String COUNT_KEY = "count";
- @Override
- public DataProcessorDescription declareModel() {
- return ProcessingElementBuilder.create(getLabel(PE_ID))
- .category(DataProcessorType.AGGREGATE)
- .requiredStream(StreamRequirementsBuilder
- .create()
- .requiredPropertyWithUnaryMapping(
- EpRequirements.stringReq(),
- getLabel(WORD_COUNT_FIELD_KEY),
- PropertyScope.NONE)
- .build())
- .outputStrategy(OutputStrategies.fixed(EpProperties.stringEp(
- getLabel(WORD_KEY),
- "word",
- "http://schema.org/text"), EpProperties.integerEp(getLabel(COUNT_KEY), "count", "http://schema.org/number")))
- .requiredIntegerParameter(getLabel(TIME_WINDOW_KEY))
- .build();
- }
+ @Override
+ public DataProcessorDescription declareModel() {
+ return ProcessingElementBuilder.create(getLabel(PE_ID))
+ .category(DataProcessorType.AGGREGATE)
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredPropertyWithUnaryMapping(
+ EpRequirements.stringReq(),
+ getLabel(WORD_COUNT_FIELD_KEY),
+ PropertyScope.NONE)
+ .build())
+ .outputStrategy(OutputStrategies.fixed(EpProperties.stringEp(
+ getLabel(WORD_KEY),
+ "word",
+ "http://schema.org/text"), EpProperties.integerEp(getLabel(COUNT_KEY), "count", "http://schema.org/number")))
+ .requiredIntegerParameter(getLabel(TIME_WINDOW_KEY))
+ .build();
+ }
- @Override
- public FlinkDataProcessorRuntime<WordCountParameters> getRuntime(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
+ @Override
+ public FlinkDataProcessorRuntime<WordCountParameters> getRuntime(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
- String fieldName = extractor.mappingPropertyValue(WORD_COUNT_FIELD_KEY);
- Integer timeWindowValue = extractor.singleValueParameter(TIME_WINDOW_KEY, Integer.class);
+ String fieldName = extractor.mappingPropertyValue(WORD_COUNT_FIELD_KEY);
+ Integer timeWindowValue = extractor.singleValueParameter(TIME_WINDOW_KEY, Integer.class);
- return new WordCountProgram(new WordCountParameters(graph, fieldName, timeWindowValue), TextMiningFlinkConfig.INSTANCE.getDebug());
+ return new WordCountProgram(new WordCountParameters(graph, fieldName, timeWindowValue), TextMiningFlinkConfig.INSTANCE.getDebug());
- }
+ }
- private Label getLabel(String id) {
- return Labels.fromResources(RESOURCE_ID, id);
- }
+ private Label getLabel(String id) {
+ return Labels.fromResources(RESOURCE_ID, id);
+ }
}
diff --git a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountProgram.java b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountProgram.java
index 854ae53..3e504d1 100644
--- a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountProgram.java
+++ b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordCountProgram.java
@@ -1,31 +1,30 @@
package org.streampipes.processors.textmining.flink.processor.wordcount;
import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
import org.streampipes.processors.textmining.flink.AbstractTextMiningProgram;
-import org.streampipes.wrapper.flink.converter.ObjectToMapConverter;
import java.io.Serializable;
-import java.util.Map;
public class WordCountProgram extends AbstractTextMiningProgram<WordCountParameters> implements Serializable {
- public WordCountProgram(WordCountParameters params, boolean debug) {
- super(params, debug);
- }
+ public WordCountProgram(WordCountParameters params, boolean debug) {
+ super(params, debug);
+ }
- public WordCountProgram(WordCountParameters params) {
- super(params);
- }
+ public WordCountProgram(WordCountParameters params) {
+ super(params);
+ }
- @Override
- protected DataStream<Map<String, Object>> getApplicationLogic(
- DataStream<Map<String, Object>>... messageStream) {
+ @Override
+ protected DataStream<Event> getApplicationLogic(
+ DataStream<Event>... messageStream) {
- return messageStream[0]
- .flatMap(new WordSplitter(params.getWordCountFieldName()))
- .keyBy("word")
- .sum("count")
- .flatMap(new ObjectToMapConverter<>());
- }
-
+ return messageStream[0]
+ .flatMap(new WordSplitter(bindingParams.getWordCountFieldName()))
+ .keyBy("word")
+ .sum("count")
+ .flatMap(new WordToEventConverter());
+ }
+
}
diff --git a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordSplitter.java b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordSplitter.java
index a09950c..bfdb6fc 100644
--- a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordSplitter.java
+++ b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordSplitter.java
@@ -2,28 +2,24 @@
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
-import java.util.Map;
+public class WordSplitter implements FlatMapFunction<Event, Word> {
-public class WordSplitter implements FlatMapFunction<Map<String, Object>, Word> {
+ private String mappingPropertyName;
- private String mappingPropertyName;
-
- public WordSplitter(String mappingPropertyName) {
- this.mappingPropertyName = mappingPropertyName;
- }
-
- @Override
- public void flatMap(Map<String, Object> in,
- Collector<Word> out) throws Exception {
-
- String propertyValue = (String) in.get(mappingPropertyName);
- for(String word : propertyValue.split(" "))
- {
- out.collect(new Word(word, 1));
- }
- }
+ public WordSplitter(String mappingPropertyName) {
+ this.mappingPropertyName = mappingPropertyName;
+ }
-
+ @Override
+ public void flatMap(Event in,
+ Collector<Word> out) throws Exception {
+
+ String propertyValue = in.getFieldBySelector(mappingPropertyName).getAsPrimitive().getAsString();
+ for (String word : propertyValue.split(" ")) {
+ out.collect(new Word(word, 1));
+ }
+ }
}
diff --git a/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordToEventConverter.java b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordToEventConverter.java
new file mode 100644
index 0000000..4108638
--- /dev/null
+++ b/streampipes-processors-text-mining-flink/src/main/java/org/streampipes/processors/textmining/flink/processor/wordcount/WordToEventConverter.java
@@ -0,0 +1,31 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package org.streampipes.processors.textmining.flink.processor.wordcount;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
+
+public class WordToEventConverter implements FlatMapFunction<Word, Event> {
+
+ @Override
+ public void flatMap(Word word, Collector<Event> collector) throws Exception {
+ Event event = new Event();
+ event.addField("word", word.getWord());
+ event.addField("count", word.getCount());
+ collector.collect(event);
+ }
+}
diff --git a/streampipes-processors-transformation-flink/pom.xml b/streampipes-processors-transformation-flink/pom.xml
index 1f600fd..3734d7c 100644
--- a/streampipes-processors-transformation-flink/pom.xml
+++ b/streampipes-processors-transformation-flink/pom.xml
@@ -3,7 +3,7 @@
<parent>
<artifactId>streampipes-pipeline-elements</artifactId>
<groupId>org.streampipes</groupId>
- <version>0.60.1</version>
+ <version>0.61.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
@@ -103,7 +103,16 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
-
+ <dependency>
+ <groupId>xerces</groupId>
+ <artifactId>xercesImpl</artifactId>
+ <version>2.9.1</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.nekohtml</groupId>
+ <artifactId>nekohtml</artifactId>
+ <version>1.9.13</version>
+ </dependency>
</dependencies>
<build>
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeDocumentSource.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeDocumentSource.java
new file mode 100755
index 0000000..9b11417
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeDocumentSource.java
@@ -0,0 +1,27 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe;
+
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Something that can be represented as a {@link TextDocument}.
+ */
+public interface BoilerpipeDocumentSource {
+ TextDocument toTextDocument() throws BoilerpipeProcessingException;
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeExtractor.java
new file mode 100755
index 0000000..3aba4e8
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeExtractor.java
@@ -0,0 +1,65 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe;
+
+import java.io.Reader;
+
+import org.xml.sax.InputSource;
+
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Describes a complete filter pipeline.
+ */
+public interface BoilerpipeExtractor extends BoilerpipeFilter {
+ /**
+ * Extracts text from the HTML code given as a String.
+ *
+ * @param html The HTML code as a String.
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(final String html) throws BoilerpipeProcessingException;
+
+ /**
+ * Extracts text from the HTML code available from the given {@link InputSource}.
+ *
+ * @param is The InputSource containing the HTML
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(final InputSource is) throws BoilerpipeProcessingException;
+
+ /**
+ * Extracts text from the HTML code available from the given {@link Reader}.
+ *
+ * @param r The Reader containing the HTML
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(final Reader r) throws BoilerpipeProcessingException;
+
+ /**
+ * Extracts text from the given {@link TextDocument} object.
+ *
+ * @param doc The {@link TextDocument}.
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(TextDocument doc) throws BoilerpipeProcessingException;
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeFilter.java
new file mode 100755
index 0000000..2f9cee4
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeFilter.java
@@ -0,0 +1,49 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2019 FZI Forschungszentrum Informatik
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe;
+
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+import java.io.Serializable;
+
+/**
+ * A generic {@link BoilerpipeFilter}. Takes a {@link TextDocument} and processes it somehow.
+ */
+public interface BoilerpipeFilter extends Serializable {
+ /**
+ * Processes the given document <code>doc</code>.
+ *
+ * @param doc The {@link TextDocument} that is to be processed.
+ * @return <code>true</code> if changes have been made to the {@link TextDocument}.
+ * @throws BoilerpipeProcessingException
+ */
+ boolean process(final TextDocument doc) throws BoilerpipeProcessingException;
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeInput.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeInput.java
new file mode 100755
index 0000000..bc15d3d
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeInput.java
@@ -0,0 +1,33 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe;
+
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * A source that returns {@link TextDocument}s.
+ */
+public interface BoilerpipeInput {
+ /**
+ * Returns (somehow) a {@link TextDocument}.
+ *
+ * @return A {@link TextDocument}.
+ * @throws BoilerpipeProcessingException
+ */
+ TextDocument getTextDocument() throws BoilerpipeProcessingException;
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeProcessingException.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeProcessingException.java
new file mode 100755
index 0000000..7bcd023
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/BoilerpipeProcessingException.java
@@ -0,0 +1,41 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe;
+
+/**
+ * Exception for signaling failure in the processing pipeline.
+ */
+public class BoilerpipeProcessingException extends Exception {
+ private static final long serialVersionUID = 1L;
+
+ public BoilerpipeProcessingException() {
+ super();
+ }
+
+ public BoilerpipeProcessingException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public BoilerpipeProcessingException(String message) {
+ super(message);
+ }
+
+ public BoilerpipeProcessingException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/conditions/TextBlockCondition.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/conditions/TextBlockCondition.java
new file mode 100755
index 0000000..3bccb55
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/conditions/TextBlockCondition.java
@@ -0,0 +1,36 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.conditions;
+
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.labels.ConditionalLabelAction;
+
+/**
+ * Evaluates whether a given {@link TextBlock} meets a certain condition.
+ *
+ * Useful in combination with {@link ConditionalLabelAction}.
+ */
+public interface TextBlockCondition {
+ /**
+ * Returns <code>true</code> iff the given {@link TextBlock} tb meets the defined condition.
+ *
+ * @param tb
+ * @return <code><true</code> iff the condition is met.
+ */
+ boolean meetsCondition(final TextBlock tb);
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/Image.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/Image.java
new file mode 100755
index 0000000..9eeda4e
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/Image.java
@@ -0,0 +1,108 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.document;
+
+/**
+ * Represents an Image resource that is contained in the document.
+ *
+ * Any of the attributes may be null, except for "src".
+ */
+public class Image implements Comparable<Image> {
+ private final String src;
+ private final String width;
+ private final String height;
+ private final String alt;
+ private final int area;
+
+ public Image(final String src, final String width, final String height, final String alt) {
+ this.src = src;
+ if (src == null) {
+ throw new NullPointerException("src attribute must not be null");
+ }
+ this.width = nullTrim(width);
+ this.height = nullTrim(height);
+ this.alt = nullTrim(alt);
+
+ if (width != null && height != null) {
+ int a;
+ try {
+ a = Integer.parseInt(width) * Integer.parseInt(height);
+ } catch (NumberFormatException e) {
+ a = -1;
+ }
+ this.area = a;
+ } else {
+ this.area = -1;
+ }
+ }
+
+ public String getSrc() {
+ return src;
+ }
+
+ public String getWidth() {
+ return width;
+ }
+
+ public String getHeight() {
+ return height;
+ }
+
+ public String getAlt() {
+ return alt;
+ }
+
+ private static String nullTrim(String s) {
+ if (s == null) {
+ return null;
+ }
+ s = s.trim();
+ if (s.length() == 0) {
+ return null;
+ }
+ return s;
+ }
+
+ /**
+ * Returns the image's area (specified by width * height), or -1 if width/height weren't both
+ * specified or could not be parsed.
+ *
+ * @return
+ */
+ public int getArea() {
+ return area;
+ }
+
+ public String toString() {
+ return src + "\twidth=" + width + "\theight=" + height + "\talt=" + alt + "\tarea=" + area;
+ }
+
+ @Override
+ public int compareTo(Image o) {
+ if (o == this) {
+ return 0;
+ }
+ if (area > o.area) {
+ return -1;
+ } else if (area == o.area) {
+ return src.compareTo(o.src);
+ } else {
+ return 1;
+ }
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextBlock.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextBlock.java
new file mode 100755
index 0000000..1e94070
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextBlock.java
@@ -0,0 +1,283 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.document;
+
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.Set;
+
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Describes a block of text.
+ *
+ * A block can be an "atomic" text element (i.e., a sequence of text that is not interrupted by any
+ * HTML markup) or a compound of such atomic elements.
+ */
+public class TextBlock implements Cloneable {
+ boolean isContent = false;
+ private CharSequence text;
+ Set<String> labels = null;
+
+ int offsetBlocksStart;
+ int offsetBlocksEnd;
+
+ int numWords;
+ int numWordsInAnchorText;
+ int numWordsInWrappedLines;
+ int numWrappedLines;
+ float textDensity;
+ float linkDensity;
+
+ BitSet containedTextElements;
+
+ private int numFullTextWords = 0;
+ private int tagLevel;
+
+ private static final BitSet EMPTY_BITSET = new BitSet();
+ public static final TextBlock EMPTY_START = new TextBlock("", EMPTY_BITSET, 0, 0, 0, 0, -1);
+ public static final TextBlock EMPTY_END = new TextBlock("", EMPTY_BITSET, 0, 0, 0, 0,
+ Integer.MAX_VALUE);
+
+ public TextBlock(final String text) {
+ this(text, null, 0, 0, 0, 0, 0);
+ }
+
+ public TextBlock(final String text, final BitSet containedTextElements, final int numWords,
+ final int numWordsInAnchorText, final int numWordsInWrappedLines, final int numWrappedLines,
+ final int offsetBlocks) {
+ this.text = text;
+ this.containedTextElements = containedTextElements;
+ this.numWords = numWords;
+ this.numWordsInAnchorText = numWordsInAnchorText;
+ this.numWordsInWrappedLines = numWordsInWrappedLines;
+ this.numWrappedLines = numWrappedLines;
+ this.offsetBlocksStart = offsetBlocks;
+ this.offsetBlocksEnd = offsetBlocks;
+ initDensities();
+ }
+
+ public boolean isContent() {
+ return isContent;
+ }
+
+ public boolean setIsContent(boolean isContent) {
+ if (isContent != this.isContent) {
+ this.isContent = isContent;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public String getText() {
+ return text.toString();
+ }
+
+ public int getNumWords() {
+ return numWords;
+ }
+
+ public int getNumWordsInAnchorText() {
+ return numWordsInAnchorText;
+ }
+
+ public float getTextDensity() {
+ return textDensity;
+ }
+
+ public float getLinkDensity() {
+ return linkDensity;
+ }
+
+ public void mergeNext(final TextBlock other) {
+ if (!(text instanceof StringBuilder)) {
+ text = new StringBuilder(text);
+ }
+ StringBuilder sb = (StringBuilder) text;
+ sb.append('\n');
+ sb.append(other.text);
+
+ numWords += other.numWords;
+ numWordsInAnchorText += other.numWordsInAnchorText;
+
+ numWordsInWrappedLines += other.numWordsInWrappedLines;
+ numWrappedLines += other.numWrappedLines;
+
+ offsetBlocksStart = Math.min(offsetBlocksStart, other.offsetBlocksStart);
+ offsetBlocksEnd = Math.max(offsetBlocksEnd, other.offsetBlocksEnd);
+
+ initDensities();
+
+ this.isContent |= other.isContent;
+
+ if (containedTextElements == null) {
+ containedTextElements = (BitSet) other.containedTextElements.clone();
+ } else {
+ containedTextElements.or(other.containedTextElements);
+ }
+
+ numFullTextWords += other.numFullTextWords;
+
+ if (other.labels != null) {
+ if (labels == null) {
+ labels = new HashSet<String>(other.labels);
+ } else {
+ labels.addAll(other.labels);
+ }
+ }
+
+ tagLevel = Math.min(tagLevel, other.tagLevel);
+ }
+
+ private void initDensities() {
+ if (numWordsInWrappedLines == 0) {
+ numWordsInWrappedLines = numWords;
+ numWrappedLines = 1;
+ }
+ textDensity = numWordsInWrappedLines / (float) numWrappedLines;
+ linkDensity = numWords == 0 ? 0 : numWordsInAnchorText / (float) numWords;
+ }
+
+ public int getOffsetBlocksStart() {
+ return offsetBlocksStart;
+ }
+
+ public int getOffsetBlocksEnd() {
+ return offsetBlocksEnd;
+ }
+
+ public String toString() {
+ return "[" + offsetBlocksStart + "-" + offsetBlocksEnd + ";tl=" + tagLevel + "; nw=" + numWords
+ + ";nwl=" + numWrappedLines + ";ld=" + linkDensity + "]\t"
+ + (isContent ? "CONTENT" : "boilerplate") + "," + labels + "\n" + getText();
+ }
+
+ /**
+ * Adds an arbitrary String label to this {@link TextBlock}.
+ *
+ * @param label The label
+ * @see DefaultLabels
+ */
+ public void addLabel(final String label) {
+ if (labels == null) {
+ labels = new HashSet<String>(2);
+ }
+ labels.add(label);
+ }
+
+ /**
+ * Checks whether this TextBlock has the given label.
+ *
+ * @param label The label
+ * @return <code>true</code> if this block is marked by the given label.
+ */
+ public boolean hasLabel(final String label) {
+ return labels != null && labels.contains(label);
+ }
+
+ public boolean removeLabel(final String label) {
+ return labels != null && labels.remove(label);
+ }
+
+ /**
+ * Returns the labels associated to this TextBlock, or <code>null</code> if no such labels exist.
+ *
+ * NOTE: The returned instance is the one used directly in TextBlock. You have full access to the
+ * data structure. However it is recommended to use the label-specific methods in
+ * {@link TextBlock} whenever possible.
+ *
+ * @return Returns the set of labels, or <code>null</code> if no labels was added yet.
+ */
+ public Set<String> getLabels() {
+ return labels;
+ }
+
+ /**
+ * Adds a set of labels to this {@link TextBlock}. <code>null</code>-references are silently
+ * ignored.
+ *
+ * @param l The labels to be added.
+ */
+ public void addLabels(final Set<String> l) {
+ if (l == null) {
+ return;
+ }
+ if (this.labels == null) {
+ this.labels = new HashSet<String>(l);
+ } else {
+ this.labels.addAll(l);
+ }
+ }
+
+ /**
+ * Adds a set of labels to this {@link TextBlock}. <code>null</code>-references are silently
+ * ignored.
+ *
+ * @param l The labels to be added.
+ */
+ public void addLabels(final String... l) {
+ if (l == null) {
+ return;
+ }
+ if (this.labels == null) {
+ this.labels = new HashSet<String>();
+ }
+ for (final String label : l) {
+ this.labels.add(label);
+ }
+ }
+
+ /**
+ * Returns the containedTextElements BitSet, or <code>null</code>.
+ *
+ * @return
+ */
+ public BitSet getContainedTextElements() {
+ return containedTextElements;
+ }
+
+ @Override
+ protected TextBlock clone() {
+ final TextBlock clone;
+ try {
+ clone = (TextBlock) super.clone();
+ } catch (CloneNotSupportedException e) {
+ throw new RuntimeException(e);
+ }
+ if (text != null && !(text instanceof String)) {
+ clone.text = new StringBuilder(text);
+ }
+ if (labels != null && !labels.isEmpty()) {
+ clone.labels = new HashSet<String>(labels);
+ }
+ if (containedTextElements != null) {
+ clone.containedTextElements = (BitSet) containedTextElements.clone();
+ }
+
+ return clone;
+ }
+
+ public int getTagLevel() {
+ return tagLevel;
+ }
+
+ public void setTagLevel(int tagLevel) {
+ this.tagLevel = tagLevel;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextDocument.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextDocument.java
new file mode 100755
index 0000000..5bc60c0
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextDocument.java
@@ -0,0 +1,132 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.document;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A text document, consisting of one or more {@link TextBlock}s.
+ */
+public class TextDocument implements Cloneable {
+ final List<TextBlock> textBlocks;
+ String title;
+
+ /**
+ * Creates a new {@link TextDocument} with given {@link TextBlock}s, and no title.
+ *
+ * @param textBlocks The text blocks of this document.
+ */
+ public TextDocument(final List<TextBlock> textBlocks) {
+ this(null, textBlocks);
+ }
+
+ /**
+ * Creates a new {@link TextDocument} with given {@link TextBlock}s and given title.
+ *
+ * @param title The "main" title for this text document.
+ * @param textBlocks The text blocks of this document.
+ */
+ public TextDocument(final String title, final List<TextBlock> textBlocks) {
+ this.title = title;
+ this.textBlocks = textBlocks;
+ }
+
+ /**
+ * Returns the {@link TextBlock}s of this document.
+ *
+ * @return A list of {@link TextBlock}s, in sequential order of appearance.
+ */
+ public List<TextBlock> getTextBlocks() {
+ return textBlocks;
+ }
+
+ /**
+ * Returns the "main" title for this document, or <code>null</code> if no such title has ben set.
+ *
+ * @return The "main" title.
+ */
+ public String getTitle() {
+ return title;
+ }
+
+ /**
+ * Updates the "main" title for this document.
+ *
+ * @param title
+ */
+ public void setTitle(final String title) {
+ this.title = title;
+ }
+
+ /**
+ * Returns the {@link TextDocument}'s content.
+ *
+ * @return The content text.
+ */
+ public String getContent() {
+ return getText(true, false);
+ }
+
+ /**
+ * Returns the {@link TextDocument}'s content, non-content or both
+ *
+ * @param includeContent Whether to include TextBlocks marked as "content".
+ * @param includeNonContent Whether to include TextBlocks marked as "non-content".
+ * @return The text.
+ */
+ public String getText(boolean includeContent, boolean includeNonContent) {
+ StringBuilder sb = new StringBuilder();
+ LOOP : for (TextBlock block : getTextBlocks()) {
+ if (block.isContent()) {
+ if (!includeContent) {
+ continue LOOP;
+ }
+ } else {
+ if (!includeNonContent) {
+ continue LOOP;
+ }
+ }
+ sb.append(block.getText());
+ sb.append('\n');
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns detailed debugging information about the contained {@link TextBlock}s.
+ *
+ * @return Debug information.
+ */
+ public String debugString() {
+ StringBuilder sb = new StringBuilder();
+ for (TextBlock tb : getTextBlocks()) {
+ sb.append(tb.toString());
+ sb.append('\n');
+ }
+ return sb.toString();
+ }
+
+ public TextDocument clone() {
+ final List<TextBlock> list = new ArrayList<TextBlock>(textBlocks.size());
+ for (TextBlock tb : textBlocks) {
+ list.add(tb.clone());
+ }
+ return new TextDocument(title, list);
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextDocumentStatistics.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextDocumentStatistics.java
new file mode 100755
index 0000000..467cf60
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/TextDocumentStatistics.java
@@ -0,0 +1,62 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.document;
+
+/**
+ * Provides shallow statistics on a given {@link TextDocument}
+ */
+public final class TextDocumentStatistics {
+ private int numWords = 0;
+ private int numBlocks = 0;
+
+ /**
+ * Computes statistics on a given {@link TextDocument}.
+ *
+ * @param doc The {@link TextDocument}.
+ * @param contentOnly if true then o
+ */
+ public TextDocumentStatistics(final TextDocument doc, final boolean contentOnly) {
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (contentOnly && !tb.isContent()) {
+ continue;
+ }
+
+ numWords += tb.getNumWords();
+ numBlocks++;
+ }
+ }
+
+ /**
+ * Returns the average number of words at block-level (= overall number of words divided by the
+ * number of blocks).
+ *
+ * @return Average
+ */
+ public float avgNumWords() {
+ return numWords / (float) numBlocks;
+ }
+
+ /**
+ * Returns the overall number of words in all blocks.
+ *
+ * @return Sum
+ */
+ public int getNumWords() {
+ return numWords;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/package-info.java
new file mode 100755
index 0000000..7043bd0
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/document/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * The Boilerpipe document model.
+ */
+package com.kohlschutter.boilerpipe.document;
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/estimators/SimpleEstimator.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/estimators/SimpleEstimator.java
new file mode 100755
index 0000000..f090feb
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/estimators/SimpleEstimator.java
@@ -0,0 +1,61 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.estimators;
+
+import com.kohlschutter.boilerpipe.BoilerpipeExtractor;
+import com.kohlschutter.boilerpipe.document.TextDocumentStatistics;
+import com.kohlschutter.boilerpipe.extractors.ArticleExtractor;
+import com.kohlschutter.boilerpipe.extractors.DefaultExtractor;
+
+/**
+ * Estimates the "goodness" of a {@link BoilerpipeExtractor} on a given document.
+ */
+public final class SimpleEstimator {
+
+ /**
+ * Returns the singleton instance of {@link SimpleEstimator}
+ */
+ public static final SimpleEstimator INSTANCE = new SimpleEstimator();
+
+ private SimpleEstimator() {
+ }
+
+ /**
+ * Given the statistics of the document before and after applying the {@link BoilerpipeExtractor},
+ * can we regard the extraction quality (too) low?
+ *
+ * Works well with {@link DefaultExtractor}, {@link ArticleExtractor} and others.
+ *
+ * @param dsBefore
+ * @param dsAfter
+ * @return true if low quality is to be expected.
+ */
+ public boolean isLowQuality(final TextDocumentStatistics dsBefore,
+ final TextDocumentStatistics dsAfter) {
+ if (dsBefore.getNumWords() < 90 || dsAfter.getNumWords() < 70) {
+ return true;
+ }
+
+ if (dsAfter.avgNumWords() < 25) {
+ return true;
+ }
+
+ return false;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ArticleExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ArticleExtractor.java
new file mode 100755
index 0000000..fb207f3
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ArticleExtractor.java
@@ -0,0 +1,64 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.english.IgnoreBlocksAfterContentFilter;
+import com.kohlschutter.boilerpipe.filters.english.NumWordsRulesClassifier;
+import com.kohlschutter.boilerpipe.filters.english.TerminatingBlocksFinder;
+import com.kohlschutter.boilerpipe.filters.heuristics.BlockProximityFusion;
+import com.kohlschutter.boilerpipe.filters.heuristics.DocumentTitleMatchClassifier;
+import com.kohlschutter.boilerpipe.filters.heuristics.ExpandTitleToContentFilter;
+import com.kohlschutter.boilerpipe.filters.heuristics.KeepLargestBlockFilter;
+import com.kohlschutter.boilerpipe.filters.heuristics.LargeBlockSameTagLevelToContentFilter;
+import com.kohlschutter.boilerpipe.filters.heuristics.ListAtEndFilter;
+import com.kohlschutter.boilerpipe.filters.heuristics.TrailingHeadlineToBoilerplateFilter;
+import com.kohlschutter.boilerpipe.filters.simple.BoilerplateBlockFilter;
+
+/**
+ * A full-text extractor which is tuned towards news articles. In this scenario it achieves higher
+ * accuracy than {@link DefaultExtractor}.
+ */
+public final class ArticleExtractor extends ExtractorBase {
+ public static final ArticleExtractor INSTANCE = new ArticleExtractor();
+
+ /**
+ * Returns the singleton instance for {@link ArticleExtractor}.
+ */
+ public static ArticleExtractor getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ return
+
+ TerminatingBlocksFinder.INSTANCE.process(doc)
+ | new DocumentTitleMatchClassifier(doc.getTitle()).process(doc)
+ | NumWordsRulesClassifier.INSTANCE.process(doc)
+ | IgnoreBlocksAfterContentFilter.DEFAULT_INSTANCE.process(doc)
+ | TrailingHeadlineToBoilerplateFilter.INSTANCE.process(doc)
+ | BlockProximityFusion.MAX_DISTANCE_1.process(doc)
+ | BoilerplateBlockFilter.INSTANCE_KEEP_TITLE.process(doc)
+ | BlockProximityFusion.MAX_DISTANCE_1_CONTENT_ONLY_SAME_TAGLEVEL.process(doc)
+ | KeepLargestBlockFilter.INSTANCE_EXPAND_TO_SAME_TAGLEVEL_MIN_WORDS.process(doc)
+ | ExpandTitleToContentFilter.INSTANCE.process(doc)
+ | LargeBlockSameTagLevelToContentFilter.INSTANCE.process(doc)
+ | ListAtEndFilter.INSTANCE.process(doc);
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ArticleSentencesExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ArticleSentencesExtractor.java
new file mode 100755
index 0000000..78a74f0
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ArticleSentencesExtractor.java
@@ -0,0 +1,45 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.simple.MinClauseWordsFilter;
+import com.kohlschutter.boilerpipe.filters.simple.SplitParagraphBlocksFilter;
+
+/**
+ * A full-text extractor which is tuned towards extracting sentences from news articles.
+ */
+public final class ArticleSentencesExtractor extends ExtractorBase {
+ public static final ArticleSentencesExtractor INSTANCE = new ArticleSentencesExtractor();
+
+ /**
+ * Returns the singleton instance for {@link ArticleSentencesExtractor}.
+ */
+ public static ArticleSentencesExtractor getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ return
+
+ ArticleExtractor.INSTANCE.process(doc) | SplitParagraphBlocksFilter.INSTANCE.process(doc)
+ | MinClauseWordsFilter.INSTANCE.process(doc);
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/CanolaExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/CanolaExtractor.java
new file mode 100755
index 0000000..a8a3149
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/CanolaExtractor.java
@@ -0,0 +1,94 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.estimators.SimpleEstimator;
+
+/**
+ * A full-text extractor trained on <a href="http://krdwrd.org/">krdwrd</a> <a href
+ * ="https://krdwrd.org/trac/attachment/wiki/Corpora/Canola/CANOLA.pdf">Canola </a>. Works well with
+ * {@link SimpleEstimator}, too.
+ */
+public class CanolaExtractor extends ExtractorBase {
+ public static final CanolaExtractor INSTANCE = new CanolaExtractor();
+
+ /**
+ * Returns the singleton instance for {@link CanolaExtractor}.
+ */
+ public static CanolaExtractor getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+
+ return CLASSIFIER.process(doc);
+ }
+
+ /**
+ * The actual classifier, exposed.
+ */
+ public static final BoilerpipeFilter CLASSIFIER = new BoilerpipeFilter() {
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ boolean hasChanges = false;
+
+ ListIterator<TextBlock> it = textBlocks.listIterator();
+ if (!it.hasNext()) {
+ return false;
+ }
+ TextBlock prevBlock = TextBlock.EMPTY_START;
+ TextBlock currentBlock = it.next();
+ TextBlock nextBlock = it.hasNext() ? it.next() : TextBlock.EMPTY_START;
+
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+
+ if (nextBlock != TextBlock.EMPTY_START) {
+ while (it.hasNext()) {
+ prevBlock = currentBlock;
+ currentBlock = nextBlock;
+ nextBlock = it.next();
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+ }
+ prevBlock = currentBlock;
+ currentBlock = nextBlock;
+ nextBlock = TextBlock.EMPTY_START;
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+ }
+
+ return hasChanges;
+ }
+
+ protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) {
+ final boolean isContent =
+ (curr.getLinkDensity() > 0 && next.getNumWords() > 11)
+ || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0
+ && prev.getLinkDensity() == 0 && (curr.getNumWords() > 6
+ || prev.getNumWords() > 7 || next.getNumWords() > 19)));
+
+ return curr.setIsContent(isContent);
+ }
+ };
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/CommonExtractors.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/CommonExtractors.java
new file mode 100755
index 0000000..e06afa9
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/CommonExtractors.java
@@ -0,0 +1,56 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeExtractor;
+
+/**
+ * Provides quick access to common {@link BoilerpipeExtractor}s.
+ */
+public final class CommonExtractors {
+ private CommonExtractors() {
+ }
+
+ /**
+ * Works very well for most types of Article-like HTML.
+ */
+ public static final ArticleExtractor ARTICLE_EXTRACTOR = ArticleExtractor.INSTANCE;
+
+ /**
+ * Usually worse than {@link ArticleExtractor}, but simpler/no heuristics.
+ */
+ public static final DefaultExtractor DEFAULT_EXTRACTOR = DefaultExtractor.INSTANCE;
+
+ /**
+ * Like {@link DefaultExtractor}, but keeps the largest text block only.
+ */
+ public static final LargestContentExtractor LARGEST_CONTENT_EXTRACTOR =
+ LargestContentExtractor.INSTANCE;
+
+ /**
+ * Trained on krdwrd Canola (different definition of "boilerplate"). You may give it a try.
+ */
+ public static final CanolaExtractor CANOLA_EXTRACTOR = CanolaExtractor.INSTANCE;
+
+ /**
+ * Dummy Extractor; should return the input text. Use this to double-check that your problem is
+ * within a particular {@link BoilerpipeExtractor}, or somewhere else.
+ */
+ public static final KeepEverythingExtractor KEEP_EVERYTHING_EXTRACTOR =
+ KeepEverythingExtractor.INSTANCE;
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/DefaultExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/DefaultExtractor.java
new file mode 100755
index 0000000..bb1cd79
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/DefaultExtractor.java
@@ -0,0 +1,47 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.english.DensityRulesClassifier;
+import com.kohlschutter.boilerpipe.filters.heuristics.BlockProximityFusion;
+import com.kohlschutter.boilerpipe.filters.heuristics.SimpleBlockFusionProcessor;
+
+/**
+ * A quite generic full-text extractor.
+ */
+public class DefaultExtractor extends ExtractorBase {
+ public static final DefaultExtractor INSTANCE = new DefaultExtractor();
+
+ /**
+ * Returns the singleton instance for {@link DefaultExtractor}.
+ */
+ public static DefaultExtractor getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+
+ return
+
+ SimpleBlockFusionProcessor.INSTANCE.process(doc)
+ | BlockProximityFusion.MAX_DISTANCE_1.process(doc)
+ | DensityRulesClassifier.INSTANCE.process(doc);
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ExtractorBase.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ExtractorBase.java
new file mode 100755
index 0000000..07a4f8d
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/ExtractorBase.java
@@ -0,0 +1,110 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.URL;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import com.kohlschutter.boilerpipe.BoilerpipeExtractor;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.sax.BoilerpipeSAXInput;
+import com.kohlschutter.boilerpipe.sax.HTMLFetcher;
+
+/**
+ * The base class of Extractors. Also provides some helper methods to quickly retrieve the text that
+ * remained after processing.
+ */
+public abstract class ExtractorBase implements BoilerpipeExtractor {
+
+ /**
+ * Extracts text from the HTML code given as a String.
+ *
+ * @param html The HTML code as a String.
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(final String html) throws BoilerpipeProcessingException {
+ try {
+ return getText(new BoilerpipeSAXInput(new InputSource(new StringReader(html)))
+ .getTextDocument());
+ } catch (SAXException e) {
+ throw new BoilerpipeProcessingException(e);
+ }
+ }
+
+ /**
+ * Extracts text from the HTML code available from the given {@link InputSource}.
+ *
+ * @param is The InputSource containing the HTML
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(final InputSource is) throws BoilerpipeProcessingException {
+ try {
+ return getText(new BoilerpipeSAXInput(is).getTextDocument());
+ } catch (SAXException e) {
+ throw new BoilerpipeProcessingException(e);
+ }
+ }
+
+ /**
+ * Extracts text from the HTML code available from the given {@link URL}. NOTE: This method is
+ * mainly to be used for show case purposes. If you are going to crawl the Web, consider using
+ * {@link #getText(InputSource)} instead.
+ *
+ * @param url The URL pointing to the HTML code.
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(final URL url) throws BoilerpipeProcessingException {
+ try {
+ return getText(HTMLFetcher.fetch(url).toInputSource());
+ } catch (IOException e) {
+ throw new BoilerpipeProcessingException(e);
+ }
+ }
+
+ /**
+ * Extracts text from the HTML code available from the given {@link Reader}.
+ *
+ * @param r The Reader containing the HTML
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(final Reader r) throws BoilerpipeProcessingException {
+ return getText(new InputSource(r));
+ }
+
+ /**
+ * Extracts text from the given {@link TextDocument} object.
+ *
+ * @param doc The {@link TextDocument}.
+ * @return The extracted text.
+ * @throws BoilerpipeProcessingException
+ */
+ public String getText(TextDocument doc) throws BoilerpipeProcessingException {
+ process(doc);
+ return doc.getContent();
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/KeepEverythingExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/KeepEverythingExtractor.java
new file mode 100755
index 0000000..c5629b8
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/KeepEverythingExtractor.java
@@ -0,0 +1,39 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.simple.MarkEverythingContentFilter;
+
+/**
+ * Marks everything as content.
+ */
+public final class KeepEverythingExtractor extends ExtractorBase {
+
+ public static final KeepEverythingExtractor INSTANCE = new KeepEverythingExtractor();
+
+ private KeepEverythingExtractor() {
+
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ return MarkEverythingContentFilter.INSTANCE.process(doc);
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/KeepEverythingWithMinKWordsExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/KeepEverythingWithMinKWordsExtractor.java
new file mode 100755
index 0000000..f2860f2
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/KeepEverythingWithMinKWordsExtractor.java
@@ -0,0 +1,44 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.heuristics.SimpleBlockFusionProcessor;
+import com.kohlschutter.boilerpipe.filters.simple.MarkEverythingContentFilter;
+import com.kohlschutter.boilerpipe.filters.simple.MinWordsFilter;
+
+/**
+ * A full-text extractor which extracts the largest text component of a page. For news articles, it
+ * may perform better than the {@link DefaultExtractor}, but usually worse than
+ * {@link ArticleExtractor}.
+ */
+public final class KeepEverythingWithMinKWordsExtractor extends ExtractorBase {
+
+ private final MinWordsFilter filter;
+
+ public KeepEverythingWithMinKWordsExtractor(final int kMin) {
+ this.filter = new MinWordsFilter(kMin);
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ return SimpleBlockFusionProcessor.INSTANCE.process(doc)
+ | MarkEverythingContentFilter.INSTANCE.process(doc) | filter.process(doc);
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/LargestContentExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/LargestContentExtractor.java
new file mode 100755
index 0000000..cbdec11
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/LargestContentExtractor.java
@@ -0,0 +1,50 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.english.NumWordsRulesClassifier;
+import com.kohlschutter.boilerpipe.filters.heuristics.BlockProximityFusion;
+import com.kohlschutter.boilerpipe.filters.heuristics.KeepLargestBlockFilter;
+
+/**
+ * A full-text extractor which extracts the largest text component of a page. For news articles, it
+ * may perform better than the {@link DefaultExtractor}, but usually worse than
+ * {@link ArticleExtractor}.
+ */
+public final class LargestContentExtractor extends ExtractorBase {
+ public static final LargestContentExtractor INSTANCE = new LargestContentExtractor();
+
+ private LargestContentExtractor() {
+ }
+
+ /**
+ * Returns the singleton instance for {@link LargestContentExtractor}.
+ */
+ public static LargestContentExtractor getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ return NumWordsRulesClassifier.INSTANCE.process(doc)
+ | BlockProximityFusion.MAX_DISTANCE_1.process(doc)
+ | KeepLargestBlockFilter.INSTANCE.process(doc);
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/NumWordsRulesExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/NumWordsRulesExtractor.java
new file mode 100755
index 0000000..fb0f9ad
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/NumWordsRulesExtractor.java
@@ -0,0 +1,43 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.extractors;
+
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.english.NumWordsRulesClassifier;
+
+/**
+ * A quite generic full-text extractor solely based upon the number of words per block (the current,
+ * the previous and the next block).
+ */
+public class NumWordsRulesExtractor extends ExtractorBase {
+ public static final NumWordsRulesExtractor INSTANCE = new NumWordsRulesExtractor();
+
+ /**
+ * Returns the singleton instance for {@link NumWordsRulesExtractor}.
+ */
+ public static NumWordsRulesExtractor getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+
+ return NumWordsRulesClassifier.INSTANCE.process(doc);
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/package-info.java
new file mode 100755
index 0000000..e6be430
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/extractors/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Some standard extractors (i.e., completely piped BoilerpipeFilters)
+ */
+package com.kohlschutter.boilerpipe.extractors;
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/debug/PrintDebugFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/debug/PrintDebugFilter.java
new file mode 100755
index 0000000..24d9ffe
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/debug/PrintDebugFilter.java
@@ -0,0 +1,66 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.debug;
+
+import java.io.PrintWriter;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Prints debug information about the current state of the TextDocument. (= calls
+ * {@link TextDocument#debugString()}.
+ */
+public final class PrintDebugFilter implements BoilerpipeFilter {
+ /**
+ * Returns the default instance for {@link PrintDebugFilter}, which dumps debug information to
+ * <code>System.out</code>
+ */
+ public static final PrintDebugFilter INSTANCE = new PrintDebugFilter(new PrintWriter(System.out,
+ true));
+ private final PrintWriter out;
+
+ /**
+ * Returns the default instance for {@link PrintDebugFilter}, which dumps debug information to
+ * <code>System.out</code>
+ */
+ public static PrintDebugFilter getInstance() {
+ return INSTANCE;
+ }
+
+ /**
+ * Creates a new instance of {@link PrintDebugFilter}.
+ *
+ * Only use this method if you are not going to dump the debug information to
+ * <code>System.out</code> -- for this case, use {@link #getInstance()} instead.
+ *
+ * @param out The target {@link PrintWriter}. Will not be closed
+ */
+ public PrintDebugFilter(final PrintWriter out) {
+ this.out = out;
+
+ }
+
+ @Override
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ out.println(doc.debugString());
+
+ return false;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/DensityRulesClassifier.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/DensityRulesClassifier.java
new file mode 100755
index 0000000..61f8ca8
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/DensityRulesClassifier.java
@@ -0,0 +1,110 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Classifies {@link TextBlock}s as content/not-content through rules that have been determined
+ * using the C4.8 machine learning algorithm, as described in the paper
+ * "Boilerplate Detection using Shallow Text Features", particularly using text densities and link
+ * densities.
+ */
+public class DensityRulesClassifier implements BoilerpipeFilter {
+ public static final DensityRulesClassifier INSTANCE = new DensityRulesClassifier();
+
+ /**
+ * Returns the singleton instance for RulebasedBoilerpipeClassifier.
+ */
+ public static DensityRulesClassifier getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ boolean hasChanges = false;
+
+ ListIterator<TextBlock> it = textBlocks.listIterator();
+ if (!it.hasNext()) {
+ return false;
+ }
+ TextBlock prevBlock = TextBlock.EMPTY_START;
+ TextBlock currentBlock = it.next();
+ TextBlock nextBlock = it.hasNext() ? it.next() : TextBlock.EMPTY_START;
+
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+
+ if (nextBlock != TextBlock.EMPTY_START) {
+ while (it.hasNext()) {
+ prevBlock = currentBlock;
+ currentBlock = nextBlock;
+ nextBlock = it.next();
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+ }
+ prevBlock = currentBlock;
+ currentBlock = nextBlock;
+ nextBlock = TextBlock.EMPTY_START;
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+ }
+
+ return hasChanges;
+ }
+
+ protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) {
+ final boolean isContent;
+
+ if (curr.getLinkDensity() <= 0.333333) {
+ if (prev.getLinkDensity() <= 0.555556) {
+ if (curr.getTextDensity() <= 9) {
+ if (next.getTextDensity() <= 10) {
+ if (prev.getTextDensity() <= 4) {
+ isContent = false;
+ } else {
+ isContent = true;
+ }
+ } else {
+ isContent = true;
+ }
+ } else {
+ if (next.getTextDensity() == 0) {
+ isContent = false;
+ } else {
+ isContent = true;
+ }
+ }
+ } else {
+ if (next.getTextDensity() <= 11) {
+ isContent = false;
+ } else {
+ isContent = true;
+ }
+ }
+ } else {
+ isContent = false;
+ }
+
+ return curr.setIsContent(isContent);
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/HeuristicFilterBase.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/HeuristicFilterBase.java
new file mode 100755
index 0000000..2f93fea
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/HeuristicFilterBase.java
@@ -0,0 +1,38 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import com.kohlschutter.boilerpipe.document.TextBlock;
+
+/**
+ * Base class for some heuristics that are used by boilerpipe filters.
+ */
+abstract class HeuristicFilterBase {
+
+ protected static int getNumFullTextWords(final TextBlock tb) {
+ return getNumFullTextWords(tb, 9);
+ }
+
+ protected static int getNumFullTextWords(final TextBlock tb, float minTextDensity) {
+ if (tb.getTextDensity() >= minTextDensity) {
+ return tb.getNumWords();
+ } else {
+ return 0;
+ }
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/IgnoreBlocksAfterContentFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/IgnoreBlocksAfterContentFilter.java
new file mode 100755
index 0000000..dcc251e
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/IgnoreBlocksAfterContentFilter.java
@@ -0,0 +1,78 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import java.util.Iterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks all blocks as "non-content" that occur after blocks that have been marked
+ * {@link DefaultLabels#INDICATES_END_OF_TEXT}. These marks are ignored unless a minimum number of
+ * words in content blocks occur before this mark (default: 60). This can be used in conjunction
+ * with an upstream {@link TerminatingBlocksFinder}.
+ *
+ * @see TerminatingBlocksFinder
+ */
+public final class IgnoreBlocksAfterContentFilter extends HeuristicFilterBase implements
+ BoilerpipeFilter {
+ public static final IgnoreBlocksAfterContentFilter DEFAULT_INSTANCE =
+ new IgnoreBlocksAfterContentFilter(60);
+ public static final IgnoreBlocksAfterContentFilter INSTANCE_200 =
+ new IgnoreBlocksAfterContentFilter(200);
+ private final int minNumWords;
+
+ /**
+ * Returns the singleton instance for DeleteBlocksAfterContentFilter.
+ */
+ public static IgnoreBlocksAfterContentFilter getDefaultInstance() {
+ return DEFAULT_INSTANCE;
+ }
+
+ public IgnoreBlocksAfterContentFilter(final int minNumWords) {
+ this.minNumWords = minNumWords;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ boolean changes = false;
+
+ int numWords = 0;
+ boolean foundEndOfText = false;
+ for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) {
+ TextBlock block = it.next();
+
+ final boolean endOfText = block.hasLabel(DefaultLabels.INDICATES_END_OF_TEXT);
+ if (block.isContent()) {
+ numWords += getNumFullTextWords(block);
+ }
+ if (endOfText && numWords >= minNumWords) {
+ foundEndOfText = true;
+ }
+ if (foundEndOfText) {
+ changes = true;
+ block.setIsContent(false);
+ }
+ }
+
+ return changes;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/IgnoreBlocksAfterContentFromEndFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/IgnoreBlocksAfterContentFromEndFilter.java
new file mode 100755
index 0000000..9050047
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/IgnoreBlocksAfterContentFromEndFilter.java
@@ -0,0 +1,74 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks all blocks as "non-content" that occur after blocks that have been marked
+ * {@link DefaultLabels#INDICATES_END_OF_TEXT}, and after any content block. This filter can be used
+ * in conjunction with an upstream {@link TerminatingBlocksFinder}.
+ *
+ * @see TerminatingBlocksFinder
+ */
+public final class IgnoreBlocksAfterContentFromEndFilter extends HeuristicFilterBase implements
+ BoilerpipeFilter {
+ public static final IgnoreBlocksAfterContentFromEndFilter INSTANCE =
+ new IgnoreBlocksAfterContentFromEndFilter();
+
+ private IgnoreBlocksAfterContentFromEndFilter() {
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ boolean changes = false;
+
+ int words = 0;
+
+ List<TextBlock> blocks = doc.getTextBlocks();
+ if (!blocks.isEmpty()) {
+ ListIterator<TextBlock> it = blocks.listIterator(blocks.size());
+
+ TextBlock tb;
+
+ while (it.hasPrevious()) {
+ tb = it.previous();
+ if (tb.hasLabel(DefaultLabels.INDICATES_END_OF_TEXT)) {
+ tb.addLabel(DefaultLabels.STRICTLY_NOT_CONTENT);
+ tb.removeLabel(DefaultLabels.MIGHT_BE_CONTENT);
+ tb.setIsContent(false);
+ changes = true;
+ } else if (tb.isContent()) {
+ words += tb.getNumWords();
+ if (words > 200) {
+ break;
+ }
+ }
+
+ }
+ }
+
+ return changes;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/KeepLargestFulltextBlockFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/KeepLargestFulltextBlockFilter.java
new file mode 100755
index 0000000..0df3b40
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/KeepLargestFulltextBlockFilter.java
@@ -0,0 +1,81 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import java.util.List;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.filters.heuristics.KeepLargestBlockFilter;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Keeps the largest {@link TextBlock} only (by the number of words). In case of more than one block
+ * with the same number of words, the first block is chosen. All discarded blocks are marked
+ * "not content" and flagged as {@link DefaultLabels#MIGHT_BE_CONTENT}.
+ *
+ * As opposed to {@link KeepLargestBlockFilter}, the number of words are computed using
+ * {@link HeuristicFilterBase#getNumFullTextWords(TextBlock)}, which only counts words that occur in
+ * text elements with at least 9 words and are thus believed to be full text.
+ *
+ * NOTE: Without language-specific fine-tuning (i.e., running the default instance), this filter may
+ * lead to suboptimal results. You better use {@link KeepLargestBlockFilter} instead, which works at
+ * the level of number-of-words instead of text densities.
+ */
+public final class KeepLargestFulltextBlockFilter extends HeuristicFilterBase implements
+ BoilerpipeFilter {
+ public static final KeepLargestFulltextBlockFilter INSTANCE =
+ new KeepLargestFulltextBlockFilter();
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ if (textBlocks.size() < 2) {
+ return false;
+ }
+
+ int max = -1;
+ TextBlock largestBlock = null;
+ for (TextBlock tb : textBlocks) {
+ if (!tb.isContent()) {
+ continue;
+ }
+ int numWords = getNumFullTextWords(tb);
+ if (numWords > max) {
+ largestBlock = tb;
+ max = numWords;
+ }
+ }
+
+ if (largestBlock == null) {
+ return false;
+ }
+
+ for (TextBlock tb : textBlocks) {
+ if (tb == largestBlock) {
+ tb.setIsContent(true);
+ } else {
+ tb.setIsContent(false);
+ tb.addLabel(DefaultLabels.MIGHT_BE_CONTENT);
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/MinFulltextWordsFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/MinFulltextWordsFilter.java
new file mode 100755
index 0000000..5a3f6c3
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/MinFulltextWordsFilter.java
@@ -0,0 +1,59 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Keeps only those content blocks which contain at least k full-text words (measured by
+ * {@link HeuristicFilterBase#getNumFullTextWords(TextBlock)}). k is 30 by default.
+ */
+public final class MinFulltextWordsFilter extends HeuristicFilterBase implements BoilerpipeFilter {
+ public static final MinFulltextWordsFilter DEFAULT_INSTANCE = new MinFulltextWordsFilter(30);
+ private final int minWords;
+
+ public static MinFulltextWordsFilter getDefaultInstance() {
+ return DEFAULT_INSTANCE;
+ }
+
+ public MinFulltextWordsFilter(final int minWords) {
+ this.minWords = minWords;
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (!tb.isContent()) {
+ continue;
+ }
+ if (getNumFullTextWords(tb) < minWords) {
+ tb.setIsContent(false);
+ changes = true;
+ }
+
+ }
+
+ return changes;
+
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/NumWordsRulesClassifier.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/NumWordsRulesClassifier.java
new file mode 100755
index 0000000..f8ef2be
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/NumWordsRulesClassifier.java
@@ -0,0 +1,110 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Classifies {@link TextBlock}s as content/not-content through rules that have been determined
+ * using the C4.8 machine learning algorithm, as described in the paper
+ * "Boilerplate Detection using Shallow Text Features" (WSDM 2010), particularly using number of
+ * words per block and link density per block.
+ */
+public class NumWordsRulesClassifier implements BoilerpipeFilter {
+ public static final NumWordsRulesClassifier INSTANCE = new NumWordsRulesClassifier();
+
+ /**
+ * Returns the singleton instance for RulebasedBoilerpipeClassifier.
+ */
+ public static NumWordsRulesClassifier getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ boolean hasChanges = false;
+
+ ListIterator<TextBlock> it = textBlocks.listIterator();
+ if (!it.hasNext()) {
+ return false;
+ }
+ TextBlock prevBlock = TextBlock.EMPTY_START;
+ TextBlock currentBlock = it.next();
+ TextBlock nextBlock = it.hasNext() ? it.next() : TextBlock.EMPTY_START;
+
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+
+ if (nextBlock != TextBlock.EMPTY_START) {
+ while (it.hasNext()) {
+ prevBlock = currentBlock;
+ currentBlock = nextBlock;
+ nextBlock = it.next();
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+ }
+ prevBlock = currentBlock;
+ currentBlock = nextBlock;
+ nextBlock = TextBlock.EMPTY_START;
+ hasChanges = classify(prevBlock, currentBlock, nextBlock) | hasChanges;
+ }
+
+ return hasChanges;
+ }
+
+ protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) {
+ final boolean isContent;
+
+ if (curr.getLinkDensity() <= 0.333333) {
+ if (prev.getLinkDensity() <= 0.555556) {
+ if (curr.getNumWords() <= 16) {
+ if (next.getNumWords() <= 15) {
+ if (prev.getNumWords() <= 4) {
+ isContent = false;
+ } else {
+ isContent = true;
+ }
+ } else {
+ isContent = true;
+ }
+ } else {
+ isContent = true;
+ }
+ } else {
+ if (curr.getNumWords() <= 40) {
+ if (next.getNumWords() <= 17) {
+ isContent = false;
+ } else {
+ isContent = true;
+ }
+ } else {
+ isContent = true;
+ }
+ }
+ } else {
+ isContent = false;
+ }
+
+ return curr.setIsContent(isContent);
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/TerminatingBlocksFinder.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/TerminatingBlocksFinder.java
new file mode 100755
index 0000000..fba9d14
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/TerminatingBlocksFinder.java
@@ -0,0 +1,109 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Finds blocks which are potentially indicating the end of an article text and marks them with
+ * {@link DefaultLabels#INDICATES_END_OF_TEXT}. This can be used in conjunction with a downstream
+ * {@link IgnoreBlocksAfterContentFilter}.
+ *
+ * @see IgnoreBlocksAfterContentFilter
+ */
+public class TerminatingBlocksFinder implements BoilerpipeFilter {
+ public static final TerminatingBlocksFinder INSTANCE = new TerminatingBlocksFinder();
+
+ /**
+ * Returns the singleton instance for TerminatingBlocksFinder.
+ */
+ public static TerminatingBlocksFinder getInstance() {
+ return INSTANCE;
+ }
+
+ // public static long timeSpent = 0;
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ boolean changes = false;
+
+ // long t = System.currentTimeMillis();
+
+ for (TextBlock tb : doc.getTextBlocks()) {
+ final int numWords = tb.getNumWords();
+ if (numWords < 15) {
+ final String text = tb.getText().trim();
+ final int len = text.length();
+ if (len >= 8) {
+ final String textLC = text.toLowerCase();
+ if (textLC.startsWith("comments")
+ || startsWithNumber(textLC, len, " comments", " users responded in")
+ || textLC.startsWith("© reuters") || textLC.startsWith("please rate this")
+ || textLC.startsWith("post a comment") || textLC.contains("what you think...")
+ || textLC.contains("add your comment") || textLC.contains("add comment")
+ || textLC.contains("reader views") || textLC.contains("have your say")
+ || textLC.contains("reader comments") || textLC.contains("rätta artikeln")
+ || textLC.equals("thanks for your comments - this feedback is now closed")) {
+ tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT);
+ changes = true;
+ }
+ } else if (tb.getLinkDensity() == 1.0) {
+ if (text.equals("Comment")) {
+ tb.addLabel(DefaultLabels.INDICATES_END_OF_TEXT);
+ }
+ }
+ }
+ }
+
+ // timeSpent += System.currentTimeMillis() - t;
+
+ return changes;
+ }
+
+ /**
+ * Checks whether the given text t starts with a sequence of digits, followed by one of the given
+ * strings.
+ *
+ * @param t The text to examine
+ * @param len The length of the text to examine
+ * @param str Any strings that may follow the digits.
+ * @return true if at least one combination matches
+ */
+ private static boolean startsWithNumber(final String t, final int len, final String... str) {
+ int j = 0;
+ while (j < len && isDigit(t.charAt(j))) {
+ j++;
+ }
+ if (j != 0) {
+ for (String s : str) {
+ if (t.startsWith(s, j)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ private static boolean isDigit(final char c) {
+ return c >= '0' && c <= '9';
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/package-info.java
new file mode 100755
index 0000000..8003cf8
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/english/package-info.java
@@ -0,0 +1,7 @@
+/**
+ * These BoilerpipeFilters have only been tested on English text.
+ *
+ * That is, they will probably work with other Western languages, but maybe need some parameter tuning to perform well.
+ */
+package com.kohlschutter.boilerpipe.filters.english;
+
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/AddPrecedingLabelsFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/AddPrecedingLabelsFilter.java
new file mode 100755
index 0000000..bb94437
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/AddPrecedingLabelsFilter.java
@@ -0,0 +1,82 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Set;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Adds the labels of the preceding block to the current block, optionally adding a prefix.
+ */
+public final class AddPrecedingLabelsFilter implements BoilerpipeFilter {
+
+ public static final AddPrecedingLabelsFilter INSTANCE = new AddPrecedingLabelsFilter("");
+ public static final AddPrecedingLabelsFilter INSTANCE_PRE = new AddPrecedingLabelsFilter("^");
+
+ private final String labelPrefix;
+
+ /**
+ * Creates a new {@link AddPrecedingLabelsFilter} instance.
+ *
+ * @param maxBlocksDistance The maximum distance in blocks.
+ * @param contentOnly
+ */
+ public AddPrecedingLabelsFilter(final String labelPrefix) {
+ this.labelPrefix = labelPrefix;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ if (textBlocks.size() < 2) {
+ return false;
+ }
+
+ boolean changes = false;
+ int remaining = textBlocks.size();
+
+ TextBlock blockBelow = null;
+ TextBlock block;
+ for (ListIterator<TextBlock> it = textBlocks.listIterator(textBlocks.size()); it.hasPrevious();) {
+ if (--remaining <= 0) {
+ break;
+ }
+ if (blockBelow == null) {
+ blockBelow = it.previous();
+ continue;
+ }
+ block = it.previous();
+
+ Set<String> labels = block.getLabels();
+ if (labels != null && !labels.isEmpty()) {
+ for (String l : labels) {
+ blockBelow.addLabel(labelPrefix + l);
+ }
+ changes = true;
+ }
+ blockBelow = block;
+ }
+
+ return changes;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ArticleMetadataFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ArticleMetadataFilter.java
new file mode 100755
index 0000000..5656687
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ArticleMetadataFilter.java
@@ -0,0 +1,62 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.regex.Pattern;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Tries to find TextBlocks that comprise of "article metadata".
+ */
+public class ArticleMetadataFilter implements BoilerpipeFilter {
+ private static final Pattern[] PATTERNS_SHORT =
+ new Pattern[] {
+ Pattern
+ .compile("^[0-9 \\,\\./]*\\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)?\\b[0-9 \\,\\:apm\\./]*([CPSDMGET]{2,3})?$"),
+ Pattern.compile("^[Bb]y ")};
+
+ public static final ArticleMetadataFilter INSTANCE = new ArticleMetadataFilter();
+
+ private ArticleMetadataFilter() {
+ }
+
+ @Override
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ boolean changed = false;
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (tb.getNumWords() > 10) {
+ continue;
+ }
+ final String text = tb.getText();
+ for (Pattern p : PATTERNS_SHORT) {
+ if (p.matcher(text).find()) {
+ changed = true;
+ tb.setIsContent(true);
+ tb.addLabel(DefaultLabels.ARTICLE_METADATA);
+ }
+ }
+ }
+ return changed;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/BlockProximityFusion.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/BlockProximityFusion.java
new file mode 100755
index 0000000..3a62999
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/BlockProximityFusion.java
@@ -0,0 +1,122 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.Iterator;
+import java.util.List;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Fuses adjacent blocks if their distance (in blocks) does not exceed a certain limit. This
+ * probably makes sense only in cases where an upstream filter already has removed some blocks.
+ */
+public final class BlockProximityFusion implements BoilerpipeFilter {
+
+ private final int maxBlocksDistance;
+
+ public static final BlockProximityFusion MAX_DISTANCE_1 = new BlockProximityFusion(1, false,
+ false);
+ public static final BlockProximityFusion MAX_DISTANCE_1_SAME_TAGLEVEL = new BlockProximityFusion(
+ 1, false, true);
+ public static final BlockProximityFusion MAX_DISTANCE_1_CONTENT_ONLY = new BlockProximityFusion(
+ 1, true, false);
+ public static final BlockProximityFusion MAX_DISTANCE_1_CONTENT_ONLY_SAME_TAGLEVEL =
+ new BlockProximityFusion(1, true, true);
+
+ private final boolean contentOnly;
+
+ private final boolean sameTagLevelOnly;
+
+ /**
+ * Creates a new {@link BlockProximityFusion} instance.
+ *
+ * @param maxBlocksDistance The maximum distance in blocks.
+ * @param contentOnly
+ */
+ public BlockProximityFusion(final int maxBlocksDistance, final boolean contentOnly,
+ final boolean sameTagLevelOnly) {
+ this.maxBlocksDistance = maxBlocksDistance;
+ this.contentOnly = contentOnly;
+ this.sameTagLevelOnly = sameTagLevelOnly;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ if (textBlocks.size() < 2) {
+ return false;
+ }
+
+ boolean changes = false;
+ TextBlock prevBlock;
+
+ int offset;
+ if (contentOnly) {
+ prevBlock = null;
+ offset = 0;
+ for (TextBlock tb : textBlocks) {
+ offset++;
+ if (tb.isContent()) {
+ prevBlock = tb;
+ break;
+ }
+ }
+ if (prevBlock == null) {
+ return false;
+ }
+ } else {
+ prevBlock = textBlocks.get(0);
+ offset = 1;
+ }
+
+ for (Iterator<TextBlock> it = textBlocks.listIterator(offset); it.hasNext();) {
+ TextBlock block = it.next();
+ if (!block.isContent()) {
+ prevBlock = block;
+ continue;
+ }
+ int diffBlocks = block.getOffsetBlocksStart() - prevBlock.getOffsetBlocksEnd() - 1;
+ if (diffBlocks <= maxBlocksDistance) {
+ boolean ok = true;
+ if (contentOnly) {
+ if (!prevBlock.isContent() || !block.isContent()) {
+ ok = false;
+ }
+ }
+ if (ok && sameTagLevelOnly && prevBlock.getTagLevel() != block.getTagLevel()) {
+ ok = false;
+ }
+ if (ok) {
+ prevBlock.mergeNext(block);
+ it.remove();
+ changes = true;
+ } else {
+ prevBlock = block;
+ }
+ } else {
+ prevBlock = block;
+ }
+ }
+
+ return changes;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ContentFusion.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ContentFusion.java
new file mode 100755
index 0000000..d7db7dd
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ContentFusion.java
@@ -0,0 +1,72 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Merges two blocks using some heuristics.
+ */
+public final class ContentFusion implements BoilerpipeFilter {
+
+ public static final ContentFusion INSTANCE = new ContentFusion();
+
+ /**
+ * Creates a new {@link ContentFusion} instance.
+ *
+ */
+ public ContentFusion() {
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ if (textBlocks.size() < 2) {
+ return false;
+ }
+
+ TextBlock prevBlock = textBlocks.get(0);
+
+ boolean changes = false;
+ do {
+ changes = false;
+ for (ListIterator<TextBlock> it = textBlocks.listIterator(1); it.hasNext();) {
+ TextBlock block = it.next();
+
+ if (prevBlock.isContent() && block.getLinkDensity() < 0.56
+ && !block.hasLabel(DefaultLabels.STRICTLY_NOT_CONTENT)) {
+
+ prevBlock.mergeNext(block);
+ it.remove();
+ changes = true;
+ } else {
+ prevBlock = block;
+ }
+ }
+ } while (changes);
+
+ return true;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/DocumentTitleMatchClassifier.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/DocumentTitleMatchClassifier.java
new file mode 100755
index 0000000..0b4d1d2
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/DocumentTitleMatchClassifier.java
@@ -0,0 +1,170 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks {@link TextBlock}s which contain parts of the HTML <code><TITLE></code> tag, using
+ * some heuristics which are quite specific to the news domain.
+ */
+public final class DocumentTitleMatchClassifier implements BoilerpipeFilter {
+
+ private final Set<String> potentialTitles;
+
+ public DocumentTitleMatchClassifier(String title) {
+ if (title == null) {
+ this.potentialTitles = null;
+ } else {
+
+ title = title.replace('\u00a0', ' ');
+ title = title.replace("'", "");
+
+ title = title.trim().toLowerCase();
+
+ if (title.length() == 0) {
+ this.potentialTitles = null;
+ } else {
+ this.potentialTitles = new HashSet<String>();
+
+ potentialTitles.add(title);
+
+ String p;
+
+ p = getLongestPart(title, "[ ]*[\\|»|-][ ]*");
+ if (p != null) {
+ potentialTitles.add(p);
+ }
+ p = getLongestPart(title, "[ ]*[\\|»|:][ ]*");
+ if (p != null) {
+ potentialTitles.add(p);
+ }
+ p = getLongestPart(title, "[ ]*[\\|»|:\\(\\)][ ]*");
+ if (p != null) {
+ potentialTitles.add(p);
+ }
+ p = getLongestPart(title, "[ ]*[\\|»|:\\(\\)\\-][ ]*");
+ if (p != null) {
+ potentialTitles.add(p);
+ }
+ p = getLongestPart(title, "[ ]*[\\|»|,|:\\(\\)\\-][ ]*");
+ if (p != null) {
+ potentialTitles.add(p);
+ }
+ p = getLongestPart(title, "[ ]*[\\|»|,|:\\(\\)\\-\u00a0][ ]*");
+ if (p != null) {
+ potentialTitles.add(p);
+ }
+
+ addPotentialTitles(potentialTitles, title, "[ ]+[\\|][ ]+", 4);
+ addPotentialTitles(potentialTitles, title, "[ ]+[\\-][ ]+", 4);
+
+ potentialTitles.add(title.replaceFirst(" - [^\\-]+$", ""));
+ potentialTitles.add(title.replaceFirst("^[^\\-]+ - ", ""));
+ }
+ }
+ }
+
+ public Set<String> getPotentialTitles() {
+ return potentialTitles;
+ }
+
+ private void addPotentialTitles(final Set<String> potentialTitles, final String title,
+ final String pattern, final int minWords) {
+ String[] parts = title.split(pattern);
+ if (parts.length == 1) {
+ return;
+ }
+ for (int i = 0; i < parts.length; i++) {
+ String p = parts[i];
+ if (p.contains(".com")) {
+ continue;
+ }
+ final int numWords = p.split("[\b ]+").length;
+ if (numWords >= minWords) {
+ potentialTitles.add(p);
+ }
+ }
+ }
+
+ private String getLongestPart(final String title, final String pattern) {
+ String[] parts = title.split(pattern);
+ if (parts.length == 1) {
+ return null;
+ }
+ int longestNumWords = 0;
+ String longestPart = "";
+ for (int i = 0; i < parts.length; i++) {
+ String p = parts[i];
+ if (p.contains(".com")) {
+ continue;
+ }
+ final int numWords = p.split("[\b ]+").length;
+ if (numWords > longestNumWords || p.length() > longestPart.length()) {
+ longestNumWords = numWords;
+ longestPart = p;
+ }
+ }
+ if (longestPart.length() == 0) {
+ return null;
+ } else {
+ return longestPart.trim();
+ }
+ }
+
+ private static final Pattern PAT_REMOVE_CHARACTERS = Pattern.compile("[\\?\\!\\.\\-\\:]+");
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ if (potentialTitles == null) {
+ return false;
+ }
+ boolean changes = false;
+
+ for (final TextBlock tb : doc.getTextBlocks()) {
+ String text = tb.getText();
+
+ text = text.replace('\u00a0', ' ');
+ text = text.replace("'", "");
+
+ text = text.trim().toLowerCase();
+
+ if (potentialTitles.contains(text)) {
+ tb.addLabel(DefaultLabels.TITLE);
+ changes = true;
+ break;
+ }
+
+ text = PAT_REMOVE_CHARACTERS.matcher(text).replaceAll("").trim();
+ if (potentialTitles.contains(text)) {
+ tb.addLabel(DefaultLabels.TITLE);
+ changes = true;
+ break;
+ }
+ }
+ return changes;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ExpandTitleToContentFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ExpandTitleToContentFilter.java
new file mode 100755
index 0000000..2820d00
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ExpandTitleToContentFilter.java
@@ -0,0 +1,70 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks all {@link TextBlock}s "content" which are between the headline and the part that has
+ * already been marked content, if they are marked {@link DefaultLabels#MIGHT_BE_CONTENT}.
+ *
+ * This filter is quite specific to the news domain.
+ */
+public final class ExpandTitleToContentFilter implements BoilerpipeFilter {
+ public static final ExpandTitleToContentFilter INSTANCE = new ExpandTitleToContentFilter();
+
+ /**
+ * Returns the singleton instance for ExpandTitleToContentFilter.
+ */
+ public static ExpandTitleToContentFilter getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ int i = 0;
+ int title = -1;
+ int contentStart = -1;
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (contentStart == -1 && tb.hasLabel(DefaultLabels.TITLE)) {
+ title = i;
+ contentStart = -1;
+ }
+ if (contentStart == -1 && tb.isContent()) {
+ contentStart = i;
+ }
+
+ i++;
+ }
+
+ if (contentStart <= title || title == -1) {
+ return false;
+ }
+ boolean changes = false;
+ for (TextBlock tb : doc.getTextBlocks().subList(title, contentStart)) {
+ if (tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT)) {
+ changes = tb.setIsContent(true) | changes;
+ }
+ }
+ return changes;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/KeepLargestBlockFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/KeepLargestBlockFilter.java
new file mode 100755
index 0000000..8368dc6
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/KeepLargestBlockFilter.java
@@ -0,0 +1,117 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Keeps the largest {@link TextBlock} only (by the number of words). In case of more than one block
+ * with the same number of words, the first block is chosen. All discarded blocks are marked
+ * "not content" and flagged as {@link DefaultLabels#MIGHT_BE_CONTENT}.
+ *
+ * Note that, by default, only TextBlocks marked as "content" are taken into consideration.
+ */
+public final class KeepLargestBlockFilter implements BoilerpipeFilter {
+ public static final KeepLargestBlockFilter INSTANCE = new KeepLargestBlockFilter(false, 0);
+ public static final KeepLargestBlockFilter INSTANCE_EXPAND_TO_SAME_TAGLEVEL =
+ new KeepLargestBlockFilter(true, 0);
+ public static final KeepLargestBlockFilter INSTANCE_EXPAND_TO_SAME_TAGLEVEL_MIN_WORDS =
+ new KeepLargestBlockFilter(true, 150);
+ private final boolean expandToSameLevelText;
+ private final int minWords;
+
+ public KeepLargestBlockFilter(boolean expandToSameLevelText, final int minWords) {
+ this.expandToSameLevelText = expandToSameLevelText;
+ this.minWords = minWords;
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ if (textBlocks.size() < 2) {
+ return false;
+ }
+
+ int maxNumWords = -1;
+ TextBlock largestBlock = null;
+
+ int level = -1;
+
+ int i = 0;
+ int n = -1;
+ for (TextBlock tb : textBlocks) {
+ if (tb.isContent()) {
+ final int nw = tb.getNumWords();
+
+ if (nw > maxNumWords) {
+ largestBlock = tb;
+ maxNumWords = nw;
+
+ n = i;
+
+ if (expandToSameLevelText) {
+ level = tb.getTagLevel();
+ }
+ }
+ }
+ i++;
+ }
+ for (TextBlock tb : textBlocks) {
+ if (tb == largestBlock) {
+ tb.setIsContent(true);
+ tb.addLabel(DefaultLabels.VERY_LIKELY_CONTENT);
+ } else {
+ tb.setIsContent(false);
+ tb.addLabel(DefaultLabels.MIGHT_BE_CONTENT);
+ }
+ }
+ if (expandToSameLevelText && n != -1) {
+
+ for (ListIterator<TextBlock> it = textBlocks.listIterator(n); it.hasPrevious();) {
+ TextBlock tb = it.previous();
+ final int tl = tb.getTagLevel();
+ if (tl < level) {
+ break;
+ } else if (tl == level) {
+ if (tb.getNumWords() >= minWords) {
+ tb.setIsContent(true);
+ }
+ }
+ }
+ for (ListIterator<TextBlock> it = textBlocks.listIterator(n); it.hasNext();) {
+ TextBlock tb = it.next();
+ final int tl = tb.getTagLevel();
+ if (tl < level) {
+ break;
+ } else if (tl == level) {
+ if (tb.getNumWords() >= minWords) {
+ tb.setIsContent(true);
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/LabelFusion.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/LabelFusion.java
new file mode 100755
index 0000000..c1bcf7a
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/LabelFusion.java
@@ -0,0 +1,87 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Fuses adjacent blocks if their labels are equal.
+ */
+public final class LabelFusion implements BoilerpipeFilter {
+
+ public static final LabelFusion INSTANCE = new LabelFusion();
+
+ /**
+ * Creates a new {@link LabelFusion} instance.
+ */
+ private LabelFusion() {
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ if (textBlocks.size() < 2) {
+ return false;
+ }
+
+ boolean changes = false;
+ TextBlock prevBlock = textBlocks.get(0);
+ int offset = 1;
+
+ for (Iterator<TextBlock> it = textBlocks.listIterator(offset); it.hasNext();) {
+ TextBlock block = it.next();
+
+ if (equalLabels(prevBlock.getLabels(), block.getLabels())) {
+ prevBlock.mergeNext(block);
+ it.remove();
+ changes = true;
+ } else {
+ prevBlock = block;
+ }
+ }
+
+ return changes;
+ }
+
+ private boolean equalLabels(Set<String> labels, Set<String> labels2) {
+ if (labels == null || labels2 == null) {
+ return false;
+ }
+ return markupLabelsOnly(labels).equals(markupLabelsOnly(labels2));
+ }
+
+ private Set<String> markupLabelsOnly(final Set<String> set1) {
+ Set<String> set = new HashSet<String>(set1);
+ for (Iterator<String> it = set.iterator(); it.hasNext();) {
+ final String label = it.next();
+ if (!label.startsWith(DefaultLabels.MARKUP_PREFIX)) {
+ it.remove();
+ }
+ }
+ return set;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/LargeBlockSameTagLevelToContentFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/LargeBlockSameTagLevelToContentFilter.java
new file mode 100755
index 0000000..40d0aee
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/LargeBlockSameTagLevelToContentFilter.java
@@ -0,0 +1,70 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks all blocks as content that:
+ * <ol>
+ * <li>are on the same tag-level as very likely main content (usually the level of the largest
+ * block)</li>
+ * <li>have a significant number of words, currently: at least 100</li>
+ * </ol>
+ */
+public final class LargeBlockSameTagLevelToContentFilter implements BoilerpipeFilter {
+ public static final LargeBlockSameTagLevelToContentFilter INSTANCE =
+ new LargeBlockSameTagLevelToContentFilter();
+
+ private LargeBlockSameTagLevelToContentFilter() {
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ int tagLevel = -1;
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) {
+ tagLevel = tb.getTagLevel();
+ break;
+ }
+ }
+
+ if (tagLevel == -1) {
+ return false;
+ }
+
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (!tb.isContent()) {
+
+ if (tb.getNumWords() >= 100 && tb.getTagLevel() == tagLevel) {
+ tb.setIsContent(true);
+ changes = true;
+ }
+ }
+ }
+
+ return changes;
+
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ListAtEndFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ListAtEndFilter.java
new file mode 100755
index 0000000..d100dfc
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/ListAtEndFilter.java
@@ -0,0 +1,57 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks nested list-item blocks after the end of the main content.
+ */
+public final class ListAtEndFilter implements BoilerpipeFilter {
+ public static final ListAtEndFilter INSTANCE = new ListAtEndFilter();
+
+ private ListAtEndFilter() {
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ int tagLevel = Integer.MAX_VALUE;
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) {
+ tagLevel = tb.getTagLevel();
+ } else {
+ if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT)
+ && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0) {
+ tb.setIsContent(true);
+ changes = true;
+ } else {
+ tagLevel = Integer.MAX_VALUE;
+ }
+ }
+ }
+
+ return changes;
+
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/SimpleBlockFusionProcessor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/SimpleBlockFusionProcessor.java
new file mode 100755
index 0000000..d755a11
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/SimpleBlockFusionProcessor.java
@@ -0,0 +1,67 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.Iterator;
+import java.util.List;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Merges two subsequent blocks if their text densities are equal.
+ */
+public class SimpleBlockFusionProcessor implements BoilerpipeFilter {
+ public static final SimpleBlockFusionProcessor INSTANCE = new SimpleBlockFusionProcessor();
+
+ /**
+ * Returns the singleton instance for BlockFusionProcessor.
+ */
+ public static SimpleBlockFusionProcessor getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ boolean changes = false;
+
+ if (textBlocks.size() < 2) {
+ return false;
+ }
+
+ TextBlock b1 = textBlocks.get(0);
+ for (Iterator<TextBlock> it = textBlocks.listIterator(1); it.hasNext();) {
+ TextBlock b2 = it.next();
+
+ final boolean similar = (b1.getTextDensity() == b2.getTextDensity());
+
+ if (similar) {
+ b1.mergeNext(b2);
+ it.remove();
+ changes = true;
+ } else {
+ b1 = b2;
+ }
+ }
+
+ return changes;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/TrailingHeadlineToBoilerplateFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/TrailingHeadlineToBoilerplateFilter.java
new file mode 100755
index 0000000..74a8fd2
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/TrailingHeadlineToBoilerplateFilter.java
@@ -0,0 +1,64 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
+
+import java.util.List;
+import java.util.ListIterator;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks trailing headlines ({@link TextBlock}s that have the label {@link DefaultLabels#HEADING})
+ * as boilerplate. Trailing means they are marked content and are below any other content block.
+ */
+public final class TrailingHeadlineToBoilerplateFilter implements BoilerpipeFilter {
+ public static final TrailingHeadlineToBoilerplateFilter INSTANCE =
+ new TrailingHeadlineToBoilerplateFilter();
+
+ /**
+ * Returns the singleton instance for ExpandTitleToContentFilter.
+ */
+ public static TrailingHeadlineToBoilerplateFilter getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ boolean changes = false;
+
+ List<TextBlock> list = doc.getTextBlocks();
+
+ for (ListIterator<TextBlock> it = list.listIterator(list.size()); it.hasPrevious();) {
+ TextBlock tb = it.previous();
+ if (tb.isContent()) {
+ if (tb.hasLabel(DefaultLabels.HEADING)) {
+ tb.setIsContent(false);
+ changes = true;
+ } else {
+ break;
+ }
+ }
+ }
+
+ return changes;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/package-info.java
new file mode 100755
index 0000000..3b42b3b
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/heuristics/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * These BoilerpipeFilters are pure heuristics.
+ */
+package com.kohlschutter.boilerpipe.filters.heuristics;
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/BoilerplateBlockFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/BoilerplateBlockFilter.java
new file mode 100755
index 0000000..e250a32
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/BoilerplateBlockFilter.java
@@ -0,0 +1,64 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import java.util.Iterator;
+import java.util.List;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Removes {@link TextBlock}s which have explicitly been marked as "not content".
+ */
+public final class BoilerplateBlockFilter implements BoilerpipeFilter {
+ public static final BoilerplateBlockFilter INSTANCE = new BoilerplateBlockFilter(null);
+ public static final BoilerplateBlockFilter INSTANCE_KEEP_TITLE = new BoilerplateBlockFilter(
+ DefaultLabels.TITLE);
+ private final String labelToKeep;
+
+ /**
+ * Returns the singleton instance for BoilerplateBlockFilter.
+ */
+ public static BoilerplateBlockFilter getInstance() {
+ return INSTANCE;
+ }
+
+ public BoilerplateBlockFilter(final String labelToKeep) {
+ this.labelToKeep = labelToKeep;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ List<TextBlock> textBlocks = doc.getTextBlocks();
+ boolean hasChanges = false;
+
+ for (Iterator<TextBlock> it = textBlocks.iterator(); it.hasNext();) {
+ TextBlock tb = it.next();
+ if (!tb.isContent() && (labelToKeep == null || !tb.hasLabel(DefaultLabels.TITLE))) {
+ it.remove();
+ hasChanges = true;
+ }
+ }
+
+ return hasChanges;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/InvertedFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/InvertedFilter.java
new file mode 100755
index 0000000..ae45c79
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/InvertedFilter.java
@@ -0,0 +1,49 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import java.util.List;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Reverts the "isContent" flag for all {@link TextBlock}s
+ */
+public final class InvertedFilter implements BoilerpipeFilter {
+ public static final InvertedFilter INSTANCE = new InvertedFilter();
+
+ private InvertedFilter() {
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+
+ List<TextBlock> tbs = doc.getTextBlocks();
+ if (tbs.isEmpty()) {
+ return false;
+ }
+ for (TextBlock tb : tbs) {
+ tb.setIsContent(!tb.isContent());
+ }
+
+ return true;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/LabelToBoilerplateFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/LabelToBoilerplateFilter.java
new file mode 100755
index 0000000..a7bc523
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/LabelToBoilerplateFilter.java
@@ -0,0 +1,57 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+
+/**
+ * Marks all blocks that contain a given label as "boilerplate".
+ */
+public final class LabelToBoilerplateFilter implements BoilerpipeFilter {
+ public static final LabelToBoilerplateFilter INSTANCE_STRICTLY_NOT_CONTENT =
+ new LabelToBoilerplateFilter(DefaultLabels.STRICTLY_NOT_CONTENT);
+
+ private String[] labels;
+
+ public LabelToBoilerplateFilter(final String... label) {
+ this.labels = label;
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ BLOCK_LOOP : for (TextBlock tb : doc.getTextBlocks()) {
+ if (tb.isContent()) {
+ for (String label : labels) {
+ if (tb.hasLabel(label)) {
+ tb.setIsContent(false);
+ changes = true;
+ continue BLOCK_LOOP;
+ }
+ }
+ }
+ }
+
+ return changes;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/LabelToContentFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/LabelToContentFilter.java
new file mode 100755
index 0000000..3daa874
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/LabelToContentFilter.java
@@ -0,0 +1,53 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Marks all blocks that contain a given label as "content".
+ */
+public final class LabelToContentFilter implements BoilerpipeFilter {
+ private String[] labels;
+
+ public LabelToContentFilter(final String... label) {
+ this.labels = label;
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ BLOCK_LOOP : for (TextBlock tb : doc.getTextBlocks()) {
+ if (!tb.isContent()) {
+ for (String label : labels) {
+ if (tb.hasLabel(label)) {
+ tb.setIsContent(true);
+ changes = true;
+ continue BLOCK_LOOP;
+ }
+ }
+ }
+ }
+
+ return changes;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MarkEverythingBoilerplateFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MarkEverythingBoilerplateFilter.java
new file mode 100755
index 0000000..1856ff0
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MarkEverythingBoilerplateFilter.java
@@ -0,0 +1,49 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Marks all blocks as boilerplate.
+ */
+public final class MarkEverythingBoilerplateFilter implements BoilerpipeFilter {
+ public static final MarkEverythingBoilerplateFilter INSTANCE =
+ new MarkEverythingBoilerplateFilter();
+
+ private MarkEverythingBoilerplateFilter() {
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (tb.isContent()) {
+ tb.setIsContent(false);
+ changes = true;
+ }
+ }
+
+ return changes;
+
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MarkEverythingContentFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MarkEverythingContentFilter.java
new file mode 100755
index 0000000..a2d9779
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MarkEverythingContentFilter.java
@@ -0,0 +1,48 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Marks all blocks as content.
+ */
+public final class MarkEverythingContentFilter implements BoilerpipeFilter {
+ public static final MarkEverythingContentFilter INSTANCE = new MarkEverythingContentFilter();
+
+ private MarkEverythingContentFilter() {
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (!tb.isContent()) {
+ tb.setIsContent(true);
+ changes = true;
+ }
+ }
+
+ return changes;
+
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MinClauseWordsFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MinClauseWordsFilter.java
new file mode 100755
index 0000000..0c2899b
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MinClauseWordsFilter.java
@@ -0,0 +1,108 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Keeps only blocks that have at least one segment fragment ("clause") with at least <em>k</em>
+ * words (default: 5).
+ *
+ * NOTE: You might consider using the {@link SplitParagraphBlocksFilter} upstream.
+ *
+ * @see SplitParagraphBlocksFilter
+ */
+public final class MinClauseWordsFilter implements BoilerpipeFilter {
+ public static final MinClauseWordsFilter INSTANCE = new MinClauseWordsFilter(5, false);
+ private int minWords;
+ private final boolean acceptClausesWithoutDelimiter;
+
+ public MinClauseWordsFilter(final int minWords) {
+ this(minWords, false);
+ }
+
+ public MinClauseWordsFilter(final int minWords, final boolean acceptClausesWithoutDelimiter) {
+ this.minWords = minWords;
+ this.acceptClausesWithoutDelimiter = acceptClausesWithoutDelimiter;
+ }
+
+ private final Pattern PAT_CLAUSE_DELIMITER = Pattern
+ .compile("[\\p{L}\\d][\\,\\.\\:\\;\\!\\?]+([ \\n\\r]+|$)");
+ private final Pattern PAT_WHITESPACE = Pattern.compile("[ \\n\\r]+");
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (!tb.isContent()) {
+ continue;
+ }
+ final String text = tb.getText();
+
+ Matcher m = PAT_CLAUSE_DELIMITER.matcher(text);
+ boolean found = m.find();
+ int start = 0;
+ int end;
+ boolean hasClause = false;
+ while (found) {
+ end = m.start() + 1;
+ hasClause = isClause(text.subSequence(start, end));
+ start = m.end();
+
+ if (hasClause) {
+ break;
+ }
+ found = m.find();
+ }
+ end = text.length();
+
+ // since clauses should *always end* with a delimiter, we normally
+ // don't consider text without one
+ if (acceptClausesWithoutDelimiter) {
+ hasClause |= isClause(text.subSequence(start, end));
+ }
+
+ if (!hasClause) {
+ tb.setIsContent(false);
+ changes = true;
+ // System.err.println("IS NOT CONTENT: " + text);
+ }
+ }
+
+ return changes;
+
+ }
+
+ private boolean isClause(final CharSequence text) {
+ Matcher m = PAT_WHITESPACE.matcher(text);
+ int n = 1;
+ while (m.find()) {
+ n++;
+ if (n >= minWords) {
+ return true;
+ }
+ }
+ return n >= minWords;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MinWordsFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MinWordsFilter.java
new file mode 100755
index 0000000..a3da35a
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/MinWordsFilter.java
@@ -0,0 +1,53 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Keeps only those content blocks which contain at least <em>k</em> words.
+ */
+public final class MinWordsFilter implements BoilerpipeFilter {
+ private final int minWords;
+
+ public MinWordsFilter(final int minWords) {
+ this.minWords = minWords;
+ }
+
+ public boolean process(final TextDocument doc) throws BoilerpipeProcessingException {
+
+ boolean changes = false;
+
+ for (TextBlock tb : doc.getTextBlocks()) {
+ if (!tb.isContent()) {
+ continue;
+ }
+ if (tb.getNumWords() < minWords) {
+ tb.setIsContent(false);
+ changes = true;
+ }
+
+ }
+
+ return changes;
+
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/SplitParagraphBlocksFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/SplitParagraphBlocksFilter.java
new file mode 100755
index 0000000..4d7261e
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/SplitParagraphBlocksFilter.java
@@ -0,0 +1,79 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Splits TextBlocks at paragraph boundaries.
+ *
+ * NOTE: This is not fully supported (i.e., it will break highlighting support via
+ * #getContainedTextElements()), but this one probably is necessary for some other filters.
+ *
+ * @see MinClauseWordsFilter
+ */
+public final class SplitParagraphBlocksFilter implements BoilerpipeFilter {
+ public static final SplitParagraphBlocksFilter INSTANCE = new SplitParagraphBlocksFilter();
+
+ /**
+ * Returns the singleton instance for TerminatingBlocksFinder.
+ */
+ public static SplitParagraphBlocksFilter getInstance() {
+ return INSTANCE;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+ boolean changes = false;
+
+ final List<TextBlock> blocks = doc.getTextBlocks();
+ final List<TextBlock> blocksNew = new ArrayList<TextBlock>();
+
+ for (TextBlock tb : blocks) {
+ final String text = tb.getText();
+ final String[] paragraphs = text.split("[\n\r]+");
+ if (paragraphs.length < 2) {
+ blocksNew.add(tb);
+ continue;
+ }
+ final boolean isContent = tb.isContent();
+ final Set<String> labels = tb.getLabels();
+ for (String p : paragraphs) {
+ final TextBlock tbP = new TextBlock(p);
+ tbP.setIsContent(isContent);
+ tbP.addLabels(labels);
+ blocksNew.add(tbP);
+ changes = true;
+ }
+ }
+
+ if (changes) {
+ blocks.clear();
+ blocks.addAll(blocksNew);
+ }
+
+ return changes;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/SurroundingToContentFilter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/SurroundingToContentFilter.java
new file mode 100755
index 0000000..95e6595
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/SurroundingToContentFilter.java
@@ -0,0 +1,77 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
+import java.util.Iterator;
+import java.util.List;
+
+import com.kohlschutter.boilerpipe.BoilerpipeFilter;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.conditions.TextBlockCondition;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Marks blocks as "content" if their preceding and following blocks are both already marked
+ * "content", and the given {@link TextBlockCondition} is met.
+ */
+public class SurroundingToContentFilter implements BoilerpipeFilter {
+ public static final SurroundingToContentFilter INSTANCE_TEXT = new SurroundingToContentFilter(
+ new TextBlockCondition() {
+
+ @Override
+ public boolean meetsCondition(TextBlock tb) {
+ return tb.getLinkDensity() == 0 && tb.getNumWords() > 6;
+ }
+ });
+
+ private final TextBlockCondition cond;
+
+ public SurroundingToContentFilter(final TextBlockCondition cond) {
+ this.cond = cond;
+ }
+
+ public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
+
+ List<TextBlock> tbs = doc.getTextBlocks();
+ if (tbs.size() < 3) {
+ return false;
+ }
+
+ TextBlock a = tbs.get(0);
+ TextBlock b = tbs.get(1);
+ TextBlock c;
+ boolean hasChanges = false;
+ for (Iterator<TextBlock> it = tbs.listIterator(2); it.hasNext();) {
+ c = it.next();
+ if (!b.isContent() && a.isContent() && c.isContent() && cond.meetsCondition(b)) {
+ b.setIsContent(true);
+ hasChanges = true;
+ }
+
+ a = c;
+ if (!it.hasNext()) {
+ break;
+ }
+ b = it.next();
+ }
+
+ return hasChanges;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/package-info.java
new file mode 100755
index 0000000..5c40ee1
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/filters/simple/package-info.java
@@ -0,0 +1,5 @@
+/**
+ * These BoilerpipeFilters are straight-forward and probably not really specific to English.
+ */
+package com.kohlschutter.boilerpipe.filters.simple;
+
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/ConditionalLabelAction.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/ConditionalLabelAction.java
new file mode 100755
index 0000000..042958c
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/ConditionalLabelAction.java
@@ -0,0 +1,40 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.labels;
+
+import com.kohlschutter.boilerpipe.conditions.TextBlockCondition;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+
+/**
+ * Adds labels to a {@link TextBlock} if the given criteria are met.
+ */
+public final class ConditionalLabelAction extends LabelAction {
+
+ private final TextBlockCondition condition;
+
+ public ConditionalLabelAction(TextBlockCondition condition, String... labels) {
+ super(labels);
+ this.condition = condition;
+ }
+
+ public void addTo(final TextBlock tb) {
+ if (condition.meetsCondition(tb)) {
+ addLabelsTo(tb);
+ }
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/DefaultLabels.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/DefaultLabels.java
new file mode 100755
index 0000000..e3c244e
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/DefaultLabels.java
@@ -0,0 +1,46 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.labels;
+
+import com.kohlschutter.boilerpipe.document.TextBlock;
+
+/**
+ * Some pre-defined labels which can be used in conjunction with {@link TextBlock#addLabel(String)}
+ * and {@link TextBlock#hasLabel(String)}.
+ */
+public final class DefaultLabels {
+ public static final String TITLE = "de.l3s.boilerpipe/TITLE";
+ public static final String ARTICLE_METADATA = "de.l3s.boilerpipe/ARTICLE_METADATA";
+ public static final String INDICATES_END_OF_TEXT = "de.l3s.boilerpipe/INDICATES_END_OF_TEXT";
+ public static final String MIGHT_BE_CONTENT = "de.l3s.boilerpipe/MIGHT_BE_CONTENT";
+ public static final String VERY_LIKELY_CONTENT = "de.l3s.boilerpipe/VERY_LIKELY_CONTENT";
+ public static final String STRICTLY_NOT_CONTENT = "de.l3s.boilerpipe/STRICTLY_NOT_CONTENT";
+ public static final String HR = "de.l3s.boilerpipe/HR";
+ public static final String LI = "de.l3s.boilerpipe/LI";
+
+ public static final String HEADING = "de.l3s.boilerpipe/HEADING";
+ public static final String H1 = "de.l3s.boilerpipe/H1";
+ public static final String H2 = "de.l3s.boilerpipe/H2";
+ public static final String H3 = "de.l3s.boilerpipe/H3";
+
+ public static final String MARKUP_PREFIX = "<";
+
+ private DefaultLabels() {
+ // not to be instantiated
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/LabelAction.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/LabelAction.java
new file mode 100755
index 0000000..a73a447
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/labels/LabelAction.java
@@ -0,0 +1,47 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.labels;
+
+import java.util.Arrays;
+
+import com.kohlschutter.boilerpipe.document.TextBlock;
+
+/**
+ * Helps adding labels to {@link TextBlock}s.
+ *
+ * @see ConditionalLabelAction
+ */
+public class LabelAction {
+ protected final String[] labels;
+
+ public LabelAction(String... labels) {
+ this.labels = labels;
+ }
+
+ public void addTo(final TextBlock tb) {
+ addLabelsTo(tb);
+ }
+
+ protected final void addLabelsTo(final TextBlock tb) {
+ tb.addLabels(labels);
+ }
+
+ public String toString() {
+ return super.toString() + "{" + Arrays.asList(labels) + "}";
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/package-info.java
new file mode 100755
index 0000000..99b5704
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * The Boilerpipe top-level package.
+ */
+package com.kohlschutter.boilerpipe;
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLContentHandler.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLContentHandler.java
new file mode 100755
index 0000000..8e597ba
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLContentHandler.java
@@ -0,0 +1,441 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.labels.LabelAction;
+import com.kohlschutter.boilerpipe.util.UnicodeTokenizer;
+
+/**
+ * A simple SAX {@link ContentHandler}, used by {@link BoilerpipeSAXInput}. Can be used by different
+ * parser implementations, e.g. NekoHTML and TagSoup.
+ */
+public class BoilerpipeHTMLContentHandler implements ContentHandler {
+
+ private final Map<String, TagAction> tagActions;
+ private String title = null;
+
+ static final String ANCHOR_TEXT_START = "$\ue00a<";
+ static final String ANCHOR_TEXT_END = ">\ue00a$";
+
+ StringBuilder tokenBuffer = new StringBuilder();
+ StringBuilder textBuffer = new StringBuilder();
+
+ int inBody = 0;
+ int inAnchor = 0;
+ int inIgnorableElement = 0;
+
+ int tagLevel = 0;
+ int blockTagLevel = -1;
+
+ boolean sbLastWasWhitespace = false;
+ private int textElementIdx = 0;
+
+ private final List<TextBlock> textBlocks = new ArrayList<TextBlock>();
+
+ private String lastStartTag = null;
+ @SuppressWarnings("unused")
+ private String lastEndTag = null;
+ @SuppressWarnings("unused")
+ private Event lastEvent = null;
+
+ private int offsetBlocks = 0;
+ private BitSet currentContainedTextElements = new BitSet();
+
+ private boolean flush = false;
+ boolean inAnchorText = false;
+
+ LinkedList<LinkedList<LabelAction>> labelStacks = new LinkedList<LinkedList<LabelAction>>();
+ LinkedList<Integer> fontSizeStack = new LinkedList<Integer>();
+
+ /**
+ * Recycles this instance.
+ */
+ public void recycle() {
+ tokenBuffer.setLength(0);
+ textBuffer.setLength(0);
+
+ inBody = 0;
+ inAnchor = 0;
+ inIgnorableElement = 0;
+ sbLastWasWhitespace = false;
+ textElementIdx = 0;
+
+ textBlocks.clear();
+
+ lastStartTag = null;
+ lastEndTag = null;
+ lastEvent = null;
+
+ offsetBlocks = 0;
+ currentContainedTextElements.clear();
+
+ flush = false;
+ inAnchorText = false;
+ }
+
+ /**
+ * Constructs a {@link BoilerpipeHTMLContentHandler} using the {@link DefaultTagActionMap}.
+ */
+ public BoilerpipeHTMLContentHandler() {
+ this(DefaultTagActionMap.INSTANCE);
+ }
+
+ /**
+ * Constructs a {@link BoilerpipeHTMLContentHandler} using the given {@link TagActionMap}.
+ *
+ * @param tagActions The {@link TagActionMap} to use, e.g. {@link DefaultTagActionMap}.
+ */
+ public BoilerpipeHTMLContentHandler(final TagActionMap tagActions) {
+ this.tagActions = tagActions;
+ }
+
+ // @Override
+ public void endDocument() throws SAXException {
+ flushBlock();
+ }
+
+ // @Override
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ // @Override
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ if (!sbLastWasWhitespace) {
+ textBuffer.append(' ');
+ tokenBuffer.append(' ');
+ }
+ sbLastWasWhitespace = true;
+ }
+
+ // @Override
+ public void processingInstruction(String target, String data) throws SAXException {
+ }
+
+ // @Override
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ // @Override
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+ // @Override
+ public void startDocument() throws SAXException {
+ }
+
+ // @Override
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ }
+
+ // @Override
+ public void startElement(String uri, String localName, String qName, Attributes atts)
+ throws SAXException {
+ labelStacks.add(null);
+
+ TagAction ta = tagActions.get(localName);
+ if (ta != null) {
+ if (ta.changesTagLevel()) {
+ tagLevel++;
+ }
+ flush = ta.start(this, localName, qName, atts) | flush;
+ } else {
+ tagLevel++;
+ flush = true;
+ }
+
+ lastEvent = Event.START_TAG;
+ lastStartTag = localName;
+ }
+
+ // @Override
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ TagAction ta = tagActions.get(localName);
+ if (ta != null) {
+ flush = ta.end(this, localName, qName) | flush;
+ } else {
+ flush = true;
+ }
+
+ if (ta == null || ta.changesTagLevel()) {
+ tagLevel--;
+ }
+
+ if (flush) {
+ flushBlock();
+ }
+
+ lastEvent = Event.END_TAG;
+ lastEndTag = localName;
+
+ labelStacks.removeLast();
+ }
+
+ // @Override
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ textElementIdx++;
+
+ if (flush) {
+ flushBlock();
+ flush = false;
+ }
+
+ if (inIgnorableElement != 0) {
+ return;
+ }
+
+ char c;
+ boolean startWhitespace = false;
+ boolean endWhitespace = false;
+ if (length == 0) {
+ return;
+ }
+
+ final int end = start + length;
+ for (int i = start; i < end; i++) {
+ if (Character.isWhitespace(ch[i])) {
+ ch[i] = ' ';
+ }
+ }
+ while (start < end) {
+ c = ch[start];
+ if (c == ' ') {
+ startWhitespace = true;
+ start++;
+ length--;
+ } else {
+ break;
+ }
+ }
+ while (length > 0) {
+ c = ch[start + length - 1];
+ if (c == ' ') {
+ endWhitespace = true;
+ length--;
+ } else {
+ break;
+ }
+ }
+ if (length == 0) {
+ if (startWhitespace || endWhitespace) {
+ if (!sbLastWasWhitespace) {
+ textBuffer.append(' ');
+ tokenBuffer.append(' ');
+ }
+ sbLastWasWhitespace = true;
+ } else {
+ sbLastWasWhitespace = false;
+ }
+ lastEvent = Event.WHITESPACE;
+ return;
+ }
+ if (startWhitespace) {
+ if (!sbLastWasWhitespace) {
+ textBuffer.append(' ');
+ tokenBuffer.append(' ');
+ }
+ }
+
+ if (blockTagLevel == -1) {
+ blockTagLevel = tagLevel;
+ }
+
+ textBuffer.append(ch, start, length);
+ tokenBuffer.append(ch, start, length);
+ if (endWhitespace) {
+ textBuffer.append(' ');
+ tokenBuffer.append(' ');
+ }
+
+ sbLastWasWhitespace = endWhitespace;
+ lastEvent = Event.CHARACTERS;
+
+ currentContainedTextElements.set(textElementIdx);
+ }
+
+ List<TextBlock> getTextBlocks() {
+ return textBlocks;
+ }
+
+ public void flushBlock() {
+ if (inBody == 0) {
+ if ("TITLE".equalsIgnoreCase(lastStartTag) && inBody == 0) {
+ setTitle(tokenBuffer.toString().trim());
+ }
+ textBuffer.setLength(0);
+ tokenBuffer.setLength(0);
+ return;
+ }
+
+ final int length = tokenBuffer.length();
+ switch (length) {
+ case 0:
+ return;
+ case 1:
+ if (sbLastWasWhitespace) {
+ textBuffer.setLength(0);
+ tokenBuffer.setLength(0);
+ return;
+ }
+ }
+ final String[] tokens = UnicodeTokenizer.tokenize(tokenBuffer);
+
+ int numWords = 0;
+ int numLinkedWords = 0;
+ int numWrappedLines = 0;
+ int currentLineLength = -1; // don't count the first space
+ final int maxLineLength = 80;
+ int numTokens = 0;
+ int numWordsCurrentLine = 0;
+
+ for (String token : tokens) {
+ if (ANCHOR_TEXT_START.equals(token)) {
+ inAnchorText = true;
+ } else if (ANCHOR_TEXT_END.equals(token)) {
+ inAnchorText = false;
+ } else if (isWord(token)) {
+ numTokens++;
+ numWords++;
+ numWordsCurrentLine++;
+ if (inAnchorText) {
+ numLinkedWords++;
+ }
+ final int tokenLength = token.length();
+ currentLineLength += tokenLength + 1;
+ if (currentLineLength > maxLineLength) {
+ numWrappedLines++;
+ currentLineLength = tokenLength;
+ numWordsCurrentLine = 1;
+ }
+ } else {
+ numTokens++;
+ }
+ }
+ if (numTokens == 0) {
+ return;
+ }
+ int numWordsInWrappedLines;
+ if (numWrappedLines == 0) {
+ numWordsInWrappedLines = numWords;
+ numWrappedLines = 1;
+ } else {
+ numWordsInWrappedLines = numWords - numWordsCurrentLine;
+ }
+
+ TextBlock tb =
+ new TextBlock(textBuffer.toString().trim(), currentContainedTextElements, numWords,
+ numLinkedWords, numWordsInWrappedLines, numWrappedLines, offsetBlocks);
+ currentContainedTextElements = new BitSet();
+
+ offsetBlocks++;
+
+ textBuffer.setLength(0);
+ tokenBuffer.setLength(0);
+
+ tb.setTagLevel(blockTagLevel);
+ addTextBlock(tb);
+ blockTagLevel = -1;
+ }
+
+ protected void addTextBlock(final TextBlock tb) {
+
+ for (Integer l : fontSizeStack) {
+ if (l != null) {
+ tb.addLabel("font-" + l);
+ break;
+ }
+ }
+ for (LinkedList<LabelAction> labelStack : labelStacks) {
+ if (labelStack != null) {
+ for (LabelAction labels : labelStack) {
+ if (labels != null) {
+ labels.addTo(tb);
+ }
+ }
+ }
+ }
+
+ textBlocks.add(tb);
+ }
+
+ private static final Pattern PAT_VALID_WORD_CHARACTER = Pattern
+ .compile("[\\p{L}\\p{Nd}\\p{Nl}\\p{No}]");
+
+ private static boolean isWord(final String token) {
+ return PAT_VALID_WORD_CHARACTER.matcher(token).find();
+ }
+
+ static private enum Event {
+ START_TAG, END_TAG, CHARACTERS, WHITESPACE
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String s) {
+ if (s == null || s.length() == 0) {
+ return;
+ }
+ title = s;
+ }
+
+ /**
+ * Returns a {@link TextDocument} containing the extracted {@link TextBlock} s. NOTE: Only call
+ * this after parsing.
+ *
+ * @return The {@link TextDocument}
+ */
+ public TextDocument toTextDocument() {
+ // just to be sure
+ flushBlock();
+
+ return new TextDocument(getTitle(), getTextBlocks());
+ }
+
+ public void addWhitespaceIfNecessary() {
+ if (!sbLastWasWhitespace) {
+ tokenBuffer.append(' ');
+ textBuffer.append(' ');
+ sbLastWasWhitespace = true;
+ }
+ }
+
+ public void addLabelAction(final LabelAction la) throws IllegalStateException {
+ LinkedList<LabelAction> labelStack = labelStacks.getLast();
+ if (labelStack == null) {
+ labelStack = new LinkedList<LabelAction>();
+ labelStacks.removeLast();
+ labelStacks.add(labelStack);
+ }
+ labelStack.add(la);
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLParser.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLParser.java
new file mode 100755
index 0000000..4db1898
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeHTMLParser.java
@@ -0,0 +1,90 @@
+/**
+ *
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2019 FZI Forschungszentrum Informatik
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import org.apache.xerces.parsers.AbstractSAXParser;
+import org.cyberneko.html.HTMLConfiguration;
+
+import com.kohlschutter.boilerpipe.BoilerpipeDocumentSource;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * A simple SAX Parser, used by {@link BoilerpipeSAXInput}. The parser uses <a
+ * href="http://nekohtml.sourceforge.net/">CyberNeko</a> to parse HTML content.
+ */
+public class BoilerpipeHTMLParser extends AbstractSAXParser implements BoilerpipeDocumentSource {
+
+ private BoilerpipeHTMLContentHandler contentHandler;
+
+ /**
+ * Constructs a {@link BoilerpipeHTMLParser} using a default HTML content handler.
+ */
+ public BoilerpipeHTMLParser() {
+ this(new BoilerpipeHTMLContentHandler());
+ }
+
+ /**
+ * Constructs a {@link BoilerpipeHTMLParser} using the given {@link BoilerpipeHTMLContentHandler}.
+ *
+ * @param contentHandler
+ */
+ public BoilerpipeHTMLParser(BoilerpipeHTMLContentHandler contentHandler) {
+ super(new HTMLConfiguration());
+ setContentHandler(contentHandler);
+ }
+
+ protected BoilerpipeHTMLParser(boolean ignore) {
+ super(new HTMLConfiguration());
+ }
+
+ public void setContentHandler(final BoilerpipeHTMLContentHandler contentHandler) {
+ this.contentHandler = contentHandler;
+ super.setContentHandler(contentHandler);
+ }
+
+ public void setContentHandler(final org.xml.sax.ContentHandler contentHandler) {
+ this.contentHandler = null;
+ super.setContentHandler(contentHandler);
+ }
+
+ /**
+ * Returns a {@link TextDocument} containing the extracted {@link TextBlock} s. NOTE: Only call
+ * this after {@link #parse(org.xml.sax.InputSource)}.
+ *
+ * @return The {@link TextDocument}
+ */
+ public TextDocument toTextDocument() {
+ return contentHandler.toTextDocument();
+ }
+}
\ No newline at end of file
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeSAXInput.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeSAXInput.java
new file mode 100755
index 0000000..18606b2
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/BoilerpipeSAXInput.java
@@ -0,0 +1,72 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.io.IOException;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import com.kohlschutter.boilerpipe.BoilerpipeInput;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Parses an {@link InputSource} using SAX and returns a {@link TextDocument}.
+ */
+public final class BoilerpipeSAXInput implements BoilerpipeInput {
+ private final InputSource is;
+
+ /**
+ * Creates a new instance of {@link BoilerpipeSAXInput} for the given {@link InputSource}.
+ *
+ * @param is
+ * @throws SAXException
+ */
+ public BoilerpipeSAXInput(final InputSource is) throws SAXException {
+ this.is = is;
+ }
+
+ /**
+ * Retrieves the {@link TextDocument} using a default HTML parser.
+ */
+ public TextDocument getTextDocument() throws BoilerpipeProcessingException {
+ return getTextDocument(new BoilerpipeHTMLParser());
+ }
+
+ /**
+ * Retrieves the {@link TextDocument} using the given HTML parser.
+ *
+ * @param parser The parser used to transform the input into boilerpipe's internal representation.
+ * @return The retrieved {@link TextDocument}
+ * @throws BoilerpipeProcessingException
+ */
+ public TextDocument getTextDocument(final BoilerpipeHTMLParser parser)
+ throws BoilerpipeProcessingException {
+ try {
+ parser.parse(is);
+ } catch (IOException e) {
+ throw new BoilerpipeProcessingException(e);
+ } catch (SAXException e) {
+ throw new BoilerpipeProcessingException(e);
+ }
+
+ return parser.toTextDocument();
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/CommonTagActions.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/CommonTagActions.java
new file mode 100755
index 0000000..0e2eabd
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/CommonTagActions.java
@@ -0,0 +1,343 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.labels.LabelAction;
+
+/**
+ * Defines an action that is to be performed whenever a particular tag occurs during HTML parsing.
+ */
+public abstract class CommonTagActions {
+
+ private CommonTagActions() {
+ }
+
+ public static final class Chained implements TagAction {
+
+ private final TagAction t1;
+ private final TagAction t2;
+
+ public Chained(final TagAction t1, final TagAction t2) {
+ this.t1 = t1;
+ this.t2 = t2;
+ }
+
+ public boolean start(BoilerpipeHTMLContentHandler instance, String localName, String qName,
+ Attributes atts) throws SAXException {
+ return t1.start(instance, localName, qName, atts)
+ | t2.start(instance, localName, qName, atts);
+ }
+
+ public boolean end(BoilerpipeHTMLContentHandler instance, String localName, String qName)
+ throws SAXException {
+ return t1.end(instance, localName, qName) | t2.end(instance, localName, qName);
+ }
+
+ public boolean changesTagLevel() {
+ return t1.changesTagLevel() || t2.changesTagLevel();
+ }
+ }
+
+ /**
+ * Marks this tag as "ignorable", i.e. all its inner content is silently skipped.
+ */
+ public static final TagAction TA_IGNORABLE_ELEMENT = new TagAction() {
+
+ public boolean start(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+ instance.inIgnorableElement++;
+ return true;
+ }
+
+ public boolean end(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ instance.inIgnorableElement--;
+ return true;
+ }
+
+ public boolean changesTagLevel() {
+ return true;
+ }
+ };
+
+ /**
+ * Marks this tag as "anchor" (this should usually only be set for the <code><A></code>
+ * tag). Anchor tags may not be nested.
+ *
+ * There is a bug in certain versions of NekoHTML which still allows nested tags. If boilerpipe
+ * encounters such nestings, a SAXException is thrown.
+ */
+ public static final TagAction TA_ANCHOR_TEXT = new TagAction() {
+
+ public boolean start(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) throws SAXException {
+ if (instance.inAnchor++ > 0) {
+ // as nested A elements are not allowed per specification, we
+ // are probably reaching this branch due to a bug in the XML
+ // parser
+ System.err
+ .println("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow...");
+
+ end(instance, localName, qName);
+ }
+ if (instance.inIgnorableElement == 0) {
+ instance.addWhitespaceIfNecessary();
+ instance.tokenBuffer.append(BoilerpipeHTMLContentHandler.ANCHOR_TEXT_START);
+ instance.tokenBuffer.append(' ');
+ instance.sbLastWasWhitespace = true;
+ }
+ return false;
+ }
+
+ public boolean end(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ if (--instance.inAnchor == 0) {
+ if (instance.inIgnorableElement == 0) {
+ instance.addWhitespaceIfNecessary();
+ instance.tokenBuffer.append(BoilerpipeHTMLContentHandler.ANCHOR_TEXT_END);
+ instance.tokenBuffer.append(' ');
+ instance.sbLastWasWhitespace = true;
+ }
+ }
+ return false;
+ }
+
+ public boolean changesTagLevel() {
+ return true;
+ }
+ };
+
+ /**
+ * Marks this tag the body element (this should usually only be set for the
+ * <code><BODY></code> tag).
+ */
+ public static final TagAction TA_BODY = new TagAction() {
+ public boolean start(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+ instance.flushBlock();
+ instance.inBody++;
+ return false;
+ }
+
+ public boolean end(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ instance.flushBlock();
+ instance.inBody--;
+ return false;
+ }
+
+ public boolean changesTagLevel() {
+ return true;
+ }
+ };
+
+ /**
+ * Marks this tag a simple "inline" element, which generates whitespace, but no new block.
+ */
+ public static final TagAction TA_INLINE_WHITESPACE = new TagAction() {
+
+ public boolean start(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+ instance.addWhitespaceIfNecessary();
+ return false;
+ }
+
+ public boolean end(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ instance.addWhitespaceIfNecessary();
+ return false;
+ }
+
+ public boolean changesTagLevel() {
+ return false;
+ }
+ };
+
+ /**
+ * @deprecated Use {@link #TA_INLINE_WHITESPACE} instead
+ */
+ @Deprecated
+ public static final TagAction TA_INLINE = TA_INLINE_WHITESPACE;
+
+ /**
+ * Marks this tag a simple "inline" element, which neither generates whitespace, nor a new block.
+ */
+ public static final TagAction TA_INLINE_NO_WHITESPACE = new TagAction() {
+
+ public boolean start(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+ return false;
+ }
+
+ public boolean end(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ return false;
+ }
+
+ public boolean changesTagLevel() {
+ return false;
+ }
+ };
+ private static final Pattern PAT_FONT_SIZE = Pattern.compile("([\\+\\-]?)([0-9])");
+
+ /**
+ * Explicitly marks this tag a simple "block-level" element, which always generates whitespace
+ */
+ public static final TagAction TA_BLOCK_LEVEL = new TagAction() {
+
+ public boolean start(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+ return true;
+ }
+
+ public boolean end(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ return true;
+ }
+
+ public boolean changesTagLevel() {
+ return true;
+ }
+ };
+
+ /**
+ * Special TagAction for the <code><FONT></code> tag, which keeps track of the absolute and
+ * relative font size.
+ */
+ public static final TagAction TA_FONT = new TagAction() {
+
+ public boolean start(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+
+ String sizeAttr = atts.getValue("size");
+ if (sizeAttr != null) {
+ Matcher m = PAT_FONT_SIZE.matcher(sizeAttr);
+ if (m.matches()) {
+ String rel = m.group(1);
+ final int val = Integer.parseInt(m.group(2));
+ final int size;
+ if (rel.length() == 0) {
+ // absolute
+ size = val;
+ } else {
+ // relative
+ int prevSize;
+ if (instance.fontSizeStack.isEmpty()) {
+ prevSize = 3;
+ } else {
+ prevSize = 3;
+ for (Integer s : instance.fontSizeStack) {
+ if (s != null) {
+ prevSize = s;
+ break;
+ }
+ }
+ }
+ if (rel.charAt(0) == '+') {
+ size = prevSize + val;
+ } else {
+ size = prevSize - val;
+ }
+
+ }
+ instance.fontSizeStack.add(0, size);
+ } else {
+ instance.fontSizeStack.add(0, null);
+ }
+ } else {
+ instance.fontSizeStack.add(0, null);
+ }
+ return false;
+ }
+
+ public boolean end(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ instance.fontSizeStack.removeFirst();
+ return false;
+ }
+
+ public boolean changesTagLevel() {
+ return false;
+ }
+ };
+
+ /**
+ * {@link CommonTagActions} for inline elements, which triggers some {@link LabelAction} on the
+ * generated {@link TextBlock}.
+ */
+ public static final class InlineTagLabelAction implements TagAction {
+
+ private final LabelAction action;
+
+ public InlineTagLabelAction(final LabelAction action) {
+ this.action = action;
+ }
+
+ public boolean start(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+ instance.addWhitespaceIfNecessary();
+ instance.addLabelAction(action);
+ return false;
+ }
+
+ public boolean end(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ instance.addWhitespaceIfNecessary();
+ return false;
+ }
+
+ public boolean changesTagLevel() {
+ return false;
+ }
+ }
+
+ /**
+ * {@link CommonTagActions} for block-level elements, which triggers some {@link LabelAction} on
+ * the generated {@link TextBlock}.
+ */
+ public static final class BlockTagLabelAction implements TagAction {
+
+ private final LabelAction action;
+
+ public BlockTagLabelAction(final LabelAction action) {
+ this.action = action;
+ }
+
+ public boolean start(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) {
+ instance.addLabelAction(action);
+ return true;
+ }
+
+ public boolean end(BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) {
+ return true;
+ }
+
+ public boolean changesTagLevel() {
+ return true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/DefaultTagActionMap.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/DefaultTagActionMap.java
new file mode 100755
index 0000000..58c4ed0
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/DefaultTagActionMap.java
@@ -0,0 +1,81 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+import com.kohlschutter.boilerpipe.labels.LabelAction;
+
+/**
+ * Default {@link TagAction}s. Seem to work well.
+ */
+public class DefaultTagActionMap extends TagActionMap {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ public static final TagActionMap INSTANCE = new DefaultTagActionMap();
+
+ protected DefaultTagActionMap() {
+ setTagAction("STYLE", CommonTagActions.TA_IGNORABLE_ELEMENT);
+ setTagAction("SCRIPT", CommonTagActions.TA_IGNORABLE_ELEMENT);
+ setTagAction("OPTION", CommonTagActions.TA_IGNORABLE_ELEMENT);
+ setTagAction("OBJECT", CommonTagActions.TA_IGNORABLE_ELEMENT);
+ setTagAction("EMBED", CommonTagActions.TA_IGNORABLE_ELEMENT);
+ setTagAction("APPLET", CommonTagActions.TA_IGNORABLE_ELEMENT);
+ setTagAction("LINK", CommonTagActions.TA_IGNORABLE_ELEMENT);
+
+ setTagAction("A", CommonTagActions.TA_ANCHOR_TEXT);
+ setTagAction("BODY", CommonTagActions.TA_BODY);
+
+ setTagAction("STRIKE", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("U", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("B", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("I", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("EM", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("STRONG", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("SPAN", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+
+ // New in 1.1 (especially to improve extraction quality from Wikipedia etc.)
+ setTagAction("SUP", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+
+ // New in 1.2
+ setTagAction("CODE", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("TT", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("SUB", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+ setTagAction("VAR", CommonTagActions.TA_INLINE_NO_WHITESPACE);
+
+ setTagAction("ABBR", CommonTagActions.TA_INLINE_WHITESPACE);
+ setTagAction("ACRONYM", CommonTagActions.TA_INLINE_WHITESPACE);
+
+ setTagAction("FONT", CommonTagActions.TA_INLINE_NO_WHITESPACE); // could also use TA_FONT
+
+ // added in 1.1.1
+ setTagAction("NOSCRIPT", CommonTagActions.TA_IGNORABLE_ELEMENT);
+
+ // New in 1.3
+ setTagAction("LI", new CommonTagActions.BlockTagLabelAction(new LabelAction(DefaultLabels.LI)));
+ setTagAction("H1", new CommonTagActions.BlockTagLabelAction(new LabelAction(DefaultLabels.H1,
+ DefaultLabels.HEADING)));
+ setTagAction("H2", new CommonTagActions.BlockTagLabelAction(new LabelAction(DefaultLabels.H2,
+ DefaultLabels.HEADING)));
+ setTagAction("H3", new CommonTagActions.BlockTagLabelAction(new LabelAction(DefaultLabels.H3,
+ DefaultLabels.HEADING)));
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLDocument.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLDocument.java
new file mode 100755
index 0000000..2eecc11
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLDocument.java
@@ -0,0 +1,56 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.Charset;
+
+import org.xml.sax.InputSource;
+
+/**
+ * An {@link InputSourceable} for {@link HTMLFetcher}.
+ */
+public class HTMLDocument implements InputSourceable {
+ private final Charset charset;
+ private final byte[] data;
+
+ public HTMLDocument(final byte[] data, final Charset charset) {
+ this.data = data;
+ this.charset = charset;
+ }
+
+ public HTMLDocument(final String data) {
+ Charset cs = Charset.forName("utf-8");
+ this.data = data.getBytes(cs);
+ this.charset = cs;
+ }
+
+ public Charset getCharset() {
+ return charset;
+ }
+
+ public byte[] getData() {
+ return data;
+ }
+
+ public InputSource toInputSource() {
+ final InputSource is = new InputSource(new ByteArrayInputStream(data));
+ is.setEncoding(charset.name());
+ return is;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLFetcher.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLFetcher.java
new file mode 100755
index 0000000..ba588e9
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLFetcher.java
@@ -0,0 +1,91 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLConnection;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * A very simple HTTP/HTML fetcher, really just for demo purposes.
+ */
+public class HTMLFetcher {
+ private HTMLFetcher() {
+ }
+
+ private static final Pattern PAT_CHARSET = Pattern.compile("charset=([^; ]+)$");
+
+ /**
+ * Fetches the document at the given URL, using {@link URLConnection}.
+ *
+ * @param url
+ * @return
+ * @throws IOException
+ */
+ public static HTMLDocument fetch(final URL url) throws IOException {
+ final URLConnection conn = url.openConnection();
+ final String ct = conn.getContentType();
+
+ if (ct == null || !(ct.equals("text/html") || ct.startsWith("text/html;"))) {
+ throw new IOException("Unsupported content type: " + ct);
+ }
+
+ Charset cs = Charset.forName("Cp1252");
+ if (ct != null) {
+ Matcher m = PAT_CHARSET.matcher(ct);
+ if (m.find()) {
+ final String charset = m.group(1);
+ try {
+ cs = Charset.forName(charset);
+ } catch (UnsupportedCharsetException e) {
+ // keep default
+ }
+ }
+ }
+
+ InputStream in = conn.getInputStream();
+
+ final String encoding = conn.getContentEncoding();
+ if (encoding != null) {
+ if ("gzip".equalsIgnoreCase(encoding)) {
+ in = new GZIPInputStream(in);
+ } else {
+ System.err.println("WARN: unsupported Content-Encoding: " + encoding);
+ }
+ }
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ byte[] buf = new byte[4096];
+ int r;
+ while ((r = in.read(buf)) != -1) {
+ bos.write(buf, 0, r);
+ }
+ in.close();
+
+ final byte[] data = bos.toByteArray();
+
+ return new HTMLDocument(data, cs);
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLHighlighter.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLHighlighter.java
new file mode 100755
index 0000000..e080bed
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/HTMLHighlighter.java
@@ -0,0 +1,518 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2019 FZI Forschungszentrum Informatik
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.StringReader;
+import java.net.URL;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.xerces.parsers.AbstractSAXParser;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.cyberneko.html.HTMLConfiguration;
+
+import com.kohlschutter.boilerpipe.BoilerpipeExtractor;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Highlights text blocks in an HTML document that have been marked as "content" in the
+ * corresponding {@link TextDocument}.
+ */
+public final class HTMLHighlighter implements Serializable {
+
+ private Map<String, Set<String>> tagWhitelist = null;
+
+ /**
+ * Creates a new {@link HTMLHighlighter}, which is set-up to return the full HTML text, with the
+ * extracted text portion <b>highlighted</b>.
+ */
+ public static HTMLHighlighter newHighlightingInstance() {
+ return new HTMLHighlighter(false);
+ }
+
+ /**
+ * Creates a new {@link HTMLHighlighter}, which is set-up to return only the extracted HTML text,
+ * including enclosed markup.
+ */
+ public static HTMLHighlighter newExtractingInstance() {
+ return new HTMLHighlighter(true);
+ }
+
+ private HTMLHighlighter(final boolean extractHTML) {
+ if (extractHTML) {
+ setOutputHighlightOnly(true);
+ setExtraStyleSheet("\n<style type=\"text/css\">\n" + "A:before { content:' '; } \n" //
+ + "A:after { content:' '; } \n" //
+ + "SPAN:before { content:' '; } \n" //
+ + "SPAN:after { content:' '; } \n" //
+ + "</style>\n");
+ setPreHighlight("");
+ setPostHighlight("");
+ }
+ }
+
+ /**
+ * Processes the given {@link TextDocument} and the original HTML text (as a String).
+ *
+ * @param doc The processed {@link TextDocument}.
+ * @param origHTML The original HTML document.
+ * @return The highlighted HTML.
+ * @throws BoilerpipeProcessingException
+ */
+ public String process(final TextDocument doc, final String origHTML)
+ throws BoilerpipeProcessingException {
+ return process(doc, new InputSource(new StringReader(origHTML)));
+ }
+
+ /**
+ * Processes the given {@link TextDocument} and the original HTML text (as an {@link InputSource}
+ * ).
+ *
+ * @param doc The processed {@link TextDocument}.
+ * @param is The original HTML document.
+ * @return The highlighted HTML.
+ * @throws BoilerpipeProcessingException
+ */
+ public String process(final TextDocument doc, final InputSource is)
+ throws BoilerpipeProcessingException {
+ final Implementation implementation = new Implementation();
+ implementation.process(doc, is);
+
+ String html = implementation.html.toString();
+ if (outputHighlightOnly) {
+ Matcher m;
+
+ boolean repeat = true;
+ while (repeat) {
+ repeat = false;
+ m = PAT_TAG_NO_TEXT.matcher(html);
+ if (m.find()) {
+ repeat = true;
+ html = m.replaceAll("");
+ }
+
+ m = PAT_SUPER_TAG.matcher(html);
+ if (m.find()) {
+ repeat = true;
+ html = m.replaceAll(m.group(1));
+ }
+ }
+ }
+
+ return html;
+ }
+
+ private static final Pattern PAT_TAG_NO_TEXT = Pattern.compile("<[^/][^>]*></[^>]*>");
+ private static final Pattern PAT_SUPER_TAG = Pattern.compile("^<[^>]*>(<.*?>)</[^>]*>$");
+
+ /**
+ * Fetches the given {@link URL} using {@link HTMLFetcher} and processes the retrieved HTML using
+ * the specified {@link BoilerpipeExtractor}.
+ *
+ * @param doc The processed {@link TextDocument}.
+ * @param is The original HTML document.
+ * @return The highlighted HTML.
+ * @throws BoilerpipeProcessingException
+ */
+ public String process(final URL url, final BoilerpipeExtractor extractor) throws IOException,
+ BoilerpipeProcessingException, SAXException {
+ final HTMLDocument htmlDoc = HTMLFetcher.fetch(url);
+
+ final TextDocument doc = new BoilerpipeSAXInput(htmlDoc.toInputSource()).getTextDocument();
+ extractor.process(doc);
+
+ final InputSource is = htmlDoc.toInputSource();
+
+ return process(doc, is);
+ }
+
+ private boolean outputHighlightOnly = false;
+ private String extraStyleSheet = "\n<style type=\"text/css\">\n" + ".x-boilerpipe-mark1 {"
+ + " text-decoration:none; " + "background-color: #ffff42 !important; "
+ + "color: black !important; " + "display:inline !important; "
+ + "visibility:visible !important; }\n" + //
+ "</style>\n";
+ private String preHighlight = "<span class=\"x-boilerpipe-mark1\">";
+ private String postHighlight = "</span>";
+
+ /**
+ * If true, only HTML enclosed within highlighted content will be returned
+ */
+ public boolean isOutputHighlightOnly() {
+ return outputHighlightOnly;
+ }
+
+ /**
+ * Sets whether only HTML enclosed within highlighted content will be returned, or the whole HTML
+ * document.
+ */
+ public void setOutputHighlightOnly(boolean outputHighlightOnly) {
+ this.outputHighlightOnly = outputHighlightOnly;
+ }
+
+ /**
+ * Returns the extra stylesheet definition that will be inserted in the HEAD element.
+ *
+ * By default, this corresponds to a simple definition that marks text in class
+ * "x-boilerpipe-mark1" as inline text with yellow background.
+ */
+ public String getExtraStyleSheet() {
+ return extraStyleSheet;
+ }
+
+ /**
+ * Sets the extra stylesheet definition that will be inserted in the HEAD element.
+ *
+ * To disable, set it to the empty string: ""
+ *
+ * @param extraStyleSheet Plain HTML
+ */
+ public void setExtraStyleSheet(String extraStyleSheet) {
+ this.extraStyleSheet = extraStyleSheet;
+ }
+
+ /**
+ * Returns the string that will be inserted before any highlighted HTML block.
+ *
+ * By default, this corresponds to <code><span class=&qupt;x-boilerpipe-mark1"></code>
+ */
+ public String getPreHighlight() {
+ return preHighlight;
+ }
+
+ /**
+ * Sets the string that will be inserted prior to any highlighted HTML block.
+ *
+ * To disable, set it to the empty string: ""
+ */
+ public void setPreHighlight(String preHighlight) {
+ this.preHighlight = preHighlight;
+ }
+
+ /**
+ * Returns the string that will be inserted after any highlighted HTML block.
+ *
+ * By default, this corresponds to <code></span></code>
+ */
+ public String getPostHighlight() {
+ return postHighlight;
+ }
+
+ /**
+ * Sets the string that will be inserted after any highlighted HTML block.
+ *
+ * To disable, set it to the empty string: ""
+ */
+ public void setPostHighlight(String postHighlight) {
+ this.postHighlight = postHighlight;
+ }
+
+ private abstract static class TagAction {
+ void beforeStart(final Implementation instance, final String localName) {
+ }
+
+ void afterStart(final Implementation instance, final String localName) {
+ }
+
+ void beforeEnd(final Implementation instance, final String localName) {
+ }
+
+ void afterEnd(final Implementation instance, final String localName) {
+ }
+ }
+
+ private static final TagAction TA_IGNORABLE_ELEMENT = new TagAction() {
+ void beforeStart(final Implementation instance, final String localName) {
+ instance.inIgnorableElement++;
+ }
+
+ void afterEnd(final Implementation instance, final String localName) {
+ instance.inIgnorableElement--;
+ }
+ };
+
+ private static final TagAction TA_HEAD = new TagAction() {
+ void beforeStart(final Implementation instance, final String localName) {
+ instance.inIgnorableElement++;
+ }
+
+ void beforeEnd(final Implementation instance, String localName) {
+ instance.html.append(instance.hl.extraStyleSheet);
+ }
+
+ void afterEnd(final Implementation instance, final String localName) {
+ instance.inIgnorableElement--;
+ }
+ };
+ private static Map<String, TagAction> TAG_ACTIONS = new HashMap<String, TagAction>();
+ static {
+ TAG_ACTIONS.put("STYLE", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("SCRIPT", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("OPTION", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("NOSCRIPT", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("OBJECT", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("EMBED", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("APPLET", TA_IGNORABLE_ELEMENT);
+ // NOTE: you might want to comment this out:
+ TAG_ACTIONS.put("LINK", TA_IGNORABLE_ELEMENT);
+
+ TAG_ACTIONS.put("HEAD", TA_HEAD);
+ }
+
+ private final class Implementation extends AbstractSAXParser implements ContentHandler {
+ StringBuilder html = new StringBuilder();
+
+ private int inIgnorableElement = 0;
+ private int characterElementIdx = 0;
+ private final BitSet contentBitSet = new BitSet();
+ private final HTMLHighlighter hl = HTMLHighlighter.this;
+
+ Implementation() {
+ super(new HTMLConfiguration());
+ setContentHandler(this);
+ }
+
+ void process(final TextDocument doc, final InputSource is) throws BoilerpipeProcessingException {
+ for (TextBlock block : doc.getTextBlocks()) {
+ if (block.isContent()) {
+ final BitSet bs = block.getContainedTextElements();
+ if (bs != null) {
+ contentBitSet.or(bs);
+ }
+ }
+ }
+
+ try {
+ parse(is);
+ } catch (SAXException e) {
+ throw new BoilerpipeProcessingException(e);
+ } catch (IOException e) {
+ throw new BoilerpipeProcessingException(e);
+ }
+ }
+
+ public void endDocument() throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ }
+
+ public void processingInstruction(String target, String data) throws SAXException {
+ }
+
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+ public void startDocument() throws SAXException {
+ }
+
+ public void startElement(String uri, String localName, String qName, Attributes atts)
+ throws SAXException {
+ TagAction ta = TAG_ACTIONS.get(localName);
+ if (ta != null) {
+ ta.beforeStart(this, localName);
+ }
+
+ // HACK: remove existing highlight
+ boolean ignoreAttrs = false;
+ if ("SPAN".equalsIgnoreCase(localName)) {
+ String classVal = atts.getValue("class");
+ if ("x-boilerpipe-mark1".equals(classVal)) {
+ ignoreAttrs = true;
+ }
+ }
+
+ try {
+ if (inIgnorableElement == 0) {
+ if (outputHighlightOnly) {
+ // boolean highlight = contentBitSet
+ // .get(characterElementIdx);
+
+ // if (!highlight) {
+ // return;
+ // }
+ }
+
+ final Set<String> whitelistAttributes;
+ if (tagWhitelist == null) {
+ whitelistAttributes = null;
+ } else {
+ whitelistAttributes = tagWhitelist.get(qName);
+ if (whitelistAttributes == null) {
+ // skip
+ return;
+ }
+ }
+
+ html.append('<');
+ html.append(qName);
+ if (!ignoreAttrs) {
+ final int numAtts = atts.getLength();
+ for (int i = 0; i < numAtts; i++) {
+ final String attr = atts.getQName(i);
+
+ if (whitelistAttributes != null && !whitelistAttributes.contains(attr)) {
+ // skip
+ continue;
+ }
+
+ final String value = atts.getValue(i);
+ html.append(' ');
+ html.append(attr);
+ html.append("=\"");
+ html.append(xmlEncode(value));
+ html.append("\"");
+ }
+ }
+ html.append('>');
+ }
+ } finally {
+ if (ta != null) {
+ ta.afterStart(this, localName);
+ }
+ }
+ }
+
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ TagAction ta = TAG_ACTIONS.get(localName);
+ if (ta != null) {
+ ta.beforeEnd(this, localName);
+ }
+
+ try {
+ if (inIgnorableElement == 0) {
+ if (outputHighlightOnly) {
+ // boolean highlight = contentBitSet
+ // .get(characterElementIdx);
+
+ // if (!highlight) {
+ // return;
+ // }
+ }
+
+ if (tagWhitelist != null && !tagWhitelist.containsKey(qName)) {
+ // skip
+ return;
+ }
+
+ html.append("</");
+ html.append(qName);
+ html.append('>');
+ }
+ } finally {
+ if (ta != null) {
+ ta.afterEnd(this, localName);
+ }
+ }
+ }
+
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ characterElementIdx++;
+ if (inIgnorableElement == 0) {
+
+ boolean highlight = contentBitSet.get(characterElementIdx);
+
+ if (!highlight && outputHighlightOnly) {
+ return;
+ }
+
+ if (highlight) {
+ html.append(preHighlight);
+ }
+ html.append(xmlEncode(String.valueOf(ch, start, length)));
+ if (highlight) {
+ html.append(postHighlight);
+ }
+ }
+ }
+
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ }
+
+ }
+
+ private static String xmlEncode(final String in) {
+ if (in == null) {
+ return "";
+ }
+ char c;
+ StringBuilder out = new StringBuilder(in.length());
+
+ for (int i = 0; i < in.length(); i++) {
+ c = in.charAt(i);
+ switch (c) {
+ case '<':
+ out.append("<");
+ break;
+ case '>':
+ out.append(">");
+ break;
+ case '&':
+ out.append("&");
+ break;
+ case '"':
+ out.append(""");
+ break;
+ default:
+ out.append(c);
+ }
+ }
+
+ return out.toString();
+ }
+
+ public Map<String, Set<String>> getTagWhitelist() {
+ return tagWhitelist;
+ }
+
+ public void setTagWhitelist(Map<String, Set<String>> tagWhitelist) {
+ this.tagWhitelist = tagWhitelist;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/ImageExtractor.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/ImageExtractor.java
new file mode 100755
index 0000000..23aafad
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/ImageExtractor.java
@@ -0,0 +1,293 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ * Modifications copyright (C) 2019 FZI Forschungszentrum Informatik
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.xerces.parsers.AbstractSAXParser;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.cyberneko.html.HTMLConfiguration;
+
+import com.kohlschutter.boilerpipe.BoilerpipeExtractor;
+import com.kohlschutter.boilerpipe.BoilerpipeProcessingException;
+import com.kohlschutter.boilerpipe.document.Image;
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.document.TextDocument;
+
+/**
+ * Extracts the images that are enclosed by extracted content.
+ */
+public final class ImageExtractor {
+ public static final ImageExtractor INSTANCE = new ImageExtractor();
+
+ /**
+ * Returns the singleton instance of {@link ImageExtractor}.
+ *
+ * @return
+ */
+ public static ImageExtractor getInstance() {
+ return INSTANCE;
+ }
+
+ private ImageExtractor() {
+ }
+
+ /**
+ * Processes the given {@link TextDocument} and the original HTML text (as a String).
+ *
+ * @param doc The processed {@link TextDocument}.
+ * @param origHTML The original HTML document.
+ * @return A List of enclosed {@link Image}s
+ * @throws BoilerpipeProcessingException
+ */
+ public List<Image> process(final TextDocument doc, final String origHTML)
+ throws BoilerpipeProcessingException {
+ return process(doc, new InputSource(new StringReader(origHTML)));
+ }
+
+ /**
+ * Processes the given {@link TextDocument} and the original HTML text (as an {@link InputSource}
+ * ).
+ *
+ * @param doc The processed {@link TextDocument}.
+ * @param origHTML The original HTML document.
+ * @return A List of enclosed {@link Image}s
+ * @throws BoilerpipeProcessingException
+ */
+ public List<Image> process(final TextDocument doc, final InputSource is)
+ throws BoilerpipeProcessingException {
+ final Implementation implementation = new Implementation();
+ implementation.process(doc, is);
+
+ return implementation.linksHighlight;
+ }
+
+ /**
+ * Fetches the given {@link URL} using {@link HTMLFetcher} and processes the retrieved HTML using
+ * the specified {@link BoilerpipeExtractor}.
+ *
+ * @param doc The processed {@link TextDocument}.
+ * @param is The original HTML document.
+ * @return A List of enclosed {@link Image}s
+ * @throws BoilerpipeProcessingException
+ */
+ public List<Image> process(final URL url, final BoilerpipeExtractor extractor)
+ throws IOException, BoilerpipeProcessingException, SAXException {
+ final HTMLDocument htmlDoc = HTMLFetcher.fetch(url);
+
+ final TextDocument doc = new BoilerpipeSAXInput(htmlDoc.toInputSource()).getTextDocument();
+ extractor.process(doc);
+
+ final InputSource is = htmlDoc.toInputSource();
+
+ return process(doc, is);
+ }
+
+ private final class Implementation extends AbstractSAXParser implements ContentHandler {
+ List<Image> linksHighlight = new ArrayList<Image>();
+ private List<Image> linksBuffer = new ArrayList<Image>();
+
+ private int inIgnorableElement = 0;
+ private int characterElementIdx = 0;
+ private final BitSet contentBitSet = new BitSet();
+
+ private boolean inHighlight = false;
+
+ Implementation() {
+ super(new HTMLConfiguration());
+ setContentHandler(this);
+ }
+
+ void process(final TextDocument doc, final InputSource is) throws BoilerpipeProcessingException {
+ for (TextBlock block : doc.getTextBlocks()) {
+ if (block.isContent()) {
+ final BitSet bs = block.getContainedTextElements();
+ if (bs != null) {
+ contentBitSet.or(bs);
+ }
+ }
+ }
+
+ try {
+ parse(is);
+ } catch (SAXException e) {
+ throw new BoilerpipeProcessingException(e);
+ } catch (IOException e) {
+ throw new BoilerpipeProcessingException(e);
+ }
+ }
+
+ public void endDocument() throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix) throws SAXException {
+ }
+
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ }
+
+ public void processingInstruction(String target, String data) throws SAXException {
+ }
+
+ public void setDocumentLocator(Locator locator) {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }
+
+ public void startDocument() throws SAXException {
+ }
+
+ public void startElement(String uri, String localName, String qName, Attributes atts)
+ throws SAXException {
+ TagAction ta = TAG_ACTIONS.get(localName);
+ if (ta != null) {
+ ta.beforeStart(this, localName);
+ }
+
+ try {
+ if (inIgnorableElement == 0) {
+ if (inHighlight && "IMG".equalsIgnoreCase(localName)) {
+ String src = atts.getValue("src");
+ if (src != null && src.length() > 0) {
+ linksBuffer.add(new Image(src, atts.getValue("width"), atts.getValue("height"), atts
+ .getValue("alt")));
+ }
+ }
+ }
+ } finally {
+ if (ta != null) {
+ ta.afterStart(this, localName);
+ }
+ }
+ }
+
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ TagAction ta = TAG_ACTIONS.get(localName);
+ if (ta != null) {
+ ta.beforeEnd(this, localName);
+ }
+
+ try {
+ if (inIgnorableElement == 0) {
+ //
+ }
+ } finally {
+ if (ta != null) {
+ ta.afterEnd(this, localName);
+ }
+ }
+ }
+
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ characterElementIdx++;
+ if (inIgnorableElement == 0) {
+
+ boolean highlight = contentBitSet.get(characterElementIdx);
+ if (!highlight) {
+ if (length == 0) {
+ return;
+ }
+ boolean justWhitespace = true;
+ for (int i = start; i < start + length; i++) {
+ if (!Character.isWhitespace(ch[i])) {
+ justWhitespace = false;
+ break;
+ }
+ }
+ if (justWhitespace) {
+ return;
+ }
+ }
+
+ inHighlight = highlight;
+ if (inHighlight) {
+ linksHighlight.addAll(linksBuffer);
+ linksBuffer.clear();
+ }
+ }
+ }
+
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
+ }
+
+ }
+
+ private static final TagAction TA_IGNORABLE_ELEMENT = new TagAction() {
+ void beforeStart(final Implementation instance, final String localName) {
+ instance.inIgnorableElement++;
+ }
+
+ void afterEnd(final Implementation instance, final String localName) {
+ instance.inIgnorableElement--;
+ }
+ };
+
+ private static Map<String, TagAction> TAG_ACTIONS = new HashMap<String, TagAction>();
+ static {
+ TAG_ACTIONS.put("STYLE", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("SCRIPT", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("OPTION", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("NOSCRIPT", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("EMBED", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("APPLET", TA_IGNORABLE_ELEMENT);
+ TAG_ACTIONS.put("LINK", TA_IGNORABLE_ELEMENT);
+
+ TAG_ACTIONS.put("HEAD", TA_IGNORABLE_ELEMENT);
+ }
+
+ private abstract static class TagAction {
+ void beforeStart(final Implementation instance, final String localName) {
+ }
+
+ void afterStart(final Implementation instance, final String localName) {
+ }
+
+ void beforeEnd(final Implementation instance, final String localName) {
+ }
+
+ void afterEnd(final Implementation instance, final String localName) {
+ }
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/InputSourceable.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/InputSourceable.java
new file mode 100755
index 0000000..241a60e
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/InputSourceable.java
@@ -0,0 +1,28 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import org.xml.sax.InputSource;
+
+/**
+ * An InputSourceable can return an arbitrary number of new {@link InputSource}s for a given
+ * document.
+ */
+public interface InputSourceable {
+ InputSource toInputSource();
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/MarkupTagAction.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/MarkupTagAction.java
new file mode 100755
index 0000000..42b3338
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/MarkupTagAction.java
@@ -0,0 +1,118 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+import com.kohlschutter.boilerpipe.document.TextBlock;
+import com.kohlschutter.boilerpipe.labels.DefaultLabels;
+import com.kohlschutter.boilerpipe.labels.LabelAction;
+
+/**
+ * Assigns labels for element CSS classes and ids to the corresponding {@link TextBlock}. CSS
+ * classes are prefixed by <code>{@link DefaultLabels#MARKUP_PREFIX}.</code>, and IDs are prefixed
+ * by <code>{@link DefaultLabels#MARKUP_PREFIX}#</code>
+ */
+public final class MarkupTagAction implements TagAction {
+
+ private final boolean isBlockLevel;
+ private LinkedList<List<String>> labelStack = new LinkedList<List<String>>();
+
+ public MarkupTagAction(final boolean isBlockLevel) {
+ this.isBlockLevel = isBlockLevel;
+ }
+
+ private static final Pattern PAT_NUM = Pattern.compile("[0-9]+");
+
+ @Override
+ public boolean start(BoilerpipeHTMLContentHandler instance, String localName, String qName,
+ Attributes atts) throws SAXException {
+ List<String> labels = new ArrayList<String>(5);
+ labels.add(DefaultLabels.MARKUP_PREFIX + localName);
+
+ String classVal = atts.getValue("class");
+
+ if (classVal != null && classVal.length() > 0) {
+ classVal = PAT_NUM.matcher(classVal).replaceAll("#");
+ classVal = classVal.trim();
+ String[] vals = classVal.split("[ ]+");
+ labels.add(DefaultLabels.MARKUP_PREFIX + "." + classVal.replace(' ', '.'));
+ if (vals.length > 1) {
+ for (String s : vals) {
+ labels.add(DefaultLabels.MARKUP_PREFIX + "." + s);
+ }
+ }
+ }
+
+ String id = atts.getValue("id");
+ if (id != null && id.length() > 0) {
+ id = PAT_NUM.matcher(id).replaceAll("#");
+ labels.add(DefaultLabels.MARKUP_PREFIX + "#" + id);
+ }
+
+ Set<String> ancestors = getAncestorLabels();
+ List<String> labelsWithAncestors =
+ new ArrayList<String>((ancestors.size() + 1) * labels.size());
+
+ for (String l : labels) {
+ for (String an : ancestors) {
+ labelsWithAncestors.add(an);
+ labelsWithAncestors.add(an + " " + l);
+ }
+ labelsWithAncestors.add(l);
+ }
+
+ instance.addLabelAction(new LabelAction(labelsWithAncestors
+ .toArray(new String[labelsWithAncestors.size()])));
+
+ labelStack.add(labels);
+
+ return isBlockLevel;
+ }
+
+ @Override
+ public boolean end(BoilerpipeHTMLContentHandler instance, String localName, String qName)
+ throws SAXException {
+
+ labelStack.removeLast();
+ return isBlockLevel;
+ }
+
+ public boolean changesTagLevel() {
+ return isBlockLevel;
+ }
+
+ private Set<String> getAncestorLabels() {
+ Set<String> set = new HashSet<String>();
+ for (List<String> labels : labelStack) {
+ if (labels == null) {
+ continue;
+ }
+ set.addAll(labels);
+ }
+ return set;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/TagAction.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/TagAction.java
new file mode 100755
index 0000000..9f1d3cd
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/TagAction.java
@@ -0,0 +1,35 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * Defines an action that is to be performed whenever a particular tag occurs during HTML parsing.
+ */
+public interface TagAction {
+
+ boolean start(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName, final Attributes atts) throws SAXException;
+
+ boolean end(final BoilerpipeHTMLContentHandler instance, final String localName,
+ final String qName) throws SAXException;
+
+ boolean changesTagLevel();
+}
\ No newline at end of file
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/TagActionMap.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/TagActionMap.java
new file mode 100755
index 0000000..04fcafb
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/TagActionMap.java
@@ -0,0 +1,59 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
+import java.util.HashMap;
+
+/**
+ * Base class for definition a set of {@link TagAction}s that are to be used for the HTML parsing
+ * process.
+ *
+ * @see DefaultTagActionMap
+ */
+public abstract class TagActionMap extends HashMap<String, TagAction> {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Sets a particular {@link TagAction} for a given tag. Any existing TagAction for that tag will
+ * be removed and overwritten.
+ *
+ * @param tag The tag (will be stored internally 1. as it is, 2. lower-case, 3. upper-case)
+ * @param action The {@link TagAction}
+ */
+ protected void setTagAction(final String tag, final TagAction action) {
+ put(tag.toUpperCase(), action);
+ put(tag.toLowerCase(), action);
+ put(tag, action);
+ }
+
+ /**
+ * Adds a particular {@link TagAction} for a given tag. If a TagAction already exists for that
+ * tag, a chained action, consisting of the previous and the new {@link TagAction} is created.
+ *
+ * @param tag The tag (will be stored internally 1. as it is, 2. lower-case, 3. upper-case)
+ * @param action The {@link TagAction}
+ */
+ protected void addTagAction(final String tag, final TagAction action) {
+ TagAction previousAction = get(tag);
+ if (previousAction == null) {
+ setTagAction(tag, action);
+ } else {
+ setTagAction(tag, new CommonTagActions.Chained(previousAction, action));
+ }
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/package-info.java
new file mode 100755
index 0000000..35a8e5c
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/sax/package-info.java
@@ -0,0 +1,5 @@
+/**
+ * Classes related to parsing and producing HTML from/to Boilerpipe TextDocuments.
+ */
+package com.kohlschutter.boilerpipe.sax;
+
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/util/UnicodeTokenizer.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/util/UnicodeTokenizer.java
new file mode 100755
index 0000000..e1cc02b
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/util/UnicodeTokenizer.java
@@ -0,0 +1,40 @@
+/**
+ * boilerpipe
+ *
+ * Copyright (c) 2009, 2014 Christian Kohlschütter
+ *
+ * The author licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kohlschutter.boilerpipe.util;
+
+import java.util.regex.Pattern;
+
+/**
+ * Tokenizes text according to Unicode word boundaries and strips off non-word characters.
+ */
+public class UnicodeTokenizer {
+ private static final Pattern PAT_WORD_BOUNDARY = Pattern.compile("\\b");
+ private static final Pattern PAT_NOT_WORD_BOUNDARY = Pattern
+ .compile("[\u2063]*([\\\"'\\.,\\!\\@\\-\\:\\;\\$\\?\\(\\)/])[\u2063]*");
+
+ /**
+ * Tokenizes the text and returns an array of tokens.
+ *
+ * @param text The text
+ * @return The tokens
+ */
+ public static String[] tokenize(final CharSequence text) {
+ return PAT_NOT_WORD_BOUNDARY.matcher(PAT_WORD_BOUNDARY.matcher(text).replaceAll("\u2063"))
+ .replaceAll("$1").replaceAll("[ \u2063]+", " ").trim().split("[ ]+");
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/util/package-info.java b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/util/package-info.java
new file mode 100755
index 0000000..f804445
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/com/kohlschutter/boilerpipe/util/package-info.java
@@ -0,0 +1,5 @@
+/**
+ * Some helper classes.
+ */
+package com.kohlschutter.boilerpipe.util;
+
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/TransformationFlinkInit.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/TransformationFlinkInit.java
index fd1ca8a..6d03aa6 100644
--- a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/TransformationFlinkInit.java
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/TransformationFlinkInit.java
@@ -20,6 +20,7 @@
import org.streampipes.container.init.DeclarersSingleton;
import org.streampipes.container.standalone.init.StandaloneModelSubmitter;
import org.streampipes.processors.transformation.flink.config.TransformationFlinkConfig;
+import org.streampipes.processors.transformation.flink.processor.boilerplate.BoilerplateController;
import org.streampipes.processors.transformation.flink.processor.converter.FieldConverterController;
import org.streampipes.processors.transformation.flink.processor.hasher.FieldHasherController;
import org.streampipes.processors.transformation.flink.processor.mapper.FieldMapperController;
@@ -33,7 +34,8 @@
.add(new FieldHasherController())
.add(new FieldMapperController())
//.add(new MeasurementUnitConverterController())
- .add(new FieldRenamerController());
+ .add(new FieldRenamerController())
+ .add(new BoilerplateController());
new TransformationFlinkInit().init(TransformationFlinkConfig.INSTANCE);
}
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateController.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateController.java
new file mode 100644
index 0000000..5b2e4bc
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateController.java
@@ -0,0 +1,89 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.transformation.flink.processor.boilerplate;
+
+import org.streampipes.model.graph.DataProcessorDescription;
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.model.schema.PropertyScope;
+import org.streampipes.processors.transformation.flink.config.TransformationFlinkConfig;
+import org.streampipes.sdk.builder.ProcessingElementBuilder;
+import org.streampipes.sdk.builder.StreamRequirementsBuilder;
+import org.streampipes.sdk.extractor.ProcessingElementParameterExtractor;
+import org.streampipes.sdk.helpers.*;
+import org.streampipes.wrapper.flink.FlinkDataProcessorDeclarer;
+import org.streampipes.wrapper.flink.FlinkDataProcessorRuntime;
+
+public class BoilerplateController extends FlinkDataProcessorDeclarer<BoilerplateParameters> {
+
+ public static final String HTML_PROPERTY = "stringProperty";
+ public static final String EXTRACTOR = "extractor";
+ public static final String OUTPUT_MODE = "outputMode";
+
+
+ @Override
+ public DataProcessorDescription declareModel() {
+ return ProcessingElementBuilder.create("org.streampipes.processors.transformation.flink.processor.boilerplate",
+ "Boilerplate Removal", "Removes boilerplate and extract fulltext from HTML")
+ .iconUrl(TransformationFlinkConfig.getIconUrl("Boilerplate_icon"))
+ .requiredStream(StreamRequirementsBuilder
+ .create()
+ .requiredPropertyWithUnaryMapping(EpRequirements.stringReq(),
+ Labels.from(HTML_PROPERTY,"Html", "The property with the html"), PropertyScope.NONE)
+ .build())
+ .requiredSingleValueSelection(Labels.from(EXTRACTOR, "Extractor", "Common use: Article Extractor"),
+ Options.from("Article Extractor", "Default Extractor", "Largest Content Extractor", "Canola Extractor", "Keep Everything Extractor"))
+ .requiredSingleValueSelection(Labels.from(OUTPUT_MODE, "Output Mode", ""),
+ Options.from("Plain Text", "Highlighted Html", "Html"))
+ .supportedProtocols(SupportedProtocols.kafka())
+ .supportedFormats(SupportedFormats.jsonFormat())
+ .outputStrategy(OutputStrategies.keep())
+ .build();
+ }
+
+ @Override
+ public FlinkDataProcessorRuntime<BoilerplateParameters> getRuntime(DataProcessorInvocation graph, ProcessingElementParameterExtractor extractor) {
+ String htmlProperty = extractor.mappingPropertyValue(HTML_PROPERTY);
+ String htmlExtractor = extractor.selectedSingleValue(EXTRACTOR, String.class);
+ String htmlOutputMode = extractor.selectedSingleValue(OUTPUT_MODE, String.class);
+
+ ExtractorMode extractorMode = null;
+ switch (htmlExtractor) {
+ case "Article Extractor": extractorMode = ExtractorMode.ARTICLE;
+ break;
+ case "Default Extractor": extractorMode = ExtractorMode.DEFAULT;
+ break;
+ case "Largest Content Extractor": extractorMode = ExtractorMode.LARGEST_CONTENT;
+ break;
+ case "Canola Extractor": extractorMode = ExtractorMode.CANOLA;
+ break;
+ case "Keep Everything Extractor": extractorMode = ExtractorMode.KEEP_EVERYTHING;
+ }
+
+ OutputMode outputMode = null;
+ switch (htmlOutputMode) {
+ case "Plain Text": outputMode = OutputMode.PLAIN_TEXT;
+ break;
+ case "Highlighted Html": outputMode = OutputMode.HIGHLIGHTED_HTML;
+ break;
+ case "Html": outputMode = OutputMode.HTML;
+ }
+
+ BoilerplateParameters staticParams = new BoilerplateParameters(graph, htmlProperty, extractorMode, outputMode);
+
+ return new BoilerplateProgram(staticParams, TransformationFlinkConfig.INSTANCE.getDebug());
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateParameters.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateParameters.java
new file mode 100644
index 0000000..64a63bd
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateParameters.java
@@ -0,0 +1,46 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.transformation.flink.processor.boilerplate;
+
+import org.streampipes.model.graph.DataProcessorInvocation;
+import org.streampipes.wrapper.params.binding.EventProcessorBindingParams;
+
+public class BoilerplateParameters extends EventProcessorBindingParams {
+
+ private String htmlProperty;
+ private ExtractorMode extractorMode;
+ private OutputMode outputMode;
+
+ public BoilerplateParameters(DataProcessorInvocation graph, String htmlProperty, ExtractorMode extractorMode, OutputMode outputMode) {
+ super(graph);
+ this.htmlProperty = htmlProperty;
+ this.extractorMode = extractorMode;
+ this.outputMode = outputMode;
+ }
+
+ public String getHtmlProperty() {
+ return htmlProperty;
+ }
+
+ public ExtractorMode getExtractorMode() {
+ return extractorMode;
+ }
+
+ public OutputMode getOutputMode() {
+ return outputMode;
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateProgram.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateProgram.java
new file mode 100644
index 0000000..1cb5095
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateProgram.java
@@ -0,0 +1,39 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.transformation.flink.processor.boilerplate;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.streampipes.model.runtime.Event;
+import org.streampipes.processors.transformation.flink.AbstractFlinkTransformationProgram;
+
+public class BoilerplateProgram extends AbstractFlinkTransformationProgram<BoilerplateParameters> {
+
+ public BoilerplateProgram(BoilerplateParameters params, boolean debug) {
+ super(params, debug);
+ }
+
+ public BoilerplateProgram(BoilerplateParameters params) {
+ super(params);
+ }
+
+ @Override
+ protected DataStream<Event> getApplicationLogic(DataStream<Event>... dataStreams) {
+ return dataStreams[0].flatMap(
+ new BoilerplateRemover(params.getHtmlProperty(), params.getExtractorMode(), params.getOutputMode())
+ );
+ }
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateRemover.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateRemover.java
new file mode 100644
index 0000000..d8e9199
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/BoilerplateRemover.java
@@ -0,0 +1,90 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.transformation.flink.processor.boilerplate;
+
+import com.kohlschutter.boilerpipe.document.TextDocument;
+import com.kohlschutter.boilerpipe.extractors.CommonExtractors;
+import com.kohlschutter.boilerpipe.extractors.ExtractorBase;
+import com.kohlschutter.boilerpipe.sax.BoilerpipeSAXInput;
+import com.kohlschutter.boilerpipe.sax.HTMLDocument;
+import com.kohlschutter.boilerpipe.sax.HTMLHighlighter;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+import org.streampipes.model.runtime.Event;
+
+import java.nio.charset.Charset;
+
+public class BoilerplateRemover implements FlatMapFunction<Event, Event> {
+
+ private String htmlProperty;
+ private OutputMode outputMode;
+
+ private ExtractorBase extractor;
+ private HTMLHighlighter htmlHighlighter;
+
+ public BoilerplateRemover(String htmlProperty, ExtractorMode extractorMode, OutputMode outputMode) {
+ this.htmlProperty = htmlProperty;
+ this.outputMode = outputMode;
+ this.htmlHighlighter = null;
+ setExtractor(extractorMode);
+ }
+
+ @Override
+ public void flatMap(Event event, Collector<Event> collector) throws Exception {
+ String value = event.getFieldBySelector(htmlProperty).getAsPrimitive().getAsString();
+
+ HTMLDocument htmlDoc = new HTMLDocument(value.getBytes(), Charset.defaultCharset());
+ TextDocument textDoc = new BoilerpipeSAXInput(htmlDoc.toInputSource()).getTextDocument();
+ extractor.process(textDoc);
+
+ String result = "";
+ switch (outputMode) {
+ case PLAIN_TEXT: result = textDoc.getContent();
+ break;
+ case HIGHLIGHTED_HTML: result = getHTMLHighligther(false).process(textDoc, htmlDoc.toInputSource());
+ break;
+ case HTML: result = getHTMLHighligther(true).process(textDoc, htmlDoc.toInputSource());
+ }
+
+ event.updateFieldBySelector(htmlProperty, result);
+
+ collector.collect(event);
+ }
+
+ private void setExtractor(ExtractorMode extractorMode) {
+ switch (extractorMode) {
+ case ARTICLE: extractor = CommonExtractors.ARTICLE_EXTRACTOR;
+ break;
+ case DEFAULT: extractor = CommonExtractors.DEFAULT_EXTRACTOR;
+ break;
+ case LARGEST_CONTENT: extractor = CommonExtractors.LARGEST_CONTENT_EXTRACTOR;
+ break;
+ case CANOLA: extractor = CommonExtractors.CANOLA_EXTRACTOR;
+ break;
+ case KEEP_EVERYTHING: extractor = CommonExtractors.KEEP_EVERYTHING_EXTRACTOR;
+ }
+ }
+
+ private HTMLHighlighter getHTMLHighligther(boolean extractHTML) {
+ if (htmlHighlighter == null) {
+ if (extractHTML) htmlHighlighter = HTMLHighlighter.newExtractingInstance();
+ else htmlHighlighter = HTMLHighlighter.newHighlightingInstance();
+ }
+ return htmlHighlighter;
+ }
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/ExtractorMode.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/ExtractorMode.java
new file mode 100644
index 0000000..e0e183a
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/ExtractorMode.java
@@ -0,0 +1,26 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.transformation.flink.processor.boilerplate;
+
+public enum ExtractorMode {
+ ARTICLE,
+ DEFAULT,
+ LARGEST_CONTENT,
+ CANOLA,
+ KEEP_EVERYTHING
+
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/OutputMode.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/OutputMode.java
new file mode 100644
index 0000000..2f45b85
--- /dev/null
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/boilerplate/OutputMode.java
@@ -0,0 +1,23 @@
+/*
+Copyright 2019 FZI Forschungszentrum Informatik
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package org.streampipes.processors.transformation.flink.processor.boilerplate;
+
+public enum OutputMode {
+ PLAIN_TEXT,
+ HIGHLIGHTED_HTML,
+ HTML
+}
diff --git a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/converter/FieldConverter.java b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/converter/FieldConverter.java
index 44f113b..01a680a 100644
--- a/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/converter/FieldConverter.java
+++ b/streampipes-processors-transformation-flink/src/main/java/org/streampipes/processors/transformation/flink/processor/converter/FieldConverter.java
@@ -20,11 +20,10 @@
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.streampipes.model.runtime.Event;
import org.streampipes.vocabulary.XSD;
-import java.util.Map;
-
-public class FieldConverter implements FlatMapFunction<Map<String, Object>, Map<String, Object>> {
+public class FieldConverter implements FlatMapFunction<Event, Event> {
private static Logger LOG = LoggerFactory.getLogger(FieldConverter.class);
@@ -38,13 +37,13 @@
@Override
- public void flatMap(Map<String, Object> in, Collector<Map<String, Object>> out) {
- String value = String.valueOf(in.get(convertProperty));
+ public void flatMap(Event in, Collector<Event> out) {
+ String value = in.getFieldBySelector(convertProperty).getAsPrimitive().getAsString();
try {
if (targetDatatype.equals(X