Added Apache Tika Server example with Grobid Service
diff --git a/docker-compose-tika-grobid.yml b/docker-compose-tika-grobid.yml
new file mode 100644
index 0000000..cb3a66a
--- /dev/null
+++ b/docker-compose-tika-grobid.yml
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: "3.8"
+services:
+
+ ## Apache Tika Server
+ tika:
+ image: apache/tika:1.25-full
+ # Override default so we can add configuration on classpath
+ entrypoint: [ "/bin/sh", "-c", "exec java -cp /grobid:/tika-server-1.25.jar org.apache.tika.server.TikaServerCli -h 0.0.0.0 $$0 $$@"]
+ # Kept command as example but could be added to entrypoint too
+ command: -c /grobid/tika-config.xml
+ restart: on-failure
+ ports:
+ - "9998:9998"
+ volumes:
+ - ./sample-configs/grobid:/grobid
+ depends_on:
+ - grobid
+
+ ## Grobid Service
+ grobid:
+ image: lfoppiano/grobid:0.6.1
+ ports:
+ - "8070:8070"
+ - "8071:8071"
+
diff --git a/sample-configs/grobid/org/apache/tika/parser/journal/GrobidExtractor.properties b/sample-configs/grobid/org/apache/tika/parser/journal/GrobidExtractor.properties
new file mode 100644
index 0000000..44689a2
--- /dev/null
+++ b/sample-configs/grobid/org/apache/tika/parser/journal/GrobidExtractor.properties
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+grobid.server.url=http://grobid:8070
\ No newline at end of file
diff --git a/sample-configs/grobid/tika-config.xml b/sample-configs/grobid/tika-config.xml
new file mode 100644
index 0000000..5b4aad9
--- /dev/null
+++ b/sample-configs/grobid/tika-config.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<properties>
+ <parsers>
+ <parser class="org.apache.tika.parser.journal.JournalParser">
+ <mime>application/pdf</mime>
+ </parser>
+ </parsers>
+</properties>