blob: 6ed341cb7ad99b47a9c4c111b73a8c0ad7d6aae1 [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<writers xmlns="http://lucene.apache.org/nutch"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://lucene.apache.org/nutch index-writers.xsd">
<writer id="indexer_solr_1" class="org.apache.nutch.indexwriter.solr.SolrIndexWriter">
<parameters>
<param name="type" value="http"/>
<param name="url" value="http://localhost:8983/solr/nutch"/>
<param name="collection" value=""/>
<param name="weight.field" value=""/>
<param name="commitSize" value="1000"/>
<!-- enable authentication. In addition, set username and
password for basic authentication, or pass the Bearer
token via Authentication header. See below.
-->
<param name="auth" value="false"/>
<!-- username and password for basic authentication -->
<param name="username" value=""/>
<param name="password" value=""/>
<!-- Name for Authorization HTTP header
<param name="auth.header.name" value="Bearer"/>
HTTP header -> Authorization: Bearer 1234567890
-->
<param name="auth.header.name" value=""/>
<!-- Value for Authorization HTTP header
<param name="auth.header.value" value="1234567890"/>
HTTP header -> Authorization: Bearer 1234567890
-->
<param name="auth.header.value" value=""/>
</parameters>
<mapping>
<copy>
<!-- <field source="content" dest="search"/> -->
<!-- <field source="title" dest="title,search"/> -->
</copy>
<rename>
<field source="metatag.description" dest="description"/>
<field source="metatag.keywords" dest="keywords"/>
</rename>
<remove>
<field source="segment"/>
</remove>
</mapping>
</writer>
<writer id="indexer_rabbit_1" class="org.apache.nutch.indexwriter.rabbit.RabbitIndexWriter">
<parameters>
<param name="server.uri" value="amqp://guest:guest@localhost:5672/"/>
<param name="binding" value="false"/>
<param name="binding.arguments" value=""/>
<param name="exchange.name" value=""/>
<param name="exchange.options" value="type=direct,durable=true"/>
<param name="queue.name" value="nutch.queue"/>
<param name="queue.options" value="durable=true,exclusive=false,auto-delete=false"/>
<param name="routingkey" value=""/>
<param name="commit.mode" value="multiple"/>
<param name="commit.size" value="250"/>
<param name="headers.static" value=""/>
<param name="headers.dynamic" value=""/>
</parameters>
<mapping>
<copy>
<field source="title" dest="title,search"/>
</copy>
<rename>
<field source="metatag.description" dest="description"/>
<field source="metatag.keywords" dest="keywords"/>
</rename>
<remove>
<field source="content"/>
<field source="segment"/>
<field source="boost"/>
</remove>
</mapping>
</writer>
<writer id="indexer_dummy_1" class="org.apache.nutch.indexwriter.dummy.DummyIndexWriter">
<parameters>
<param name="delete" value="false"/>
<param name="path" value="./dummy-index.txt"/>
</parameters>
<mapping>
<copy />
<rename />
<remove />
</mapping>
</writer>
<writer id="indexer_csv_1" class="org.apache.nutch.indexwriter.csv.CSVIndexWriter">
<parameters>
<param name="fields" value="id,title,content"/>
<param name="charset" value="UTF-8"/>
<param name="separator" value=","/>
<param name="valuesep" value="|"/>
<param name="quotechar" value="&quot;"/>
<param name="escapechar" value="&quot;"/>
<param name="maxfieldlength" value="4096"/>
<param name="maxfieldvalues" value="12"/>
<param name="header" value="true"/>
<param name="outpath" value="csvindexwriter"/>
</parameters>
<mapping>
<copy />
<rename />
<remove />
</mapping>
</writer>
<writer id="indexer_elastic_1" class="org.apache.nutch.indexwriter.elastic.ElasticIndexWriter">
<parameters>
<param name="host" value="localhost"/>
<param name="port" value="9200"/>
<param name="scheme" value="http"/><!-- http or https -->
<param name="index" value="nutch"/>
<param name="username" value="elastic"/>
<param name="password" value=""/>
<!--<param name="auth" value="false"/>-->
<param name="max.bulk.docs" value="250"/>
<param name="max.bulk.size" value="2500500"/>
<param name="exponential.backoff.millis" value="100"/>
<param name="exponential.backoff.retries" value="10"/>
<param name="bulk.close.timeout" value="600"/>
<!--<param name="options" value="key1=value1,key2=value2"/>-->
</parameters>
<mapping>
<copy>
<field source="title" dest="title,search"/>
</copy>
<rename />
<remove />
</mapping>
</writer>
<writer id="indexer_opensearch_1x_1" class="org.apache.nutch.indexwriter.opensearch1x.OpenSearch1xIndexWriter">
<parameters>
<param name="host" value="localhost"/>
<param name="port" value="9200"/>
<param name="scheme" value="https"/><!-- http or https -->
<param name="index" value="nutch"/>
<param name="username" value="admin"/>
<param name="password" value="admin"/>
<param name="trust.store.path" value=""/>
<param name="trust.store.password" value=""/>
<param name="trust.store.type" value="JKS"/>
<param name="key.store.path" value=""/>
<param name="key.store.password" value=""/>
<param name="key.store.type" value="JKS"/>
<param name="max.bulk.docs" value="250"/>
<param name="max.bulk.size" value="2500500"/>
<param name="exponential.backoff.millis" value="100"/>
<param name="exponential.backoff.retries" value="10"/>
<param name="bulk.close.timeout" value="600"/>
<!--<param name="options" value="key1=value1,key2=value2"/>-->
</parameters>
<mapping>
<copy>
<field source="title" dest="title,search"/>
</copy>
<rename />
<remove />
</mapping>
</writer>
<writer id="indexer_cloud_search_1" class="org.apache.nutch.indexwriter.cloudsearch.CloudSearchIndexWriter">
<parameters>
<param name="endpoint" value=""/>
<param name="region" value=""/>
<param name="batch.dump" value="false"/>
<param name="batch.maxSize" value="-1"/>
</parameters>
<mapping>
<copy />
<rename />
<remove />
</mapping>
</writer>
<writer id="indexer_kafka_1" class="org.apache.nutch.indexwriter.kafka.KafkaIndexWriter">
<parameters>
<param name="host" value=""/>
<param name="port" value="9092"/>
<param name="topic" value=""/>
<param name="key.serializer" value="org.apache.kafka.common.serialization.ByteArraySerializer"/>
<param name="value.serializer" value="org.apache.kafka.connect.json.JsonSerializer"/>
<param name="max.doc.count" value="100"/>
</parameters>
<mapping>
<copy>
<field source="title" dest="search"/>
</copy>
<rename />
<remove />
</mapping>
</writer>
</writers>