| <?xml version="1.0" encoding="UTF-8" ?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <writers xmlns="http://lucene.apache.org/nutch" |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:schemaLocation="http://lucene.apache.org/nutch index-writers.xsd"> |
| |
| <writer id="indexer_solr_1" class="org.apache.nutch.indexwriter.solr.SolrIndexWriter"> |
| <parameters> |
| <param name="type" value="http"/> |
| <param name="url" value="http://localhost:8983/solr/nutch"/> |
| <param name="collection" value=""/> |
| <param name="weight.field" value=""/> |
| <param name="commitSize" value="1000"/> |
| <param name="auth" value="false"/> |
| <param name="username" value="username"/> |
| <param name="password" value="password"/> |
| </parameters> |
| <mapping> |
| <copy> |
| <!-- <field source="content" dest="search"/> --> |
| <!-- <field source="title" dest="title,search"/> --> |
| </copy> |
| <rename> |
| <field source="metatag.description" dest="description"/> |
| <field source="metatag.keywords" dest="keywords"/> |
| </rename> |
| <remove> |
| <field source="segment"/> |
| </remove> |
| </mapping> |
| </writer> |
| <writer id="indexer_rabbit_1" class="org.apache.nutch.indexwriter.rabbit.RabbitIndexWriter"> |
| <parameters> |
| <param name="server.uri" value="amqp://guest:guest@localhost:5672/"/> |
| <param name="binding" value="false"/> |
| <param name="binding.arguments" value=""/> |
| <param name="exchange.name" value=""/> |
| <param name="exchange.options" value="type=direct,durable=true"/> |
| <param name="queue.name" value="nutch.queue"/> |
| <param name="queue.options" value="durable=true,exclusive=false,auto-delete=false"/> |
| <param name="routingkey" value=""/> |
| <param name="commit.mode" value="multiple"/> |
| <param name="commit.size" value="250"/> |
| <param name="headers.static" value=""/> |
| <param name="headers.dynamic" value=""/> |
| </parameters> |
| <mapping> |
| <copy> |
| <field source="title" dest="title,search"/> |
| </copy> |
| <rename> |
| <field source="metatag.description" dest="description"/> |
| <field source="metatag.keywords" dest="keywords"/> |
| </rename> |
| <remove> |
| <field source="content"/> |
| <field source="segment"/> |
| <field source="boost"/> |
| </remove> |
| </mapping> |
| </writer> |
| <writer id="indexer_dummy_1" class="org.apache.nutch.indexwriter.dummy.DummyIndexWriter"> |
| <parameters> |
| <param name="delete" value="false"/> |
| <param name="path" value="./dummy-index.txt"/> |
| </parameters> |
| <mapping> |
| <copy /> |
| <rename /> |
| <remove /> |
| </mapping> |
| </writer> |
| <writer id="indexer_csv_1" class="org.apache.nutch.indexwriter.csv.CSVIndexWriter"> |
| <parameters> |
| <param name="fields" value="id,title,content"/> |
| <param name="charset" value="UTF-8"/> |
| <param name="separator" value=","/> |
| <param name="valuesep" value="|"/> |
| <param name="quotechar" value="""/> |
| <param name="escapechar" value="""/> |
| <param name="maxfieldlength" value="4096"/> |
| <param name="maxfieldvalues" value="12"/> |
| <param name="header" value="true"/> |
| <param name="outpath" value="csvindexwriter"/> |
| </parameters> |
| <mapping> |
| <copy /> |
| <rename /> |
| <remove /> |
| </mapping> |
| </writer> |
| <writer id="indexer_elastic_1" class="org.apache.nutch.indexwriter.elastic.ElasticIndexWriter"> |
| <parameters> |
| <param name="host" value=""/> |
| <param name="port" value="9300"/> |
| <param name="cluster" value=""/> |
| <param name="index" value="nutch"/> |
| <param name="max.bulk.docs" value="250"/> |
| <param name="max.bulk.size" value="2500500"/> |
| <param name="exponential.backoff.millis" value="100"/> |
| <param name="exponential.backoff.retries" value="10"/> |
| <param name="bulk.close.timeout" value="600"/> |
| <!--<param name="options" value="key1=value1,key2=value2"/>--> |
| </parameters> |
| <mapping> |
| <copy> |
| <field source="title" dest="title,search"/> |
| </copy> |
| <rename /> |
| <remove /> |
| </mapping> |
| </writer> |
| <writer id="indexer_elastic_rest_1" class="org.apache.nutch.indexwriter.elasticrest.ElasticRestIndexWriter"> |
| <parameters> |
| <param name="host" value=""/> |
| <param name="port" value="9200"/> |
| <param name="index" value="nutch"/> |
| <param name="max.bulk.docs" value="250"/> |
| <param name="max.bulk.size" value="2500500"/> |
| <param name="user" value="user"/> |
| <param name="password" value="password"/> |
| <param name="type" value="doc"/> |
| <param name="https" value="false"/> |
| <param name="trustallhostnames" value="false"/> |
| <param name="languages" value=""/> |
| <param name="separator" value="_"/> |
| <param name="sink" value="others"/> |
| </parameters> |
| <mapping> |
| <copy> |
| <field source="title" dest="search"/> |
| </copy> |
| <rename /> |
| <remove /> |
| </mapping> |
| </writer> |
| <writer id="indexer_cloud_search_1" class="org.apache.nutch.indexwriter.cloudsearch.CloudSearchIndexWriter"> |
| <parameters> |
| <param name="endpoint" value=""/> |
| <param name="region" value=""/> |
| <param name="batch.dump" value="false"/> |
| <param name="batch.maxSize" value="-1"/> |
| </parameters> |
| <mapping> |
| <copy /> |
| <rename /> |
| <remove /> |
| </mapping> |
| </writer> |
| <writer id="indexer_kafka_1" class="org.apache.nutch.indexwriter.kafka.KafkaIndexWriter"> |
| <parameters> |
| <param name="host" value=""/> |
| <param name="port" value="9092"/> |
| <param name="topic" value=""/> |
| <param name="key.serializer" value="org.apache.kafka.common.serialization.ByteArraySerializer"/> |
| <param name="value.serializer" value="org.apache.kafka.connect.json.JsonSerializer"/> |
| <param name="max.doc.count" value="100"/> |
| </parameters> |
| <mapping> |
| <copy> |
| <field source="title" dest="search"/> |
| </copy> |
| <rename /> |
| <remove /> |
| </mapping> |
| </writer> |
| </writers> |