| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <!-- A comprehensive account of the entire gora-accumulo mapping |
| options can be found on the Gora website at: |
| http://gora.apache.org/current/gora-accumulo.html |
| Users are strongly advised to consult this documentation |
| prior to working with the default mapping configuration |
| provided below. |
| --> |
| |
| <gora-orm> |
| |
| <table name="webpage"> |
| <family name="p" /> |
| <family name="f" /> |
| <family name="s" /> |
| <family name="il" /> |
| <family name="ol" /> |
| <family name="h" /> |
| <family name="mtdt" /> |
| <family name="mk" /> |
| <config key="table.file.compress.blocksize" value="32K"/> |
| </table> |
| <class table="webpage" keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage"> |
| |
| <!-- fetch fields --> |
| <field name="baseUrl" family="f" qualifier="bas"/> |
| <field name="status" family="f" qualifier="st"/> |
| <field name="prevFetchTime" family="f" qualifier="pts"/> |
| <field name="fetchTime" family="f" qualifier="ts"/> |
| <field name="fetchInterval" family="f" qualifier="fi"/> |
| <field name="retriesSinceFetch" family="f" qualifier="rsf"/> |
| <field name="reprUrl" family="f" qualifier="rpr"/> |
| <field name="content" family="f" qualifier="cnt"/> |
| <field name="contentType" family="f" qualifier="typ"/> |
| <field name="protocolStatus" family="f" qualifier="prot"/> |
| <field name="modifiedTime" family="f" qualifier="mod"/> |
| <field name="prevModifiedTime" family="f" qualifier="pmod"/> |
| <field name="batchId" family="f" qualifier="bid"/> |
| |
| <!-- parse fields --> |
| <field name="title" family="p" qualifier="t"/> |
| <field name="text" family="p" qualifier="c"/> |
| <field name="parseStatus" family="p" qualifier="st"/> |
| <field name="signature" family="p" qualifier="sig"/> |
| <field name="prevSignature" family="p" qualifier="psig"/> |
| |
| <!-- score fields --> |
| <field name="score" family="s" qualifier="s"/> |
| <field name="headers" family="h"/> |
| <field name="inlinks" family="il"/> |
| <field name="outlinks" family="ol"/> |
| <field name="metadata" family="mtdt"/> |
| <field name="markers" family="mk"/> |
| </class> |
| |
| <table name="host"> |
| <family name="mtdt" /> |
| <family name="il" /> |
| <family name="ol" /> |
| </table> |
| |
| <class table="host" keyClass="java.lang.String" name="org.apache.nutch.storage.Host"> |
| <field name="metadata" family="mtdt"/> |
| <field name="inlinks" family="il"/> |
| <field name="outlinks" family="ol"/> |
| </class> |
| |
| </gora-orm> |