| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.fetcher; |
| |
| import java.io.Serializable; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.apache.nutch.parse.Outlink; |
| |
| /** |
| * This class is used to capture the various events occurring |
| * at fetch time. These events are sent to a queue implementing the publisher |
| * |
| */ |
| public class FetcherThreadEvent implements Serializable{ |
| |
| /** Type of event to specify start, end or reporting of a fetch item. **/ |
| public static enum PublishEventType {START, END, REPORT} |
| |
| private PublishEventType eventType; |
| private Map<String, Object> eventData; |
| private String url; |
| private Long timestamp; |
| |
| /** |
| * Constructor to create an event to be published |
| * @param eventType Type of {@link #eventType event} being created |
| * @param url URL of the fetched page to which this event belongs to |
| */ |
| public FetcherThreadEvent(PublishEventType eventType, String url) { |
| this.eventType = eventType; |
| this.url = url; |
| this.timestamp = System.currentTimeMillis(); |
| } |
| |
| /** |
| * Get type of this event object |
| * @return {@link PublishEventType Event} type |
| */ |
| public PublishEventType getEventType() { |
| return eventType; |
| } |
| |
| /** |
| * Set event type of this object |
| * @param eventType Set {@link #eventType event} type |
| */ |
| public void setEventType(PublishEventType eventType) { |
| this.eventType = eventType; |
| } |
| |
| /** |
| * Get event data |
| * @return |
| */ |
| public Map<String, Object> getEventData() { |
| return eventData; |
| } |
| /** |
| * Set metadata to this even |
| * @param eventData A map containing important information relevant |
| * to this event (fetched page). |
| * Ex - score, title, outlinks, content-type, etc |
| */ |
| public void setEventData(Map<String, Object> eventData) { |
| this.eventData = eventData; |
| } |
| |
| /** |
| * Get URL of this event |
| * @return {@link #url URL} of this event |
| */ |
| public String getUrl() { |
| return url; |
| } |
| |
| /** |
| * Set URL of this event (fetched page) |
| * @param url URL of the fetched page |
| */ |
| public void setUrl(String url) { |
| this.url = url; |
| } |
| /** |
| * Add new data to the eventData object. |
| * @param key A key to refer to the data being added to this event |
| * @param value Data to be stored in the event referenced by the above key |
| */ |
| public void addEventData(String key, Object value) { |
| if(eventData == null) { |
| eventData = new HashMap<>(); |
| } |
| eventData.put(key, value); |
| } |
| |
| /** |
| * Given a collection of lists this method will add it |
| * the oultink metadata |
| * @param links A collection of outlinks generating from the fetched page |
| * this event refers to |
| */ |
| public void addOutlinksToEventData(Collection<Outlink> links) { |
| ArrayList<Map<String, String>> outlinkList = new ArrayList<>(); |
| for(Outlink link: links) { |
| Map<String, String> outlink = new HashMap<>(); |
| outlink.put("url", link.getToUrl()); |
| outlink.put("anchor", link.getAnchor()); |
| outlinkList.add(outlink); |
| } |
| this.addEventData("outlinks", outlinkList); |
| } |
| |
| /** |
| * Get timestamp of current event. |
| * @return {@link #timestamp Timestamp} |
| */ |
| public Long getTimestamp() { |
| return timestamp; |
| } |
| |
| /** |
| * Set timestamp for this event |
| * @param timestamp Timestamp of the occurrence of this event |
| */ |
| public void setTimestamp(Long timestamp) { |
| this.timestamp = timestamp; |
| } |
| |
| } |