blob: 6c175c83a54c6ee33ac44647de0f19771c987086 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.fetcher;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.nutch.parse.Outlink;
/**
* This class is used to capture the various events occurring
* at fetch time. These events are sent to a queue implementing the publisher
*
*/
public class FetcherThreadEvent implements Serializable{
/** Type of event to specify start, end or reporting of a fetch item. **/
public static enum PublishEventType {START, END, REPORT}
private PublishEventType eventType;
private Map<String, Object> eventData;
private String url;
private Long timestamp;
/**
* Constructor to create an event to be published
* @param eventType Type of {@link #eventType event} being created
* @param url URL of the fetched page to which this event belongs to
*/
public FetcherThreadEvent(PublishEventType eventType, String url) {
this.eventType = eventType;
this.url = url;
this.timestamp = System.currentTimeMillis();
}
/**
* Get type of this event object
* @return {@link PublishEventType Event} type
*/
public PublishEventType getEventType() {
return eventType;
}
/**
* Set event type of this object
* @param eventType Set {@link #eventType event} type
*/
public void setEventType(PublishEventType eventType) {
this.eventType = eventType;
}
/**
* Get event data
* @return
*/
public Map<String, Object> getEventData() {
return eventData;
}
/**
* Set metadata to this even
* @param eventData A map containing important information relevant
* to this event (fetched page).
* Ex - score, title, outlinks, content-type, etc
*/
public void setEventData(Map<String, Object> eventData) {
this.eventData = eventData;
}
/**
* Get URL of this event
* @return {@link #url URL} of this event
*/
public String getUrl() {
return url;
}
/**
* Set URL of this event (fetched page)
* @param url URL of the fetched page
*/
public void setUrl(String url) {
this.url = url;
}
/**
* Add new data to the eventData object.
* @param key A key to refer to the data being added to this event
* @param value Data to be stored in the event referenced by the above key
*/
public void addEventData(String key, Object value) {
if(eventData == null) {
eventData = new HashMap<>();
}
eventData.put(key, value);
}
/**
* Given a collection of lists this method will add it
* the oultink metadata
* @param links A collection of outlinks generating from the fetched page
* this event refers to
*/
public void addOutlinksToEventData(Collection<Outlink> links) {
ArrayList<Map<String, String>> outlinkList = new ArrayList<>();
for(Outlink link: links) {
Map<String, String> outlink = new HashMap<>();
outlink.put("url", link.getToUrl());
outlink.put("anchor", link.getAnchor());
outlinkList.add(outlink);
}
this.addEventData("outlinks", outlinkList);
}
/**
* Get timestamp of current event.
* @return {@link #timestamp Timestamp}
*/
public Long getTimestamp() {
return timestamp;
}
/**
* Set timestamp for this event
* @param timestamp Timestamp of the occurrence of this event
*/
public void setTimestamp(Long timestamp) {
this.timestamp = timestamp;
}
}