blob: b1f9775096e7f8b2a7b99084bf0ca00beef6822b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.service.resources;
import java.util.HashMap;
import javax.ws.rs.Consumes;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;
import org.apache.nutch.service.NutchReader;
import org.apache.nutch.service.impl.LinkReader;
import org.apache.nutch.service.impl.NodeReader;
import org.apache.nutch.service.impl.SequenceReader;
import org.apache.nutch.service.model.request.ReaderConfig;
/**
* The Reader endpoint enables a user to read sequence files,
* nodes and links from the Nutch webgraph.
* @author Sujen Shah
*
*/
@Path("/reader")
public class ReaderResouce {
/**
* Read a sequence file
* @param readerConf
* @param nrows Number of rows to read. If not specified all rows will be read
* @param start Specify a starting line number to read the file from
* @param end The line number to read the file till
* @param count Boolean value. If true, this endpoint will return the number of lines in the line
* @return Appropriate HTTP response based on the query
*/
@Path("/sequence/read")
@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
public Response seqRead(ReaderConfig readerConf,
@DefaultValue("-1")@QueryParam("nrows") int nrows,
@DefaultValue("-1")@QueryParam("start") int start,
@QueryParam("end")int end, @QueryParam("count") boolean count) {
NutchReader reader = new SequenceReader();
String path = readerConf.getPath();
return performRead(reader, path, nrows, start, end, count);
}
/**
* Get Link Reader response schema
* @return JSON object specifying the schema of the responses returned by the Link Reader
*/
@Path("/link")
@GET
@Produces(MediaType.APPLICATION_JSON)
public Response linkRead() {
HashMap<String, String> schema = new HashMap<>();
schema.put("key_url","string");
schema.put("timestamp", "int");
schema.put("score","float");
schema.put("anchor","string");
schema.put("linktype","string");
schema.put("url","string");
return Response.ok(schema).type(MediaType.APPLICATION_JSON).build();
}
/**
* Read link object
* @param readerConf
* @param nrows
* @param start
* @param end
* @param count
* @return
*/
@Path("/link/read")
@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
public Response linkRead(ReaderConfig readerConf,
@DefaultValue("-1")@QueryParam("nrows") int nrows,
@DefaultValue("-1")@QueryParam("start") int start,
@QueryParam("end") int end, @QueryParam("count") boolean count) {
NutchReader reader = new LinkReader();
String path = readerConf.getPath();
return performRead(reader, path, nrows, start, end, count);
}
/**
* Get schema of the Node object
* @return
*/
@Path("/node")
@GET
@Produces(MediaType.APPLICATION_JSON)
public Response nodeRead() {
HashMap<String, String> schema = new HashMap<>();
schema.put("key_url","string");
schema.put("num_inlinks", "int");
schema.put("num_outlinks","int");
schema.put("inlink_score","float");
schema.put("outlink_score","float");
schema.put("metadata","string");
return Response.ok(schema).type(MediaType.APPLICATION_JSON).build();
}
/**
* Read Node object as stored in the Nutch Webgraph
* @param readerConf
* @param nrows
* @param start
* @param end
* @param count
* @return
*/
@Path("/node/read")
@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
public Response nodeRead(ReaderConfig readerConf,
@DefaultValue("-1")@QueryParam("nrows") int nrows,
@DefaultValue("-1")@QueryParam("start") int start,
@QueryParam("end") int end, @QueryParam("count") boolean count) {
NutchReader reader = new NodeReader();
String path = readerConf.getPath();
return performRead(reader, path, nrows, start, end, count);
}
private Response performRead(NutchReader reader, String path,
int nrows, int start, int end, boolean count) {
Object result;
try{
if(count){
result = reader.count(path);
return Response.ok(result).type(MediaType.TEXT_PLAIN).build();
}
else if(start>-1 && end>0) {
result = reader.slice(path, start, end);
}
else if(nrows>-1) {
result = reader.head(path, nrows);
}
else {
result = reader.read(path);
}
return Response.ok(result).type(MediaType.APPLICATION_JSON).build();
}catch(Exception e){
return Response.status(Status.BAD_REQUEST).entity("File not found").build();
}
}
}