| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.service.resources; |
| |
| import java.util.HashMap; |
| |
| import javax.ws.rs.Consumes; |
| import javax.ws.rs.DefaultValue; |
| import javax.ws.rs.GET; |
| import javax.ws.rs.POST; |
| import javax.ws.rs.Path; |
| import javax.ws.rs.Produces; |
| import javax.ws.rs.QueryParam; |
| import javax.ws.rs.core.MediaType; |
| import javax.ws.rs.core.Response; |
| import javax.ws.rs.core.Response.Status; |
| |
| import org.apache.nutch.service.NutchReader; |
| import org.apache.nutch.service.impl.LinkReader; |
| import org.apache.nutch.service.impl.NodeReader; |
| import org.apache.nutch.service.impl.SequenceReader; |
| import org.apache.nutch.service.model.request.ReaderConfig; |
| |
| /** |
| * The Reader endpoint enables a user to read sequence files, |
| * nodes and links from the Nutch webgraph. |
| * @author Sujen Shah |
| * |
| */ |
| @Path("/reader") |
| public class ReaderResouce { |
| |
| /** |
| * Read a sequence file |
| * @param readerConf |
| * @param nrows Number of rows to read. If not specified all rows will be read |
| * @param start Specify a starting line number to read the file from |
| * @param end The line number to read the file till |
| * @param count Boolean value. If true, this endpoint will return the number of lines in the line |
| * @return Appropriate HTTP response based on the query |
| */ |
| @Path("/sequence/read") |
| @POST |
| @Consumes(MediaType.APPLICATION_JSON) |
| @Produces(MediaType.APPLICATION_JSON) |
| public Response seqRead(ReaderConfig readerConf, |
| @DefaultValue("-1")@QueryParam("nrows") int nrows, |
| @DefaultValue("-1")@QueryParam("start") int start, |
| @QueryParam("end")int end, @QueryParam("count") boolean count) { |
| |
| NutchReader reader = new SequenceReader(); |
| String path = readerConf.getPath(); |
| return performRead(reader, path, nrows, start, end, count); |
| } |
| |
| /** |
| * Get Link Reader response schema |
| * @return JSON object specifying the schema of the responses returned by the Link Reader |
| */ |
| @Path("/link") |
| @GET |
| @Produces(MediaType.APPLICATION_JSON) |
| public Response linkRead() { |
| HashMap<String, String> schema = new HashMap<>(); |
| schema.put("key_url","string"); |
| schema.put("timestamp", "int"); |
| schema.put("score","float"); |
| schema.put("anchor","string"); |
| schema.put("linktype","string"); |
| schema.put("url","string"); |
| return Response.ok(schema).type(MediaType.APPLICATION_JSON).build(); |
| } |
| |
| /** |
| * Read link object |
| * @param readerConf |
| * @param nrows |
| * @param start |
| * @param end |
| * @param count |
| * @return |
| */ |
| @Path("/link/read") |
| @POST |
| @Consumes(MediaType.APPLICATION_JSON) |
| @Produces(MediaType.APPLICATION_JSON) |
| public Response linkRead(ReaderConfig readerConf, |
| @DefaultValue("-1")@QueryParam("nrows") int nrows, |
| @DefaultValue("-1")@QueryParam("start") int start, |
| @QueryParam("end") int end, @QueryParam("count") boolean count) { |
| |
| NutchReader reader = new LinkReader(); |
| String path = readerConf.getPath(); |
| return performRead(reader, path, nrows, start, end, count); |
| } |
| |
| /** |
| * Get schema of the Node object |
| * @return |
| */ |
| @Path("/node") |
| @GET |
| @Produces(MediaType.APPLICATION_JSON) |
| public Response nodeRead() { |
| HashMap<String, String> schema = new HashMap<>(); |
| schema.put("key_url","string"); |
| schema.put("num_inlinks", "int"); |
| schema.put("num_outlinks","int"); |
| schema.put("inlink_score","float"); |
| schema.put("outlink_score","float"); |
| schema.put("metadata","string"); |
| return Response.ok(schema).type(MediaType.APPLICATION_JSON).build(); |
| } |
| |
| |
| /** |
| * Read Node object as stored in the Nutch Webgraph |
| * @param readerConf |
| * @param nrows |
| * @param start |
| * @param end |
| * @param count |
| * @return |
| */ |
| @Path("/node/read") |
| @POST |
| @Consumes(MediaType.APPLICATION_JSON) |
| @Produces(MediaType.APPLICATION_JSON) |
| public Response nodeRead(ReaderConfig readerConf, |
| @DefaultValue("-1")@QueryParam("nrows") int nrows, |
| @DefaultValue("-1")@QueryParam("start") int start, |
| @QueryParam("end") int end, @QueryParam("count") boolean count) { |
| |
| NutchReader reader = new NodeReader(); |
| String path = readerConf.getPath(); |
| return performRead(reader, path, nrows, start, end, count); |
| } |
| |
| |
| private Response performRead(NutchReader reader, String path, |
| int nrows, int start, int end, boolean count) { |
| Object result; |
| try{ |
| if(count){ |
| result = reader.count(path); |
| return Response.ok(result).type(MediaType.TEXT_PLAIN).build(); |
| } |
| else if(start>-1 && end>0) { |
| result = reader.slice(path, start, end); |
| } |
| else if(nrows>-1) { |
| result = reader.head(path, nrows); |
| } |
| else { |
| result = reader.read(path); |
| } |
| return Response.ok(result).type(MediaType.APPLICATION_JSON).build(); |
| }catch(Exception e){ |
| return Response.status(Status.BAD_REQUEST).entity("File not found").build(); |
| } |
| } |
| |
| } |