| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.service.resources; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Map; |
| |
| import javax.ws.rs.Consumes; |
| import javax.ws.rs.DefaultValue; |
| import javax.ws.rs.GET; |
| import javax.ws.rs.POST; |
| import javax.ws.rs.Path; |
| import javax.ws.rs.Produces; |
| import javax.ws.rs.QueryParam; |
| import javax.ws.rs.core.MediaType; |
| import javax.ws.rs.core.Response; |
| import javax.ws.rs.core.Response.Status; |
| |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.nutch.crawl.CrawlDbReader; |
| import org.apache.nutch.fetcher.FetchNode; |
| import org.apache.nutch.fetcher.FetchNodeDb; |
| import org.apache.nutch.service.model.request.DbQuery; |
| import org.apache.nutch.service.model.response.FetchNodeDbInfo; |
| |
| @Path(value = "/db") |
| public class DbResource extends AbstractResource { |
| |
| @POST |
| @Path(value = "/crawldb") |
| @Consumes(MediaType.APPLICATION_JSON) |
| public Response readdb(DbQuery dbQuery){ |
| if(dbQuery == null) |
| return Response.status(Status.BAD_REQUEST).build(); |
| |
| Configuration conf = configManager.get(dbQuery.getConfId()); |
| if(conf == null){ |
| conf = configManager.get(ConfigResource.DEFAULT); |
| } |
| if(dbQuery.getCrawlId() == null || dbQuery.getType() == null){ |
| return Response.status(Status.BAD_REQUEST).build(); |
| } |
| String type = dbQuery.getType(); |
| |
| if(type.equalsIgnoreCase("stats")){ |
| return crawlDbStats(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); |
| } |
| if(type.equalsIgnoreCase("dump")){ |
| return crawlDbDump(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); |
| } |
| if(type.equalsIgnoreCase("topN")){ |
| return crawlDbTopN(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); |
| } |
| if(type.equalsIgnoreCase("url")){ |
| return crawlDbUrl(conf, dbQuery.getArgs(), dbQuery.getCrawlId()); |
| } |
| return null; |
| |
| } |
| |
| @GET |
| @Path(value="/fetchdb") |
| public List<FetchNodeDbInfo> fetchDb(@DefaultValue("0")@QueryParam("to")int to, @DefaultValue("0")@QueryParam("from")int from){ |
| List<FetchNodeDbInfo> listOfFetchedNodes = new ArrayList<>(); |
| Map<Integer, FetchNode> fetchNodedbMap = FetchNodeDb.getInstance().getFetchNodeDb(); |
| |
| if(to ==0 || to>fetchNodedbMap.size()){ |
| to = fetchNodedbMap.size(); |
| } |
| for(int i=from;i<=to;i++){ |
| if(!fetchNodedbMap.containsKey(i)){ |
| continue; |
| } |
| FetchNode node = fetchNodedbMap.get(i); |
| FetchNodeDbInfo fdbInfo = new FetchNodeDbInfo(); |
| fdbInfo.setUrl(node.getUrl().toString()); |
| fdbInfo.setStatus(node.getStatus()); |
| fdbInfo.setNumOfOutlinks(node.getOutlinks().length); |
| fdbInfo.setChildNodes(node.getOutlinks()); |
| listOfFetchedNodes.add(fdbInfo); |
| } |
| |
| return listOfFetchedNodes; |
| } |
| @SuppressWarnings("resource") |
| private Response crawlDbStats(Configuration conf, Map<String, String> args, String crawlId){ |
| CrawlDbReader dbr = new CrawlDbReader(); |
| try{ |
| return Response.ok(dbr.query(args, conf, "stats", crawlId)).build(); |
| }catch(Exception e){ |
| e.printStackTrace(); |
| return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); |
| } |
| } |
| |
| @Produces(MediaType.APPLICATION_OCTET_STREAM) |
| private Response crawlDbDump(Configuration conf, Map<String, String> args, String crawlId){ |
| @SuppressWarnings("resource") |
| CrawlDbReader dbr = new CrawlDbReader(); |
| try{ |
| return Response.ok(dbr.query(args, conf, "dump", crawlId), MediaType.APPLICATION_OCTET_STREAM).build(); |
| }catch(Exception e){ |
| e.printStackTrace(); |
| return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); |
| } |
| } |
| |
| @Produces(MediaType.APPLICATION_OCTET_STREAM) |
| private Response crawlDbTopN(Configuration conf, Map<String, String> args, String crawlId) { |
| @SuppressWarnings("resource") |
| CrawlDbReader dbr = new CrawlDbReader(); |
| try{ |
| return Response.ok(dbr.query(args, conf, "topN", crawlId), MediaType.APPLICATION_OCTET_STREAM).build(); |
| }catch(Exception e){ |
| e.printStackTrace(); |
| return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); |
| } |
| } |
| |
| private Response crawlDbUrl(Configuration conf, Map<String, String> args, String crawlId){ |
| @SuppressWarnings("resource") |
| CrawlDbReader dbr = new CrawlDbReader(); |
| try{ |
| return Response.ok(dbr.query(args, conf, "url", crawlId)).build(); |
| }catch(Exception e){ |
| e.printStackTrace(); |
| return Response.serverError().entity(e.getMessage()).type(MediaType.TEXT_PLAIN).build(); |
| } |
| } |
| } |