blob: 1d9120ed860cacd4db7517f3b2c71c67ff274449 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.webui.service.impl;
import java.lang.invoke.MethodHandles;
import java.sql.SQLException;
import java.util.List;
import javax.annotation.Resource;
import org.apache.nutch.webui.client.NutchClient;
import org.apache.nutch.webui.client.NutchClientFactory;
import org.apache.nutch.webui.client.impl.CrawlingCycle;
import org.apache.nutch.webui.client.impl.RemoteCommandsBatchFactory;
import org.apache.nutch.webui.client.impl.CrawlingCycleListener;
import org.apache.nutch.webui.client.impl.RemoteCommand;
import org.apache.nutch.webui.client.impl.RemoteCommandExecutor;
import org.apache.nutch.webui.client.model.Crawl;
import org.apache.nutch.webui.client.model.Crawl.CrawlStatus;
import org.apache.nutch.webui.model.NutchInstance;
import org.apache.nutch.webui.service.CrawlService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import com.j256.ormlite.dao.Dao;
@Service
public class CrawlServiceImpl implements CrawlService, CrawlingCycleListener {
private static final Logger LOG = LoggerFactory
.getLogger(MethodHandles.lookup().lookupClass());
@Resource
private Dao<Crawl, Long> crawlDao;
@Resource
private NutchClientFactory nutchClientFactory;
@Resource
private RemoteCommandsBatchFactory commandFactory;
@Override
@Async
public void startCrawl(Long crawlId, NutchInstance instance) {
Crawl crawl = null;
try {
crawl = crawlDao.queryForId(crawlId);
if(crawl.getCrawlId()==null) {
crawl.setCrawlId("crawl-" + crawlId.toString());
}
NutchClient client = nutchClientFactory.getClient(instance);
String seedDirectory = client.createSeed(crawl.getSeedList());
crawl.setSeedDirectory(seedDirectory);
List<RemoteCommand> commands = commandFactory.createCommands(crawl);
RemoteCommandExecutor executor = new RemoteCommandExecutor(client);
CrawlingCycle cycle = new CrawlingCycle(this, executor, crawl, commands);
cycle.executeCrawlCycle();
} catch (Exception e) {
crawl.setStatus(CrawlStatus.ERROR);
saveCrawl(crawl);
LOG.error("exception occured", e);
}
}
@Override
public List<Crawl> getCrawls() {
try {
return crawlDao.queryForAll();
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@Override
public void saveCrawl(Crawl crawl) {
try {
crawlDao.createOrUpdate(crawl);
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@Override
public void deleteCrawl(Long crawlId) {
try {
crawlDao.deleteById(crawlId);
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@Override
public void crawlingStarted(Crawl crawl) {
crawl.setStatus(CrawlStatus.CRAWLING);
crawl.setProgress(0);
saveCrawl(crawl);
}
@Override
public void onCrawlError(Crawl crawl, String msg) {
crawl.setStatus(CrawlStatus.ERROR);
saveCrawl(crawl);
}
@Override
public void commandExecuted(Crawl crawl, RemoteCommand command, int progress) {
crawl.setProgress(progress);
saveCrawl(crawl);
}
@Override
public void crawlingFinished(Crawl crawl) {
crawl.setStatus(CrawlStatus.FINISHED);
saveCrawl(crawl);
}
}