blob: d5ca530e262c7468e6c7def1bfdd2de1042bc9db [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.oodt.cas.pushpull.retrievalmethod;
//OODT imports
import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.cas.pushpull.config.DataFilesInfo;
import org.apache.oodt.cas.pushpull.config.DownloadInfo;
import org.apache.oodt.cas.pushpull.exceptions.AlreadyInDatabaseException;
import org.apache.oodt.cas.pushpull.exceptions.ProtocolFileException;
import org.apache.oodt.cas.pushpull.exceptions.RetrievalMethodException;
import org.apache.oodt.cas.pushpull.exceptions.ToManyFailedDownloadsException;
import org.apache.oodt.cas.pushpull.exceptions.UndefinedTypeException;
import org.apache.oodt.cas.pushpull.filerestrictions.FileRestrictions;
import org.apache.oodt.cas.pushpull.filerestrictions.Parser;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFile;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFileStructure;
import org.apache.oodt.cas.protocol.ProtocolFile;
import org.apache.oodt.cas.protocol.util.ProtocolFileFilter;
import org.apache.oodt.cas.pushpull.protocol.ProtocolPath;
import org.apache.oodt.cas.pushpull.protocol.RemoteSite;
import org.apache.oodt.cas.pushpull.protocol.RemoteSiteFile;
import org.apache.oodt.cas.pushpull.retrievalsystem.DataFileToPropFileLinker;
import org.apache.oodt.cas.pushpull.retrievalsystem.FileRetrievalSystem;
//JDK imports
import java.io.File;
import java.io.FileInputStream;
import java.net.MalformedURLException;
import java.util.List;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* @author bfoster
* @version $Revision$
*
* <p>
* Describe your class here
* </p>.
*/
public class RemoteCrawler implements RetrievalMethod {
private static final Logger LOG = Logger.getLogger(RemoteCrawler.class
.getName());
/**
* Starts the crawler and creates a default DirStruct if null was supplied
* in constructor
*
* @throws MalformedURLException
* @throws ProtocolException
* @throws ProtocolFileException
*/
@Override
public void processPropFile(FileRetrievalSystem frs, Parser propFileParser,
File propFile, DataFilesInfo dfi, DataFileToPropFileLinker linker)
throws Exception {
RemoteSite remoteSite;
// parse property file
Metadata fileMetadata = new Metadata();
VirtualFileStructure vfs = propFileParser.parse(new FileInputStream(
propFile), fileMetadata);
// determine RemoteSite
DownloadInfo di = dfi.getDownloadInfo();
if (!di.isAllowAliasOverride()
|| (remoteSite = vfs.getRemoteSite()) == null)
remoteSite = di.getRemoteSite();
// modify vfs to be root based if HOME directory based
if (!vfs.isRootBased()) {
String homeDirPath = frs.getHomeDir(remoteSite).getPath();
VirtualFile root = new VirtualFile(homeDirPath, true);
root.addChild(vfs.getRootVirtualFile());
vfs = new VirtualFileStructure(homeDirPath + "/"
+ vfs.getPathToRoot(), root.getRootDir());
frs.changeToHOME(remoteSite);
}
// initialize variables
final String initialCdPath = vfs.getPathToRoot();
final VirtualFile vf = vfs.getRootVirtualFile();
// change to initial directory (takes care of Linux auto-mounting)
frs.changeToDir(initialCdPath, remoteSite);
// add starting directory to stack
Stack<RemoteSiteFile> files = new Stack<RemoteSiteFile>();
files.add(new RemoteSiteFile(frs.getCurrentFile(remoteSite), remoteSite));
// start crawling
while (!files.isEmpty()) {
RemoteSiteFile file = files.peek();
try {
// if directory, then add its children to the crawl list
if (file.isDir()) {
// get next page worth of children
List<RemoteSiteFile> children = frs.getNextPage(file,
new ProtocolFileFilter() {
@Override
public boolean accept(ProtocolFile pFile) {
return FileRestrictions.isAllowed(new
ProtocolPath(pFile
.getPath(), pFile.isDir()), vf);
}
});
// if directory had more children then add them
if (children.size() > 0)
files.addAll(children);
// otherwise remove the directory from the crawl list
else
files.pop();
// if file, then download it
} else {
linker.addPropFileToDataFileLink(propFile, file);
if (!frs.addToDownloadQueue(files.pop(), di
.getRenamingConv(), di.getStagingArea(), dfi
.getQueryMetadataElementName(), di
.deleteFromServer(), fileMetadata))
linker.eraseLinks(propFile);
}
} catch (ToManyFailedDownloadsException e) {
throw new RetrievalMethodException(
"Connection appears to be down. . .unusual number of download failures. . .stopping : "
+ e.getMessage());
} catch (CatalogException e) {
throw new RetrievalMethodException(
"Failed to communicate with database : "
+ e.getMessage());
} catch (AlreadyInDatabaseException e) {
LOG.log(Level.WARNING, "Skipping file : " + e.getMessage());
} catch (UndefinedTypeException e) {
LOG.log(Level.WARNING, "Skipping file : " + e.getMessage());
} catch (Exception e) {
linker.markAsFailed(propFile, e.getMessage());
throw new Exception("Uknown error accured while downloading "
+ file + " from " + remoteSite + " -- bailing out : "
+ e.getMessage(), e);
}
}
}
}