blob: 1c48ab3e2faa1a24e79dfbc6a2dd10064dd4d78f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.protocol.ftp;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.Socket;
import java.util.List;
import org.apache.commons.net.MalformedServerReplyException;
import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPCommand;
import org.apache.commons.net.ftp.FTPFile;
import org.apache.commons.net.ftp.FTPFileEntryParser;
import org.apache.commons.net.ftp.FTPReply;
import org.apache.commons.net.ftp.FTPConnectionClosedException;
/***********************************************
* Client.java encapsulates functionalities necessary for nutch to get dir list
* and retrieve file from an FTP server. This class takes care of all low level
* details of interacting with an FTP server and provides a convenient higher
* level interface.
*
* Modified from FtpClient.java in apache commons-net.
*
* Notes by John Xing: ftp server implementations are hardly uniform and none
* seems to follow RFCs whole-heartedly. We have no choice, but assume common
* denominator as following: (1) Use stream mode for data transfer. Block mode
* will be better for multiple file downloading and partial file downloading.
* However not every ftpd has block mode support. (2) Use passive mode for data
* connection. So Nutch will work if we run behind firewall. (3) Data connection
* is opened/closed per ftp command for the reasons listed in (1). There are ftp
* servers out there, when partial downloading is enforced by closing data
* channel socket on our client side, the server side immediately closes control
* channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
* to obtain remote file attributes if possible. MDTM and SIZE would be nice, but
* not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
* thread? Do not use it at all.
*
* About exceptions: Some specific exceptions are re-thrown as one of
* FtpException*.java In fact, each function throws FtpException*.java or pass
* IOException.
*
* @author John Xing
***********************************************/
public class Client extends FTP {
private int __dataTimeout;
private int __passivePort;
private String __passiveHost;
// private int __fileType, __fileFormat;
private boolean __remoteVerificationEnabled;
// private FTPFileEntryParser __entryParser;
private String __systemName;
/** Public default constructor */
public Client() {
__initDefaults();
__dataTimeout = -1;
__remoteVerificationEnabled = true;
}
// defaults when initialize
private void __initDefaults() {
__passiveHost = null;
__passivePort = -1;
__systemName = null;
// __fileType = FTP.ASCII_FILE_TYPE;
// __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
// __entryParser = null;
}
// parse reply for pass()
private void __parsePassiveModeReply(String reply)
throws MalformedServerReplyException {
int i, index, lastIndex;
String octet1, octet2;
StringBuffer host;
reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();
host = new StringBuffer(24);
lastIndex = 0;
index = reply.indexOf(',');
host.append(reply.substring(lastIndex, index));
for (i = 0; i < 3; i++) {
host.append('.');
lastIndex = index + 1;
index = reply.indexOf(',', lastIndex);
host.append(reply.substring(lastIndex, index));
}
lastIndex = index + 1;
index = reply.indexOf(',', lastIndex);
octet1 = reply.substring(lastIndex, index);
octet2 = reply.substring(index + 1);
// index and lastIndex now used as temporaries
try {
index = Integer.parseInt(octet1);
lastIndex = Integer.parseInt(octet2);
} catch (NumberFormatException e) {
throw new MalformedServerReplyException(
"Could not parse passive host information.\nServer Reply: " + reply);
}
index <<= 8;
index |= lastIndex;
__passiveHost = host.toString();
__passivePort = index;
}
/**
* open a passive data connection socket
*
* @param command
* @param arg
* @return
* @throws IOException
* @throws FtpExceptionCanNotHaveDataConnection
*/
protected Socket __openPassiveDataConnection(int command, String arg)
throws IOException, FtpExceptionCanNotHaveDataConnection {
Socket socket;
// // 20040317, xing, accommodate ill-behaved servers, see below
// int port_previous = __passivePort;
if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. "
+ getReplyString());
try {
__parsePassiveModeReply(getReplyStrings()[0]);
} catch (MalformedServerReplyException e) {
throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
}
// // 20040317, xing, accommodate ill-behaved servers, see above
// int count = 0;
// System.err.println("__passivePort "+__passivePort);
// System.err.println("port_previous "+port_previous);
// while (__passivePort == port_previous) {
// // just quit if too many tries. make it an exception here?
// if (count++ > 10)
// return null;
// // slow down further for each new try
// Thread.sleep(500*count);
// if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
// throw new FtpExceptionCanNotHaveDataConnection(
// "pasv() failed. " + getReplyString());
// //return null;
// try {
// __parsePassiveModeReply(getReplyStrings()[0]);
// } catch (MalformedServerReplyException e) {
// throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
// }
// }
socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
socket.close();
return null;
}
if (__remoteVerificationEnabled && !verifyRemote(socket)) {
InetAddress host1, host2;
host1 = socket.getInetAddress();
host2 = getRemoteAddress();
socket.close();
// our precaution
throw new FtpExceptionCanNotHaveDataConnection(
"Host attempting data connection " + host1.getHostAddress()
+ " is not same as server " + host2.getHostAddress()
+ " So we intentionally close it for security precaution.");
}
if (__dataTimeout >= 0)
socket.setSoTimeout(__dataTimeout);
return socket;
}
/***
* Sets the timeout in milliseconds to use for data connection. set
* immediately after opening the data connection.
***/
public void setDataTimeout(int timeout) {
__dataTimeout = timeout;
}
/***
* Closes the connection to the FTP server and restores connection parameters
* to the default values.
* <p>
*
* @exception IOException
* If an error occurs while disconnecting.
***/
public void disconnect() throws IOException {
__initDefaults();
super.disconnect();
// no worry for data connection, since we always close it
// in every ftp command that invloves data connection
}
/***
* Enable or disable verification that the remote host taking part of a data
* connection is the same as the host to which the control connection is
* attached. The default is for verification to be enabled. You may set this
* value at any time, whether the FTPClient is currently connected or not.
* <p>
*
* @param enable
* True to enable verification, false to disable verification.
***/
public void setRemoteVerificationEnabled(boolean enable) {
__remoteVerificationEnabled = enable;
}
/***
* Return whether or not verification of the remote host participating in data
* connections is enabled. The default behavior is for verification to be
* enabled.
* <p>
*
* @return True if verification is enabled, false if not.
***/
public boolean isRemoteVerificationEnabled() {
return __remoteVerificationEnabled;
}
/***
* Login to the FTP server using the provided username and password.
* <p>
*
* @param username
* The username to login under.
* @param password
* The password to use.
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a
* result of the client being idle or some other reason causing
* the server to send FTP reply code 421. This exception may be
* caught either as an IOException or independently as itself.
* @exception IOException
* If an I/O error occurs while either sending a command to the
* server or receiving a reply from the server.
***/
public boolean login(String username, String password) throws IOException {
user(username);
if (FTPReply.isPositiveCompletion(getReplyCode()))
return true;
// If we get here, we either have an error code, or an intermmediate
// reply requesting password.
if (!FTPReply.isPositiveIntermediate(getReplyCode()))
return false;
return FTPReply.isPositiveCompletion(pass(password));
}
/***
* Logout of the FTP server by sending the QUIT command.
* <p>
*
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a
* result of the client being idle or some other reason causing
* the server to send FTP reply code 421. This exception may be
* caught either as an IOException or independently as itself.
* @exception IOException
* If an I/O error occurs while either sending a command to the
* server or receiving a reply from the server.
***/
public boolean logout() throws IOException {
return FTPReply.isPositiveCompletion(quit());
}
/**
* retrieve list reply for path
*
* @param path
* @param entries
* @param limit
* @param parser
* @throws IOException
* @throws FtpExceptionCanNotHaveDataConnection
* @throws FtpExceptionUnknownForcedDataClose
* @throws FtpExceptionControlClosedByForcedDataClose
*/
public void retrieveList(String path, List<FTPFile> entries, int limit,
FTPFileEntryParser parser) throws IOException,
FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
FtpExceptionControlClosedByForcedDataClose {
Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
if (socket == null)
throw new FtpExceptionCanNotHaveDataConnection("LIST "
+ ((path == null) ? "" : path));
BufferedReader reader = new BufferedReader(new InputStreamReader(
socket.getInputStream()));
// force-close data channel socket, when download limit is reached
// boolean mandatory_close = false;
// List entries = new LinkedList();
int count = 0;
String line = parser.readNextEntry(reader);
while (line != null) {
FTPFile ftpFile = parser.parseFTPEntry(line);
// skip non-formatted lines
if (ftpFile == null) {
line = parser.readNextEntry(reader);
continue;
}
entries.add(ftpFile);
count += line.length();
// impose download limit if limit >= 0, otherwise no limit
// here, cut off is up to the line when total bytes is just over limit
if (limit >= 0 && count > limit) {
// mandatory_close = true;
break;
}
line = parser.readNextEntry(reader);
}
// if (mandatory_close)
// you always close here, no matter mandatory_close or not.
// however different ftp servers respond differently, see below.
socket.close();
// scenarios:
// (1) mandatory_close is false, download limit not reached
// no special care here
// (2) mandatory_close is true, download limit is reached
// different servers have different reply codes:
try {
int reply = getReply();
if (!_notBadReply(reply))
throw new FtpExceptionUnknownForcedDataClose(getReplyString());
} catch (FTPConnectionClosedException e) {
// some ftp servers will close control channel if data channel socket
// is closed by our end before all data has been read out. Check:
// tux414.q-tam.hp.com FTP server (hp.com version whp02)
// so must catch FTPConnectionClosedException thrown by getReply() above
// disconnect();
throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
}
}
/**
* retrieve file for path
*
* @param path
* @param os
* @param limit
* @throws IOException
* @throws FtpExceptionCanNotHaveDataConnection
* @throws FtpExceptionUnknownForcedDataClose
* @throws FtpExceptionControlClosedByForcedDataClose
*/
public void retrieveFile(String path, OutputStream os, int limit)
throws IOException, FtpExceptionCanNotHaveDataConnection,
FtpExceptionUnknownForcedDataClose,
FtpExceptionControlClosedByForcedDataClose {
Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
if (socket == null)
throw new FtpExceptionCanNotHaveDataConnection("RETR "
+ ((path == null) ? "" : path));
InputStream input = socket.getInputStream();
// 20040318, xing, treat everything as BINARY_FILE_TYPE for now
// do we ever need ASCII_FILE_TYPE?
// if (__fileType == ASCII_FILE_TYPE)
// input = new FromNetASCIIInputStream(input);
// fixme, should we instruct server here for binary file type?
// force-close data channel socket
// boolean mandatory_close = false;
int len;
int count = 0;
byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
while ((len = input.read(buf, 0, buf.length)) != -1) {
count += len;
// impose download limit if limit >= 0, otherwise no limit
// here, cut off is exactly of limit bytes
if (limit >= 0 && count > limit) {
os.write(buf, 0, len - (count - limit));
// mandatory_close = true;
break;
}
os.write(buf, 0, len);
os.flush();
}
// if (mandatory_close)
// you always close here, no matter mandatory_close or not.
// however different ftp servers respond differently, see below.
socket.close();
// scenarios:
// (1) mandatory_close is false, download limit not reached
// no special care here
// (2) mandatory_close is true, download limit is reached
// different servers have different reply codes:
// do not need this
// sendCommand("ABOR");
try {
int reply = getReply();
if (!_notBadReply(reply))
throw new FtpExceptionUnknownForcedDataClose(getReplyString());
} catch (FTPConnectionClosedException e) {
// some ftp servers will close control channel if data channel socket
// is closed by our end before all data has been read out. Check:
// tux414.q-tam.hp.com FTP server (hp.com version whp02)
// so must catch FTPConnectionClosedException thrown by getReply() above
// disconnect();
throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
}
}
/**
* reply check after closing data connection
*
* @param reply
* @return
*/
private boolean _notBadReply(int reply) {
if (FTPReply.isPositiveCompletion(reply)) {
// do nothing
} else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
// some ftp servers reply 426, e.g.,
// foggy FTP server (Version wu-2.6.2(2)
// there is second reply witing? no!
// getReply();
} else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
// some ftp servers reply 450, e.g.,
// ProFTPD [ftp.kernel.org]
// there is second reply witing? no!
// getReply();
} else if (reply == 451) { // FTPReply.ACTION_ABORTED
// some ftp servers reply 451, e.g.,
// ProFTPD [ftp.kernel.org]
// there is second reply witing? no!
// getReply();
} else {
// what other kind of ftp server out there?
return false;
}
return true;
}
/***
* Sets the file type to be transferred. This should be one of
* <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
* etc. The file type only needs to be set when you want to change the type.
* After changing it, the new type stays in effect until you change it again.
* The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
* is never called.
* <p>
*
* @param fileType
* The <code> _FILE_TYPE </code> constant indcating the type of file.
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a
* result of the client being idle or some other reason causing
* the server to send FTP reply code 421. This exception may be
* caught either as an IOException or independently as itself.
* @exception IOException
* If an I/O error occurs while either sending a command to the
* server or receiving a reply from the server.
***/
public boolean setFileType(int fileType) throws IOException {
if (FTPReply.isPositiveCompletion(type(fileType))) {
/*
* __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
*/
return true;
}
return false;
}
/***
* Fetches the system type name from the server and returns the string. This
* value is cached for the duration of the connection after the first call to
* this method. In other words, only the first time that you invoke this
* method will it issue a SYST command to the FTP server. FTPClient will
* remember the value and return the cached value until a call to disconnect.
* <p>
*
* @return The system type name obtained from the server. null if the
* information could not be obtained.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a
* result of the client being idle or some other reason causing
* the server to send FTP reply code 421. This exception may be
* caught either as an IOException or independently as itself.
* @exception IOException
* If an I/O error occurs while either sending a command to the
* server or receiving a reply from the server.
***/
public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
// if (syst() == FTPReply.NAME_SYSTEM_TYPE)
// Technically, we should expect a NAME_SYSTEM_TYPE response, but
// in practice FTP servers deviate, so we soften the condition to
// a positive completion.
if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
__systemName = (getReplyStrings()[0]).substring(4);
} else {
throw new FtpExceptionBadSystResponse("Bad response of SYST: "
+ getReplyString());
}
return __systemName;
}
/***
* Sends a NOOP command to the FTP server. This is useful for preventing
* server timeouts.
* <p>
*
* @return True if successfully completed, false if not.
* @exception FTPConnectionClosedException
* If the FTP server prematurely closes the connection as a
* result of the client being idle or some other reason causing
* the server to send FTP reply code 421. This exception may be
* caught either as an IOException or independently as itself.
* @exception IOException
* If an I/O error occurs while either sending a command to the
* server or receiving a reply from the server.
***/
public boolean sendNoOp() throws IOException {
return FTPReply.isPositiveCompletion(noop());
}
// client.stat(path);
// client.sendCommand("STAT");
// client.sendCommand("STAT",path);
// client.sendCommand("MDTM",path);
// client.sendCommand("SIZE",path);
// client.sendCommand("HELP","SITE");
// client.sendCommand("SYST");
// client.setRestartOffset(120);
}