| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.protocol.ftp; |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.OutputStream; |
| |
| import java.net.InetAddress; |
| import java.net.Socket; |
| |
| import java.util.List; |
| |
| import org.apache.commons.net.MalformedServerReplyException; |
| |
| import org.apache.commons.net.ftp.FTP; |
| import org.apache.commons.net.ftp.FTPCommand; |
| import org.apache.commons.net.ftp.FTPFile; |
| import org.apache.commons.net.ftp.FTPFileEntryParser; |
| import org.apache.commons.net.ftp.FTPReply; |
| |
| import org.apache.commons.net.ftp.FTPConnectionClosedException; |
| |
| /*********************************************** |
| * Client.java encapsulates functionalities necessary for nutch to get dir list |
| * and retrieve file from an FTP server. This class takes care of all low level |
| * details of interacting with an FTP server and provides a convenient higher |
| * level interface. |
| * |
| * Modified from FtpClient.java in apache commons-net. |
| * |
| * Notes by John Xing: ftp server implementations are hardly uniform and none |
| * seems to follow RFCs whole-heartedly. We have no choice, but assume common |
| * denominator as following: (1) Use stream mode for data transfer. Block mode |
| * will be better for multiple file downloading and partial file downloading. |
| * However not every ftpd has block mode support. (2) Use passive mode for data |
| * connection. So Nutch will work if we run behind firewall. (3) Data connection |
| * is opened/closed per ftp command for the reasons listed in (1). There are ftp |
| * servers out there, when partial downloading is enforced by closing data |
| * channel socket on our client side, the server side immediately closes control |
| * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used |
| * to obtain remote file attributes if possible. MDTM and SIZE would be nice, but |
| * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single |
| * thread? Do not use it at all. |
| * |
| * About exceptions: Some specific exceptions are re-thrown as one of |
| * FtpException*.java In fact, each function throws FtpException*.java or pass |
| * IOException. |
| * |
| * @author John Xing |
| ***********************************************/ |
| |
| public class Client extends FTP { |
| private int __dataTimeout; |
| private int __passivePort; |
| private String __passiveHost; |
| // private int __fileType, __fileFormat; |
| private boolean __remoteVerificationEnabled; |
| // private FTPFileEntryParser __entryParser; |
| private String __systemName; |
| |
| /** Public default constructor */ |
| public Client() { |
| __initDefaults(); |
| __dataTimeout = -1; |
| __remoteVerificationEnabled = true; |
| } |
| |
| // defaults when initialize |
| private void __initDefaults() { |
| __passiveHost = null; |
| __passivePort = -1; |
| __systemName = null; |
| // __fileType = FTP.ASCII_FILE_TYPE; |
| // __fileFormat = FTP.NON_PRINT_TEXT_FORMAT; |
| // __entryParser = null; |
| } |
| |
| // parse reply for pass() |
| private void __parsePassiveModeReply(String reply) |
| throws MalformedServerReplyException { |
| int i, index, lastIndex; |
| String octet1, octet2; |
| StringBuffer host; |
| |
| reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim(); |
| |
| host = new StringBuffer(24); |
| lastIndex = 0; |
| index = reply.indexOf(','); |
| host.append(reply.substring(lastIndex, index)); |
| |
| for (i = 0; i < 3; i++) { |
| host.append('.'); |
| lastIndex = index + 1; |
| index = reply.indexOf(',', lastIndex); |
| host.append(reply.substring(lastIndex, index)); |
| } |
| |
| lastIndex = index + 1; |
| index = reply.indexOf(',', lastIndex); |
| |
| octet1 = reply.substring(lastIndex, index); |
| octet2 = reply.substring(index + 1); |
| |
| // index and lastIndex now used as temporaries |
| try { |
| index = Integer.parseInt(octet1); |
| lastIndex = Integer.parseInt(octet2); |
| } catch (NumberFormatException e) { |
| throw new MalformedServerReplyException( |
| "Could not parse passive host information.\nServer Reply: " + reply); |
| } |
| |
| index <<= 8; |
| index |= lastIndex; |
| |
| __passiveHost = host.toString(); |
| __passivePort = index; |
| } |
| |
| /** |
| * open a passive data connection socket |
| * |
| * @param command |
| * @param arg |
| * @return |
| * @throws IOException |
| * @throws FtpExceptionCanNotHaveDataConnection |
| */ |
| protected Socket __openPassiveDataConnection(int command, String arg) |
| throws IOException, FtpExceptionCanNotHaveDataConnection { |
| Socket socket; |
| |
| // // 20040317, xing, accommodate ill-behaved servers, see below |
| // int port_previous = __passivePort; |
| |
| if (pasv() != FTPReply.ENTERING_PASSIVE_MODE) |
| throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. " |
| + getReplyString()); |
| |
| try { |
| __parsePassiveModeReply(getReplyStrings()[0]); |
| } catch (MalformedServerReplyException e) { |
| throw new FtpExceptionCanNotHaveDataConnection(e.getMessage()); |
| } |
| |
| // // 20040317, xing, accommodate ill-behaved servers, see above |
| // int count = 0; |
| // System.err.println("__passivePort "+__passivePort); |
| // System.err.println("port_previous "+port_previous); |
| // while (__passivePort == port_previous) { |
| // // just quit if too many tries. make it an exception here? |
| // if (count++ > 10) |
| // return null; |
| // // slow down further for each new try |
| // Thread.sleep(500*count); |
| // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE) |
| // throw new FtpExceptionCanNotHaveDataConnection( |
| // "pasv() failed. " + getReplyString()); |
| // //return null; |
| // try { |
| // __parsePassiveModeReply(getReplyStrings()[0]); |
| // } catch (MalformedServerReplyException e) { |
| // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage()); |
| // } |
| // } |
| |
| socket = _socketFactory_.createSocket(__passiveHost, __passivePort); |
| |
| if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) { |
| socket.close(); |
| return null; |
| } |
| |
| if (__remoteVerificationEnabled && !verifyRemote(socket)) { |
| InetAddress host1, host2; |
| |
| host1 = socket.getInetAddress(); |
| host2 = getRemoteAddress(); |
| |
| socket.close(); |
| |
| // our precaution |
| throw new FtpExceptionCanNotHaveDataConnection( |
| "Host attempting data connection " + host1.getHostAddress() |
| + " is not same as server " + host2.getHostAddress() |
| + " So we intentionally close it for security precaution."); |
| } |
| |
| if (__dataTimeout >= 0) |
| socket.setSoTimeout(__dataTimeout); |
| |
| return socket; |
| } |
| |
| /*** |
| * Sets the timeout in milliseconds to use for data connection. set |
| * immediately after opening the data connection. |
| ***/ |
| public void setDataTimeout(int timeout) { |
| __dataTimeout = timeout; |
| } |
| |
| /*** |
| * Closes the connection to the FTP server and restores connection parameters |
| * to the default values. |
| * <p> |
| * |
| * @exception IOException |
| * If an error occurs while disconnecting. |
| ***/ |
| public void disconnect() throws IOException { |
| __initDefaults(); |
| super.disconnect(); |
| // no worry for data connection, since we always close it |
| // in every ftp command that invloves data connection |
| } |
| |
| /*** |
| * Enable or disable verification that the remote host taking part of a data |
| * connection is the same as the host to which the control connection is |
| * attached. The default is for verification to be enabled. You may set this |
| * value at any time, whether the FTPClient is currently connected or not. |
| * <p> |
| * |
| * @param enable |
| * True to enable verification, false to disable verification. |
| ***/ |
| public void setRemoteVerificationEnabled(boolean enable) { |
| __remoteVerificationEnabled = enable; |
| } |
| |
| /*** |
| * Return whether or not verification of the remote host participating in data |
| * connections is enabled. The default behavior is for verification to be |
| * enabled. |
| * <p> |
| * |
| * @return True if verification is enabled, false if not. |
| ***/ |
| public boolean isRemoteVerificationEnabled() { |
| return __remoteVerificationEnabled; |
| } |
| |
| /*** |
| * Login to the FTP server using the provided username and password. |
| * <p> |
| * |
| * @param username |
| * The username to login under. |
| * @param password |
| * The password to use. |
| * @return True if successfully completed, false if not. |
| * @exception FTPConnectionClosedException |
| * If the FTP server prematurely closes the connection as a |
| * result of the client being idle or some other reason causing |
| * the server to send FTP reply code 421. This exception may be |
| * caught either as an IOException or independently as itself. |
| * @exception IOException |
| * If an I/O error occurs while either sending a command to the |
| * server or receiving a reply from the server. |
| ***/ |
| public boolean login(String username, String password) throws IOException { |
| user(username); |
| |
| if (FTPReply.isPositiveCompletion(getReplyCode())) |
| return true; |
| |
| // If we get here, we either have an error code, or an intermmediate |
| // reply requesting password. |
| if (!FTPReply.isPositiveIntermediate(getReplyCode())) |
| return false; |
| |
| return FTPReply.isPositiveCompletion(pass(password)); |
| } |
| |
| /*** |
| * Logout of the FTP server by sending the QUIT command. |
| * <p> |
| * |
| * @return True if successfully completed, false if not. |
| * @exception FTPConnectionClosedException |
| * If the FTP server prematurely closes the connection as a |
| * result of the client being idle or some other reason causing |
| * the server to send FTP reply code 421. This exception may be |
| * caught either as an IOException or independently as itself. |
| * @exception IOException |
| * If an I/O error occurs while either sending a command to the |
| * server or receiving a reply from the server. |
| ***/ |
| public boolean logout() throws IOException { |
| return FTPReply.isPositiveCompletion(quit()); |
| } |
| |
| /** |
| * retrieve list reply for path |
| * |
| * @param path |
| * @param entries |
| * @param limit |
| * @param parser |
| * @throws IOException |
| * @throws FtpExceptionCanNotHaveDataConnection |
| * @throws FtpExceptionUnknownForcedDataClose |
| * @throws FtpExceptionControlClosedByForcedDataClose |
| */ |
| public void retrieveList(String path, List<FTPFile> entries, int limit, |
| FTPFileEntryParser parser) throws IOException, |
| FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose, |
| FtpExceptionControlClosedByForcedDataClose { |
| Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path); |
| |
| if (socket == null) |
| throw new FtpExceptionCanNotHaveDataConnection("LIST " |
| + ((path == null) ? "" : path)); |
| |
| BufferedReader reader = new BufferedReader(new InputStreamReader( |
| socket.getInputStream())); |
| |
| // force-close data channel socket, when download limit is reached |
| // boolean mandatory_close = false; |
| |
| // List entries = new LinkedList(); |
| int count = 0; |
| String line = parser.readNextEntry(reader); |
| while (line != null) { |
| FTPFile ftpFile = parser.parseFTPEntry(line); |
| // skip non-formatted lines |
| if (ftpFile == null) { |
| line = parser.readNextEntry(reader); |
| continue; |
| } |
| entries.add(ftpFile); |
| count += line.length(); |
| // impose download limit if limit >= 0, otherwise no limit |
| // here, cut off is up to the line when total bytes is just over limit |
| if (limit >= 0 && count > limit) { |
| // mandatory_close = true; |
| break; |
| } |
| line = parser.readNextEntry(reader); |
| } |
| |
| // if (mandatory_close) |
| // you always close here, no matter mandatory_close or not. |
| // however different ftp servers respond differently, see below. |
| socket.close(); |
| |
| // scenarios: |
| // (1) mandatory_close is false, download limit not reached |
| // no special care here |
| // (2) mandatory_close is true, download limit is reached |
| // different servers have different reply codes: |
| |
| try { |
| int reply = getReply(); |
| if (!_notBadReply(reply)) |
| throw new FtpExceptionUnknownForcedDataClose(getReplyString()); |
| } catch (FTPConnectionClosedException e) { |
| // some ftp servers will close control channel if data channel socket |
| // is closed by our end before all data has been read out. Check: |
| // tux414.q-tam.hp.com FTP server (hp.com version whp02) |
| // so must catch FTPConnectionClosedException thrown by getReply() above |
| // disconnect(); |
| throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage()); |
| } |
| |
| } |
| |
| /** |
| * retrieve file for path |
| * |
| * @param path |
| * @param os |
| * @param limit |
| * @throws IOException |
| * @throws FtpExceptionCanNotHaveDataConnection |
| * @throws FtpExceptionUnknownForcedDataClose |
| * @throws FtpExceptionControlClosedByForcedDataClose |
| */ |
| public void retrieveFile(String path, OutputStream os, int limit) |
| throws IOException, FtpExceptionCanNotHaveDataConnection, |
| FtpExceptionUnknownForcedDataClose, |
| FtpExceptionControlClosedByForcedDataClose { |
| |
| Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path); |
| |
| if (socket == null) |
| throw new FtpExceptionCanNotHaveDataConnection("RETR " |
| + ((path == null) ? "" : path)); |
| |
| InputStream input = socket.getInputStream(); |
| |
| // 20040318, xing, treat everything as BINARY_FILE_TYPE for now |
| // do we ever need ASCII_FILE_TYPE? |
| // if (__fileType == ASCII_FILE_TYPE) |
| // input = new FromNetASCIIInputStream(input); |
| |
| // fixme, should we instruct server here for binary file type? |
| |
| // force-close data channel socket |
| // boolean mandatory_close = false; |
| |
| int len; |
| int count = 0; |
| byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE]; |
| while ((len = input.read(buf, 0, buf.length)) != -1) { |
| count += len; |
| // impose download limit if limit >= 0, otherwise no limit |
| // here, cut off is exactly of limit bytes |
| if (limit >= 0 && count > limit) { |
| os.write(buf, 0, len - (count - limit)); |
| // mandatory_close = true; |
| break; |
| } |
| os.write(buf, 0, len); |
| os.flush(); |
| } |
| |
| // if (mandatory_close) |
| // you always close here, no matter mandatory_close or not. |
| // however different ftp servers respond differently, see below. |
| socket.close(); |
| |
| // scenarios: |
| // (1) mandatory_close is false, download limit not reached |
| // no special care here |
| // (2) mandatory_close is true, download limit is reached |
| // different servers have different reply codes: |
| |
| // do not need this |
| // sendCommand("ABOR"); |
| |
| try { |
| int reply = getReply(); |
| if (!_notBadReply(reply)) |
| throw new FtpExceptionUnknownForcedDataClose(getReplyString()); |
| } catch (FTPConnectionClosedException e) { |
| // some ftp servers will close control channel if data channel socket |
| // is closed by our end before all data has been read out. Check: |
| // tux414.q-tam.hp.com FTP server (hp.com version whp02) |
| // so must catch FTPConnectionClosedException thrown by getReply() above |
| // disconnect(); |
| throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage()); |
| } |
| |
| } |
| |
| /** |
| * reply check after closing data connection |
| * |
| * @param reply |
| * @return |
| */ |
| private boolean _notBadReply(int reply) { |
| |
| if (FTPReply.isPositiveCompletion(reply)) { |
| // do nothing |
| } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED |
| // some ftp servers reply 426, e.g., |
| // foggy FTP server (Version wu-2.6.2(2) |
| // there is second reply witing? no! |
| // getReply(); |
| } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN |
| // some ftp servers reply 450, e.g., |
| // ProFTPD [ftp.kernel.org] |
| // there is second reply witing? no! |
| // getReply(); |
| } else if (reply == 451) { // FTPReply.ACTION_ABORTED |
| // some ftp servers reply 451, e.g., |
| // ProFTPD [ftp.kernel.org] |
| // there is second reply witing? no! |
| // getReply(); |
| } else { |
| // what other kind of ftp server out there? |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /*** |
| * Sets the file type to be transferred. This should be one of |
| * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>, |
| * etc. The file type only needs to be set when you want to change the type. |
| * After changing it, the new type stays in effect until you change it again. |
| * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method |
| * is never called. |
| * <p> |
| * |
| * @param fileType |
| * The <code> _FILE_TYPE </code> constant indcating the type of file. |
| * @return True if successfully completed, false if not. |
| * @exception FTPConnectionClosedException |
| * If the FTP server prematurely closes the connection as a |
| * result of the client being idle or some other reason causing |
| * the server to send FTP reply code 421. This exception may be |
| * caught either as an IOException or independently as itself. |
| * @exception IOException |
| * If an I/O error occurs while either sending a command to the |
| * server or receiving a reply from the server. |
| ***/ |
| public boolean setFileType(int fileType) throws IOException { |
| if (FTPReply.isPositiveCompletion(type(fileType))) { |
| /* |
| * __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT; |
| */ |
| return true; |
| } |
| return false; |
| } |
| |
| /*** |
| * Fetches the system type name from the server and returns the string. This |
| * value is cached for the duration of the connection after the first call to |
| * this method. In other words, only the first time that you invoke this |
| * method will it issue a SYST command to the FTP server. FTPClient will |
| * remember the value and return the cached value until a call to disconnect. |
| * <p> |
| * |
| * @return The system type name obtained from the server. null if the |
| * information could not be obtained. |
| * @exception FTPConnectionClosedException |
| * If the FTP server prematurely closes the connection as a |
| * result of the client being idle or some other reason causing |
| * the server to send FTP reply code 421. This exception may be |
| * caught either as an IOException or independently as itself. |
| * @exception IOException |
| * If an I/O error occurs while either sending a command to the |
| * server or receiving a reply from the server. |
| ***/ |
| public String getSystemName() throws IOException, FtpExceptionBadSystResponse { |
| // if (syst() == FTPReply.NAME_SYSTEM_TYPE) |
| // Technically, we should expect a NAME_SYSTEM_TYPE response, but |
| // in practice FTP servers deviate, so we soften the condition to |
| // a positive completion. |
| if (__systemName == null && FTPReply.isPositiveCompletion(syst())) { |
| __systemName = (getReplyStrings()[0]).substring(4); |
| } else { |
| throw new FtpExceptionBadSystResponse("Bad response of SYST: " |
| + getReplyString()); |
| } |
| |
| return __systemName; |
| } |
| |
| /*** |
| * Sends a NOOP command to the FTP server. This is useful for preventing |
| * server timeouts. |
| * <p> |
| * |
| * @return True if successfully completed, false if not. |
| * @exception FTPConnectionClosedException |
| * If the FTP server prematurely closes the connection as a |
| * result of the client being idle or some other reason causing |
| * the server to send FTP reply code 421. This exception may be |
| * caught either as an IOException or independently as itself. |
| * @exception IOException |
| * If an I/O error occurs while either sending a command to the |
| * server or receiving a reply from the server. |
| ***/ |
| public boolean sendNoOp() throws IOException { |
| return FTPReply.isPositiveCompletion(noop()); |
| } |
| |
| // client.stat(path); |
| // client.sendCommand("STAT"); |
| // client.sendCommand("STAT",path); |
| // client.sendCommand("MDTM",path); |
| // client.sendCommand("SIZE",path); |
| // client.sendCommand("HELP","SITE"); |
| // client.sendCommand("SYST"); |
| // client.setRestartOffset(120); |
| |
| } |