blob: a8a34067e16ef86fb621cc9673c64f7bc107b82b [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/env python
#-----------------------------------------------------------------------------
# Name: filelist.py
# Purpose: File listing class/functions.
#
# Author: Brian Wilson
#
# Created: Mon Apr 10 11:01:06 2006
#-----------------------------------------------------------------------------
#
USAGE = """
filelist.py [--help] [--bottomUp] [--directory] [--delete]
[--fetchDir <outputDir>] [--fetchWitSubDirs]
[--list] [--matchUrl] --quiet] [--regex '.*\.[cC]']
[--size] [--topOnly] [--url]
[--wildcard '*.txt.*'] [--xml] <topPaths ...>
Recursively traverse and print (with full paths or URL's) all files
under the topPath(s) that match ANY of one or more regular expressions
and/or wildcard glob) strings. By default, it simply prints the matches,
but one can also get their sizes, fetch them, or delete them.
The topPaths can be a mixture of local and remote (ftp or http)
paths, in which case a list of URL's is returned. If xml mode is
turned on, then the output is an XML list.
If no regex or wildcard patterns are specified, then ALL files
are returned. If files are fetched, then the URL's are
REWRITTEN to point to the local copies.
"""
# See the bottom of the file for exact switches and example of use.
import sys, os, re, string, getopt, types, getpass
import urllib, urllib2, urlparse, time, shutil, socket, stat
from fnmatch import fnmatchcase
from ftplib import FTP
#import dataenc
def matchAnyThenConstrain(root, name, haveRegs, regs, haveWilds, wildCards,
constraintFunction):
"""Return True if the file name matches any of the compiled regular
expressions or any of the wildcard (glob) specs, and (if present) the
constraintFunction returns True. The regex can be a pair of match &
substitution patterns. The 'name' of the file might be altered by a
regex substitution and/or the constraintFunction.
"""
if not haveRegs and not haveWilds:
if constraintFunction is not None:
return constraintFunction(root, name)
else:
return (True, name)
else:
match = False
if haveRegs:
for reg in regs:
pattern, subst = reg
if pattern.search(name):
match = True
if subst:
name = pattern.sub(subst, name)
break
if haveWilds and not match:
for wild in wildCards:
if fnmatchcase(name, wild):
match = True
break
if match and constraintFunction is not None:
match, name = constraintFunction(root, name)
return (match, name)
# Users call this function
def filelist(urlPaths, regSpecs=[], wildCards=[], needCredentials=False, userCredentials=None,
matchFunction=matchAnyThenConstrain, constraintFunction=None,
matchUrl=False, walkDirectories=True,
urlMode=True, xmlMode=True, quietMode=False, verboseMode=False, getFileInfo=False,
fetchDir=None, fetchIfNewer=False, fetchWithSubDirs=False,
directoryMode=False, listMode=False, deleteMode=False, topDown=True,
stream=sys.stdout):
"""Recursively traverse and print (with full paths or URL's) all files
under the topPath(s) that match one or more regular expressions and/or
wildcard (glob) strings, and an optional constraint (T/F) function to
further winnow the candidate matches. (The matchFunction can also be
entirely replaced with custom logic.)
By default, it simply generates the matches, but one can also fetch them,
get their sizes, or delete them (if they are local files).
Handles local directory paths and ftp/http URL's.
Returns three file lists: matched, actually fetched, & destination names.
"""
try:
matchedFiles = [] # source files that match criteria
fetchedFiles = [] # files that were actually fetched this run
destinationFiles = [] # destination (local) file names (rewritten URL)
topPaths = []
for url in urlPaths:
if url == '' or url == None: continue
remote, protocol, netloc, path = remoteUrl(url)
if not remote: url = os.path.abspath(url)
if url[-1] == '/': url = url[:-1]
topPaths.append(url)
if needCredentials and userCredentials is None:
userCredentials = promptForCredentials(topPaths)
if fetchDir:
workDir = os.path.join(fetchDir, '.tmp')
# fetch into tmp directory & then rename so fetching is atomic
try: os.mkdir(workDir)
except: pass
if not os.path.exists(workDir):
die("filelist: Cannot write to fetch directory %s" % fetchDir)
if isinstance(topPaths, types.StringType): topPaths = [topPaths]
regSpecs = [s for s in regSpecs if s != '' and s != None]
wildCards = [s for s in wildCards if s != '' and s != None]
haveRegs = False; regs = []; haveWilds = False; haveMatchFunction = False
if len(regSpecs) > 0:
haveRegs = True
regs = []
for reg in regSpecs:
(pattern, subst) = parse_re_with_subst(reg)
regs.append( (re.compile(pattern), subst) )
if len(wildCards) > 0:
haveWilds = True
prefix = ''
extra = ''
suffix = ''
if deleteMode:
suffix += ' deleted.'
if '.' in topPaths:
die("filelist: Recursively deleting from the dot (.) path is not safe. Shame.")
if directoryMode: listMode = False
if listMode: getFileInfo = True
if quietMode: stream = None
sumSizes = 0
if xmlMode:
matchedFiles.append('<files>')
fetchedFiles.append('<files>')
_output('<files>', destinationFiles, stream)
prefix += ' <file>'
suffix += '</file>'
for top in topPaths:
if verboseMode: warn('filelist: searching', top)
topMatchCount = 0; topFetchCount = 0
for root, dirs, files, infos in walk(top, userCredentials, walkDirectories, topDown):
if verboseMode: warn('filelist: found files in', root)
remote, protocol, netloc, path = remoteUrl(root)
if directoryMode:
contents = dirs
else:
contents = files
for i in range(len(contents)):
line = ''
file = contents[i]
try:
info = infos[i]
except:
info = None
if matchUrl:
name = os.path.join(root, file)
else:
name = file
match, newname = matchFunction(root, name, haveRegs, regs,
haveWilds, wildCards, constraintFunction)
if match:
line = ''
topMatchCount += 1
fn = os.path.join(root, file)
if getFileInfo or (fetchIfNewer and not remote):
if remote:
if info and getFileInfo:
if listMode: line = info.line
extra = ' ' + str(info.size) + ' ' + str(info.modTime)
sumSizes += info.size
else:
st = os.stat(fn)
line = ' '.join( map(str, \
(st.st_mode, st.st_uid, st.st_gid, st.st_size, st.st_mtime, fn)))
info = FileInfo(line, st.st_size, st.st_mtime, st.st_uid, st.st_gid, st.st_mode)
if getFileInfo:
extra = ' ' + str(info.size) + ' ' + str(info.modTime)
sumSizes += info.size
if not remote and urlMode: fn = makeFileUrl(fn)
matchedFiles.append(prefix + fn + extra + suffix)
if matchUrl:
newfn = newname
else:
newfn = os.path.join(root, newname)
newr, newp, newloc, newpath = remoteUrl(newfn)
newfile = os.path.split(newpath)[1]
if fetchDir:
if fetchDir == '.': fetchDir = os.getcwd()
if fetchWithSubDirs:
destDir = os.path.join(fetchDir, newpath[1:])
else:
destDir = fetchDir
destFile = os.path.join(destDir, newfile)
tmpFile = os.path.join(workDir, newfile)
if shouldFetch(remote, destFile, fetchIfNewer, info):
if not quietMode:
warn('filelist: Fetching ', fn)
warn('filelist: Writing ', destFile)
try:
os.makedirs(destDir)
except:
# kludge, makedirs throws exception if any part of path exists
pass
if remote:
urllib.urlretrieve(fn, tmpFile)
else:
shutil.copyfile(fn, tmpFile)
os.rename(tmpFile, destFile) # atomic rename of file into destDir
topFetchCount += 1
fetchedFiles.append(prefix + fn + suffix)
if getFileInfo: line = line + ' ' + destFile
# now rewrite URL to point to local copy of file
fn = destFile
if not remote and urlMode: fn = makeFileUrl(fn)
if not listMode:
line = prefix + fn + extra + suffix
_output(line, destinationFiles, stream)
if deleteMode:
if remote:
die('filelist: Cannot delete remote files (yet)')
else:
os.unlink(fn)
if verboseMode and fetchDir:
warn('filelist: Matched %d files from %s' % (topMatchCount, top))
warn('filelist: Fetched %d files from %s' % (topFetchCount, top))
if fetchDir:
for f in os.listdir(workDir): os.remove(os.path.join(workDir, f))
os.rmdir(workDir)
if xmlMode:
matchedFiles.append('</files>')
fetchedFiles.append('</files>')
_output('<files>', destinationFiles, stream)
if getFileInfo:
if xmlMode:
line = '<totalSize>%s</totalSize>' % sumSizes
else:
line = '#filelist: total size %s' % sumSizes
matchedFiles.append(line)
_output(line, destinationFiles, stream)
except KeyboardInterrupt:
if fetchDir:
for f in os.listdir(workDir): os.remove(os.path.join(workDir, f))
os.rmdir(workDir)
die('filelist: Keyboard Interrupt')
return (matchedFiles, fetchedFiles, destinationFiles)
def shouldFetch(remote, destFile, fetchIfNewer, srcFileInfo):
if remote:
if os.path.exists(destFile):
doFetch = False
else:
doFetch = True
else:
if os.path.exists(destFile):
if fetchIfNewer:
destModTime = os.path.getmtime(destFile)
if destModTime < srcFileInfo.modTime:
doFetch = True
else:
doFetch = False
else:
doFetch = False
else:
doFetch = True
return doFetch
def _output(line, lines, stream=None):
"""Internal function: Add line to output lines and optionally print to stream."""
lines.append(line)
if stream: print >>stream, line
class FileInfo:
"""Holder class for those file info. elements that are consistent among local
files (output of stat), ftp directories, http, etc. Minimum useful fields are
modification time and size. Line contains usual string output of ls -l.
"""
def __init__(self, line, size, modTime, userId=None, groupId=None, protectMode=None):
self.line=line; self.size=size; self.modTime=modTime
self.userId=userId; self.groupId=groupId; self.protectMode=protectMode
class UserCredential(object):
"""Container for user credential info. like username, password, certificate, etc.
"""
def __init__(self, username=None, password=None, validInterval=None, certificate=None):
self.username = username
self.password = password
self.validInterval = validInterval # tuple of Ints (days, hours, minutes)
if password is not None and validInterval is None:
die('UserCredential: If password is present, validInterval is also required.')
self.certificate = certificate
def getPassword(self):
pw = self._password
if pw:
pw, daynumber, timestamp = dataenc.pass_dec(pw)
if dataenc.unexpired(daynumber, timestamp, self.validInterval):
return pw
else:
return None
else:
return None
def setPassword(self, pw):
if pw and pw != '':
self._password = dataenc.pass_enc(pw, daynumber=True, timestamp=True)
else:
self._password = pw
password = property(getPassword, setPassword)
class UserCredentials:
"""Contains dictionary of (url, credential) pairs and optionally an httpProxy.
"""
def __init__(self, httpProxy=None, credentials={}):
self.httpProxy = httpProxy
self.credentials = credentials
def add(self, url, credential):
self.credentials[url] = credential; return self
def forUrl(self, url):
for key in self.credentials:
if url.startswith(key):
return self.credentials[key]
return None
def promptForCredentials(urls, httpProxy=None):
if httpProxy == None:
httpProxy = raw_input('Enter HTTP proxy [none]: ')
if httpProxy == '': httpProxy = None
credentials = UserCredentials(httpProxy)
localUserName = getpass.getuser()
for url in urls:
remote, protocol, netloc, path = remoteUrl(url)
if remote:
username, password, validInterval = promptForCredential(url, localUserName)
credential = UserCredential(username, password, validInterval)
credentials.add(url, credential)
return credentials
def promptForCredential(url, localUserName):
remote, protocol, netloc, path = remoteUrl(url)
if protocol == 'ftp':
defaultUserName = 'anonymous'
else:
defaultUserName = localUserName
username = raw_input('Need credentials for URL %s\nUsername [%s]: ' \
% (url, defaultUserName))
if username == '': username = defaultUserName
password = ''
while password == '':
password = getpass.getpass()
validInterval = [0, 1, 0]
if password != '':
response = raw_input('Enter valid time period for credential [(days, hours, minutes) = 0 1 0]: ')
if response != '':
validInterval = response.split()
return (username, password, validInterval)
class DirectoryWalker:
"""Recursively walk directories using the protocol specified in a URL.
Sublclasses handle ftp, http, sftp, local file system, etc.
"""
def __init__(self, userCredentials=None, retries=3, sleepTime=5):
self.userCredentials = userCredentials
self.retries = retries
self.sleepTime = sleepTime
def walk(self, top, walkDirectories=True):
"""Recursively walk directories on a remote site to retrieve file lists.
"""
remote, protocol, netloc, path = remoteUrl(top)
status, dir_listing = self.retrieveDirList(top)
if status:
if len(dir_listing) == 0:
yield (top, [], [], [])
else:
(dirs, files, infos) = self.parseDirList(dir_listing, path)
yield (top, dirs, files, infos)
if walkDirectories:
for dir in dirs:
# Depth-first recursion
for root, dirs, files, infos in self.walk(top + '/' + dir, walkDirectories):
yield (root, dirs, files, infos)
else:
warn('DirectoryWalker: error, unable to retrieve directory listing at', top)
yield (top, [], [], [])
def retrieveDirList(self, url):
"""Retrieve directory listing as a list of text lines. Returns (status, dirList)."""
pass
def parseDirList(self, dirList, path=None):
"""Parse directory listing (text) and return three lists (dirs, files, fileInfos)."""
pass
class FtpDirectoryWalker(DirectoryWalker):
"""Recursively walk directories on an ftp site."""
def __init__(self, userCredentials=None, retries=3, sleepTime=5):
DirectoryWalker.__init__(self, userCredentials, retries, sleepTime)
def retrieveDirList(self, url):
"""Retrieve a directory listing via ftp with retries.
"""
remote, protocol, netloc, path = remoteUrl(url)
credential = None
if self.userCredentials:
credential = self.userCredentials.forUrl(url)
dir = ''; dir_list = []
ftp = FTP()
for i in range(self.retries):
try:
ftp.connect(netloc)
if credential is None or \
credential.username == 'anonymous' or \
credential.username == '':
ftp.login()
else:
ftp.login(credential.username, credential.password)
ftp.cwd(path)
ftp.retrlines('LIST', dir_list.append)
ftp.quit()
dir = '\n'.join(dir_list)
return (True, dir)
except:
pass
time.sleep(self.sleepTime)
warn('FtpDirectoryWalker: connect retry to ', netloc, path)
return (False, dir)
def parseDirList(self, dir, path=None):
"""Parse long directory listing returned by ftp or (ls -l).
Separate entries into directories and files.
"""
dirs = []; files = []; infos = []
for entry in dir.split('\n'):
fields = entry.split()
if len(fields) < 7: continue
fn = fields[-1]
if fn == '.' or fn == '..': continue
if re.match('^d', fields[0])and fields[0][7] == 'r':
dirs.append(fn)
else:
files.append(fn)
info = FileInfo(entry, int(fields[4]), '-'.join(fields[5:8]), \
fields[2], fields[3], fields[0])
infos.append(info)
return (dirs, files, infos)
class DirListingParser(object):
"""Base class for directory listing parsers."""
def __init__(self, regex):
self.regex = regex
self.compiledRegex = re.compile(self.regex)
def parse(self, dir, listingHtml):
"""Return (dirs, files, infos)."""
dirs = []; files = []; infos = []
raise NotImplementedError, "Override this method in sub class."
class ApacheDirListingParser(DirListingParser):
"""Parser class for apache."""
def parse(self, dir, listingHtml):
dirs = []; files = []; infos = []
items = self.compiledRegex.findall(listingHtml)
for item, itemName in items:
if itemName.strip() == 'Parent Directory': continue
if isinstance(item, str):
name = item
else:
name, dateTime, size = item[:]
if name.endswith('/'):
type = 'd'
dirs.append(name[:-1])
else:
type = '-'
files.append(name)
#not doing file info
'''
size = size.lower()
if size.endswith('k'):
size = int(size[:-1]) * 1024
elif size.endswith('m'):
size = int(size[:-1]) * 1024 * 1024
else:
size = -1
line = '%s--------- 1 ? ? %15d %s %s' % (type, size, dateTime, name)
info = FileInfo(line, size, dateTime)
'''
infos.append(None)
return (dirs, files, infos)
class CDAACDirListingParser(DirListingParser):
"""Parser class for CDAAC data server."""
def parse(self, dir, listingHtml):
dirs = []; files = []; infos = []
items = self.compiledRegex.findall(listingHtml)
for item, itemName in items:
if itemName.strip() == 'Parent Directory': continue
if isinstance(item, str):
name = item
else:
name, dateTime, size = item[:]
if name.endswith('/'):
type = 'd'
dirs.append(name)
else:
type = '-'
files.append(name)
#not doing file info
'''
size = size.lower()
if size.endswith('k'):
size = int(size[:-1]) * 1024
elif size.endswith('m'):
size = int(size[:-1]) * 1024 * 1024
else:
size = -1
line = '%s--------- 1 ? ? %15d %s %s' % (type, size, dateTime, name)
info = FileInfo(line, size, dateTime)
'''
infos.append(None)
return (dirs, files, infos)
class HttpDirectoryWalker(DirectoryWalker):
"""Recursively walk directories on an http (web) site to retrieve file lists.
Handles many styles of HTML directory listings, but still very FRAGILE.
"""
#list of directory listing parser plugins
DIR_LIST_REGEX_PLUGINS = [
#apache 2.0.55 directory listing
ApacheDirListingParser(r'(?i)alt="\[.*?\]">\s*<A HREF="(?P<name>.*?)">(.*?)</A>'),
#CDAAC (COSMIC Data)
CDAACDirListingParser(r'(?i)<LI><A HREF="(?P<name>.*?)">(.*?)</A>'),
]
def __init__(self, userCredentials=None, retries=3, sleepTime=5):
DirectoryWalker.__init__(self, userCredentials, retries, sleepTime)
if self.userCredentials:
if self.userCredentials.httpProxy:
os.environ['http_proxy'] = self.userCredentials.httpProxy
# global kludge, default proxyHandler looks up proxy there
passwordMgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
for url, cred in self.userCredentials.credentials.iteritems():
passwordMgr.add_password(None, url, cred.username, cred.password)
authHandler = urllib2.HTTPBasicAuthHandler(passwordMgr)
opener = urllib2.build_opener(authHandler)
else:
# opener = urllib2.build_opener()
opener = None
# opener.add_headers = [('User-agent', 'Mozilla/5.0')]
self.opener = opener
def retrieveDirList(self, url):
"""Retrieve an HTML directory listing via http with retries.
"""
### url = os.path.join(url, 'contents.html') ### hack for DAP servers at GES-DISC
dir_listing = ''
proxies = {}
for i in range(self.retries):
try:
if self.opener:
response = self.opener.open(url)
else:
response = urllib.urlopen(url)
except IOError, e:
if hasattr(e, 'reason'):
warn('HttpDirectoryWalker: Error, failed to reach server because: %s' % e.reason)
elif hasattr(e, 'code'):
warn('HttpDirectoryWalker: Server could not fulfill request, error code %s' % e.code)
else:
dir_listing = response.read()
return (True, dir_listing)
time.sleep(self.sleepTime)
warn('HttpDirectoryWalker: retrying ', url)
return (False, dir_listing)
reDirPath = re.compile(r'(?i)<H1>.*?Index of\s*?(\S+?)\s*?</H1>')
def parseDirList(self, dir, path):
"""Parse fragile HTML directory listings returned by various HTTP servers,
including Apache and OpenDAP. Separate entries into directories and files.
"""
dirs = []; files = []; infos = []
if path:
match = HttpDirectoryWalker.reDirPath.search(dir)
if not match:
die('HttpDirectoryWalker: Cannot find directory name %s in HTML listing:\n%s' % (path, dir))
dirName = match.group(1)
if dirName not in path:
warn('HttpDirectoryWalker: Directory name %s in HTML listing does not agree with path %s:\n%s' % (dirName, path, dir))
# Try to find directory lines that contain file info
reDirListWithStat = re.compile( \
r'(?i)<A HREF=[\'"]*?(?P<name>[^\?].*?' + dirName + r'.*?)[\'"]*?>.*?</A>\s*(?P<dateTime>\S+ \S+)\s+?(?P<size>\S+)\s*?$')
items = reDirListWithStat.findall(dir)
# If not, then try to find simple directory lines
if len(items) == 0:
reDirList = re.compile( \
r'(?i)<A HREF=[\'"]*?(?P<name>[^\?].*?' + dirName + r'.*?)[\'"]*?>.*?</A>')
items = reDirList.findall(dir)
if len(items) != 0:
dateTime = '? ?'; size = ''
for item in items:
if isinstance(item, str):
name = item
else:
name, dateTime, size = item[:]
if dirName not in name: continue
if name.endswith('/'):
type = 'd'
dirs.append(name)
else:
type = '-'
files.append(name)
size = size.lower()
if size.endswith('k'):
size = int(size[:-1]) * 1024
elif size.endswith('m'):
size = int(size[:-1]) * 1024 * 1024
else:
size = -1
line = '%s--------- 1 ? ? %15d %s %s' % (type, size, dateTime, name)
info = FileInfo(line, size, dateTime)
infos.append(info)
print line
#try plugins
else:
for plugin in self.DIR_LIST_REGEX_PLUGINS:
pluginResults = plugin.parse(dirName, dir)
if len(pluginResults[0]) != 0 or len(pluginResults[1]) != 0 or \
len(pluginResults[2]) != 0: return pluginResults
return (dirs, files, infos)
def walk(top, userCredentials=None, walkDirectories=True, topDown=True):
"""Recursively walk directories to retrieve file lists.
Returns the topPath, contained subdirectories and files, and
optionally FileInfo objects (if info is included in protocol results).
Handles local directory paths and ftp/http protocols (URL's).
"""
remote, protocol, netloc, path = remoteUrl(top)
if remote:
if protocol == 'ftp':
ftpWalker = FtpDirectoryWalker(userCredentials)
for root, dirs, files, infos in ftpWalker.walk(top, walkDirectories):
yield (root, dirs, files, infos)
elif protocol == 'http':
# import pdb; pdb.set_trace()
httpWalker = HttpDirectoryWalker(userCredentials)
for root, dirs, files,infos in httpWalker.walk(top, walkDirectories):
yield (root, dirs, files, infos)
elif protocol == 'sftp':
sftpWalker = SftpDirectoryWalker(userCredentials)
for root, dirs, files,infos in sftpWalker.walk(top, walkDirectories):
yield (root, dirs, files, infos)
else:
die('filelist: Cannot handle protocol ', protocol)
else:
if walkDirectories:
for root, dirs, files in os.walk(top, topDown):
yield (root, dirs, files, [])
else:
files = os.listdir(top)
yield (top, [], files, [])
def remoteUrl(url):
"""Returns True if the URL is remote; also returns protocol,
net location (host:port), and path."""
protocol, netloc, path, params, query, fragment = urlparse.urlparse(url)
if protocol == '':
return (False, protocol, netloc, path)
else:
return (True, protocol, netloc, path)
# utils
RE_WITH_SUBST_PATTERN = re.compile(r'^s/(.+)/(.+)/$')
def parse_re_with_subst(str):
match = RE_WITH_SUBST_PATTERN.match(str)
if match:
return (match.group(1), match.group(2))
else:
return (str, None)
def hostName():
return socket.gethostbyaddr(socket.gethostname())[0]
FILE_URL_PREFIX = 'file://' + hostName()
def makeFileUrl(file):
return FILE_URL_PREFIX + file
def warn(*str): sys.stderr.write(' '.join(str) + '\n')
def die(str, status=1): warn(str); sys.exit(status)
def main():
"""Main function for outside scripts to call."""
from sys import argv
if len(argv) < 2: die(USAGE)
try:
opts, argv = getopt.getopt(argv[1:], 'hbcdf:ilqr:stuvw:x',
['help', 'bottomUp', 'credentials', 'delete', 'directory',
'fetchDir=', 'fetchIfNewer', 'fetchWithSubDirs', 'info',
'list', 'quiet', 'regex=', 'size', 'topOnly',
'url', 'verbose', 'wildcard=', 'xml'])
except getopt.GetoptError, (msg, bad_opt):
die("%s error: Bad option: %s, %s" % (argv[0], bad_opt, msg))
regSpecs = []; wildCards = []; matchUrl=False; walkDirectories = True
needCredentials = False; userCredentials = None
urlMode=False; xmlMode=False; quietMode=False; verboseMode=False; getFileInfo=False
fetchDir = None; fetchIfNewer=False; fetchWithSubDirs=False
directoryMode = False; deleteMode = False; topDown = True; listMode = False
for opt, val in opts:
if opt in ('-h', '--help'): die(USAGE)
elif opt in ('-b', '--bottomUp'): topDown = False
elif opt in ('-c', '--credentials'): needCredentials = True
elif opt in ('-d', '--directory'): directoryMode=True
elif opt in ('--delete'): deleteMode=True
elif opt in ('-f', '--fetchDir'): fetchDir = val
# retrieve remote files to this dir
elif opt in ('--fetchIfNewer'): fetchIfNewer=True
# only fetch if src file is newer than existing dest file
elif opt in ('--fetchWithSubDirs'): fetchWithSubDirs=True
# mirror subdirectories when fetching
elif opt in ('-i', '--info'): getFileInfo=True
elif opt in ('-l', '--list'): listMode=True
elif opt in ('-m', '--matchUrl'): matchUrl=True
# regexs match entire URL/path, not just file name
elif opt in ('-q', '--quiet'): quietMode=True
# don't print files during walk
elif opt in ('-r', '--regex'): regSpecs.append(val)
elif opt in ('-s', '--size'): sizeMode=True
elif opt in ('-t', '--topOnly'): walkDirectories=False
elif opt in ('-u', '--url'): urlMode=True
# return URL's (file:, ftp:, http:, etc.)
elif opt in ('-v', '--verbose'): verboseMode=True
elif opt in ('-w', '--wildcard'): wildCards.append(val)
elif opt in ('-x', '--xml'): xmlMode=True # return list in XML format
else: die(USAGE)
# import pdb; pdb.set_trace()
matchedFiles, fetchedFiles, destinationFiles = \
filelist(argv, regSpecs, wildCards, needCredentials, userCredentials,
matchAnyThenConstrain, None, matchUrl, walkDirectories,
urlMode, xmlMode, quietMode, verboseMode, getFileInfo,
fetchDir, fetchIfNewer, fetchWithSubDirs,
directoryMode, listMode, deleteMode, topDown)
if quietMode:
if listMode == 'match':
print matchedFiles
elif listMode == 'fetch':
print fetchedFiles
elif listMode == 'destination':
print destinationFiles
else:
pass
if __name__ == '__main__': main()