blob: be080f480d7d8692bbb3eeb8231bfc1d78dc25f4 [file] [log] [blame]
import sys
import os
import libprofile
import madpy
from madpy.madpack import configyml
from optparse import OptionParser
basic_numerics = '["MIN", "MAX", "AVG", "MADLIB_SCHEMA.cmsketch_median"]'
all_numerics = '["MIN", "MAX", "AVG", "MADLIB_SCHEMA.cmsketch_median", "MADLIB_SCHEMA.cmsketch_depth_histogram(%%,4)"]'
basic_non_numerics = '["MADLIB_SCHEMA.fmsketch_dcount"]'
all_non_numerics = '["MADLIB_SCHEMA.fmsketch_dcount", "MADLIB_SCHEMA.mfvsketch_quick_histogram(%%,10)"]'
def profile(table, numericaggs, non_numericaggs, configdir, skipcols):
# if config file left unspecified, first check scriptdir.
# if nothing there, take the installed version: madpy/Config.yml
retval = ""
if configdir == None:
scriptdir = madpy.__path__[0] + "/config/scripts"
if os.path.exists(scriptdir+"/Config.yml"):
configdir = [scriptdir]
else:
configdir = madpy.__path__
if numericaggs == None:
numericaggs = basic_numerics
if non_numericaggs == None:
non_numericaggs = basic_non_numerics
conf = configyml.get_config(configdir[0], False)
api = conf['dbapi2']
connect_args = conf['connect_args']
dbapi2 = __import__(api, globals(), locals(), [''])
## @var dbconn live database connection
con_args={};
for arg in connect_args:
if arg.find("=") == -1:
retval += sys.exc_info()[0]
retval += "Missing '=' character in the connect_args parameter: " + arg
raise
# cleanup the string
arg = ((arg.replace( "'", "")).replace( '"', '')).replace( ' ', '')
equal_sign = arg.find('=')
# create a proper dictionary of connection parameters
con_args[ arg[:equal_sign]] = arg[equal_sign+1:]
conn = dbapi2.connect( **con_args)
# Open a cursor to perform database operations
cur = conn.cursor()
(numcols, non_numcols) = libprofile.catalog_columns(conn, table, skipcols)
query = libprofile.gen_profile_query(table,
`eval'(numericaggs),
`eval'(non_numericaggs),
numcols, non_numcols)
retval += query
retval += "\n"
retval += "---"
retval += "\n"
# Fetch numeric columnnames from table
cur.execute(query)
out = cur.fetchone()
for i in range(`len'(out)):
retval += cur.description[i][0]+": "+str(out[i])+"\n"
cur.close()
conn.close()
return retval
def profile_run( table):
if table == "":
parser.error("missing tablename")
sys.exit(2)
return profile(table, all_numerics, all_non_numerics, None, [])
def main(argv):
usage = """usage: %prog [options] tablename
(note: database connection info taken from Config.yml, see -c flag)"""
parser = OptionParser(usage=usage)
parser.add_option("-n", "--numeric", dest="numericaggs", nargs = 1,
help="array of aggs for integer columns",
default=basic_numerics)
parser.add_option("-t", "--nonnumeric", dest="non_numericaggs", nargs = 1,
help="array of aggs for non-numeric columns",
default=basic_non_numerics)
parser.add_option("-k", "--kitchensink", action="store_true", dest="kitchen",
help="compute all available statistics", default=False)
parser.add_option("-s", "--skip", dest="skipcols", nargs = 1,
help="array of columns to skip",
default='[]')
parser.add_option('-c', '--configdir', nargs=1, dest='configdir',
default=None,
help="""directory holding Config.yml
(default SCRIPTDIR if already initialized, else
"""
+ madpy.__path__[0]+")")
(options, args) = parser.parse_args()
try:
table = args[0]
except:
parser.error("missing tablename")
sys.exit(2)
if options.kitchen:
numericaggs = all_numerics
non_numericaggs = all_non_numerics
else:
numericaggs = None
non_numericaggs = None
if not options.configdir:
options.configdir = None
if not options.skipcols:
options.skipcols = []
print profile(table, numericaggs, non_numericaggs, options.configdir, options.skipcols)
if __name__ == "__main__":
main(sys.argv[1:])