blob: 3c44a8631068a0b09f5ced267b9940acc78a69e5 [file] [log] [blame]
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import sys
import getopt
import re
from datetime import datetime
def main(argv):
inputfile = ''
outputfile = ''
parameters = []
try:
opts, args = getopt.getopt(argv, "hi:o:p:",["input=", "output=", "parameters="])
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-i", "--input"):
inputfile = arg
elif opt in ("-o", "--output"):
outputfile = arg
elif opt in ("-p", "--parameters"):
parameters = arg.split(',')
if inputfile == '' or outputfile == '' or parameters == []:
error('Arguments not set correctly.')
usage()
sys.exit(1)
print 'Input file is: ', inputfile
print 'Output file is: ', outputfile
print 'Paremeters are: ', parameters
parse_instrumentation_log(inputfile, outputfile, parameters)
def usage():
print 'USAGE: python instrumentation-log-parser.py -i <inputfile> -o <outputfile> -p <path/to/json/value1,path/to/json/value2>'
def info(message):
log('INFO', message)
def debug(message):
log('DEBUG', message)
def error(message):
log('ERROR', message)
def warn(message):
log('WARN', message)
def log(level, message):
print '[%s] [%s] %s' % (datetime.now(), level, message)
def parse_instrumentation_log(inputfile, outputfile, parameters):
info('Parsing instrumentation log. [inputfile=%s;outputfile=%s;parameters=%s]' % (inputfile, outputfile, parameters))
with open(inputfile) as i:
content = i.readlines()
debug('Input file has %d lines' % (len(content)))
header = re.compile('^([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}).*$')
json_begin = re.compile('^\{$')
json_end = re.compile('^\}$')
json_content = ''
timestamp = ''
output_line_count = 0
with open(outputfile, 'w+') as o:
o.write('Timestamp;%s\n' % (';'.join(parameters)))
input_line_count = 0
for line in content:
if header.match(line) != None:
timestamp = header.match(line).group(1)
elif json_begin.match(line):
json_content = json_content + line
elif json_end.match(line):
json_content = json_content + line
try:
json_object = json.loads(json_content)
output_line = timestamp
for parameter in parameters:
json_fragment = json_object
for parameter_key in parameter.split('/'):
json_fragment = json_fragment[parameter_key]
output_line = output_line + ';' + str(json_fragment)
output_line_count = output_line_count + 1
o.write(output_line + '\n')
except ValueError:
warn('Unparseable JSON input at line %d' % input_line_count)
json_content = ''
else:
json_content = json_content + line
input_line_count = input_line_count + 1
if input_line_count % 1000000 == 0:
debug('%d input lines processed, %d output lines written' % (input_line_count, output_line_count))
info('Parsing instrumentation log finished. In total %d input lines processed, %d output lines written'
% (input_line_count, output_line_count))
if __name__ == "__main__":
main(sys.argv[1:])