docker/distill/distill/models/stout.py - incubator-flagon-tap - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 from distill import app, es
 from elasticsearch_dsl import DocType, String, Boolean, Date, Nested, Search
 from elasticsearch_dsl.query import MultiMatch, Match, Q
 from elasticsearch import Elasticsearch, TransportError
 from flask import jsonify
 import pandas as pd

 class StoutDoc (DocType):
     """
     Representation of a Stout documentat.
     """

     sessionID = String (index="not_analyzed")
     task1 = Nested ()
     task2 = Nested ()

     class Meta:
         index = '.stout'
         doc_type = 'testing'

     def save (self, *args, **kwargs):
         """
         Save data from parsing as a Stout document in Distill
         """
         return super (StoutDoc, self).save (*args, **kwargs)

 class Stout (object):
     """
     Main Stout class to support ingest and search operations.
     """

     @staticmethod
     def ingest ():
         """
         Ingest data coming from Stout to Distill
         """

         # Create the mappings in elasticsearch
         StoutDoc.init ()
         status = True
         data = _parse ();
         try:
             for k,v in data.items ():
                 doc = StoutDoc ()
                 if 'sessionID' in v:
                     doc.sessionID = v['sessionID']
                 if 'task1' in v:
                     doc.task1 = v['task1']
                 if 'task2' in v:
                     doc.task2 = v['task2']
                 doc.save ()
         except Error as e:
             status = False
         return jsonify (status=status)

 def _parse ():
     """
     Parse master answer table with mapping into an associative array

     :return: [dict] dictionary of session information
     """
     master = app.config ['MASTER']
     mappings = app.config ['MAPPINGS']

     fileContents=pd.read_csv(master, encoding='utf-8')
     plainTextMappings=pd.read_csv(mappings, encoding='raw_unicode_escape')
     headers=list(fileContents.columns.values)

     #generate the mapping between header and plain text
     translationRow={};
     for fieldIndex in range(1,len(headers)):
         t=plainTextMappings.ix[fieldIndex]
         translationRow[headers[fieldIndex]]=t[9]

     dictBySessionID={}
     translationRow['items.text']='foo'
     index=0
     for row in fileContents.iterrows():
         index=index+1

         taskMetrics={}
         index,data=row
         identifier=row[1][0].split("::")
         sessionID=identifier[0]
         taskID=(identifier[1])
         workingData={}
         #is this session id already in the dictionary?
         if sessionID in dictBySessionID:
             #grab the entry as workingData
             workingData=dictBySessionID[sessionID]

         sysData={}
         task1Data={}
         task2Data={}
         metaData={}
         d={}

         for fieldIndex in range(1,len(headers)):
             if not pd.isnull(row[1][fieldIndex]):  #only interested in non-null fields
                 tempDict={}
                 if headers[fieldIndex] in translationRow:
                     tempDict['field']=translationRow[headers[fieldIndex]]
                     #tempDict['field']=translationRow[9]
                 tempDict['value']=row[1][fieldIndex]
                 d[headers[fieldIndex]]=row[1][fieldIndex]
                 if "SYS" in headers[fieldIndex]:
                     sysData[headers[fieldIndex]]=tempDict
                 elif "OT1" in headers[fieldIndex]:
                     task1Data[headers[fieldIndex]]=tempDict
                 elif "OT2" in headers[fieldIndex]:
                     task2Data[headers[fieldIndex]]=tempDict
                 else:
                     metaData[headers[fieldIndex]]=tempDict

         if d['TSK_TIME_DIFF_']>0:  #block tasks with zero time elapsed
             a=int(d['TSK_TIME_DIFF_OT1_'])
             b=int(d['TSK_TIME_DIFF_OT2_'])
             #figure out which task the values belong to
             if ((a>0) & (b<=0)):
                 task1Data['taskID']=taskID
                 task1Data['meta']=metaData
                 task1Data['system']=sysData
                 workingData['task1']=task1Data
             elif ((a<=0) & (b>0)):
                 task2Data['taskID']=taskID
                 task2Data['meta']=metaData
                 task2Data['system']=sysData
                 workingData['task2']=task2Data
             else:
                 raise ValueError('Encountered an unexpected task time diff state')

         workingData['sessionID'] = sessionID
         dictBySessionID[sessionID]=workingData
     return dictBySessionID
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from distill import app, es
	from elasticsearch_dsl import DocType, String, Boolean, Date, Nested, Search
	from elasticsearch_dsl.query import MultiMatch, Match, Q
	from elasticsearch import Elasticsearch, TransportError
	from flask import jsonify
	import pandas as pd

	class StoutDoc (DocType):
	"""
	Representation of a Stout documentat.
	"""

	sessionID = String (index="not_analyzed")
	task1 = Nested ()
	task2 = Nested ()

	class Meta:
	index = '.stout'
	doc_type = 'testing'

	def save (self, args, *kwargs):
	"""
	Save data from parsing as a Stout document in Distill
	"""
	return super (StoutDoc, self).save (args, *kwargs)

	class Stout (object):
	"""
	Main Stout class to support ingest and search operations.
	"""

	@staticmethod
	def ingest ():
	"""
	Ingest data coming from Stout to Distill
	"""

	# Create the mappings in elasticsearch
	StoutDoc.init ()
	status = True
	data = _parse ();
	try:
	for k,v in data.items ():
	doc = StoutDoc ()
	if 'sessionID' in v:
	doc.sessionID = v['sessionID']
	if 'task1' in v:
	doc.task1 = v['task1']
	if 'task2' in v:
	doc.task2 = v['task2']
	doc.save ()
	except Error as e:
	status = False
	return jsonify (status=status)

	def _parse ():
	"""
	Parse master answer table with mapping into an associative array

	:return: [dict] dictionary of session information
	"""
	master = app.config ['MASTER']
	mappings = app.config ['MAPPINGS']

	fileContents=pd.read_csv(master, encoding='utf-8')
	plainTextMappings=pd.read_csv(mappings, encoding='raw_unicode_escape')
	headers=list(fileContents.columns.values)

	#generate the mapping between header and plain text
	translationRow={};
	for fieldIndex in range(1,len(headers)):
	t=plainTextMappings.ix[fieldIndex]
	translationRow[headers[fieldIndex]]=t[9]

	dictBySessionID={}
	translationRow['items.text']='foo'
	index=0
	for row in fileContents.iterrows():
	index=index+1

	taskMetrics={}
	index,data=row
	identifier=row[1][0].split("::")
	sessionID=identifier[0]
	taskID=(identifier[1])
	workingData={}
	#is this session id already in the dictionary?
	if sessionID in dictBySessionID:
	#grab the entry as workingData
	workingData=dictBySessionID[sessionID]

	sysData={}
	task1Data={}
	task2Data={}
	metaData={}
	d={}

	for fieldIndex in range(1,len(headers)):
	if not pd.isnull(row[1][fieldIndex]): #only interested in non-null fields
	tempDict={}
	if headers[fieldIndex] in translationRow:
	tempDict['field']=translationRow[headers[fieldIndex]]
	#tempDict['field']=translationRow[9]
	tempDict['value']=row[1][fieldIndex]
	d[headers[fieldIndex]]=row[1][fieldIndex]
	if "SYS" in headers[fieldIndex]:
	sysData[headers[fieldIndex]]=tempDict
	elif "OT1" in headers[fieldIndex]:
	task1Data[headers[fieldIndex]]=tempDict
	elif "OT2" in headers[fieldIndex]:
	task2Data[headers[fieldIndex]]=tempDict
	else:
	metaData[headers[fieldIndex]]=tempDict

	if d['TSK_TIME_DIFF_']>0: #block tasks with zero time elapsed
	a=int(d['TSK_TIME_DIFF_OT1_'])
	b=int(d['TSK_TIME_DIFF_OT2_'])
	#figure out which task the values belong to
	if ((a>0) & (b<=0)):
	task1Data['taskID']=taskID
	task1Data['meta']=metaData
	task1Data['system']=sysData
	workingData['task1']=task1Data
	elif ((a<=0) & (b>0)):
	task2Data['taskID']=taskID
	task2Data['meta']=metaData
	task2Data['system']=sysData
	workingData['task2']=task2Data
	else:
	raise ValueError('Encountered an unexpected task time diff state')

	workingData['sessionID'] = sessionID
	dictBySessionID[sessionID]=workingData
	return dictBySessionID