src/plugins/utils/tone.py - kibble-scanners - Git at Google

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
  #the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """
 This is an experimental tone analyzer plugin for using Watson/BlueMix for
 analyzing the mood of email on a list. This requires a Watson account
 and a watson section in config.yaml, as such:

 watson:
     username: $user
     password: $pass
     api:      https://$something.watsonplatform.net/tone-analyzer/api

 Currently only pony mail is supported. more to come.
 """

 import time
 import datetime
 import re
 import json
 import hashlib
 import requests
 import json
 import uuid

 def watsonTone(KibbleBit, bodies):
     """ Sentiment analysis using IBM Watson """
     if 'watson' in KibbleBit.config:
         headers = {
             'Content-Type': 'application/json'
         }

         # Crop out quotes
         for body in bodies:
             lines = body.split("\n")
             body = "\n".join([x for x in lines if not x.startswith(">")])

             js = {
                 'text': body
             }
             try:
                 rv = requests.post(
                     "%s/v3/tone?version=2017-09-21&sentences=false" % KibbleBit.config['watson']['api'],
                     headers = headers,
                     data = json.dumps(js),
                     auth = (KibbleBit.config['watson']['username'], KibbleBit.config['watson']['password'])
                 )
                 jsout = rv.json()
             except:
                 jsout = {} # borked Watson?
             mood = {}
             if 'document_tone' in jsout:
                 for tone in jsout['document_tone']['tones']:
                     mood[tone['tone_id']] = tone['score']
             else:
                 KibbleBit.pprint("Failed to analyze email body.")
             yield mood


 def azureTone(KibbleBit, bodies):
     """ Sentiment analysis using Azure Text Analysis API """
     if 'azure' in KibbleBit.config:
         headers = {
             'Content-Type': 'application/json',
             'Ocp-Apim-Subscription-Key': KibbleBit.config['azure']['apikey']
         }


         js = {
             "documents": []
           }

         # For each body...
         a = 0
         moods = []
         for body in bodies:
             # Crop out quotes
             lines = body.split("\n")
             body = "\n".join([x for x in lines if not x.startswith(">")])
             doc = {
                 "language": "en",
                 "id": str(a),
                 "text": body
               }
             js['documents'].append(doc)
             moods.append({}) # placeholder for each doc, to be replaced
             a += 1
         try:
             rv = requests.post(
                 "https://%s.api.cognitive.microsoft.com/text/analytics/v2.0/sentiment" % KibbleBit.config['azure']['location'],
                 headers = headers,
                 data = json.dumps(js)
             )
             jsout = rv.json()
         except:
             jsout = {} # borked sentiment analysis?

         if 'documents' in jsout and len(jsout['documents']) > 0:
             for doc in jsout['documents']:
                 mood = {}
                 # This is more parred than Watson, so we'll split it into three groups: positive, neutral and negative.
                 # Divide into four segments, 0->40%, 25->75% and 60->100%.
                 # 0-40 promotes negative, 60-100 promotes positive, and 25-75% promotes neutral.
                 # As we don't want to over-represent negative/positive where the results are
                 # muddy, the neutral zone is larger than the positive/negative zones by 10%.
                 val = doc['score']
                 mood['negative'] = max(0, ((0.4 - val) * 2.5)) # For 40% and below, use 2½ distance
                 mood['positive'] = max(0, ((val-0.6) * 2.5)) # For 60% and above, use 2½ distance
                 mood['neutral'] = max(0, 1 - (abs(val - 0.5) * 2)) # Between 25% and 75% use double the distance to middle.
                 moods[int(doc['id'])] = mood # Replace moods[X] with the actual mood

         else:
             KibbleBit.pprint("Failed to analyze email body.")
             print(jsout)
             # Depending on price tier, Azure will return a 429 if you go too fast.
             # If we see a statusCode return, let's just stop for now.
             # Later scans can pick up the slack.
             if 'statusCode' in jsout:
                 KibbleBit.pprint("Possible rate limiting in place, stopping for now.")
                 return False
         return moods

 def picoTone(KibbleBit, bodies):
     """ Sentiment analysis using picoAPI Text Analysis """
     if 'picoapi' in KibbleBit.config:
         headers = {
             'Content-Type': 'application/json',
             'PicoAPI-Key': KibbleBit.config['picoapi']['key']
         }


         js = {
             "texts": []
           }

         # For each body...
         a = 0
         moods = []
         for body in bodies:
             # Crop out quotes
             lines = body.split("\n")
             body = "\n".join([x for x in lines if not x.startswith(">")])
             doc = {
                 "id": str(a),
                 "body": body
               }
             js['texts'].append(doc)
             moods.append({}) # placeholder for each doc, to be replaced
             a += 1
         try:
             rv = requests.post(
                 "https://v1.picoapi.com/api/text/sentiment",
                 headers = headers,
                 data = json.dumps(js)
             )
             jsout = rv.json()
         except:
             jsout = {} # borked sentiment analysis?

         if 'results' in jsout and len(jsout['results']) > 0:
             for doc in jsout['results']:
                 mood = {}

                 # Sentiment is the overall score, and we use that for the neutrality of a text
                 val = (1 + doc['sentiment']) / 2

                 mood['negative'] = doc['negativity'] # Use the direct Bayesian score from picoAPI
                 mood['positive'] = doc['positivity'] # Use the direct Bayesian score from picoAPI
                 mood['neutral'] = doc['neutrality'] # Calc neutrality to favor a middle sentiment score, ignore high/low

                 # Additional (optional) emotion weighting
                 if 'emotions' in doc:
                     for k, v in doc['emotions'].items():
                         mood[k] = v / 100 # Value is betwen 0 and 100.

                 moods[int(doc['id'])] = mood # Replace moods[X] with the actual mood

         else:
             KibbleBit.pprint("Failed to analyze email body.")
             print(jsout)
             # 403 returned on invalid key, 429 on rate exceeded.
             # If we see a code return, let's just stop for now.
             # Later scans can pick up the slack.
             if 'code' in jsout:
                 KibbleBit.pprint("Possible rate limiting in place, stopping for now.")
                 return False
         return moods
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	#the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""
	This is an experimental tone analyzer plugin for using Watson/BlueMix for
	analyzing the mood of email on a list. This requires a Watson account
	and a watson section in config.yaml, as such:

	watson:
	username: $user
	password: $pass
	api: https://$something.watsonplatform.net/tone-analyzer/api

	Currently only pony mail is supported. more to come.
	"""

	import time
	import datetime
	import re
	import json
	import hashlib
	import requests
	import json
	import uuid

	def watsonTone(KibbleBit, bodies):
	""" Sentiment analysis using IBM Watson """
	if 'watson' in KibbleBit.config:
	headers = {
	'Content-Type': 'application/json'
	}

	# Crop out quotes
	for body in bodies:
	lines = body.split("\n")
	body = "\n".join([x for x in lines if not x.startswith(">")])

	js = {
	'text': body
	}
	try:
	rv = requests.post(
	"%s/v3/tone?version=2017-09-21&sentences=false" % KibbleBit.config['watson']['api'],
	headers = headers,
	data = json.dumps(js),
	auth = (KibbleBit.config['watson']['username'], KibbleBit.config['watson']['password'])
	)
	jsout = rv.json()
	except:
	jsout = {} # borked Watson?
	mood = {}
	if 'document_tone' in jsout:
	for tone in jsout['document_tone']['tones']:
	mood[tone['tone_id']] = tone['score']
	else:
	KibbleBit.pprint("Failed to analyze email body.")
	yield mood


	def azureTone(KibbleBit, bodies):
	""" Sentiment analysis using Azure Text Analysis API """
	if 'azure' in KibbleBit.config:
	headers = {
	'Content-Type': 'application/json',
	'Ocp-Apim-Subscription-Key': KibbleBit.config['azure']['apikey']
	}


	js = {
	"documents": []
	}

	# For each body...
	a = 0
	moods = []
	for body in bodies:
	# Crop out quotes
	lines = body.split("\n")
	body = "\n".join([x for x in lines if not x.startswith(">")])
	doc = {
	"language": "en",
	"id": str(a),
	"text": body
	}
	js['documents'].append(doc)
	moods.append({}) # placeholder for each doc, to be replaced
	a += 1
	try:
	rv = requests.post(
	"https://%s.api.cognitive.microsoft.com/text/analytics/v2.0/sentiment" % KibbleBit.config['azure']['location'],
	headers = headers,
	data = json.dumps(js)
	)
	jsout = rv.json()
	except:
	jsout = {} # borked sentiment analysis?

	if 'documents' in jsout and len(jsout['documents']) > 0:
	for doc in jsout['documents']:
	mood = {}
	# This is more parred than Watson, so we'll split it into three groups: positive, neutral and negative.
	# Divide into four segments, 0->40%, 25->75% and 60->100%.
	# 0-40 promotes negative, 60-100 promotes positive, and 25-75% promotes neutral.
	# As we don't want to over-represent negative/positive where the results are
	# muddy, the neutral zone is larger than the positive/negative zones by 10%.
	val = doc['score']
	mood['negative'] = max(0, ((0.4 - val) * 2.5)) # For 40% and below, use 2½ distance
	mood['positive'] = max(0, ((val-0.6) * 2.5)) # For 60% and above, use 2½ distance
	mood['neutral'] = max(0, 1 - (abs(val - 0.5) * 2)) # Between 25% and 75% use double the distance to middle.
	moods[int(doc['id'])] = mood # Replace moods[X] with the actual mood

	else:
	KibbleBit.pprint("Failed to analyze email body.")
	print(jsout)
	# Depending on price tier, Azure will return a 429 if you go too fast.
	# If we see a statusCode return, let's just stop for now.
	# Later scans can pick up the slack.
	if 'statusCode' in jsout:
	KibbleBit.pprint("Possible rate limiting in place, stopping for now.")
	return False
	return moods

	def picoTone(KibbleBit, bodies):
	""" Sentiment analysis using picoAPI Text Analysis """
	if 'picoapi' in KibbleBit.config:
	headers = {
	'Content-Type': 'application/json',
	'PicoAPI-Key': KibbleBit.config['picoapi']['key']
	}


	js = {
	"texts": []
	}

	# For each body...
	a = 0
	moods = []
	for body in bodies:
	# Crop out quotes
	lines = body.split("\n")
	body = "\n".join([x for x in lines if not x.startswith(">")])
	doc = {
	"id": str(a),
	"body": body
	}
	js['texts'].append(doc)
	moods.append({}) # placeholder for each doc, to be replaced
	a += 1
	try:
	rv = requests.post(
	"https://v1.picoapi.com/api/text/sentiment",
	headers = headers,
	data = json.dumps(js)
	)
	jsout = rv.json()
	except:
	jsout = {} # borked sentiment analysis?

	if 'results' in jsout and len(jsout['results']) > 0:
	for doc in jsout['results']:
	mood = {}

	# Sentiment is the overall score, and we use that for the neutrality of a text
	val = (1 + doc['sentiment']) / 2

	mood['negative'] = doc['negativity'] # Use the direct Bayesian score from picoAPI
	mood['positive'] = doc['positivity'] # Use the direct Bayesian score from picoAPI
	mood['neutral'] = doc['neutrality'] # Calc neutrality to favor a middle sentiment score, ignore high/low

	# Additional (optional) emotion weighting
	if 'emotions' in doc:
	for k, v in doc['emotions'].items():
	mood[k] = v / 100 # Value is betwen 0 and 100.

	moods[int(doc['id'])] = mood # Replace moods[X] with the actual mood

	else:
	KibbleBit.pprint("Failed to analyze email body.")
	print(jsout)
	# 403 returned on invalid key, 429 on rate exceeded.
	# If we see a code return, let's just stop for now.
	# Later scans can pick up the slack.
	if 'code' in jsout:
	KibbleBit.pprint("Possible rate limiting in place, stopping for now.")
	return False
	return moods