| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.stanbol.enhancer.engines.sentiment.util; |
| |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.concurrent.locks.ReadWriteLock; |
| import java.util.concurrent.locks.ReentrantReadWriteLock; |
| |
| import org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier; |
| import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory; |
| |
| /** |
| * <code>{Word,Category} -> {Sentiment}</code> Dictionary intended to be |
| * used by {@link SentimentClassifier} implementation to hold the dictionary.<p> |
| * This implementation is thread save. |
| * |
| * @author Rupert Westenthaler |
| * |
| */ |
| public class WordSentimentDictionary { |
| |
| |
| private final ReadWriteLock lock; |
| private final Map<String,Map<LexicalCategory,Double>> wordMap; |
| private final Locale locale; |
| private int sentCount; //the number of wordSentiments |
| |
| /** |
| * Create a word sentiment directory for the given locale. |
| * @param locale the locale used to convert words to lower case. If |
| * <code>null</code> {@link Locale#ROOT} will be used. |
| */ |
| public WordSentimentDictionary(Locale locale){ |
| this.wordMap = new HashMap<String,Map<LexicalCategory,Double>>(); |
| this.lock = new ReentrantReadWriteLock(); |
| this.locale = locale == null ? Locale.ROOT : locale; |
| } |
| |
| /** |
| * Puts (adds/updates) a word (with unknown {@link LexicalCategory}) |
| * to the dictionary |
| * @param word the word. |
| * @param sentiment the sentiment value |
| * @return the old sentiment value or <code>null</code> if none. |
| */ |
| public Double updateSentiment(String word, Double sentiment){ |
| return updateSentiment(null, word, sentiment); |
| } |
| /** |
| * Puts (adds/updates) a word with {@link LexicalCategory} to the dictionary. |
| * @param cat the {@link LexicalCategory} of the word or <code>null</code> if not known |
| * @param word the word |
| * @param sentiment the sentiment value or <code>null</code> to remove this |
| * mapping. |
| * @return the old sentiment value or <code>null</code> if none. |
| */ |
| public Double updateSentiment(LexicalCategory cat, String word, Double sentiment){ |
| word = word.toLowerCase(locale); |
| Double old = null; |
| lock.writeLock().lock(); |
| try { |
| Map<LexicalCategory,Double> entry = wordMap.get(word); |
| //most elements (99%) will only have a single value. |
| //so we use a singleton map as default and create a HashMap for those |
| //that do have more elements (to save memory) |
| boolean replace = false; |
| if(entry == null && sentiment != null){ |
| entry = Collections.singletonMap(cat, sentiment); |
| replace = true; |
| } else if(entry != null){ |
| if(entry.size() == 1){ //special case |
| if(sentiment == null) { |
| old = entry.get(cat); |
| if(old != null){ //remove |
| entry = null; |
| replace = true; |
| } //not found -> do nothing |
| } else { //about to add 2nd element |
| //create a normal HashMap and add the existing value; |
| entry = new HashMap<LexicalCategory,Double>(entry); |
| replace = true; |
| } |
| } |
| if(sentiment == null){ |
| if(entry != null && entry.size() > 1){ |
| old = entry.remove(cat); |
| if(old != null && entry.size() == 1){ //only one entry left |
| //switch back to a singletonMap |
| Entry<LexicalCategory,Double> lastEntry = entry.entrySet().iterator().next(); |
| entry = Collections.singletonMap(lastEntry.getKey(), lastEntry.getValue()); |
| replace = true; |
| } |
| } //else already processed by special case size == 1 |
| } else { |
| old = entry.put(cat, Double.valueOf(sentiment)); |
| } |
| } //else entry == null and sentiment == null ... nothing to do |
| if(replace){ //we have changed the entry instance and need to put the word |
| if(entry == null){ |
| wordMap.remove(word); |
| } else { |
| wordMap.put(word, entry); |
| } |
| } |
| } finally { |
| lock.writeLock().unlock(); |
| } |
| if(old == null && sentiment != null){ |
| sentCount++; //we added a new sentiment |
| } else if(old != null && sentiment == null){ |
| sentCount--; |
| } //else no change |
| return old; |
| } |
| |
| /** |
| * Getter for the sentiment value for the word. If multiple sentiments |
| * for different {@link LexicalCategory lexical categories} are registered |
| * for the word this will return the average of those. |
| * @param word the word |
| * @return the sentiment or <code>null</code> if not in the dictionary. |
| */ |
| public Double getSentiment(String word){ |
| return getSentiment(null, word); |
| } |
| /** |
| * Getter for the sentiment for the parsed word and {@link LexicalCategory}. |
| * In case the category is <code>null</code> this method might parse an |
| * average over different sentiments registered for different lexical |
| * categories. |
| * @param cat the category |
| * @param word the word |
| * @return the sentiment or <code>null</code> if the not in the dictionary. |
| */ |
| public Double getSentiment(LexicalCategory cat, String word){ |
| lock.readLock().lock(); |
| try { |
| Map<LexicalCategory,Double> sentiments = wordMap.get(word.toLowerCase(locale)); |
| if(sentiments != null){ |
| Double sentiment = sentiments.get(cat); |
| if(sentiment == null && cat == null && !sentiments.isEmpty()){ |
| if(sentiments.size() == 1) { |
| sentiment = sentiments.values().iterator().next(); |
| } else { |
| double avgSent = 0; |
| for(Double sent : sentiments.values()){ |
| avgSent = avgSent + sent; |
| } |
| sentiment = Double.valueOf(avgSent/(double)sentiments.size()); |
| } |
| } |
| return sentiment; |
| } else { |
| return null; |
| } |
| } finally { |
| lock.readLock().unlock(); |
| } |
| } |
| |
| /** |
| * The number of words in the dictionary. NOTE that a single word |
| * might have multiple sentiments for different {@link LexicalCategory}. |
| * So this value might be lower to {@link #size()} |
| **/ |
| public int getWordCount() { |
| lock.readLock().lock(); |
| try { |
| return wordMap.size(); |
| } finally { |
| lock.readLock().unlock(); |
| } |
| } |
| /** |
| * The number of word sentiments in the dictionary |
| * @return |
| */ |
| public int size(){ |
| return sentCount; |
| } |
| |
| /** |
| * removes all entries of this dictionary. |
| */ |
| public void clear() { |
| lock.writeLock().lock(); |
| try { |
| wordMap.clear(); |
| } finally { |
| lock.writeLock().unlock(); |
| } |
| } |
| |
| |
| } |