bing api
diff --git a/opennlp-similarity/pom.xml b/opennlp-similarity/pom.xml
index b7e7bd0..35b768b 100644
--- a/opennlp-similarity/pom.xml
+++ b/opennlp-similarity/pom.xml
@@ -44,6 +44,14 @@
 	<prerequisites>

 		<maven>3.0</maven>

 	</prerequisites>

+	

+	<repositories>

+		<repository>

+		 <id>net.billylieurance</id>

+        <name>BillyLieuranceNet</name>

+        <url>http://www.billylieurance.net/maven2</url>	

+        </repository>

+	</repositories>

 

 	<dependencies>

 		<dependency>

@@ -58,7 +66,7 @@
 			<version>4.8.1</version>

 			<scope>test</scope>

 		</dependency>

-			<dependency>

+		<dependency>

 			<groupId>commons-lang</groupId>

 			<artifactId>commons-lang</artifactId>

 			<version>2.5</version>

@@ -91,6 +99,46 @@
 			<artifactId>solr-core</artifactId>

 			<version>4.0.0-BETA</version>

 		</dependency>

+		<dependency>

+			 <groupId>commons-codec</groupId>

+			 <artifactId>commons-codec</artifactId>

+			 <version>1.7</version>

+		</dependency>

+		<dependency>

+			 <groupId>commons-logging</groupId>

+			 <artifactId>commons-logging</artifactId>

+			 <version>1.1.1</version>

+		</dependency>

+		<dependency>

+			 <groupId>org.apache.httpcomponents</groupId>

+			 <artifactId>httpclient</artifactId>

+			 <version>4.2.1</version>

+        </dependency>

+        <dependency>

+			 <groupId>org.apache.httpcomponents</groupId>

+			 <artifactId>httpclient-cache</artifactId>

+			 <version>4.2.1</version>

+		</dependency>

+		<dependency>

+			 <groupId>org.apache.httpcomponents</groupId>

+			 <artifactId>httpcore</artifactId>

+			 <version>4.2.1</version>

+		</dependency>

+		<dependency>

+			 <groupId>org.apache.httpcomponents</groupId>

+			 <artifactId>httpmime</artifactId>

+			 <version>4.2.1</version>

+        </dependency>

+		<dependency>

+			 <groupId>org.apache.httpcomponents</groupId>

+			 <artifactId>fluent-hc</artifactId>

+			 <version>4.2.1</version>

+        </dependency>

+		<dependency>

+	        <groupId>net.billylieurance.azuresearch</groupId>

+	        <artifactId>azure-bing-search-java</artifactId>

+        <version>0.11.0</version>

+		</dependency>

             

 	</dependencies>

 	

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java
index 1b65034..a934264 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingWebQueryRunner.java
@@ -28,128 +28,41 @@
 import java.util.List;

 import java.util.logging.Logger;

 

+import net.billylieurance.azuresearch.AzureSearchResultSet;

+import net.billylieurance.azuresearch.AzureSearchWebQuery;

+import net.billylieurance.azuresearch.AzureSearchWebResult;

 import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;

 

 import org.apache.commons.lang.StringUtils;

 import org.json.JSONArray;

 import org.json.JSONObject;

 

+

 public class BingWebQueryRunner {

   private static final Logger LOG = Logger

       .getLogger("opennlp.tools.similarity.apps.BingWebQueryRunner");

-

-  private String constructBingWebUrl(String query, int numbOfHits)

-      throws Exception {

-    String codedQuery = URLEncoder.encode(query, "UTF-8");

-

-    String yahooRequest = "https://api.datamarket.azure.com/Bing/SearchWeb"

-     // "http://api.search.live.net/json.aspx?Appid="

-        + BingQueryRunner.APP_ID + "&Query=" + codedQuery ;

-      //  + "&Sources=Web"

-        // Common request fields (optional)

-       // + "&Version=2.0" + "&Market=en-us&web.count=" + numbOfHits

-         // News-specific request fields (optional)

-      //  + "&News.Offset=0";

-

-    return yahooRequest;

-  }

-

-  public BingResponse populateBingHit(String response) throws Exception {

-    BingResponse resp = new BingResponse();

-    JSONObject rootObject = new JSONObject(response);

-    // each response is object that under the key of "ysearchresponse"

-    JSONObject responseObject = rootObject.getJSONObject("SearchResponse");

-    JSONObject web = responseObject.getJSONObject("Web"); // "News"

-

-    // the search result is in an array under the name of "results"

-    JSONArray resultSet = null;

-    try {

-      resultSet = web.getJSONArray("Results");

-      int count = (int) web.getLong("Total");

-      resp.setTotalHits(new Integer(count));

-    } catch (Exception e) {

-      e.printStackTrace();

-      LOG.severe("\nNo search results " + e);

-

-    }

-    if (resultSet != null) {

-      for (int i = 0; i < resultSet.length(); i++) {

-        try {

-          HitBase hit = new HitBase();

-          JSONObject singleResult = resultSet.getJSONObject(i);

-          hit.setAbstractText(singleResult.getString("Description"));

-          hit.setDate(singleResult.getString("DateTime"));

-          String title = StringUtils.replace(singleResult.getString("Title"),

-              "", " ");

-          hit.setTitle(title);

-          hit.setUrl(singleResult.getString("Url"));

-

-          resp.appendHits(hit);

-        } catch (Exception e) {

-          // incomplete search result: do not through exception

-        }

-      }

-    }

-    return resp;

-  }

-

-  public ArrayList<String> search(String query, String domainWeb, String lang,

-      int numbOfHits) throws Exception {

-    URL url = new URL(constructBingWebUrl(query, numbOfHits));

-    URLConnection connection = url.openConnection();

-

-    String line;

-    ArrayList<String> result = new ArrayList<String>();

-    BufferedReader reader = new BufferedReader(new InputStreamReader(

-        connection.getInputStream()));

-    int count = 0;

-    while ((line = reader.readLine()) != null) {

-      result.add(line);

-      count++;

-    }

-    return result;

-  }

-

-  public List<HitBase> runSearch(String query) {

-    BingResponse resp = null;

-    try {

-      List<String> resultList = search(query, "", "", 8);

-      resp = populateBingHit(resultList.get(0));

-

-    } catch (Exception e) {

-      // e.printStackTrace();

-      LOG.info("No news search results for query " + query);

-      return null;

-    }

-    // cast to super class

-    List<HitBase> hits = new ArrayList<HitBase>();

-    for (HitBase h : resp.getHits())

-      hits.add((HitBase) h);

-

-    hits = removeDuplicates(hits, 0.9);

-

-    return hits;

-  }

-

-  public List<HitBase> runSearch(String query, int num) {

-    BingResponse resp = null;

-    try {

-      List<String> resultList = search(query, "", "", num);

-      resp = populateBingHit(resultList.get(0));

-

-    } catch (Exception e) {

-      // e.printStackTrace();

-      LOG.info("No news search results for query " + query);

-      return null;

-    }

-    // cast to super class

-    List<HitBase> hits = new ArrayList<HitBase>();

-    for (HitBase h : resp.getHits())

-      hits.add((HitBase) h);

-

-    hits = removeDuplicates(hits, 0.9);

-    return hits;

-  }

+    public static final String BING_KEY = "TyfmF/4t1qbnA5X6sBXiTf80l29cSn+7IT0fPw2FNsU=";

+	private AzureSearchWebQuery aq = new AzureSearchWebQuery();

+  

+	public List<HitBase> runSearch(String query, int nRes) {

+	aq.setAppid(BING_KEY);

+	aq.setQuery(query);		                        

+	aq.doQuery();

+	

+	List<HitBase> results = new ArrayList<HitBase> ();

+	AzureSearchResultSet<AzureSearchWebResult> ars = aq.getQueryResult();

+	

+	for (AzureSearchWebResult anr : ars){

+	    HitBase h = new HitBase();

+	    h.setAbstractText(anr.getDescription());

+	    h.setTitle(anr.getTitle());

+	    h.setUrl(anr.getUrl());

+	    results.add(h);

+	    results = removeDuplicates(results, 0.9);

+	}

+	return results;

+}

+   

 

   public static List<HitBase> removeDuplicates(List<HitBase> hits,

       double imageDupeThresh) {

@@ -185,10 +98,10 @@
   }

 

   public int getTotalPagesAtASite(String site) {

-    BingResponse resp = null;

+   

     try {

-      List<String> resultList = search("site:" + site, "", "", 10);

-      resp = populateBingHit(resultList.get(0));

+      List<HitBase> resultList = runSearch("site:" + site, 10);

+     

 

     } catch (Exception e) {

       // e.printStackTrace();

@@ -196,11 +109,8 @@
       return 0;

     }

 

-    return resp.totalHits;

+    return 0;

   }

 

-  public static void main(String[] args) {

-    int res = new BingWebQueryRunner().getTotalPagesAtASite("www.zvents.com");

-    new BingWebQueryRunner().runSearch("site:www.tripadvisor.com", 10);

-  };

+  

 }

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
index 47e0d04..f47bde6 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
@@ -59,27 +59,35 @@
         || sentTry.indexOf("clicking here") > -1

         || sentTry.indexOf("skip to") > -1 || sentTry.indexOf("sidebar") > -1

         || sentTry.indexOf("Tags:") > -1 || sentTry.startsWith("Posted by")

-        || sentTry.indexOf("available online") > 0

-        || sentTry.indexOf("get online") > 0

-        || sentTry.indexOf("buy online") > 0

-        || sentTry.indexOf("not valid") > 0 || sentTry.indexOf("discount") > 0

-        || sentTry.indexOf("official site") > 0

-        || sentTry.indexOf("this video") > 0

-        || sentTry.indexOf("this book") > 0

-        || sentTry.indexOf("this product") > 0

-        || sentTry.indexOf("paperback") > 0 || sentTry.indexOf("hardcover") > 0

-        || sentTry.indexOf("audio cd") > 0

-        || sentTry.indexOf("related searches") > 0

-        || sentTry.indexOf("permission is granted") > 0

-        || sentTry.indexOf("[edit") > 0

-        || sentTry.indexOf("edit categories") > 0

-        || sentTry.indexOf("free license") > 0

-        || sentTry.indexOf("permission is granted") > 0

-        || sentTry.indexOf("under the terms") > 0

-        || sentTry.indexOf("rights reserved") > 0

-        || sentTry.indexOf("wikipedia") > 0 || sentTry.endsWith("the")

-        || sentTry.endsWith("the.") || sentTry.startsWith("below")

-

+        || sentTry.indexOf("available online") > -1

+        || sentTry.indexOf("get online") > -1

+        || sentTry.indexOf("buy online") > -1

+        || sentTry.indexOf("not valid") > -1 || sentTry.indexOf("discount") > -1

+        || sentTry.indexOf("official site") > -1

+        || sentTry.indexOf("this video") > -1

+        || sentTry.indexOf("this book") > -1

+        || sentTry.indexOf("this product") > -1

+        || sentTry.indexOf("paperback") > -1 || sentTry.indexOf("hardcover") > -1

+        || sentTry.indexOf("audio cd") > -1

+        || sentTry.indexOf("related searches") > -1

+        || sentTry.indexOf("permission is granted") > -1

+        || sentTry.indexOf("[edit") > -1

+        || sentTry.indexOf("edit categories") > -1

+        || sentTry.indexOf("free license") > -1

+        || sentTry.indexOf("permission is granted") > -1

+        || sentTry.indexOf("under the terms") > -1

+        || sentTry.indexOf("rights reserved") > -1

+        || sentTry.indexOf("wikipedia") > -1 || sentTry.endsWith("the")

+        || sentTry.endsWith("the.") || sentTry.startsWith("below") 

+        || sentTry.indexOf("recipient of")>-1 || sentTry.indexOf("this message")>-1 

+        ||sentTry.indexOf( "mailing list")>-1 ||sentTry.indexOf( "purchase order")>-1

+        ||sentTry.indexOf( "mon-fri")>-1 ||sentTry.indexOf( "email us")>-1 ||sentTry.indexOf( "privacy pol")>-1 ||sentTry.indexOf( "back to top")>-1 

+        ||sentTry.indexOf( "click here")>-1 ||sentTry.indexOf( "for details")>-1 ||sentTry.indexOf( "assistance?")>-1 ||sentTry.indexOf( "chat live")>-1

+        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1

+        

+        ||sentTry.startsWith( "fax") ||sentTry.startsWith( "write") || sentTry.startsWith( "email")||sentTry.indexOf( "conditions")>-1 ||sentTry.indexOf( "chat live")>-1

+        ||sentTry.indexOf( "free shipping")>-1 ||sentTry.indexOf( "company info")>-1 ||sentTry.indexOf( "satisfaction g")>-1 ||sentTry.indexOf( "contact us")>-1

+     

     )

       return null;

 

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
index f6da4de..40096c3 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
@@ -120,8 +120,8 @@
             + parseCacheFileNameCSV), ',');

         lines = reader.readAll();

       } catch (FileNotFoundException e) {

-        //e.printStackTrace();

-        System.err.println("Cannot find cache file");

+    	  if (javaObjectSerialization)

+    		  System.err.println("Cannot find cache file");

         return null;

       } catch (IOException ioe) {

         ioe.printStackTrace();

diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
index bd03628..a4aa734 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
@@ -58,8 +58,7 @@
   private static final String MODEL_DIR_KEY = "nlp.models.dir";
   // TODO config
   // this is where resources should live
-  private static String MODEL_DIR;
-  public static String MODEL_DIR_REL = "src/test/resources/models";
+  private static String MODEL_DIR=null, MODEL_DIR_REL = "src/test/resources/models";
   protected static ParserChunker2MatcherProcessor instance;
 
   private SentenceDetector sentenceDetector;
@@ -110,8 +109,10 @@
       sentence_parseObject = new HashMap<String, String[][]>();
 
     try {
-      MODEL_DIR = new File(".").getAbsolutePath().replace(".", "")
-          + MODEL_DIR_REL;
+    	if (MODEL_DIR==null) 
+    		MODEL_DIR = new File(".").getAbsolutePath().replace(".", "") + MODEL_DIR_REL;
+    	//get full path from constructor
+    		
       initializeSentenceDetector();
       initializeTokenizer();
       initializePosTagger();
@@ -141,6 +142,14 @@
 
     return instance;
   }
+  
+  public synchronized static ParserChunker2MatcherProcessor getInstance(String fullPathToResources) {
+	    MODEL_DIR = fullPathToResources+"/models";
+	    if (instance == null)
+	      instance = new ParserChunker2MatcherProcessor();
+
+	    return instance;
+	  }
 
   /**
    * General parsing function, which returns lists of parses for a portion of