blob: 50db87ca45c746ccefed807e20282aad7a5b5a01 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.similarity.apps;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import opennlp.tools.jsmlearning.ProfileReaderWriter;
import opennlp.tools.parse_thicket.Triple;
import net.billylieurance.azuresearch.AzureSearchImageQuery;
import net.billylieurance.azuresearch.AzureSearchImageResult;
import net.billylieurance.azuresearch.AzureSearchResultSet;
import net.billylieurance.azuresearch.AzureSearchWebQuery;
import net.billylieurance.azuresearch.AzureSearchWebResult;
public class YahooAnswersMiner extends BingQueryRunner{
private static final Logger LOG = Logger
.getLogger("opennlp.tools.similarity.apps.YahooAnswersMiner");
private int page = 0;
private static final int hitsPerPage = 50;
public List<HitBase> runSearch(String query) {
aq.setAppid(BING_KEY);
aq.setQuery("site:answers.yahoo.com "+
query);
aq.setPerPage(hitsPerPage);
aq.setPage(page);
aq.doQuery();
List<HitBase> results = new ArrayList<HitBase> ();
AzureSearchResultSet<AzureSearchWebResult> ars = aq.getQueryResult();
for (AzureSearchWebResult anr : ars){
HitBase h = new HitBase();
h.setAbstractText(anr.getDescription());
h.setTitle(anr.getTitle());
h.setUrl(anr.getUrl());
results.add(h);
}
page++;
return results;
}
public List<HitBase> runSearch(String query, int totalPages) {
int count=0;
List<HitBase> results = new ArrayList<HitBase>();
while(totalPages>page*hitsPerPage){
List<HitBase> res = runSearch(query);
results.addAll(res);
if (count>10)
break;
count++;
}
return results;
}
public static void main(String[] args) {
YahooAnswersMiner self = new YahooAnswersMiner();
RelatedSentenceFinder extractor = new RelatedSentenceFinder();
String topic = "obamacare";
List<HitBase> resp = self
.runSearch(topic, 150);
System.out.print(resp.get(0));
List<String[]> data = new ArrayList<String[]>();
for(HitBase item: resp){
Triple<List<String>, String, String[]> fragmentExtractionResults =
extractor.formCandidateFragmentsForPage(item, topic, null);
List<String> allFragms = (List<String>)fragmentExtractionResults.getFirst();
String downloadedPage = (String)fragmentExtractionResults.getSecond();
String[] sents = (String[])fragmentExtractionResults.getThird();
for (String fragment : allFragms) {
String[] candidateSentences = extractor.formCandidateSentences(fragment, fragmentExtractionResults);
System.out.println(candidateSentences);
data.add(candidateSentences);
}
}
ProfileReaderWriter.writeReport(data, "multi_sentence_queries.csv");
}
}