blob: 197154e8836b1b67ffc10725e0333f424b91e3c8 [file] [log] [blame]
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright 2010 Oracle and/or its affiliates. All rights reserved.
*
* Oracle and Java are registered trademarks of Oracle and/or its affiliates.
* Other names may be trademarks of their respective owners.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common
* Development and Distribution License("CDDL") (collectively, the
* "License"). You may not use this file except in compliance with the
* License. You can obtain a copy of the License at
* http://www.netbeans.org/cddl-gplv2.html
* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
* specific language governing permissions and limitations under the
* License. When distributing the software, include this License Header
* Notice in each file and include the License file at
* nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the GPL Version 2 section of the License file that
* accompanied this code. If applicable, add the following below the
* License Header, with the fields enclosed by brackets [] replaced by
* your own identifying information:
* "Portions Copyrighted [year] [name of copyright owner]"
*
* If you wish your version of this file to be governed by only the CDDL
* or only the GPL Version 2, indicate your decision by adding
* "[Contributor] elects to include this software in this distribution
* under the [CDDL or GPL Version 2] license." If you do not indicate a
* single choice of license, a recipient has the option to distribute
* your version of this file under either the CDDL, the GPL Version 2 or
* to extend the choice of license to its licensees as provided above.
* However, if you add GPL Version 2 code and therefore, elected the GPL
* Version 2 license, then the option applies only if the new code is
* made subject to such option by the copyright holder.
*
* Contributor(s):
*
* Portions Copyrighted 2010 Sun Microsystems, Inc.
*/
package org.netbeans.modules.jackpot30.impl.duplicates.indexing;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.codeviation.pojson.Pojson;
import org.netbeans.modules.jackpot30.common.api.LuceneHelpers.BitSetCollector;
import org.netbeans.modules.jackpot30.impl.duplicates.ComputeDuplicates.DuplicateDescription;
import org.netbeans.modules.jackpot30.impl.duplicates.ComputeDuplicates.Span;
import org.netbeans.modules.jackpot30.remoting.api.LocalCache;
import org.netbeans.modules.jackpot30.remoting.api.LocalCache.Task;
import org.netbeans.modules.jackpot30.remoting.api.RemoteIndex;
import org.netbeans.modules.jackpot30.remoting.api.WebUtilities;
import org.openide.filesystems.FileObject;
import org.openide.filesystems.URLMapper;
import org.openide.util.Exceptions;
/**
*
* @author lahvac
*/
@SuppressWarnings("ClassWithMultipleLoggers")
public class RemoteDuplicatesIndex {
private static final Logger TIMER = Logger.getLogger("TIMER");
public static List<DuplicateDescription> findDuplicates(Map<String, long[]> hashes, FileObject currentFile, AtomicBoolean cancel) throws IOException, URISyntaxException {
return translate(hashes, findHashOccurrences(hashes.keySet(), currentFile, cancel), currentFile);
}
private static Map<String, Map<RemoteIndex, Collection<String>>> findHashOccurrences(Collection<? extends String> hashes, FileObject currentFile, AtomicBoolean cancel) throws IOException, URISyntaxException {
Map<URI, Collection<RemoteIndex>> indices = new LinkedHashMap<URI, Collection<RemoteIndex>>();
for (RemoteIndex ri : RemoteIndex.loadIndices()) {
try {
URI uri = ri.remote.toURI();
Collection<RemoteIndex> list = indices.get(uri);
if (list == null) {
indices.put(uri, list = new ArrayList<RemoteIndex>());
}
list.add(ri);
} catch (URISyntaxException ex) {
Exceptions.printStackTrace(ex);
}
}
Map<String, Map<RemoteIndex, Collection<String>>> result = new LinkedHashMap<String, Map<RemoteIndex, Collection<String>>>();
long localTime = 0;
long remoteTime = 0;
for (RemoteIndex ri : RemoteIndex.loadIndices()) {
if (cancel.get()) return Collections.emptyMap();
Set<String> toProcess = new LinkedHashSet<String>(hashes);
Map<String, Map<String, Collection<? extends String>>> indexResult = new LinkedHashMap<String, Map<String, Collection<? extends String>>>();
long locS = System.currentTimeMillis();
indexResult.putAll(findHashOccurrencesInLocalCache(ri, toProcess, cancel));
localTime += System.currentTimeMillis() - locS;
toProcess.removeAll(indexResult.keySet());
if (!toProcess.isEmpty()) {
long remS = System.currentTimeMillis();
Map<String, Map<String, Collection<? extends String>>> remoteResults = findHashOccurrencesRemote(ri.remote.toURI(), toProcess, cancel);
remoteTime += System.currentTimeMillis() - remS;
Map<String, Map<String, Collection<? extends String>>> toSave = new LinkedHashMap<String, Map<String, Collection<? extends String>>>(remoteResults);
for (String hash : toProcess) {
if (!toSave.containsKey(hash)) {
toSave.put(hash, Collections.<String, Collection<? extends String>>emptyMap());
}
}
if (cancel.get()) return Collections.emptyMap();
saveToLocalCache(ri, toSave);
indexResult.putAll(remoteResults);
}
for (Entry<String, Map<String, Collection<? extends String>>> e : indexResult.entrySet()) {
Map<RemoteIndex, Collection<String>> hashResult = result.get(e.getKey());
if (hashResult == null) {
result.put(e.getKey(), hashResult = new LinkedHashMap<RemoteIndex, Collection<String>>());
}
for (Entry<String, Collection<? extends String>> insideHash : e.getValue().entrySet()) {
if (cancel.get()) return Collections.emptyMap();
Collection<String> dupes = hashResult.get(ri);
if (dupes == null) {
hashResult.put(ri, dupes = new LinkedHashSet<String>());
}
dupes.addAll(insideHash.getValue());
}
}
}
TIMER.log(Level.FINE, "local hash duplicates", new Object[] {currentFile, localTime});
TIMER.log(Level.FINE, "remote hash duplicates", new Object[] {currentFile, remoteTime});
return result;
}
private static Map<String, Map<String, Collection<? extends String>>> findHashOccurrencesRemote(URI remoteIndex, Iterable<? extends String> hashes, AtomicBoolean cancel) {
try {
String indexURL = remoteIndex.toASCIIString();
URI u = new URI(indexURL + "/duplicates/findDuplicates?hashes=" + WebUtilities.escapeForQuery(Pojson.save(hashes)));
String hashesMap = WebUtilities.requestStringResponse(u, cancel);
if (hashesMap == null || cancel.get()) {
//some kind of error while getting the duplicates (cannot access remote server)?
//ignore:
return Collections.emptyMap();
}
return Pojson.load(LinkedHashMap.class, hashesMap);
} catch (URISyntaxException ex) {
//XXX: better handling?
Exceptions.printStackTrace(ex);
return Collections.emptyMap();
}
}
private static Map<String, Map<String, Collection<? extends String>>> findHashOccurrencesInLocalCache(RemoteIndex ri, final Iterable<? extends String> hashes, AtomicBoolean cancel) throws IOException, URISyntaxException {
return LocalCache.runOverLocalCache(ri, new Task<IndexReader, Map<String, Map<String, Collection<? extends String>>>>() {
@Override public Map<String, Map<String, Collection<? extends String>>> run(IndexReader reader, AtomicBoolean cancel) throws IOException {
Map<String, Map<String, Collection<String>>> result = new LinkedHashMap<String, Map<String, Collection<String>>>();
for (Entry<String, Collection<? extends String>> e : containsHash(reader, hashes, cancel).entrySet()) {
if (cancel.get()) return Collections.emptyMap();
Map<String, Collection<String>> forHash = result.get(e.getKey());
if (forHash == null) {
result.put(e.getKey(), forHash = new LinkedHashMap<String, Collection<String>>());
}
for (String path : e.getValue()) {
String segment = path.substring(0, path.indexOf('/'));
path = path.substring(path.indexOf('/') + 1);
Collection<String> list = forHash.get(segment);
if (list == null) {
forHash.put(segment, list = new LinkedList<String>());
}
list.add(path);
}
}
return (Map)result; //XXX
}
}, Collections.<String, Map<String, Collection<? extends String>>>emptyMap(), cancel);
}
private static synchronized void saveToLocalCache(RemoteIndex ri, final Map<String, Map<String, Collection<? extends String>>> what) throws IOException, URISyntaxException {
LocalCache.saveToLocalCache(ri, new Task<IndexWriter, Void>() {
@Override public Void run(IndexWriter w, AtomicBoolean cancel) throws IOException {
for (Entry<String, Map<String, Collection<? extends String>>> e : what.entrySet()) {
Document doc = new Document();
doc.add(new Field("hash", e.getKey(), Store.YES, Index.NOT_ANALYZED));
for (Entry<String, Collection<? extends String>> pe : e.getValue().entrySet()) {
for (String path : pe.getValue()) {
doc.add(new Field("path", pe.getKey() + "/" + path, Store.YES, Index.NO));
}
}
w.addDocument(doc);
}
return null;
}
});
}
private static List<DuplicateDescription> translate(Map<String, long[]> hashes, Map<String, Map<RemoteIndex, Collection<String>>> occ, FileObject currentFile) {
Map<String, Map<RemoteIndex, Collection<String>>> sorted = hashMap();
Map<long[], DuplicateDescription> result = new LinkedHashMap<long[], DuplicateDescription>();
List<long[]> seen = new LinkedList<long[]>();
sorted.putAll(occ);
OUTER: for (Entry<String, Map<RemoteIndex, Collection<String>>> e : occ.entrySet()) {
long[] currentSpan = hashes.get(e.getKey());
for (Iterator<Entry<long[], DuplicateDescription>> it = result.entrySet().iterator(); it.hasNext();) {
Entry<long[], DuplicateDescription> span = it.next();
if (span.getKey()[0] <= currentSpan[0] && span.getKey()[1] >= currentSpan[1]) {
continue OUTER;
}
if (currentSpan[0] <= span.getKey()[0] && currentSpan[1] >= span.getKey()[1]) {
it.remove();
}
}
if (currentSpan[0] == (-1) || currentSpan[1] == (-1)) continue;
seen.add(currentSpan);
String longest = e.getKey();
List<Span> foundDuplicates = new LinkedList<Span>();
for (Entry<RemoteIndex, Collection<String>> root2Occurrences : e.getValue().entrySet()) {
FileObject localRoot = URLMapper.findFileObject(root2Occurrences.getKey().getLocalFolder());
for (String cand : root2Occurrences.getValue()) {
FileObject o = localRoot.getFileObject(cand);
if (o == null) continue; //XXX log!
if (areEquivalent(currentFile, o)) continue;
foundDuplicates.add(new Span(o, -1, -1));
}
}
if (foundDuplicates.isEmpty()) continue;
DuplicateDescription current = DuplicateDescription.of(foundDuplicates, getValue(longest), longest);
result.put(currentSpan, current);
}
return new LinkedList<DuplicateDescription>(result.values());
}
private static boolean areEquivalent(FileObject f1, FileObject f2) {
return f1.equals(f2);
}
private static long getValue(String encoded) {
return Long.parseLong(encoded.substring(encoded.lastIndexOf(":") + 1));
}
private static <T> TreeMap<String, T> hashMap() {
return new TreeMap<String, T>(new Comparator<String>() {
public int compare(String arg0, String arg1) {
return (int) Math.signum(getValue(arg1) - getValue(arg0));
}
});
}
private static Map<String, Collection<? extends String>> containsHash(IndexReader reader, Iterable<? extends String> hashes, AtomicBoolean cancel) throws IOException {
Map<String, Collection<? extends String>> result = new LinkedHashMap<String, Collection<? extends String>>();
for (String hash : hashes) {
if (cancel.get()) return Collections.emptyMap();
Collection<String> found = new LinkedList<String>();
Query query = new TermQuery(new Term("hash", hash));
Searcher s = new IndexSearcher(reader);
BitSet matchingDocuments = new BitSet(reader.maxDoc());
Collector c = new BitSetCollector(matchingDocuments);
s.search(query, c);
boolean wasFound = false;
for (int docNum = matchingDocuments.nextSetBit(0); docNum >= 0; docNum = matchingDocuments.nextSetBit(docNum + 1)) {
if (cancel.get()) return Collections.emptyMap();
final Document doc = reader.document(docNum);
found.addAll(Arrays.asList(doc.getValues("path")));
wasFound = true;
}
if (wasFound) {
result.put(hash, found);
}
}
return result;
}
}