| /* |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
| * |
| * Copyright 2010 Oracle and/or its affiliates. All rights reserved. |
| * |
| * Oracle and Java are registered trademarks of Oracle and/or its affiliates. |
| * Other names may be trademarks of their respective owners. |
| * |
| * The contents of this file are subject to the terms of either the GNU |
| * General Public License Version 2 only ("GPL") or the Common |
| * Development and Distribution License("CDDL") (collectively, the |
| * "License"). You may not use this file except in compliance with the |
| * License. You can obtain a copy of the License at |
| * http://www.netbeans.org/cddl-gplv2.html |
| * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the |
| * specific language governing permissions and limitations under the |
| * License. When distributing the software, include this License Header |
| * Notice in each file and include the License file at |
| * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the GPL Version 2 section of the License file that |
| * accompanied this code. If applicable, add the following below the |
| * License Header, with the fields enclosed by brackets [] replaced by |
| * your own identifying information: |
| * "Portions Copyrighted [year] [name of copyright owner]" |
| * |
| * If you wish your version of this file to be governed by only the CDDL |
| * or only the GPL Version 2, indicate your decision by adding |
| * "[Contributor] elects to include this software in this distribution |
| * under the [CDDL or GPL Version 2] license." If you do not indicate a |
| * single choice of license, a recipient has the option to distribute |
| * your version of this file under either the CDDL, the GPL Version 2 or |
| * to extend the choice of license to its licensees as provided above. |
| * However, if you add GPL Version 2 code and therefore, elected the GPL |
| * Version 2 license, then the option applies only if the new code is |
| * made subject to such option by the copyright holder. |
| * |
| * Contributor(s): |
| * |
| * Portions Copyrighted 2010 Sun Microsystems, Inc. |
| */ |
| |
| package org.netbeans.modules.jackpot30.impl.duplicates.indexing; |
| |
| import java.io.IOException; |
| import java.net.URI; |
| import java.net.URISyntaxException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.BitSet; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.LinkedHashSet; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.concurrent.atomic.AtomicBoolean; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.Field.Index; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.Collector; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.Searcher; |
| import org.apache.lucene.search.TermQuery; |
| import org.codeviation.pojson.Pojson; |
| import org.netbeans.modules.jackpot30.common.api.LuceneHelpers.BitSetCollector; |
| import org.netbeans.modules.jackpot30.impl.duplicates.ComputeDuplicates.DuplicateDescription; |
| import org.netbeans.modules.jackpot30.impl.duplicates.ComputeDuplicates.Span; |
| import org.netbeans.modules.jackpot30.remoting.api.LocalCache; |
| import org.netbeans.modules.jackpot30.remoting.api.LocalCache.Task; |
| import org.netbeans.modules.jackpot30.remoting.api.RemoteIndex; |
| import org.netbeans.modules.jackpot30.remoting.api.WebUtilities; |
| import org.openide.filesystems.FileObject; |
| import org.openide.filesystems.URLMapper; |
| import org.openide.util.Exceptions; |
| |
| /** |
| * |
| * @author lahvac |
| */ |
| @SuppressWarnings("ClassWithMultipleLoggers") |
| public class RemoteDuplicatesIndex { |
| |
| private static final Logger TIMER = Logger.getLogger("TIMER"); |
| |
| public static List<DuplicateDescription> findDuplicates(Map<String, long[]> hashes, FileObject currentFile, AtomicBoolean cancel) throws IOException, URISyntaxException { |
| return translate(hashes, findHashOccurrences(hashes.keySet(), currentFile, cancel), currentFile); |
| } |
| |
| private static Map<String, Map<RemoteIndex, Collection<String>>> findHashOccurrences(Collection<? extends String> hashes, FileObject currentFile, AtomicBoolean cancel) throws IOException, URISyntaxException { |
| Map<URI, Collection<RemoteIndex>> indices = new LinkedHashMap<URI, Collection<RemoteIndex>>(); |
| |
| for (RemoteIndex ri : RemoteIndex.loadIndices()) { |
| try { |
| URI uri = ri.remote.toURI(); |
| Collection<RemoteIndex> list = indices.get(uri); |
| |
| if (list == null) { |
| indices.put(uri, list = new ArrayList<RemoteIndex>()); |
| } |
| |
| list.add(ri); |
| } catch (URISyntaxException ex) { |
| Exceptions.printStackTrace(ex); |
| } |
| } |
| |
| Map<String, Map<RemoteIndex, Collection<String>>> result = new LinkedHashMap<String, Map<RemoteIndex, Collection<String>>>(); |
| long localTime = 0; |
| long remoteTime = 0; |
| |
| for (RemoteIndex ri : RemoteIndex.loadIndices()) { |
| if (cancel.get()) return Collections.emptyMap(); |
| |
| Set<String> toProcess = new LinkedHashSet<String>(hashes); |
| Map<String, Map<String, Collection<? extends String>>> indexResult = new LinkedHashMap<String, Map<String, Collection<? extends String>>>(); |
| |
| long locS = System.currentTimeMillis(); |
| indexResult.putAll(findHashOccurrencesInLocalCache(ri, toProcess, cancel)); |
| localTime += System.currentTimeMillis() - locS; |
| |
| toProcess.removeAll(indexResult.keySet()); |
| |
| if (!toProcess.isEmpty()) { |
| long remS = System.currentTimeMillis(); |
| Map<String, Map<String, Collection<? extends String>>> remoteResults = findHashOccurrencesRemote(ri.remote.toURI(), toProcess, cancel); |
| remoteTime += System.currentTimeMillis() - remS; |
| |
| Map<String, Map<String, Collection<? extends String>>> toSave = new LinkedHashMap<String, Map<String, Collection<? extends String>>>(remoteResults); |
| |
| for (String hash : toProcess) { |
| if (!toSave.containsKey(hash)) { |
| toSave.put(hash, Collections.<String, Collection<? extends String>>emptyMap()); |
| } |
| } |
| |
| if (cancel.get()) return Collections.emptyMap(); |
| |
| saveToLocalCache(ri, toSave); |
| |
| indexResult.putAll(remoteResults); |
| } |
| |
| for (Entry<String, Map<String, Collection<? extends String>>> e : indexResult.entrySet()) { |
| Map<RemoteIndex, Collection<String>> hashResult = result.get(e.getKey()); |
| |
| if (hashResult == null) { |
| result.put(e.getKey(), hashResult = new LinkedHashMap<RemoteIndex, Collection<String>>()); |
| } |
| |
| for (Entry<String, Collection<? extends String>> insideHash : e.getValue().entrySet()) { |
| if (cancel.get()) return Collections.emptyMap(); |
| |
| Collection<String> dupes = hashResult.get(ri); |
| |
| if (dupes == null) { |
| hashResult.put(ri, dupes = new LinkedHashSet<String>()); |
| } |
| |
| dupes.addAll(insideHash.getValue()); |
| } |
| } |
| } |
| |
| TIMER.log(Level.FINE, "local hash duplicates", new Object[] {currentFile, localTime}); |
| TIMER.log(Level.FINE, "remote hash duplicates", new Object[] {currentFile, remoteTime}); |
| return result; |
| } |
| |
| private static Map<String, Map<String, Collection<? extends String>>> findHashOccurrencesRemote(URI remoteIndex, Iterable<? extends String> hashes, AtomicBoolean cancel) { |
| try { |
| String indexURL = remoteIndex.toASCIIString(); |
| URI u = new URI(indexURL + "/duplicates/findDuplicates?hashes=" + WebUtilities.escapeForQuery(Pojson.save(hashes))); |
| String hashesMap = WebUtilities.requestStringResponse(u, cancel); |
| |
| if (hashesMap == null || cancel.get()) { |
| //some kind of error while getting the duplicates (cannot access remote server)? |
| //ignore: |
| return Collections.emptyMap(); |
| } |
| return Pojson.load(LinkedHashMap.class, hashesMap); |
| } catch (URISyntaxException ex) { |
| //XXX: better handling? |
| Exceptions.printStackTrace(ex); |
| return Collections.emptyMap(); |
| } |
| } |
| |
| private static Map<String, Map<String, Collection<? extends String>>> findHashOccurrencesInLocalCache(RemoteIndex ri, final Iterable<? extends String> hashes, AtomicBoolean cancel) throws IOException, URISyntaxException { |
| return LocalCache.runOverLocalCache(ri, new Task<IndexReader, Map<String, Map<String, Collection<? extends String>>>>() { |
| @Override public Map<String, Map<String, Collection<? extends String>>> run(IndexReader reader, AtomicBoolean cancel) throws IOException { |
| Map<String, Map<String, Collection<String>>> result = new LinkedHashMap<String, Map<String, Collection<String>>>(); |
| |
| for (Entry<String, Collection<? extends String>> e : containsHash(reader, hashes, cancel).entrySet()) { |
| if (cancel.get()) return Collections.emptyMap(); |
| |
| Map<String, Collection<String>> forHash = result.get(e.getKey()); |
| |
| if (forHash == null) { |
| result.put(e.getKey(), forHash = new LinkedHashMap<String, Collection<String>>()); |
| } |
| |
| for (String path : e.getValue()) { |
| String segment = path.substring(0, path.indexOf('/')); |
| |
| path = path.substring(path.indexOf('/') + 1); |
| |
| Collection<String> list = forHash.get(segment); |
| |
| if (list == null) { |
| forHash.put(segment, list = new LinkedList<String>()); |
| } |
| |
| list.add(path); |
| } |
| } |
| |
| return (Map)result; //XXX |
| } |
| }, Collections.<String, Map<String, Collection<? extends String>>>emptyMap(), cancel); |
| } |
| |
| private static synchronized void saveToLocalCache(RemoteIndex ri, final Map<String, Map<String, Collection<? extends String>>> what) throws IOException, URISyntaxException { |
| LocalCache.saveToLocalCache(ri, new Task<IndexWriter, Void>() { |
| @Override public Void run(IndexWriter w, AtomicBoolean cancel) throws IOException { |
| for (Entry<String, Map<String, Collection<? extends String>>> e : what.entrySet()) { |
| Document doc = new Document(); |
| |
| doc.add(new Field("hash", e.getKey(), Store.YES, Index.NOT_ANALYZED)); |
| |
| for (Entry<String, Collection<? extends String>> pe : e.getValue().entrySet()) { |
| for (String path : pe.getValue()) { |
| doc.add(new Field("path", pe.getKey() + "/" + path, Store.YES, Index.NO)); |
| } |
| } |
| |
| w.addDocument(doc); |
| } |
| |
| return null; |
| } |
| }); |
| } |
| |
| private static List<DuplicateDescription> translate(Map<String, long[]> hashes, Map<String, Map<RemoteIndex, Collection<String>>> occ, FileObject currentFile) { |
| Map<String, Map<RemoteIndex, Collection<String>>> sorted = hashMap(); |
| Map<long[], DuplicateDescription> result = new LinkedHashMap<long[], DuplicateDescription>(); |
| List<long[]> seen = new LinkedList<long[]>(); |
| |
| sorted.putAll(occ); |
| |
| OUTER: for (Entry<String, Map<RemoteIndex, Collection<String>>> e : occ.entrySet()) { |
| long[] currentSpan = hashes.get(e.getKey()); |
| |
| for (Iterator<Entry<long[], DuplicateDescription>> it = result.entrySet().iterator(); it.hasNext();) { |
| Entry<long[], DuplicateDescription> span = it.next(); |
| |
| if (span.getKey()[0] <= currentSpan[0] && span.getKey()[1] >= currentSpan[1]) { |
| continue OUTER; |
| } |
| |
| if (currentSpan[0] <= span.getKey()[0] && currentSpan[1] >= span.getKey()[1]) { |
| it.remove(); |
| } |
| } |
| |
| if (currentSpan[0] == (-1) || currentSpan[1] == (-1)) continue; |
| |
| seen.add(currentSpan); |
| |
| String longest = e.getKey(); |
| List<Span> foundDuplicates = new LinkedList<Span>(); |
| |
| for (Entry<RemoteIndex, Collection<String>> root2Occurrences : e.getValue().entrySet()) { |
| FileObject localRoot = URLMapper.findFileObject(root2Occurrences.getKey().getLocalFolder()); |
| |
| for (String cand : root2Occurrences.getValue()) { |
| FileObject o = localRoot.getFileObject(cand); |
| |
| if (o == null) continue; //XXX log! |
| if (areEquivalent(currentFile, o)) continue; |
| |
| foundDuplicates.add(new Span(o, -1, -1)); |
| } |
| } |
| |
| if (foundDuplicates.isEmpty()) continue; |
| |
| DuplicateDescription current = DuplicateDescription.of(foundDuplicates, getValue(longest), longest); |
| |
| result.put(currentSpan, current); |
| } |
| |
| return new LinkedList<DuplicateDescription>(result.values()); |
| } |
| |
| private static boolean areEquivalent(FileObject f1, FileObject f2) { |
| return f1.equals(f2); |
| } |
| |
| private static long getValue(String encoded) { |
| return Long.parseLong(encoded.substring(encoded.lastIndexOf(":") + 1)); |
| } |
| |
| private static <T> TreeMap<String, T> hashMap() { |
| return new TreeMap<String, T>(new Comparator<String>() { |
| public int compare(String arg0, String arg1) { |
| return (int) Math.signum(getValue(arg1) - getValue(arg0)); |
| } |
| }); |
| } |
| |
| private static Map<String, Collection<? extends String>> containsHash(IndexReader reader, Iterable<? extends String> hashes, AtomicBoolean cancel) throws IOException { |
| Map<String, Collection<? extends String>> result = new LinkedHashMap<String, Collection<? extends String>>(); |
| |
| for (String hash : hashes) { |
| if (cancel.get()) return Collections.emptyMap(); |
| |
| Collection<String> found = new LinkedList<String>(); |
| Query query = new TermQuery(new Term("hash", hash)); |
| Searcher s = new IndexSearcher(reader); |
| BitSet matchingDocuments = new BitSet(reader.maxDoc()); |
| Collector c = new BitSetCollector(matchingDocuments); |
| |
| s.search(query, c); |
| |
| boolean wasFound = false; |
| |
| for (int docNum = matchingDocuments.nextSetBit(0); docNum >= 0; docNum = matchingDocuments.nextSetBit(docNum + 1)) { |
| if (cancel.get()) return Collections.emptyMap(); |
| |
| final Document doc = reader.document(docNum); |
| |
| found.addAll(Arrays.asList(doc.getValues("path"))); |
| wasFound = true; |
| } |
| |
| if (wasFound) { |
| result.put(hash, found); |
| } |
| } |
| |
| return result; |
| } |
| |
| } |