| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.netbeans.modules.maven.indexer; |
| |
| import java.io.ByteArrayInputStream; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.nio.charset.Charset; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Enumeration; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.TreeSet; |
| import java.util.function.Predicate; |
| import java.util.jar.JarEntry; |
| import java.util.jar.JarFile; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| import java.util.zip.CRC32; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field.Index; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.TopScoreDocCollector; |
| import org.apache.maven.index.ArtifactContext; |
| import org.apache.maven.index.ArtifactInfo; |
| import org.apache.maven.index.Field; |
| import org.apache.maven.index.Indexer; |
| import org.apache.maven.index.IndexerField; |
| import org.apache.maven.index.IndexerFieldVersion; |
| import org.apache.maven.index.context.IndexUtils; |
| import org.apache.maven.index.context.IndexingContext; |
| import org.apache.maven.index.creator.AbstractIndexCreator; |
| import org.apache.maven.index.creator.MinimalArtifactInfoIndexCreator; |
| import org.apache.maven.index.expr.StringSearchExpression; |
| import org.codehaus.plexus.util.Base64; |
| import org.netbeans.modules.classfile.ClassFile; |
| import org.netbeans.modules.classfile.ClassName; |
| import org.netbeans.modules.maven.indexer.api.NBVersionInfo; |
| import org.netbeans.modules.maven.indexer.api.RepositoryQueries.ClassUsage; |
| |
| /** |
| * Scans classes in (local) JARs for their Java dependencies. |
| */ |
| class ClassDependencyIndexCreator extends AbstractIndexCreator { |
| |
| private static final Logger LOG = Logger.getLogger(ClassDependencyIndexCreator.class.getName()); |
| |
| private static final String NB_DEPENDENCY_CLASSES = "nbdc"; |
| private static final IndexerField FLD_NB_DEPENDENCY_CLASS = new IndexerField(new Field(null, "urn:NbClassDependenciesIndexCreator", NB_DEPENDENCY_CLASSES, "Java dependencies"), IndexerFieldVersion.V3, NB_DEPENDENCY_CLASSES, "Java dependencies", Store.YES, Index.ANALYZED); |
| |
| ClassDependencyIndexCreator() { |
| super(ClassDependencyIndexCreator.class.getName(), Arrays.asList(MinimalArtifactInfoIndexCreator.ID)); |
| } |
| |
| // XXX should rather be Map<ArtifactInfo,...> so we do not rely on interleaving of populateArtifactInfo vs. updateDocument |
| /** class/in/this/Jar -> [foreign/Class, other/foreign/Nested$Class] */ |
| private Map<String,Set<String>> classDeps; |
| |
| @Override |
| public void populateArtifactInfo(ArtifactContext context) throws IOException { |
| classDeps = null; |
| ArtifactInfo ai = context.getArtifactInfo(); |
| if (ai.getClassifier() != null) { |
| return; |
| } |
| if ("pom".equals(ai.getPackaging()) || ai.getFileExtension().endsWith(".lastUpdated")) { |
| return; |
| } |
| File jar = context.getArtifact(); |
| if (jar == null || !jar.isFile()) { |
| LOG.log(Level.FINER, "no artifact for {0}", ai); // not a big deal, maybe just *.pom (or *.pom + *.nbm) here |
| return; |
| } |
| if (jar.length() == 0) { |
| LOG.log(Level.FINER, "zero length jar for {0}", ai); // Don't try to index zero length files |
| return; |
| } |
| String packaging = ai.getPackaging(); |
| if (packaging == null || (!packaging.equals("jar") && !isArchiveFile(jar))) { |
| LOG.log(Level.FINE, "skipping artifact {0} with unrecognized packaging based on {1}", new Object[] {ai, jar}); |
| return; |
| } |
| LOG.log(Level.FINER, "reading {0}", jar); |
| classDeps = new HashMap<>(); |
| read(jar, (String name, InputStream stream, Set<String> classes) -> { |
| try { |
| addDependenciesToMap(name, stream, classDeps, classes, jar); |
| } catch (IOException ex) { |
| LOG.log(Level.INFO, "Exception indexing " + jar, ex); |
| } |
| }); |
| } |
| |
| // adapted from FileUtil, since we do not want to have to use FileObject's here |
| private static boolean isArchiveFile(File jar) throws IOException { |
| InputStream in = new FileInputStream(jar); |
| try { |
| byte[] buffer = new byte[4]; |
| return in.read(buffer, 0, 4) == 4 && (Arrays.equals(ZIP_HEADER_1, buffer) || Arrays.equals(ZIP_HEADER_2, buffer)); |
| } finally { |
| in.close(); |
| } |
| } |
| private static final byte[] ZIP_HEADER_1 = {80, 75, 3, 4}; |
| private static final byte[] ZIP_HEADER_2 = {80, 75, 5, 6}; |
| |
| @Override public boolean updateArtifactInfo(Document document, ArtifactInfo artifactInfo) { |
| return false; |
| } |
| |
| @Override public void updateDocument(ArtifactInfo ai, Document doc) { |
| if (classDeps == null || classDeps.isEmpty()) { |
| return; |
| } |
| if (ai.getClassNames() == null) { |
| // Might be *.hpi, *.war, etc. - so JarFileContentsIndexCreator ignores it (and our results would anyway be wrong due to WEB-INF/classes/ prefix) |
| LOG.log(Level.FINE, "no class names in index for {0}; therefore cannot store class usages", ai); |
| return; |
| } |
| StringBuilder b = new StringBuilder(); |
| String[] classNamesSplit = ai.getClassNames().split("\n"); |
| for (String referrerTopLevel : classNamesSplit) { |
| Set<String> referees = classDeps.remove(referrerTopLevel.substring(1)); |
| if (referees != null) { |
| for (String referee : referees) { |
| b.append(crc32base64(referee)); |
| b.append(' '); |
| } |
| } |
| b.append(' '); |
| } |
| if (!classDeps.isEmpty()) { |
| // E.g. findbugs-1.2.0.jar has TigerSubstitutes.class, TigerSubstitutesTest$Foo.class, etc., but no TigerSubstitutesTest.class (?) |
| // Or guice-3.0-rc2.jar has e.g. $Transformer.class with no source equivalent. |
| LOG.log(Level.FINE, "found dependencies for {0} from classes {1} not among {2}", new Object[] {ai, classDeps.keySet(), Arrays.asList(classNamesSplit)}); |
| } |
| LOG.log(Level.FINER, "Class dependencies index field: {0}", b); |
| // XXX is it possible to _store_ something more compact (binary) using a custom tokenizer? |
| // seems like DefaultIndexingContext hardcodes NexusAnalyzer |
| doc.add(FLD_NB_DEPENDENCY_CLASS.toField(b.toString())); |
| } |
| |
| static void search(String className, Indexer indexer, Collection<IndexingContext> contexts, List<? super ClassUsage> results) throws IOException { |
| String searchString = crc32base64(className.replace('.', '/')); |
| Query refClassQuery = indexer.constructQuery(ClassDependencyIndexCreator.FLD_NB_DEPENDENCY_CLASS.getOntology(), new StringSearchExpression(searchString)); |
| TopScoreDocCollector collector = TopScoreDocCollector.create(NexusRepositoryIndexerImpl.MAX_RESULT_COUNT, null); |
| for (IndexingContext context : contexts) { |
| IndexSearcher searcher = context.acquireIndexSearcher(); |
| try { |
| searcher.search(refClassQuery, collector); |
| ScoreDoc[] hits = collector.topDocs().scoreDocs; |
| LOG.log(Level.FINER, "for {0} ~ {1} found {2} hits", new Object[] {className, searchString, hits.length}); |
| for (ScoreDoc hit : hits) { |
| int docId = hit.doc; |
| Document d = searcher.doc(docId); |
| String fldValue = d.get(ClassDependencyIndexCreator.NB_DEPENDENCY_CLASSES); |
| LOG.log(Level.FINER, "{0} uses: {1}", new Object[] {className, fldValue}); |
| Set<String> refClasses = parseField(searchString, fldValue, d.get(ArtifactInfo.NAMES)); |
| if (!refClasses.isEmpty()) { |
| ArtifactInfo ai = IndexUtils.constructArtifactInfo(d, context); |
| if (ai != null) { |
| ai.setRepository(context.getRepositoryId()); |
| List<NBVersionInfo> version = NexusRepositoryIndexerImpl.convertToNBVersionInfo(Collections.singleton(ai)); |
| if (!version.isEmpty()) { |
| results.add(new ClassUsage(version.get(0), refClasses)); |
| } |
| } |
| } |
| } |
| } finally { |
| context.releaseIndexSearcher(searcher); |
| } |
| } |
| } |
| private static Set<String> parseField(String refereeCRC, String field, String referrersNL) { |
| Set<String> referrers = new TreeSet<String>(); |
| int p = 0; |
| for (String referrer : referrersNL.split("\n")) { |
| while (true) { |
| if (field.charAt(p) == ' ') { |
| p++; |
| break; |
| } |
| if (field.substring(p, p + 6).equals(refereeCRC)) { |
| referrers.add(referrer.substring(1).replace('/', '.')); |
| } |
| p += 7; |
| } |
| } |
| return referrers; |
| } |
| |
| static final Predicate<String> JDK_CLASS_TEST = new MatchWords(new String[]{ |
| "apple/applescript", "apple/laf", "apple/launcher", "apple/security", |
| "com/apple/concurrent", "com/apple/eawt", "com/apple/eio", "com/apple/laf", "com/oracle/net", |
| "com/oracle/nio", "com/oracle/util", "com/oracle/webservices", "com/oracle/xmlns", |
| "com/sun/accessibility", "com/sun/activation", "com/sun/awt", "com/sun/beans", "com/sun/corba", |
| "com/sun/demo", "com/sun/image", "com/sun/imageio", "com/sun/istack", "com/sun/java", |
| "com/sun/java_cup", "com/sun/jmx", "com/sun/jndi", "com/sun/management", "com/sun/media", |
| "com/sun/naming", "com/sun/net", "com/sun/nio", "com/sun/org", "com/sun/rmi", "com/sun/rowset", |
| "com/sun/security", "com/sun/swing", "com/sun/tracing", "com/sun/xml", "java/applet", "java/awt", |
| "java/awt/color", "java/awt/datatransfer", "java/awt/dnd", "java/awt/event", "java/awt/font", |
| "java/awt/geom", "java/awt/im", "java/awt/image", "java/awt/peer", "java/awt/print", |
| "java/beans", "java/beans/beancontext", "java/io", "java/lang", "java/lang/annotation", |
| "java/lang/instrument", "java/lang/invoke", "java/lang/management", "java/lang/ref", |
| "java/lang/reflect", "java/math", "java/net", "java/nio", "java/nio/channels", "java/nio/charset", |
| "java/nio/file", "java/rmi", "java/rmi/activation", "java/rmi/dgc", "java/rmi/registry", |
| "java/rmi/server", "java/security", "java/security/acl", "java/security/cert", |
| "java/security/interfaces", "java/security/spec", "java/sql", "java/text", "java/text/spi", "java/time", |
| "java/time/chrono", "java/time/format", "java/time/temporal", "java/time/zone", "java/util", |
| "java/util/concurrent", "java/util/function", "java/util/jar", "java/util/logging", |
| "java/util/prefs", "java/util/regex", "java/util/spi", "java/util/stream", "java/util/zip", |
| "javax/accessibility", "javax/activation", "javax/activity", "javax/annotation", |
| "javax/annotation/processing", "javax/imageio", "javax/imageio/event", "javax/imageio/metadata", |
| "javax/imageio/plugins", "javax/imageio/spi", "javax/imageio/stream", "javax/jws", "javax/jws/soap", |
| "javax/lang/model", "javax/management", "javax/management/loading", |
| "javax/management/modelmbean", "javax/management/monitor", "javax/management/openmbean", |
| "javax/management/relation", "javax/management/remote", "javax/management/timer", "javax/naming", |
| "javax/naming/directory", "javax/naming/event", "javax/naming/ldap", "javax/naming/spi", "javax/net", |
| "javax/net/ssl", "javax/print", "javax/print/attribute", "javax/print/event", "javax/rmi", |
| "javax/rmi/CORBA", "javax/rmi/ssl", "javax/script", "javax/security/auth", |
| "javax/security/cert", "javax/security/sasl", "javax/smartcardio", "javax/sound/midi", |
| "javax/sound/sampled", "javax/sql", "javax/sql/rowset", "javax/swing", "javax/swing/border", |
| "javax/swing/colorchooser", "javax/swing/event", "javax/swing/filechooser", "javax/swing/plaf", |
| "javax/swing/table", "javax/swing/text", "javax/swing/tree", "javax/swing/undo", "javax/tools", |
| "javax/transaction", "javax/transaction/xa", "javax/xml", "javax/xml/bind", "javax/xml/crypto", |
| "javax/xml/datatype", "javax/xml/namespace", "javax/xml/parsers", "javax/xml/soap", |
| "javax/xml/stream", "javax/xml/transform", "javax/xml/validation", "javax/xml/ws", |
| "javax/xml/xpath", "jdk/internal/cmm", "jdk/internal/instrumentation", "jdk/internal/org", |
| "jdk/internal/util", "jdk/management/cmm", "jdk/management/resource", "jdk/net", |
| "jdk/xml/internal", "org/ietf/jgss", "org/jcp/xml", "org/omg/CORBA", "org/omg/CORBA_2_3", |
| "org/omg/CosNaming", "org/omg/Dynamic", "org/omg/DynamicAny", "org/omg/IOP", "org/omg/Messaging", |
| "org/omg/PortableInterceptor", "org/omg/PortableServer", "org/omg/SendingContext", "org/omg/stub", |
| "org/w3c/dom", "org/xml/sax" |
| }); |
| |
| /** |
| * @param referrer a referring class, as {@code pkg/Outer$Inner} |
| * @param data its bytecode |
| * @param depsMap map from referring outer classes (as {@code pkg/Outer}) to referred-to classes (as {@code pkg/Outer$Inner}) |
| * @param siblings other referring classes in the same artifact (including this one), as {@code pkg/Outer$Inner} |
| * @param jar the jar file, for diagnostics |
| */ |
| private static void addDependenciesToMap(String referrer, InputStream data, Map<String, Set<String>> depsMap, Set<String> siblings, File jar) throws IOException { |
| int shell = referrer.indexOf('$', referrer.lastIndexOf('/') + 1); |
| String referrerTopLevel = shell == -1 ? referrer : referrer.substring(0, shell); |
| for (String referee : dependencies(data, jar)) { |
| if (referrer.equals(referee)) { |
| continue; |
| } |
| if (siblings.contains(referee)) { |
| continue; // in same JAR, not interesting |
| } |
| if (JDK_CLASS_TEST.test(referee)) { |
| continue; |
| } |
| Set<String> referees = depsMap.get(referrerTopLevel); |
| if (referees == null) { |
| referees = new HashSet<>(); |
| depsMap.put(referrerTopLevel, referees); |
| } |
| referees.add(referee); |
| } |
| } |
| |
| @FunctionalInterface |
| interface JarClassEntryConsumer { |
| |
| void accept(String name, InputStream classData, Set<String> siblings) throws IOException; |
| } |
| |
| // XXX in unit tests, indexing is always single-threaded, |
| // in which case the byte array can be a field instead of |
| // a thread local. Not clear if that is the case in the IDE. |
| final ThreadLocal<byte[]> BYTES = new ThreadLocal<>(); |
| // A reasonable base array size that will accommodate typical |
| // class files, to avoid reallocating more than necessary |
| private static final int MIN_ARRAY_SIZE = 16384; |
| |
| byte[] bytes(int size) { |
| // There is a pretty significant performance benefit |
| // to not allocating vast numbers of byte arrays |
| byte[] result = BYTES.get(); |
| if (result == null || result.length < size) { |
| result = new byte[Math.max(MIN_ARRAY_SIZE, size)]; |
| BYTES.set(result); |
| } |
| return result; |
| } |
| |
| void read(File jar, JarClassEntryConsumer consumer) throws IOException { |
| Set<String> classNames = new HashSet<>(); |
| try (JarFile jf = new JarFile(jar, false)) { |
| // XXX the original code ignores siblings by first having a list |
| // of the class names. Getting this before processing JAR entries |
| // means iterating the zip index twice. Not horrible, but would |
| // be nice to avoid it |
| Enumeration<JarEntry> e = jf.entries(); |
| while (e.hasMoreElements()) { |
| JarEntry entry = e.nextElement(); |
| String name = entry.getName(); |
| if (!name.endsWith(".class")) { |
| continue; |
| } |
| String clazz = name.substring(0, name.length() - 6); |
| classNames.add(clazz); |
| } |
| e = jf.entries(); |
| while (e.hasMoreElements()) { |
| JarEntry entry = e.nextElement(); |
| String name = entry.getName(); |
| if (!name.endsWith(".class")) { |
| continue; |
| } |
| int size = Math.max((int) entry.getSize(), 0); |
| if (size > 0) { |
| // Parsing is considerably faster if the data is preloaded |
| // into a byte array, likely due to random access |
| byte[] target = bytes(size); |
| try (InputStream in = jf.getInputStream(entry)) { |
| int pos = 0; |
| int count = 0; |
| while (count != -1 && pos < size) { |
| count = in.read(target, pos, size - pos); |
| pos += count == -1 ? 0 : count; |
| } |
| } |
| try (InputStream in = new ByteArrayInputStream(target, 0, size)) { |
| String clazz = name.substring(0, name.length() - 6); |
| consumer.accept(clazz, in, classNames); |
| } |
| } |
| } |
| } catch (SecurityException x) { |
| throw new IOException(x); |
| } |
| } |
| |
| // adapted from org.netbeans.nbbuild.VerifyClassLinkage |
| private static Collection<String> dependencies(InputStream data, File jar) throws IOException { |
| Set<String> result = new HashSet<String>(); |
| ClassFile cf = new ClassFile(data); |
| |
| Set<ClassName> cl = cf.getAllClassNames(); |
| for (ClassName className : cl) { |
| result.add(className.getInternalName()); |
| } |
| return result; |
| } |
| |
| static final List<IndexerField> INDEXER_FIELDS = Collections.singletonList(FLD_NB_DEPENDENCY_CLASS); |
| @Override |
| public Collection<IndexerField> getIndexerFields() { |
| return INDEXER_FIELDS; |
| } |
| |
| /** |
| * @param s a string, such as a class name |
| * @return the CRC-32 of its UTF-8 representation, as big-endian Base-64 without padding (so six chars), with _ for + (safer for Lucene) |
| */ |
| static String crc32base64(String s) { |
| crc.reset(); |
| crc.update(s.getBytes(UTF8)); |
| long v = crc.getValue(); |
| byte[] b64 = Base64.encodeBase64(new byte[] {(byte) (v >> 24 & 0xFF), (byte) (v >> 16 & 0xFF), (byte) (v >> 8 & 0xFF), (byte) (v & 0xFF)}); |
| assert b64.length == 8; |
| assert b64[6] == '='; |
| assert b64[7] == '='; |
| return new String(b64, 0, 6, LATIN1).replace('+', '_'); |
| } |
| private static final CRC32 crc = new CRC32(); |
| private static final Charset UTF8 = Charset.forName("UTF-8"); |
| private static final Charset LATIN1 = Charset.forName("ISO-8859-1"); |
| |
| } |