Merge pull request #216 from apache/feature/215-Improve-descriptor-scanning-performance-when-there-are-many-classloaders

Issue #215: Improve descriptor scanning performance when there are many classloaders
diff --git a/uimafit-core/src/main/java/org/apache/uima/fit/factory/FsIndexFactory.java b/uimafit-core/src/main/java/org/apache/uima/fit/factory/FsIndexFactory.java
index 7fa0ced..b45ed7f 100644
--- a/uimafit-core/src/main/java/org/apache/uima/fit/factory/FsIndexFactory.java
+++ b/uimafit-core/src/main/java/org/apache/uima/fit/factory/FsIndexFactory.java
@@ -26,7 +26,9 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.ServiceLoader;
 import java.util.WeakHashMap;
 
@@ -34,7 +36,9 @@
 import org.apache.uima.fit.descriptor.FsIndexKey;
 import org.apache.uima.fit.internal.ClassLoaderUtils;
 import org.apache.uima.fit.internal.MetaDataType;
+import org.apache.uima.fit.internal.ResourceManagerFactory;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.metadata.FsIndexCollection;
 import org.apache.uima.resource.metadata.FsIndexDescription;
 import org.apache.uima.resource.metadata.FsIndexKeyDescription;
@@ -69,17 +73,22 @@
 
   private static final Object CREATE_LOCK = new Object();
 
+  private static final FsIndexCollection PLACEHOLDER = new FsIndexCollection_impl();
+
+  private static WeakHashMap<String, FsIndexCollection> fsIndexCollections;
+
   private static WeakHashMap<ClassLoader, String[]> fsIndexLocationsByClassloader;
 
   private static WeakHashMap<ClassLoader, FsIndexCollection> fsIndexCollectionsByClassloader;
 
   static {
+    fsIndexCollections = new WeakHashMap<>();
     fsIndexLocationsByClassloader = new WeakHashMap<>();
     fsIndexCollectionsByClassloader = new WeakHashMap<>();
   }
 
   private FsIndexFactory() {
-    // Factory class
+    // This class is not meant to be instantiated
   }
 
   /**
@@ -282,9 +291,10 @@
     FsIndexCollection aggFsIdxCol = fsIndexCollectionsByClassloader.get(cl);
     if (aggFsIdxCol == null) {
       synchronized (CREATE_LOCK) {
+        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         List<FsIndexDescription> fsIndexList = new ArrayList<>();
 
-        loadFsIndexCollectionsFromScannedLocations(fsIndexList);
+        loadFsIndexCollectionsFromScannedLocations(fsIndexList, resMgr);
         loadFsIndexCollectionsfromSPIs(fsIndexList);
 
         aggFsIdxCol = createFsIndexCollection(
@@ -296,13 +306,19 @@
     return (FsIndexCollection) aggFsIdxCol.clone();
   }
 
-  static void loadFsIndexCollectionsFromScannedLocations(List<FsIndexDescription> fsIndexList)
-          throws ResourceInitializationException {
+  static void loadFsIndexCollectionsFromScannedLocations(List<FsIndexDescription> fsIndexList,
+          ResourceManager aResMgr) throws ResourceInitializationException {
     for (String location : scanIndexDescriptors()) {
       try {
-        XMLInputSource xmlInput = new XMLInputSource(location);
-        FsIndexCollection fsIdxCol = getXMLParser().parseFsIndexCollection(xmlInput);
-        fsIdxCol.resolveImports();
+        FsIndexCollection fsIdxCol = fsIndexCollections.get(location);
+
+        if (fsIdxCol == PLACEHOLDER) {
+          // If the description has not yet been loaded, load it
+          fsIdxCol = getXMLParser().parseFsIndexCollection(new XMLInputSource(location));
+          fsIdxCol.resolveImports(aResMgr);
+          fsIndexCollections.put(location, fsIdxCol);
+        }
+
         fsIndexList.addAll(asList(fsIdxCol.getFsIndexes()));
         LOG.debug("Detected index at [{}]", location);
       } catch (IOException e) {
@@ -339,12 +355,30 @@
       String[] indexLocations = fsIndexLocationsByClassloader.get(cl);
       if (indexLocations == null) {
         indexLocations = scanDescriptors(MetaDataType.FS_INDEX);
+        internFsIndexCollectionLocations(indexLocations);
         fsIndexLocationsByClassloader.put(cl, indexLocations);
       }
       return indexLocations;
     }
   }
 
+  private static void internFsIndexCollectionLocations(String[] indexDescriptorLocations) {
+    // We "intern" the location strings because we will use them as keys in the WeakHashMap
+    // caching the parsed index definitions. As part of this process, we put a PLACEHOLDER into the
+    // map which is replaced when the type system is actually loaded
+    Map<String, String> locationStrings = new HashMap<>();
+    fsIndexCollections.keySet().stream().forEach(loc -> locationStrings.put(loc, loc));
+    for (int i = 0; i < indexDescriptorLocations.length; i++) {
+      String existingLocString = locationStrings.get(indexDescriptorLocations[i]);
+      if (existingLocString == null) {
+        fsIndexCollections.put(indexDescriptorLocations[i], PLACEHOLDER);
+        locationStrings.put(indexDescriptorLocations[i], indexDescriptorLocations[i]);
+      } else {
+        indexDescriptorLocations[i] = existingLocString;
+      }
+    }
+  }
+
   /**
    * Force rescan of index descriptors. The next call to {@link #scanIndexDescriptors()} will rescan
    * all auto-import locations.
@@ -353,6 +387,7 @@
     synchronized (SCAN_LOCK) {
       fsIndexLocationsByClassloader.clear();
       fsIndexCollectionsByClassloader.clear();
+      fsIndexCollections.clear();
     }
   }
 }
diff --git a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypePrioritiesFactory.java b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypePrioritiesFactory.java
index e410943..17f12bb 100644
--- a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypePrioritiesFactory.java
+++ b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypePrioritiesFactory.java
@@ -24,7 +24,9 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.ServiceLoader;
 import java.util.WeakHashMap;
 
@@ -51,11 +53,16 @@
 
   private static final Object CREATE_LOCK = new Object();
 
+  private static final TypePriorities PLACEHOLDER = new TypePriorities_impl();
+
+  private static WeakHashMap<String, TypePriorities> typePriorities;
+
   private static WeakHashMap<ClassLoader, String[]> typePrioritesLocationsByClassloader;
 
   private static WeakHashMap<ClassLoader, TypePriorities> typePrioritiesByClassloader;
 
   static {
+    typePriorities = new WeakHashMap<>();
     typePrioritesLocationsByClassloader = new WeakHashMap<>();
     typePrioritiesByClassloader = new WeakHashMap<>();
   }
@@ -97,12 +104,12 @@
    * @return type priorities created from the ordered type names
    */
   public static TypePriorities createTypePriorities(String... prioritizedTypeNames) {
-    TypePriorities typePriorities = new TypePriorities_impl();
-    TypePriorityList typePriorityList = typePriorities.addPriorityList();
+    TypePriorities priorities = new TypePriorities_impl();
+    TypePriorityList typePriorityList = priorities.addPriorityList();
     for (String typeName : prioritizedTypeNames) {
       typePriorityList.addType(typeName);
     }
-    return typePriorities;
+    return priorities;
   }
 
   /**
@@ -120,12 +127,12 @@
     TypePriorities aggTypePriorities = typePrioritiesByClassloader.get(cl);
     if (aggTypePriorities == null) {
       synchronized (CREATE_LOCK) {
+        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         List<TypePriorities> typePrioritiesList = new ArrayList<>();
 
-        loadTypePrioritiesFromScannedLocations(typePrioritiesList);
+        loadTypePrioritiesFromScannedLocations(typePrioritiesList, resMgr);
         loadTypePrioritiesFromSPIs(typePrioritiesList);
 
-        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         aggTypePriorities = CasCreationUtils.mergeTypePriorities(typePrioritiesList, resMgr);
         typePrioritiesByClassloader.put(cl, aggTypePriorities);
       }
@@ -134,14 +141,20 @@
     return (TypePriorities) aggTypePriorities.clone();
   }
 
-  static void loadTypePrioritiesFromScannedLocations(List<TypePriorities> typePrioritiesList)
-          throws ResourceInitializationException {
+  static void loadTypePrioritiesFromScannedLocations(List<TypePriorities> typePrioritiesList,
+          ResourceManager aResMgr) throws ResourceInitializationException {
     for (String location : scanTypePrioritiesDescriptors()) {
       try {
-        XMLInputSource xmlInput = new XMLInputSource(location);
-        TypePriorities typePriorities = getXMLParser().parseTypePriorities(xmlInput);
-        typePriorities.resolveImports();
-        typePrioritiesList.add(typePriorities);
+        TypePriorities priorities = typePriorities.get(location);
+
+        if (priorities == PLACEHOLDER) {
+          // If the description has not yet been loaded, load it
+          priorities = getXMLParser().parseTypePriorities(new XMLInputSource(location));
+          priorities.resolveImports(aResMgr);
+          typePriorities.put(location, priorities);
+        }
+
+        typePrioritiesList.add(priorities);
         LOG.debug("Detected type priorities at [{}]", location);
       } catch (IOException e) {
         throw new ResourceInitializationException(e);
@@ -176,12 +189,30 @@
       String[] typePrioritesLocations = typePrioritesLocationsByClassloader.get(cl);
       if (typePrioritesLocations == null) {
         typePrioritesLocations = scanDescriptors(MetaDataType.TYPE_PRIORITIES);
+        internTypePrioritiesLocations(typePrioritesLocations);
         typePrioritesLocationsByClassloader.put(cl, typePrioritesLocations);
       }
       return typePrioritesLocations;
     }
   }
 
+  private static void internTypePrioritiesLocations(String[] typeDescriptorLocations) {
+    // We "intern" the location strings because we will use them as keys in the WeakHashMap
+    // caching the parsed type priorities. As part of this process, we put a PLACEHOLDER into the
+    // map which is replaced when the type system is actually loaded
+    Map<String, String> locationStrings = new HashMap<>();
+    typePriorities.keySet().stream().forEach(loc -> locationStrings.put(loc, loc));
+    for (int i = 0; i < typeDescriptorLocations.length; i++) {
+      String existingLocString = locationStrings.get(typeDescriptorLocations[i]);
+      if (existingLocString == null) {
+        typePriorities.put(typeDescriptorLocations[i], PLACEHOLDER);
+        locationStrings.put(typeDescriptorLocations[i], typeDescriptorLocations[i]);
+      } else {
+        typeDescriptorLocations[i] = existingLocString;
+      }
+    }
+  }
+
   /**
    * Force rescan of type priorities descriptors. The next call to
    * {@link #scanTypePrioritiesDescriptors()} will rescan all auto-import locations.
@@ -190,6 +221,7 @@
     synchronized (SCAN_LOCK) {
       typePrioritesLocationsByClassloader.clear();
       typePrioritiesByClassloader.clear();
+      typePriorities.clear();
     }
   }
 }
diff --git a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
index 74de8fb..1b611d8 100644
--- a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
+++ b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
@@ -24,7 +24,9 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.ServiceLoader;
 import java.util.WeakHashMap;
 
@@ -51,11 +53,16 @@
 
   private static final Object CREATE_LOCK = new Object();
 
+  private static final TypeSystemDescription PLACEHOLDER = new TypeSystemDescription_impl();
+
+  private static WeakHashMap<String, TypeSystemDescription> typeDescriptors;
+
   private static WeakHashMap<ClassLoader, String[]> typeDescriptorLocationsByClassloader;
 
   private static WeakHashMap<ClassLoader, TypeSystemDescription> typeDescriptorByClassloader;
 
   static {
+    typeDescriptors = new WeakHashMap<>();
     typeDescriptorLocationsByClassloader = new WeakHashMap<>();
     typeDescriptorByClassloader = new WeakHashMap<>();
   }
@@ -124,13 +131,13 @@
     TypeSystemDescription tsd = typeDescriptorByClassloader.get(cl);
     if (tsd == null) {
       synchronized (CREATE_LOCK) {
+        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         List<TypeSystemDescription> tsdList = new ArrayList<>();
 
-        loadTypeSystemDescriptionsFromScannedLocations(tsdList);
+        loadTypeSystemDescriptionsFromScannedLocations(tsdList, resMgr);
         loadTypeSystemDescriptionsFromSPIs(tsdList);
 
         LOG.trace("Merging type systems and resolving imports...");
-        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         tsd = mergeTypeSystems(tsdList, resMgr);
         typeDescriptorByClassloader.put(cl, tsd);
       }
@@ -138,12 +145,20 @@
     return (TypeSystemDescription) tsd.clone();
   }
 
-  static void loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> tsdList)
-          throws ResourceInitializationException {
+  static void loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> tsdList,
+          ResourceManager aResMgr) throws ResourceInitializationException {
     for (String location : scanTypeDescriptors()) {
       try {
-        XMLInputSource xmlInputType1 = new XMLInputSource(location);
-        tsdList.add(getXMLParser().parseTypeSystemDescription(xmlInputType1));
+        TypeSystemDescription description = typeDescriptors.get(location);
+
+        if (description == PLACEHOLDER) {
+          // If the description has not yet been loaded, load it
+          description = getXMLParser().parseTypeSystemDescription(new XMLInputSource(location));
+          description.resolveImports(aResMgr);
+          typeDescriptors.put(location, description);
+        }
+
+        tsdList.add(description);
         LOG.debug("Detected type system at [{}]", location);
       } catch (IOException e) {
         throw new ResourceInitializationException(e);
@@ -178,14 +193,36 @@
     synchronized (SCAN_LOCK) {
       ClassLoader cl = ClassLoaderUtils.findClassloader();
       String[] typeDescriptorLocations = typeDescriptorLocationsByClassloader.get(cl);
+
       if (typeDescriptorLocations == null) {
         typeDescriptorLocations = scanDescriptors(MetaDataType.TYPE_SYSTEM);
+
+        internTypeDescriptorLocations(typeDescriptorLocations);
+
         typeDescriptorLocationsByClassloader.put(cl, typeDescriptorLocations);
       }
+
       return typeDescriptorLocations;
     }
   }
 
+  private static void internTypeDescriptorLocations(String[] typeDescriptorLocations) {
+    // We "intern" the location strings because we will use them as keys in the WeakHashMap
+    // caching the parsed type systems. As part of this process, we put a PLACEHOLDER into the
+    // map which is replaced when the type system is actually loaded
+    Map<String, String> locationStrings = new HashMap<>();
+    typeDescriptors.keySet().stream().forEach(loc -> locationStrings.put(loc, loc));
+    for (int i = 0; i < typeDescriptorLocations.length; i++) {
+      String existingLocString = locationStrings.get(typeDescriptorLocations[i]);
+      if (existingLocString == null) {
+        typeDescriptors.put(typeDescriptorLocations[i], PLACEHOLDER);
+        locationStrings.put(typeDescriptorLocations[i], typeDescriptorLocations[i]);
+      } else {
+        typeDescriptorLocations[i] = existingLocString;
+      }
+    }
+  }
+
   /**
    * Force rescan of type descriptors. The next call to {@link #scanTypeDescriptors()} will rescan
    * all auto-import locations.
@@ -195,6 +232,7 @@
     synchronized (SCAN_LOCK) {
       typeDescriptorLocationsByClassloader.clear();
       typeDescriptorByClassloader.clear();
+      typeDescriptors.clear();
     }
   }
 }
diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
index f747d4a..13dc3cd 100644
--- a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
+++ b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
@@ -29,10 +29,6 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.junit.jupiter.api.BeforeEach;
 
-/**
- * 
- * 
- */
 public class ComponentTestBase {
 
   private static ThreadLocal<JCas> JCAS = new ThreadLocal<JCas>();
diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/factory/FsIndexFactoryTest.java b/uimafit-core/src/test/java/org/apache/uima/fit/factory/FsIndexFactoryTest.java
index 9cfdaec..e9b659f 100644
--- a/uimafit-core/src/test/java/org/apache/uima/fit/factory/FsIndexFactoryTest.java
+++ b/uimafit-core/src/test/java/org/apache/uima/fit/factory/FsIndexFactoryTest.java
@@ -41,9 +41,11 @@
 import org.apache.uima.fit.descriptor.FsIndexCollection;
 import org.apache.uima.fit.descriptor.FsIndexKey;
 import org.apache.uima.fit.factory.spi.FsIndexCollectionProviderForTesting;
+import org.apache.uima.fit.internal.ResourceManagerFactory;
 import org.apache.uima.fit.type.Sentence;
 import org.apache.uima.fit.type.Token;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.metadata.FsIndexDescription;
 import org.apache.uima.resource.metadata.FsIndexKeyDescription;
 import org.junit.jupiter.api.Test;
@@ -152,8 +154,10 @@
 
   @Test
   public void testLoadingFromScannedLocations() throws Exception {
+    ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
+
     List<FsIndexDescription> indexes = new ArrayList<>();
-    loadFsIndexCollectionsFromScannedLocations(indexes);
+    loadFsIndexCollectionsFromScannedLocations(indexes, resMgr);
     org.apache.uima.resource.metadata.FsIndexCollection fsIndexCollection = FsIndexFactory
             .createFsIndexCollection(indexes);
 
diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypePrioritiesFactoryTest.java b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypePrioritiesFactoryTest.java
index bb628e2..db317fd 100644
--- a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypePrioritiesFactoryTest.java
+++ b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypePrioritiesFactoryTest.java
@@ -31,9 +31,11 @@
 
 import org.apache.uima.cas.CAS;
 import org.apache.uima.fit.factory.spi.TypePrioritiesProviderForTesting;
+import org.apache.uima.fit.internal.ResourceManagerFactory;
 import org.apache.uima.fit.type.Sentence;
 import org.apache.uima.fit.type.Token;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.metadata.TypePriorities;
 import org.apache.uima.resource.metadata.TypePriorityList;
 import org.apache.uima.util.CasCreationUtils;
@@ -88,8 +90,10 @@
 
   @Test
   public void testLoadingFromScannedLocations() throws Exception {
+    ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
+
     List<TypePriorities> allPrios = new ArrayList<>();
-    loadTypePrioritiesFromScannedLocations(allPrios);
+    loadTypePrioritiesFromScannedLocations(allPrios, resMgr);
     TypePriorities prios = CasCreationUtils.mergeTypePriorities(allPrios, null);
 
     assertThat(prios.getPriorityLists().length).isEqualTo(1);
diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
index 052c36b..e7768e4 100644
--- a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
+++ b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
@@ -27,9 +27,11 @@
 import java.util.List;
 
 import org.apache.uima.fit.factory.spi.TypeSystemDescriptionProviderForTesting;
+import org.apache.uima.fit.internal.ResourceManagerFactory;
 import org.apache.uima.fit.type.AnalyzedText;
 import org.apache.uima.fit.type.Sentence;
 import org.apache.uima.fit.type.Token;
+import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.CasCreationUtils;
 import org.junit.jupiter.api.Test;
@@ -54,8 +56,10 @@
 
   @Test
   public void testLoadingFromScannedLocations() throws Exception {
+    ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
+
     List<TypeSystemDescription> tsds = new ArrayList<>();
-    loadTypeSystemDescriptionsFromScannedLocations(tsds);
+    loadTypeSystemDescriptionsFromScannedLocations(tsds, resMgr);
     TypeSystemDescription tsd = CasCreationUtils.mergeTypeSystems(tsds);
 
     assertNotNull(tsd.getType(Token.class.getName()));