/* | |
* Licensed to the Apache Software Foundation (ASF) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. The ASF licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, | |
* software distributed under the License is distributed on an | |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
* KIND, either express or implied. See the License for the | |
* specific language governing permissions and limitations | |
* under the License. | |
*/ | |
package org.apache.uima.tools.migration; | |
import java.io.BufferedReader; | |
import java.io.File; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.InputStreamReader; | |
import java.net.URL; | |
import java.util.ArrayList; | |
import java.util.Collection; | |
import java.util.HashSet; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Set; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import org.apache.uima.util.FileUtils; | |
/** | |
* Migration utility for converting from IBM UIMA to Apache UIMA. | |
* Updates package names and does various other string replacements. | |
* Should be run on java code, descriptors, and other files that may have UIMA | |
* package names in them (e.g., launch configurations, scripts). | |
*/ | |
public class IbmUimaToApacheUima { | |
private static List replacements = new ArrayList(); | |
private static int MAX_FILE_SIZE = 1000000; //don't update files bigger than this | |
private static Set extensions = new HashSet(); | |
private static int filesScanned = 0; | |
private static int filesModified = 0; | |
private static List filesNeedingManualAttention = new ArrayList(); | |
private static Set ibmPackageNames = new HashSet(); | |
private static final Pattern IMPORT_PATTERN = Pattern.compile("(?m)^\\s*import\\s+([^;]*);\\s*$"); | |
private static final Pattern CLASS_NAME_PATTERN = Pattern.compile("public\\s+(final\\s+|abstract\\s+)*class\\s+([A-Za-z0-9_]+)"); | |
private static final Pattern GET_NEXT_INDEX_PATTERN = Pattern.compile("JCas\\.getNextIndex\\(\\)"); | |
private static final Pattern THROW_FEAT_MISSING_PATTERN = Pattern.compile("JCas\\.throwFeatMissing"); | |
private static final Pattern PACKAGE_PATTERN = Pattern.compile("(?m)^\\s*package\\s+([A-Za-z0-9_.]+)\\s*;") ; | |
private static final Pattern GETDOCANNOT_PATTERN = Pattern.compile("[Jj][Cc][Aa][Ss](\\(\\))?\\s*\\.\\s*getDocumentAnnotation\\("); | |
/** | |
* Main program. Expects one argument, the name of a directory containing files to | |
* update. Subdirectories are processed recursively. | |
* @param args Command line arguments | |
* @throws IOException if an I/O error occurs | |
*/ | |
public static void main(String[] args) throws IOException{ | |
//parse command line | |
String dir = null; | |
for (int i = 0; i < args.length; i++) { | |
if (args[i].startsWith("-")) { | |
if (args[i].equals("-ext")) { | |
if (i + 1 >= args.length) { | |
printUsageAndExit(); | |
} | |
parseCommaSeparatedList(args[++i], extensions); | |
} | |
else { | |
System.err.println("Unknown switch " + args[i]); | |
printUsageAndExit(); | |
} | |
} | |
else { | |
if (dir != null) { | |
printUsageAndExit(); | |
} | |
else { | |
dir = args[i]; | |
} | |
} | |
} | |
if (dir == null) { | |
printUsageAndExit(); | |
} | |
//read resource files | |
//map from IBM UIMA package names to Apache UIMA package names | |
readMapping("packageMapping.txt", replacements, true); | |
//other string replacements | |
readMapping("stringReplacements.txt", replacements, false); | |
//from system property, get list of file extensions to exclude | |
//do the replacements | |
System.out.println("Migrating your files..."); | |
replaceInAllFiles(new File(args[0])); | |
System.out.println("Migration complete."); | |
System.out.println("Scanned " + filesScanned + " files. " + filesModified + " files modified."); | |
if (filesNeedingManualAttention.size() > 0) { | |
System.out.println("The following files may need manual attention:"); | |
for (int i = 0; i < filesNeedingManualAttention.size(); i++) { | |
System.out.println(" " + filesNeedingManualAttention.get(i)); | |
} | |
System.out.println("See the \"Migrating from IBM UIMA to Apache UIMA\" chapter in the " + | |
"\"UIMA Overview and Setup\" document for details."); | |
} | |
else { | |
System.out.println("No problems were detected. However, if the code does not compilie " + | |
"and run, see the \"Migrating from IBM UIMA to Apache UIMA\" chapter in the " + | |
"\"UIMA Overview and Setup\" document for assistance."); | |
} | |
} | |
/** | |
* Parses a comma separated list, entering each value into the results Collection. | |
* Trailing empty strings are included in the results Collection. | |
* @param string string to parse | |
* @param results Collection to which each value will be added | |
*/ | |
private static void parseCommaSeparatedList(String string, Collection results) { | |
String[] components = string.split(",",-1); | |
for (int i = 0; i < components.length; i++) { | |
results.add(components[i]); | |
} | |
} | |
private static void printUsageAndExit() { | |
System.err.println("Usage: java " + IbmUimaToApacheUima.class.getName() + " <directory> [-ext <fileExtensions>]"); | |
System.err.println("<fileExtensions> is a comma separated list of file extensions to process, e.g.: java,xml,properties"); | |
System.err.println("\tUse a trailing comma to include files with no extension (meaning their name contains no dot)"); | |
System.exit(1); | |
} | |
/** | |
* Applies the necessary replacements to all files in the given directory. | |
* Subdirectories are processed recursively. | |
* | |
* @param dir diretory containing files to replace | |
* @throws IOException if an I/O error occurs | |
*/ | |
private static void replaceInAllFiles(File dir) throws IOException { | |
File[] fileList = dir.listFiles(); | |
for (int i = 0; i < fileList.length; i++) { | |
File file = fileList[i]; | |
if (file.isFile()) { | |
//skip files with extensions specified in the excludes list | |
if (!extensions.isEmpty()) { | |
String filename = file.getName(); | |
String ext=""; | |
int lastDot = filename.lastIndexOf('.'); | |
if (lastDot > -1) { | |
ext = filename.substring(lastDot+1); | |
} | |
if (!extensions.contains(ext.toLowerCase())) { | |
continue; | |
} | |
} | |
//skip files that we can't read and write | |
if (!file.canRead()) { | |
System.err.println("Skipping unreadable file: " + file.getCanonicalPath()); | |
continue; | |
} | |
if (!file.canWrite()) { | |
System.err.println("Skipping unwritable file: " + file.getCanonicalPath()); | |
continue; | |
} | |
//skip files that are too big | |
if (file.length() > MAX_FILE_SIZE) { | |
System.out.println("Skipping file " + file.getCanonicalPath() + " with size: " + file.length() + " bytes"); | |
continue; | |
} | |
//do the replacements | |
replaceInFile(file); | |
} | |
//recursively call on subdirectories | |
if (file.isDirectory()) { | |
replaceInAllFiles(file); | |
} | |
} | |
} | |
/** | |
* Applies replacements to a single file. | |
* @param file the file to process | |
*/ | |
private static void replaceInFile(File file) throws IOException { | |
//read file | |
String original; | |
try { | |
original = FileUtils.file2String(file); | |
} | |
catch(IOException e) { | |
System.err.println("Error reading " + file.getCanonicalPath()); | |
System.err.println(e.getMessage()); | |
return; | |
} | |
String contents = original; | |
//apply replacements | |
Iterator iter = replacements.iterator(); | |
while (iter.hasNext()) { | |
Replacement replacement = (Replacement)iter.next(); | |
contents = contents.replaceAll(replacement.regex, replacement.replacementStr); | |
} | |
//for .java files do some additional processing | |
if (file.getName().endsWith(".java")) { | |
//updates for JCas/JCasRegistry refactoring | |
contents = applyJCasRefactoring(contents); | |
//remove duplicate imports (can be caused by some replacements) | |
contents = removeDuplicateImports(contents); | |
} | |
//write file if it changed | |
if (!contents.equals(original)) { | |
FileUtils.saveString2File(contents, file); | |
filesModified++; | |
} | |
filesScanned++; | |
//check for situations that may need manual attention, | |
//updates filesNeedingManualAttention field | |
checkForManualAttentionNeeded(file, original); | |
} | |
/* | |
* Applies changes needed due to JCas/JCasRegistry refactoring. These are a little | |
* more complicated than simple regex replacements. | |
* | |
* JCas.getNextIndex -> JCasRegistry.register(ThisClass.class) | |
* JCas.throwFeatMissing -> jcasType.jcas.throwFeatMissing [in cover class] | |
* JCas.throwFeatMissing -> jcas.throwFeatMissing [in _Type class] | |
*/ | |
private static String applyJCasRefactoring(String contents) { | |
//find the class name, we'll need it later | |
Matcher classNameMatcher = CLASS_NAME_PATTERN.matcher(contents); | |
if (!classNameMatcher.find()) | |
return contents; | |
String className = classNameMatcher.group(2); | |
//replace getNextIndex | |
Matcher getNextIndexMatcher = GET_NEXT_INDEX_PATTERN.matcher(contents); | |
String replacement = "org.apache.uima.jcas.JCasRegistry.register(" + className + ".class)"; | |
contents = getNextIndexMatcher.replaceAll(replacement); | |
//replace throwFeatMissing (replacement depends on whether we're in _Type object or not) | |
Matcher throwFeatMissingMatcher = THROW_FEAT_MISSING_PATTERN.matcher(contents); | |
if (className.endsWith("_Type")) { | |
contents = throwFeatMissingMatcher.replaceAll("this.jcas.throwFeatMissing"); | |
} | |
else { | |
contents = throwFeatMissingMatcher.replaceAll("this.jcasType.jcas.throwFeatMissing"); | |
} | |
return contents; | |
} | |
/** | |
* Remove duplicate imports from a Java source file. | |
*/ | |
private static String removeDuplicateImports(String contents) { | |
HashSet classes = new HashSet(); | |
Matcher matcher = IMPORT_PATTERN.matcher(contents); | |
int pos = 0; | |
int endOfLastDuplicate = 0; | |
StringBuffer result = null; | |
while (matcher.find(pos)) { | |
String className = matcher.group(1); | |
//account for whitespace in class name | |
className = className.replaceAll("\\s*",""); | |
if (!classes.add(className)) { | |
//duplicate import found. Do not append the import, | |
//but get everything else before it. | |
if (result == null) { | |
result = new StringBuffer(contents.length()); | |
} | |
result.append(contents.substring(endOfLastDuplicate, matcher.start())); | |
endOfLastDuplicate = matcher.end(); | |
} | |
pos = matcher.end(); | |
} | |
if (result == null) { | |
//no duplicates found | |
return contents; | |
} | |
else { | |
result.append(contents.substring(endOfLastDuplicate)); | |
return result.toString(); | |
} | |
} | |
/** | |
* Scans for certain patterns in the string that indicate situations | |
* that the migration tool doesn't resolve and may require user | |
* attention. Updated the filesNeedingManualAttention field with a String | |
* which is the file path plus the reason the file was flagged. | |
* | |
* @param contents string to scan | |
* @return true if the file needs manual attention | |
*/ | |
private static void checkForManualAttentionNeeded(File file, String contents) { | |
// UIMA package name (includes most common case of DocumentAnnotation) | |
Matcher packageNameMatcher = PACKAGE_PATTERN.matcher(contents); | |
if (packageNameMatcher.find()) { | |
String packageName = packageNameMatcher.group(1); | |
if (ibmPackageNames.contains(packageName)) { | |
filesNeedingManualAttention.add(file.getPath() + " (Uses an IBM UIMA Package Name)"); | |
return; | |
} | |
} | |
//JCas.getDocumentAnnotation (fuzzy, only matches if variable name / method | |
//ends with jcas) | |
if (GETDOCANNOT_PATTERN.matcher(contents).find()) { | |
filesNeedingManualAttention.add(file.getPath() + " (Calls JCas.getDocumentAnnotation())"); | |
return; | |
} | |
//xi:include | |
if (contents.indexOf("<xi:include") >= 0) { | |
filesNeedingManualAttention.add(file.getPath() + " (Uses xi:include)"); | |
return; | |
} | |
} | |
/** | |
* Reads a mapping from a resource file, and populates a List of | |
* Replacement objects. We don't use a Map because the order in which | |
* the replacements are applied can be important. | |
* | |
* @param fileName name of file to read from (looked up looking using Class.getResource()) | |
* @param mappings List to which Replacement objects will be added. | |
* Each object contains the regex to search for and the replacement string. | |
* @param treatAsPackageNames if true, the keys in the resource file will be considered | |
* package names, and this routine will produce regexes that replace any fully-qualified | |
* class name belonging to that package. Also in this case updates the | |
* static ibmPackageNames HashSet. | |
*/ | |
private static void readMapping(String fileName, List mappings, boolean treatAsPackageNames) throws IOException { | |
URL pkgListFile = IbmUimaToApacheUima.class.getResource(fileName); | |
InputStream inStream = pkgListFile.openStream(); | |
BufferedReader reader = new BufferedReader(new InputStreamReader(inStream)); | |
String line = reader.readLine(); | |
while (line != null) { | |
String[] mapping = line.split(" "); | |
String regex, replaceStr; | |
if (treatAsPackageNames) { | |
//we do special processing for package names to try to handle the case where | |
//user code exists in a package prefixed by com.ibm.uima. | |
//We only replace the package name when it appears as part of a fully-qualified | |
//class name in that package, not as a prefix of another package. | |
//turn package name into a regex (have to escape the . and, | |
//technically, should allow whitepsace around dots) | |
String pkgRegex = mapping[0].replaceAll("\\.", "\\\\s*\\\\.\\\\s*"); | |
//form regex that will find any fully-qualified class name in this package | |
regex = pkgRegex+"(\\.(\\*|[A-Z]\\w*))"; | |
replaceStr = mapping[1] + "$1"; | |
ibmPackageNames.add(mapping[0]); | |
} | |
else { | |
//form regex from src, by escaping dots and allowing whitespace | |
regex = mapping[0].replaceAll("\\.", "\\\\s*\\\\.\\\\s*"); | |
replaceStr = mapping[1]; | |
} | |
Replacement replacement = new Replacement(regex, replaceStr); | |
mappings.add(replacement); | |
line = reader.readLine(); | |
} | |
inStream.close(); | |
} | |
private static class Replacement { | |
String regex; | |
String replacementStr; | |
Replacement(String regex, String replacement) { | |
this.regex = regex; | |
this.replacementStr = replacement; | |
} | |
} | |
} |