connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java - manifoldcf - Git at Google

 /* $Id: RSSConnector.java 994959 2010-09-08 10:04:42Z kwright $ */

 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.manifoldcf.crawler.connectors.rss;

 import org.apache.manifoldcf.core.interfaces.*;
 import org.apache.manifoldcf.agents.interfaces.*;
 import org.apache.manifoldcf.crawler.interfaces.*;
 import org.apache.manifoldcf.crawler.system.Logging;
 import org.apache.manifoldcf.crawler.system.ManifoldCF;

 import org.apache.manifoldcf.core.fuzzyml.*;
 import org.apache.manifoldcf.core.common.DateParser;

 import org.apache.http.conn.ConnectTimeoutException;
 import org.apache.http.client.RedirectException;
 import org.apache.http.client.CircularRedirectException;
 import org.apache.http.NoHttpResponseException;
 import org.apache.http.HttpException;

 import java.io.*;
 import java.util.*;
 import java.net.*;
 import java.text.*;
 import java.util.regex.*;

 /** This is the RSS implementation of the IRepositoryConnector interface.
 * This connector basically looks at an RSS document in order to seed the
 * document queue.  The document is always fetched from the same URL (it's
 * specified in the configuration parameters).  The documents subsequently
 * crawled are not scraped for additional links; only the primary document is
 * ingested.  On the other hand, redirections ARE honored, so that various
 * sites that use this trick can be supported (e.g. the BBC)
 *
 */
 public class RSSConnector extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector
 {
   public static final String _rcsid = "@(#)$Id: RSSConnector.java 994959 2010-09-08 10:04:42Z kwright $";

   protected final static String rssThrottleGroupType = "_RSS_";

   // Usage flag values
   protected static final int ROBOTS_NONE = 0;
   protected static final int ROBOTS_DATA = 1;
   protected static final int ROBOTS_ALL = 2;

   /** Dechromed content mode - none */
   public static final int DECHROMED_NONE = 0;
   /** Dechromed content mode - description field */
   public static final int DECHROMED_DESCRIPTION = 1;
   /** Dechromed content mode - content field */
   public static final int DECHROMED_CONTENT = 2;

   /** Chromed suppression mode - use chromed content if dechromed content not available */
   public static final int CHROMED_USE = 0;
   /** Chromed suppression mode - skip documents if dechromed content not available */
   public static final int CHROMED_SKIP = 1;
   /** Chromed suppression mode - index metadata only if dechromed content not available */
   public static final int CHROMED_METADATA_ONLY = 2;

   /** Robots usage flag */
   protected int robotsUsage = ROBOTS_ALL;

   /** The user-agent for this connector instance */
   protected String userAgent = null;
   /** The email address for this connector instance */
   protected String from = null;
   /** The minimum milliseconds between fetches */
   protected long minimumMillisecondsPerFetchPerServer = -1L;
   /** The maximum open connections */
   protected int maxOpenConnectionsPerServer = 0;
   /** The minimum milliseconds between bytes */
   protected double minimumMillisecondsPerBytePerServer = 0.0;
   /** The throttle group name */
   protected String throttleGroupName = null;
   /** The proxy host */
   protected String proxyHost = null;
   /** The proxy port */
   protected int proxyPort = -1;
   /** Proxy auth domain */
   protected String proxyAuthDomain = null;
   /** Proxy auth username */
   protected String proxyAuthUsername = null;
   /** Proxy auth password */
   protected String proxyAuthPassword = null;

   /** The throttled fetcher used by this instance */
   protected ThrottledFetcher fetcher = null;
   /** The robots object used by this instance */
   protected Robots robots = null;

   /** Storage for fetcher objects */
   protected static Map<String,ThrottledFetcher> fetcherMap = new HashMap<String,ThrottledFetcher>();
   /** Storage for robots objects */
   protected static Map robotsMap = new HashMap();

   /** Flag indicating whether session data is initialized */
   protected boolean isInitialized = false;

   // A couple of very important points.
   // The canonical document identifier is simply a URL.
   // Versions of the document are calculated using a checksum technique

   protected static DataCache cache = new DataCache();


   protected static final Map understoodProtocols = new HashMap();
   static
   {
     understoodProtocols.put("http","http");
     understoodProtocols.put("https","https");
   }

   // Activity types
   public final static String ACTIVITY_FETCH = "fetch";
   public final static String ACTIVITY_ROBOTSPARSE = "robots parse";

   /** Deny access token for default authority */
   private final static String defaultAuthorityDenyToken = "DEAD_AUTHORITY";

   /** Constructor.
   */
   public RSSConnector()
   {
   }

   /** Establish a session */
   protected void getSession()
     throws ManifoldCFException
   {
     if (!isInitialized)
     {
       String x;

       String emailAddress = params.getParameter(RSSConfig.PARAMETER_EMAIL);
       if (emailAddress == null)
         throw new ManifoldCFException("Missing email address");
       userAgent = "Mozilla/5.0 (ApacheManifoldCFRSSFeedReader; "+((emailAddress==null)?"":emailAddress)+")";
       from = emailAddress;

       String robotsUsageString = params.getParameter(RSSConfig.PARAMETER_ROBOTSUSAGE);
       robotsUsage = ROBOTS_ALL;
       if (robotsUsageString == null || robotsUsageString.length() == 0 || robotsUsageString.equals(RSSConfig.VALUE_ALL))
         robotsUsage = ROBOTS_ALL;
       else if (robotsUsageString.equals(RSSConfig.VALUE_NONE))
         robotsUsage = ROBOTS_NONE;
       else if (robotsUsageString.equals(RSSConfig.VALUE_DATA))
         robotsUsage = ROBOTS_DATA;

       proxyHost = params.getParameter(RSSConfig.PARAMETER_PROXYHOST);
       String proxyPortString = params.getParameter(RSSConfig.PARAMETER_PROXYPORT);
       proxyAuthDomain = params.getParameter(RSSConfig.PARAMETER_PROXYAUTHDOMAIN);
       proxyAuthUsername = params.getParameter(RSSConfig.PARAMETER_PROXYAUTHUSERNAME);
       proxyAuthPassword = params.getObfuscatedParameter(RSSConfig.PARAMETER_PROXYAUTHPASSWORD);

       proxyPort = -1;
       if (proxyPortString != null && proxyPortString.length() > 0)
       {
         try
         {
           proxyPort = Integer.parseInt(proxyPortString);
         }
         catch (NumberFormatException e)
         {
           throw new ManifoldCFException(e.getMessage(),e);
         }
       }

       // Read throttling configuration parameters
       minimumMillisecondsPerBytePerServer = 0.0;
       maxOpenConnectionsPerServer = 10;
       minimumMillisecondsPerFetchPerServer = 0L;

       x = params.getParameter(RSSConfig.PARAMETER_BANDWIDTH);
       if (x != null && x.length() > 0)
       {
         try
         {
           int maxKBytesPerSecondPerServer = Integer.parseInt(x);
           if (maxKBytesPerSecondPerServer > 0)
             minimumMillisecondsPerBytePerServer = 1.0/(double)maxKBytesPerSecondPerServer;
         }
         catch (NumberFormatException e)
         {
           throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
         }
       }

       x = params.getParameter(RSSConfig.PARAMETER_MAXOPEN);
       if (x != null && x.length() > 0)
       {
         try
         {
           maxOpenConnectionsPerServer = Integer.parseInt(x);
         }
         catch (NumberFormatException e)
         {
           throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
         }
       }

       x = params.getParameter(RSSConfig.PARAMETER_MAXFETCHES);
       if (x != null && x.length() > 0)
       {
         try
         {
           int maxFetches = Integer.parseInt(x);
           if (maxFetches == 0)
             maxFetches = 1;
           minimumMillisecondsPerFetchPerServer = 60000L/((long)maxFetches);
         }
         catch (NumberFormatException e)
         {
           throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
         }

       }

       IThrottleGroups tg = ThrottleGroupsFactory.make(currentContext);
       // Create the throttle group
       tg.createOrUpdateThrottleGroup(rssThrottleGroupType, throttleGroupName, new ThrottleSpec(maxOpenConnectionsPerServer,
         minimumMillisecondsPerFetchPerServer, minimumMillisecondsPerBytePerServer));

       isInitialized = true;
     }
   }


   /** Return the list of activities that this connector supports (i.e. writes into the log).
   *@return the list.
   */
   @Override
   public String[] getActivitiesList()
   {
     return new String[]{ACTIVITY_FETCH, ACTIVITY_ROBOTSPARSE};
   }

   /** Tell the world what model this connector uses for getDocumentIdentifiers().
   * This must return a model value as specified above.
   *@return the model type value.
   */
   @Override
   public int getConnectorModel()
   {
     // This connector is currently structured that the RSS feeds are the seeds.
     return MODEL_ALL;
   }

   // All methods below this line will ONLY be called if a connect() call succeeded
   // on this instance!

   /** Connect.  The configuration parameters are included.
   *@param configParams are the configuration parameters for this connection.
   * Note well: There are no exceptions allowed from this call, since it is expected to mainly establish connection parameters.
   */
   @Override
   public void connect(ConfigParams configParams)
   {
     super.connect(configParams);

     // Do the necessary bookkeeping around connection counting
     throttleGroupName = params.getParameter(RSSConfig.PARAMETER_THROTTLEGROUP);
     if (throttleGroupName == null)
       throttleGroupName = "";

     fetcher = getFetcher();
     robots = getRobots(fetcher);

     // Let the system know we have a connection.
     fetcher.noteConnectionEstablished();
     robots.noteConnectionEstablished();
   }

   /** This method is periodically called for all connectors that are connected but not
   * in active use.
   */
   @Override
   public void poll()
     throws ManifoldCFException
   {
     fetcher.poll();
     robots.poll();
   }

   /** Check status of connection.
   */
   @Override
   public String check()
     throws ManifoldCFException
   {
     getSession();
     return super.check();
   }

   /** Close the connection.  Call this before discarding the repository connector.
   */
   @Override
   public void disconnect()
     throws ManifoldCFException
   {
     isInitialized = false;

     // Let the system know we are freeing the connection
     robots.noteConnectionReleased();
     fetcher.noteConnectionReleased();

     userAgent = null;
     from = null;
     minimumMillisecondsPerFetchPerServer = -1L;
     maxOpenConnectionsPerServer = 0;
     minimumMillisecondsPerBytePerServer = 0.0;
     throttleGroupName = null;
     proxyHost = null;
     proxyPort = -1;
     proxyAuthDomain = null;
     proxyAuthUsername = null;
     proxyAuthPassword = null;

     super.disconnect();
   }


   /** Get the bin name string for a document identifier.  The bin name describes the queue to which the
   * document will be assigned for throttling purposes.  Throttling controls the rate at which items in a
   * given queue are fetched; it does not say anything about the overall fetch rate, which may operate on
   * multiple queues or bins.
   * For example, if you implement a web crawler, a good choice of bin name would be the server name, since
   * that is likely to correspond to a real resource that will need real throttle protection.
   *@param documentIdentifier is the document identifier.
   *@return the bin name.
   */
   @Override
   public String[] getBinNames(String documentIdentifier)
   {
     try
     {
       WebURL uri = new WebURL(documentIdentifier);
       return new String[]{uri.getHost()};
     }
     catch (URISyntaxException e)
     {
       return new String[]{""};
     }

   }

   /** Queue "seed" documents.  Seed documents are the starting places for crawling activity.  Documents
   * are seeded when this method calls appropriate methods in the passed in ISeedingActivity object.
   *
   * This method can choose to find repository changes that happen only during the specified time interval.
   * The seeds recorded by this method will be viewed by the framework based on what the
   * getConnectorModel() method returns.
   *
   * It is not a big problem if the connector chooses to create more seeds than are
   * strictly necessary; it is merely a question of overall work required.
   *
   * The times passed to this method may be interpreted for greatest efficiency.  The time ranges
   * any given job uses with this connector will not overlap, but will proceed starting at 0 and going
   * to the "current time", each time the job is run.  For continuous crawling jobs, this method will
   * be called once, when the job starts, and at various periodic intervals as the job executes.
   *
   * When a job's specification is changed, the framework automatically resets the seeding start time to 0.  The
   * seeding start time may also be set to 0 on each job run, depending on the connector model returned by
   * getConnectorModel().
   *
   * Note that it is always ok to send MORE documents rather than less to this method.
   *@param activities is the interface this method should use to perform whatever framework actions are desired.
   *@param spec is a document specification (that comes from the job).
   *@param startTime is the beginning of the time range to consider, inclusive.
   *@param endTime is the end of the time range to consider, exclusive.
   */
   @Override
   public void addSeedDocuments(ISeedingActivity activities, DocumentSpecification spec,
     long startTime, long endTime)
     throws ManifoldCFException, ServiceInterruption
   {
     getSession();

     Filter f = new Filter(spec,true);

     // Go through all the seeds.
     Iterator iter = f.getSeeds();
     while (iter.hasNext())
     {
       String canonicalURL = (String)iter.next();
       activities.addSeedDocument(canonicalURL);
     }
   }

   /** Convert an absolute or relative URL to a document identifier.  This may involve several steps at some point,
   * but right now it does NOT involve converting the host name to a canonical host name.
   * (Doing so would destroy the ability of virtually hosted sites to do the right thing,
   * since the original host name would be lost.)  Thus, we do the conversion to IP address
   * right before we actually fetch the document.
   *@param policies are the canonicalization policies in effect.
   *@param parentIdentifier the identifier of the document in which the raw url was found, or null if none.
   *@param rawURL is the raw, un-normalized and un-canonicalized url.
   *@return the canonical URL (the document identifier), or null if the url was illegal.
   */
   protected static String makeDocumentIdentifier(CanonicalizationPolicies policies, String parentIdentifier, String rawURL)
     throws ManifoldCFException
   {
     try
     {
       // First, find the matching canonicalization policy, if any
       CanonicalizationPolicy p = policies.findMatch(rawURL);

       // Filter out control characters
       StringBuilder sb = new StringBuilder();
       int i = 0;
       while (i < rawURL.length())
       {
         char x = rawURL.charAt(i++);
         // Only 7-bit ascii is allowed in URLs - and that has limits too (no control characters)
         if (x >= ' ' && x < 128)
           sb.append(x);
       }
       rawURL = sb.toString();

       WebURL url;
       if (parentIdentifier != null)
       {
         WebURL parentURL = new WebURL(parentIdentifier);
         url = parentURL.resolve(rawURL);
       }
       else
         url = new WebURL(rawURL);

       String protocol = url.getScheme();
       String host = url.getHost();

       // The new URL better darn well have a host and a protocol, and we only know how to deal with
       // http and https.
       if (protocol == null || host == null)
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Can't use url '"+rawURL+"' because it has no protocol or host");
         return null;
       }
       if (understoodProtocols.get(protocol) == null)
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Can't use url '"+rawURL+"' because it has an unsupported protocol '"+protocol+"'");
         return null;
       }

       // Canonicalization procedure.
       // The query part of the URL may contain bad parameters (session id's, for instance), or may be ordered in such a
       // way as to prevent an effectively identical URL from being matched.  The anchor part of the URL should also be stripped.
       // This code performs both of these activities in a simple way; rewrites of various pieces may get more involved if we add
       // the ability to perform mappings using criteria specified in the UI.  Right now we don't.
       String id = doCanonicalization(p,url);
       if (id == null)
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Can't use url '"+rawURL+"' because it could not be canonicalized");
         return null;
       }

       // As a last basic legality check, go through looking for illegal characters.
       i = 0;
       while (i < id.length())
       {
         char x = id.charAt(i++);
         // Only 7-bit ascii is allowed in URLs - and that has limits too (no control characters)
         if (x < ' ' || x > 127)
         {
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: Can't use url '"+rawURL+"' because it has illegal characters in it");
           return null;
         }
       }

       return id;
     }
     catch (java.net.URISyntaxException e)
     {
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: Can't use url '"+rawURL+"' because it is badly formed: "+e.getMessage());
       return null;
     }
     catch (java.lang.IllegalArgumentException e)
     {
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: Can't use url '"+rawURL+"' because there was an argument error: "+e.getMessage(),e);
       return null;
     }
     catch (java.lang.NullPointerException e)
     {
       // This gets tossed by url.toAsciiString() for reasons I don't understand, but which have to do with a malformed URL.
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: Can't use url '"+rawURL+"' because it is missing fields: "+e.getMessage(),e);
       return null;
     }
   }

   /** Code to canonicalize a URL.  If URL cannot be canonicalized (and is illegal) return null.
   */
   protected static String doCanonicalization(CanonicalizationPolicy p, WebURL url)
     throws ManifoldCFException, java.net.URISyntaxException
   {
     // Note well: The java.net.URI class mistreats the query part of the URI, near as I can tell, in the following ways:
     // (1) It decodes the whole thing without regards to the argument interpretation, so the escaped ampersands etc in the arguments are converted
     //     to non-escaped ones (ugh).  This is why I changed the code below to parse the RAW query string and decode it myself.
     // (2) On reassembly of the query string, the class does not properly escape ":", "/", or a bunch of other characters the class description *says*
     //     it will escape.  This means it creates URI's that are illegal according to RFC 2396 - although it is true that RFC 2396 also contains
     //     apparent errors.
     //
     // I've therefore opted to deal with this problem by doing much of the query string processing myself - including its final reassembly into the
     // URI at the end of the processing.
     //

     // To make the url be canonical, we need to strip off everything after the #.  We also need to order the arguments in a canonical
     // way, and remove session identifiers of the types we know about.
     String queryString = url.getRawQuery();
     if (queryString != null)
     {
       // Rewrite the query string.  To do this, we first parse it (by looking for ampersands and equal signs), and then
       // we ditch any keys that we really don't want (session identifiers particularly).  Finally, we go through the
       // keys in sorted order and reassemble the query, making sure that any arguments that have the same name
       // appear in the same order.

       // I don't use the 'split' operation because I think it's a lot more oomph (and performance loss) than is needed
       // for this simple parsing task.

       // When reordering a url, the following is done:
       // (1) The individual order of all arguments with the same name is preserved
       // (2) The arguments themselves appear in sorted order, minus any arguments that should be removed because they
       //      are interpreted to be session arguments.
       //
       // When a url is NOT reordered, the following is done:
       // (1) Each argument is examined IN TURN.
       // (2) If the argument is a session argument and should be excluded, it is simply skipped.

       // Canonicalization note: Broadvision
       //
       // The format of Broadvision's urls is as follows:
       // http://blah/path/path?arg|arg|arg|BVSession@@@@=xxxx&more stuff
       // The session identifier is the BVSession@@@@.  In theory I could strip this away, but I've found that
       // most Broadvision sites require session even for basic navigation!

       if (p == null || p.canReorder())
       {
         // Reorder the arguments.
         HashMap argumentMap = new HashMap();
         int index = 0;
         while (index < queryString.length())
         {
           int newIndex = queryString.indexOf("&",index);
           if (newIndex == -1)
             newIndex = queryString.length();
           String argument = queryString.substring(index,newIndex);
           int valueIndex = argument.indexOf("=");
           String key;
           if (valueIndex == -1)
             key = argument;
           else
             key = argument.substring(0,valueIndex);

           // If this is a disallowed argument, simply don't include it in the final map.
           boolean includeArgument = true;
           if ((p == null || p.canRemovePhpSession()) && key.equals("PHPSESSID"))
             includeArgument = false;
           if ((p == null || p.canRemoveBvSession()) && key.indexOf("BVSession@@@@") != -1)
             includeArgument = false;

           if (includeArgument)
           {
             ArrayList list = (ArrayList)argumentMap.get(key);
             if (list == null)
             {
               list = new ArrayList();
               argumentMap.put(key,list);
             }
             list.add(argument);
           }

           if (newIndex < queryString.length())
             index = newIndex + 1;
           else
             index = newIndex;
         }

         // Reassemble query string in sorted order
         String[] sortArray = new String[argumentMap.size()];
         int i = 0;
         Iterator iter = argumentMap.keySet().iterator();
         while (iter.hasNext())
         {
           sortArray[i++] = (String)iter.next();
         }
         java.util.Arrays.sort(sortArray);

         StringBuilder newString = new StringBuilder();
         boolean isFirst = true;
         i = 0;
         while (i < sortArray.length)
         {
           String key = sortArray[i++];
           ArrayList list = (ArrayList)argumentMap.get(key);
           int j = 0;
           while (j < list.size())
           {
             if (isFirst == false)
             {
               newString.append("&");
             }
             else
               isFirst = false;
             newString.append((String)list.get(j++));
           }
         }
         queryString = newString.toString();
       }
       else
       {
         // Do not reorder!
         StringBuilder newString = new StringBuilder();
         int index = 0;
         boolean isFirst = true;
         while (index < queryString.length())
         {
           int newIndex = queryString.indexOf("&",index);
           if (newIndex == -1)
             newIndex = queryString.length();
           String argument = queryString.substring(index,newIndex);
           int valueIndex = argument.indexOf("=");
           String key;
           if (valueIndex == -1)
             key = argument;
           else
             key = argument.substring(0,valueIndex);

           // If this is a disallowed argument, simply don't include it in the final query.
           boolean includeArgument = true;
           if ((p == null || p.canRemovePhpSession()) && key.equals("PHPSESSID"))
             includeArgument = false;
           if ((p == null || p.canRemoveBvSession()) && key.indexOf("BVSession@@@@") != -1)
             includeArgument = false;

           if (includeArgument)
           {
             if (!isFirst)
               newString.append("&");
             else
               isFirst = false;
             newString.append(argument);
           }

           if (newIndex < queryString.length())
             index = newIndex + 1;
           else
             index = newIndex;
         }
         queryString = newString.toString();
       }
     }

     // Now, rewrite path to get rid of jsessionid etc.
     String pathString = url.getPath();
     if (pathString != null)
     {
       int index = pathString.indexOf(";jsessionid=");
       if ((p == null || p.canRemoveJavaSession()) && index != -1)
       {
         // There's a ";jsessionid="
         // Strip the java session id
         pathString = pathString.substring(0,index);
       }
       if ((p == null || p.canRemoveAspSession()) && pathString.startsWith("/s("))
       {
         // It's asp.net
         index = pathString.indexOf(")");
         if (index != -1)
           pathString = pathString.substring(index+1);
       }

     }

     // Put it back into the URL without the ref, and with the modified query and path parts.
     url = new WebURL(url.getScheme(),url.getHost(),url.getPort(),pathString,queryString);
     String rval = url.toASCIIString();
     return rval;
   }


   /** Get document versions given an array of document identifiers.
   * This method is called for EVERY document that is considered. It is
   * therefore important to perform as little work as possible here.
   *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
   *@param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
   *   A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
   *   had an empty version string.
   *@param activities is the interface this method should use to perform whatever framework actions are desired.
   *@param spec is the current document specification for the current job.  If there is a dependency on this
   * specification, then the version string should include the pertinent data, so that reingestion will occur
   * when the specification changes.  This is primarily useful for metadata.
   *@param jobType is an integer describing how the job is being run, whether continuous or once-only.
   *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
   *@return the corresponding version strings, with null in the places where the document no longer exists.
   * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
   * will always be processed.
   */
   @Override
   public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
     DocumentSpecification spec, int jobType, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption
   {
     getSession();

     // The connection limit is designed to permit this connector to coexist with potentially other connectors, such as the web connector.
     // There is currently no good way to enforce connection limits across all installed connectors - this will require considerably more
     // thought to set up properly.
     int connectionLimit = 200;

     if (Logging.connectors.isDebugEnabled())
       Logging.connectors.debug("RSS: In getDocumentVersions for "+Integer.toString(documentIdentifiers.length)+" documents");

     Filter f = new Filter(spec,false);

     String[] acls = f.getAcls();
     // Sort it,
     java.util.Arrays.sort(acls);

     // Build a map of the metadata names and values from the spec
     ArrayList namesAndValues = f.getMetadata();
     // Create an array of name/value fixedlists
     String[] metadata = new String[namesAndValues.size()];
     int k = 0;
     String[] fixedListStrings = new String[2];
     while (k < metadata.length)
     {
       NameValue nv = (NameValue)namesAndValues.get(k);
       String name = nv.getName();
       String value = nv.getValue();
       fixedListStrings[0] = name;
       fixedListStrings[1] = value;
       StringBuilder newsb = new StringBuilder();
       packFixedList(newsb,fixedListStrings,'=');
       metadata[k++] = newsb.toString();
     }
     java.util.Arrays.sort(metadata);

     Logging.connectors.debug("RSS: Done setting up metadata version strings");

     // NOTE: There are two kinds of documents in here; documents that are RSS feeds (that presumably have a content-type
     // of text/xml), and documents that need to be indexed.
     //
     // For the latter, the metadata etc is part of the version string.  For the former, the only thing that is part of the version string is the
     // document's checksum.
     //
     // The need to exclude documents from fetch based on whether they match an expression causes some difficulties, because we really
     // DON'T want this to apply to the feeds themselves.  Since the distinguishing characteristic of a feed is that it is in the seed list,
     // and that its content-type is text/xml, we could use either of these characteristics to treat feeds differently from
     // fetchable urls.  But the latter approach requires a fetch, which is forbidden.  So - the spec will be used to characterize the url.
     // However, the spec might change, and the url might be dropped from the list - and then what??
     //
     // The final solution is to simply not queue what cannot be mapped.

     int feedTimeout = f.getFeedTimeoutValue();

     // A preliminary fetch must be done in either case, because otherwise we cannot detect a deletion.
     // But, since we don't want to fetch twice, write these urls to temporary files.  The
     // files will be cleaned up as soon as possible.

     String[] rval = new String[documentIdentifiers.length];

     // HttpClient client = new HttpClient(httpConMan.getManager());

     // The document specification has already been used to trim out documents that are not
     // allowed from appearing in the queue.  So, even that has already been done.
     int i = 0;
     while (i < documentIdentifiers.length)
     {
       // If it is in this list, we presume that it has been vetted against the map etc., so we don't do that again.  We just fetch it.
       // And, if the content type is xml, we calculate the version as if it is a feed rather than a document.

       // Get the url
       String urlValue = documentIdentifiers[i];

       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: Getting version string for '"+urlValue+"'");

       // If there's a carrydown "data" value for this url, we use that value rather than actually fetching the document.  This also means we don't need to
       // do a robots check, because we aren't actually crawling anything.  So, ALWAYS do this first...
       CharacterInput[] dechromedData = activities.retrieveParentDataAsFiles(urlValue,"data");
       try
       {
         if (dechromedData.length > 0)
         {
           // Data already available.  The fetch cycle can be entirely avoided, as can the robots check.
           String ingestURL = f.mapDocumentURL(urlValue);
           if (ingestURL != null)
           {
             // Open up an input stream corresponding to the carrydown data.  The stream will be encoded as utf-8.
             try
             {
               InputStream is = dechromedData[0].getUtf8Stream();
               try
               {
                 StringBuilder sb = new StringBuilder();
                 long checkSum = cache.addData(activities,urlValue,"text/html",is);
                 // Grab what we need from the passed-down data for the document.  These will all become part
                 // of the version string.
                 String[] pubDates = activities.retrieveParentData(urlValue,"pubdate");
                 String[] sources = activities.retrieveParentData(urlValue,"source");
                 String[] titles = activities.retrieveParentData(urlValue,"title");
                 String[] authorNames = activities.retrieveParentData(urlValue,"authorname");
                 String[] authorEmails = activities.retrieveParentData(urlValue,"authoremail");
                 String[] categories = activities.retrieveParentData(urlValue,"category");
                 String[] descriptions = activities.retrieveParentData(urlValue,"description");
                 java.util.Arrays.sort(pubDates);
                 java.util.Arrays.sort(sources);
                 java.util.Arrays.sort(titles);
                 java.util.Arrays.sort(authorNames);
                 java.util.Arrays.sort(authorEmails);
                 java.util.Arrays.sort(categories);
                 java.util.Arrays.sort(descriptions);

                 if (sources.length == 0)
                 {
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("RSS: Warning; URL '"+ingestURL+"' doesn't seem to have any RSS feed source!");
                 }

                 sb.append('+');
                 packList(sb,acls,'+');
                 if (acls.length > 0)
                 {
                   sb.append('+');
                   pack(sb,defaultAuthorityDenyToken,'+');
                 }
                 else
                   sb.append('-');
                 // Now, do the metadata
                 packList(sb,metadata,'+');
                 // The ingestion URL
                 pack(sb,ingestURL,'+');
                 // The pub dates
                 packList(sb,pubDates,'+');
                 // The titles
                 packList(sb,titles,'+');
                 // The sources
                 packList(sb,sources,'+');
                 // The categories
                 packList(sb,categories,'+');
                 // The descriptions
                 packList(sb,descriptions,'+');
                 // The author names
                 packList(sb,authorNames,'+');
                 // The author emails
                 packList(sb,authorEmails,'+');

                 // Do the checksum part, which does not need to be parseable.
                 sb.append(new Long(checkSum).toString());

                 rval[i] = sb.toString();
               }
               finally
               {
                 is.close();
               }
             }
             catch (java.net.SocketTimeoutException e)
             {
               throw new ManifoldCFException("IO exception reading data from string: "+e.getMessage(),e);
             }
             catch (InterruptedIOException e)
             {
               throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
             }
             catch (IOException e)
             {
               throw new ManifoldCFException("IO exception reading data from string: "+e.getMessage(),e);
             }
           }
           else
           {
             // Document a seed or unmappable; just skip
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("RSS: Skipping carry-down document '"+urlValue+"' because it is unmappable or is a seed.");
           }
         }
         else
         {
           // Get the old version string
           String oldVersionString = oldVersions[i];

           // Unpack the old version as much as possible.
           // We are interested in what the ETag and Last-Modified headers were last time.
           String lastETagValue = null;
           String lastModifiedValue = null;
           // Note well: Non-continuous jobs cannot use etag because the rss document MUST be fetched each time for such jobs,
           // or the documents it points at would get deleted.
           //
           // NOTE: I disabled this code because we really need the feed's TTL value in order to reschedule properly.  I can't get the
           // TTL value without refetching the document - therefore ETag and Last-Modified cannot be used :-(
           if (false && jobType == JOBMODE_CONTINUOUS && oldVersionString != null && oldVersionString.startsWith("-"))
           {
             // It's a feed, so the last etag and last-modified fields should be encoded in this version string.
             StringBuilder lastETagBuffer = new StringBuilder();
             int unpackPos = unpack(lastETagBuffer,oldVersionString,1,'+');
             StringBuilder lastModifiedBuffer = new StringBuilder();
             unpackPos = unpack(lastModifiedBuffer,oldVersionString,unpackPos,'+');
             if (lastETagBuffer.length() > 0)
               lastETagValue = lastETagBuffer.toString();
             if (lastModifiedBuffer.length() > 0)
               lastModifiedValue = lastModifiedBuffer.toString();
           }

           if (Logging.connectors.isDebugEnabled() && (lastETagValue != null || lastModifiedValue != null))
             Logging.connectors.debug("RSS: Document '"+urlValue+"' was found to have a previous ETag value of '"+((lastETagValue==null)?"null":lastETagValue)+
             "' and a previous Last-Modified value of '"+((lastModifiedValue==null)?"null":lastModifiedValue)+"'");

           // Robots check.  First, we need to separate the url into its components
           try
           {
             URL url = new URL(urlValue);
             String protocol = url.getProtocol();
             int port = url.getPort();
             String hostName = url.getHost();
             String pathPart = url.getFile();

             // Check with robots to see if it's allowed
             if (robotsUsage >= ROBOTS_DATA && !robots.isFetchAllowed(currentContext,throttleGroupName,
               protocol,port,hostName,url.getPath(),
               userAgent,from,
               proxyHost, proxyPort, proxyAuthDomain, proxyAuthUsername, proxyAuthPassword,
               activities, connectionLimit))
             {
               activities.recordActivity(null,ACTIVITY_FETCH,
                 null,urlValue,Integer.toString(-2),"Robots exclusion",null);

               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("RSS: Skipping url '"+urlValue+"' because robots.txt says to");
               rval[i] = null;
             }
             else
             {

               // Now, use the fetcher, and get the file.
               IThrottledConnection connection = fetcher.createConnection(currentContext,
                 throttleGroupName,
                 hostName,
                 connectionLimit,
                 feedTimeout,
                 proxyHost,
                 proxyPort,
                 proxyAuthDomain,
                 proxyAuthUsername,
                 proxyAuthPassword);
               try
               {
                 // Begin the fetch
                 connection.beginFetch("Data");
                 try
                 {
                   // Execute the request.
                   // Use the connect timeout from the document specification!
                   int status = connection.executeFetch(protocol,port,pathPart,userAgent,from,
                     lastETagValue,lastModifiedValue);
                   switch (status)
                   {
                   case IThrottledConnection.STATUS_NOCHANGE:
                     rval[i] = oldVersionString;
                     break;
                   case IThrottledConnection.STATUS_OK:
                     try
                     {
                       if (Logging.connectors.isDebugEnabled())
                         Logging.connectors.debug("RSS: Successfully fetched "+urlValue);
                       // Document successfully fetched!
                       // If its content is xml, presume it's a feed...
                       String contentType = connection.getResponseHeader("Content-Type");
                       // Some sites have multiple content types.  We just look at the LAST one in that case.
                       if (contentType != null)
                       {
                         String[] contentTypes = contentType.split(",");
                         if (contentTypes.length > 0)
                           contentType = contentTypes[contentTypes.length-1].trim();
                         else
                           contentType = null;
                       }
                       boolean isXML = (contentType != null &&
                         (contentType.startsWith("text/xml") ||
                         contentType.startsWith("application/rss+xml") ||
                         contentType.startsWith("application/xml") ||
                         contentType.startsWith("application/atom+xml") ||
                         contentType.startsWith("application/xhtml+xml") ||
                         contentType.startsWith("text/XML") ||
                         contentType.startsWith("application/rdf+xml") ||
                         contentType.startsWith("text/application") ||
                         contentType.startsWith("XML") ));
                       String ingestURL = null;
                       if (!isXML)
                       {
                         // If the chromed content mode is set to "skip", and we got here, it means
                         // we should not include the content.
                         if (f.getChromedContentMode() == CHROMED_SKIP)
                         {
                           if (Logging.connectors.isDebugEnabled())
                             Logging.connectors.debug("RSS: Removing url '"+urlValue+"' because it no longer has dechromed content available");
                           rval[i] = null;
                           break;
                         }

                         // Decide whether to exclude this document based on what we see here.
                         // Basically, we want to get rid of everything that we don't know what
                         // to do with in the ingestion system.
                         if (!isContentInteresting(activities,contentType))
                         {
                           if (Logging.connectors.isDebugEnabled())
                             Logging.connectors.debug("RSS: Removing url '"+urlValue+"' because it had the wrong content type: "+((contentType==null)?"null":"'"+contentType+"'"));
                           rval[i] = null;
                           break;
                         }

                         ingestURL = f.mapDocumentURL(urlValue);
                       }
                       else
                       {
                         if (Logging.connectors.isDebugEnabled())
                           Logging.connectors.debug("RSS: The url '"+urlValue+"' is a feed");

                         if (!f.isSeed(urlValue))
                         {
                           // Remove the feed from consideration, since it has left the list of seeds
                           if (Logging.connectors.isDebugEnabled())
                             Logging.connectors.debug("RSS: Removing feed url '"+urlValue+"' because it is not a seed.");
                           rval[i] = null;
                           break;
                         }
                       }

                       InputStream is = connection.getResponseBodyStream();
                       try
                       {
                         long checkSum = cache.addData(activities,urlValue,contentType,is);
                         StringBuilder sb = new StringBuilder();
                         if (ingestURL != null)
                         {
                           // We think it is ingestable.  The version string accordingly starts with a "+".

                           // Grab what we need from the passed-down data for the document.  These will all become part
                           // of the version string.
                           String[] pubDates = activities.retrieveParentData(urlValue,"pubdate");
                           String[] sources = activities.retrieveParentData(urlValue,"source");
                           String[] titles = activities.retrieveParentData(urlValue,"title");
                           String[] authorNames = activities.retrieveParentData(urlValue,"authorname");
                           String[] authorEmails = activities.retrieveParentData(urlValue,"authoremail");
                           String[] categories = activities.retrieveParentData(urlValue,"category");
                           String[] descriptions = activities.retrieveParentData(urlValue,"description");
                           java.util.Arrays.sort(pubDates);
                           java.util.Arrays.sort(sources);
                           java.util.Arrays.sort(titles);
                           java.util.Arrays.sort(authorNames);
                           java.util.Arrays.sort(authorEmails);
                           java.util.Arrays.sort(categories);
                           java.util.Arrays.sort(descriptions);

                           if (sources.length == 0)
                           {
                             if (Logging.connectors.isDebugEnabled())
                               Logging.connectors.debug("RSS: Warning; URL '"+ingestURL+"' doesn't seem to have any RSS feed source!");
                           }

                           sb.append('+');
                           packList(sb,acls,'+');
                           if (acls.length > 0)
                           {
                             sb.append('+');
                             pack(sb,defaultAuthorityDenyToken,'+');
                           }
                           else
                             sb.append('-');
                           // Now, do the metadata
                           packList(sb,metadata,'+');
                           // The ingestion URL
                           pack(sb,ingestURL,'+');
                           // The pub dates
                           packList(sb,pubDates,'+');
                           // The titles
                           packList(sb,titles,'+');
                           // The sources
                           packList(sb,sources,'+');
                           // The categories
                           packList(sb,categories,'+');
                           // The descriptions
                           packList(sb,descriptions,'+');
                           // The author names
                           packList(sb,authorNames,'+');
                           // The author emails
                           packList(sb,authorEmails,'+');
                         }
                         else
                         {
                           sb.append('-');
                           String etag = connection.getResponseHeader("ETag");
                           if (etag == null)
                             pack(sb,"",'+');
                           else
                             pack(sb,etag,'+');
                           String lastModified = connection.getResponseHeader("Last-Modified");
                           if (lastModified == null)
                             pack(sb,"",'+');
                           else
                             pack(sb,lastModified,'+');

                         }

                         // Do the checksum part, which does not need to be parseable.
                         sb.append(new Long(checkSum).toString());

                         rval[i] = sb.toString();
                       }
                       finally
                       {
                         is.close();
                       }
                     }
                     catch (java.net.SocketTimeoutException e)
                     {
                       Logging.connectors.warn("RSS: Socket timeout exception fetching document contents '"+urlValue+"' - skipping: "+e.getMessage(), e);
                       rval[i] = null;
                     }
                     catch (ConnectTimeoutException e)
                     {
                       Logging.connectors.warn("RSS: Connecto timeout exception fetching document contents '"+urlValue+"' - skipping: "+e.getMessage(), e);
                       rval[i] = null;
                     }
                     catch (InterruptedIOException e)
                     {
                       throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
                     }
                     catch (IOException e)
                     {
                       Logging.connectors.warn("RSS: IO exception fetching document contents '"+urlValue+"' - skipping: "+e.getMessage(), e);
                       rval[i] = null;
                     }

                     break;

                   case IThrottledConnection.STATUS_SITEERROR:
                   case IThrottledConnection.STATUS_PAGEERROR:
                   default:
                     // Record an *empty* version.
                     // This signals the processDocuments() method that we really don't want to ingest this document, but we also don't
                     // want to blow the document out of the queue, since then we'd wind up perhaps fetching it multiple times.
                     rval[i] = "";
                     break;
                   }
                 }
                 finally
                 {
                   connection.doneFetch(activities);
                 }
               }
               finally
               {
                 connection.close();
               }
             }
           }
           catch (MalformedURLException e)
           {
             Logging.connectors.debug("RSS: URL '"+urlValue+"' is malformed; skipping",e);
             rval[i] = null;
           }
         }
       }
       finally
       {
         int j = 0;
         while (j < dechromedData.length)
         {
           CharacterInput ci = (CharacterInput)dechromedData[j++];
           if (ci != null)
             ci.discard();
         }

       }

       i++;
     }

     return rval;
   }

   /** Process a set of documents.
   * This is the method that should cause each document to be fetched, processed, and the results either added
   * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
   * The document specification allows this class to filter what is done based on the job.
   *@param documentIdentifiers is the set of document identifiers to process.
   *@param activities is the interface this method should use to queue up new document references
   * and ingest documents.
   *@param spec is the document specification.
   *@param scanOnly is an array corresponding to the document identifiers.  It is set to true to indicate when the processing
   * should only find other references, and should not actually call the ingestion methods.
   */
   @Override
   public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities,
     DocumentSpecification spec, boolean[] scanOnly, int jobType)
     throws ManifoldCFException, ServiceInterruption
   {
     getSession();

     // The version strings contain the ingest url as well as everything we need to determine if this url is a feed or just a document.
     // So, there is no need to reparse the specification.
     Filter f = null;

     String[] fixedList = new String[2];

     int i = 0;
     while (i < documentIdentifiers.length)
     {
       String urlValue = documentIdentifiers[i];
       String version = versions[i];

       if (version.length() == 0)
       {
         // This document had an http response incompatible with its existence on the web.
         i++;
         // Leave document in jobqueue, but do NOT get rid of it, or we will wind up seeing it queued again by
         // somebody else.  We *do* have to signal the document to be removed from the index, however, or it will
         // stick around until the job is deleted.
         activities.noDocument(urlValue,version);
         continue;
       }

       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: Processing '"+urlValue+"'");

       // The only links we extract come from documents that we think are RSS feeds.
       // When we think that's the case, we attempt to parse it as RSS XML.
       if (version.startsWith("-"))
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Interpreting document '"+urlValue+"' as a feed");

         // We think it is a feed.
         // The version string does not have anything useful in it.
         if (f == null)
           f = new Filter(spec,false);

         // If this is a continuous job, AND scanonly is true, it means that the document was either identical to the
         // previous fetch, or was not fetched at all.  In that case, it may not even be there, and we *certainly* don't
         // want to attempt to process it in any case.
         //

         // NOTE: I re-enabled the scan permanently because we need the TTL value to be set whatever the cost.  If the
         // TTL value is not set, we default to the specified job's feed-rescan time, which is not going to be current enough for some feeds.
         if (true || scanOnly[i] == false || jobType != JOBMODE_CONTINUOUS)
         {
           handleRSSFeedSAX(urlValue,activities,f);
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: Extraction of feed '"+urlValue+"' complete");

           // Record the feed's version string, so we won't refetch unless needed.
           // This functionality is required for the last ETag and Last-Modified fields to be sent to the rss server, and to
           // keep track of the adaptive parameters.
           activities.recordDocument(urlValue,version);
         }
         else
         {
           // The problem here is that we really do need to set the rescan time to something reasonable.
           // But we might not even have read the feed!  So what to do??
           // One answer is to build a connector-specific table that carries the last value of every feed around.
           // Another answer is to change the version code to always read the feed (and the heck with ETag and Last-Modified).
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: Feed '"+urlValue+"' does not appear to differ from previous fetch for a continuous job; not extracting!");

           long currentTime = System.currentTimeMillis();

           Long defaultRescanTime = f.getDefaultRescanTime(currentTime);

           if (defaultRescanTime != null)
           {
             Long minimumTime = f.getMinimumRescanTime(currentTime);
             if (minimumTime != null)
             {
               if (defaultRescanTime.longValue() < minimumTime.longValue())
                 defaultRescanTime = minimumTime;
             }
           }

           activities.setDocumentScheduleBounds(urlValue,defaultRescanTime,defaultRescanTime,null,null);

         }
       }
       else if (scanOnly[i] == false && version.startsWith("+"))
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Interpreting '"+urlValue+"' as a document");


         if (isDataIngestable(activities,urlValue))
         {
           // Treat it as an ingestable document.
           // Version *should* start with a "+".
           ArrayList acls = new ArrayList();
           StringBuilder denyAclBuffer = new StringBuilder();
           int startPos = unpackList(acls,version,1,'+');
           if (startPos < version.length() && version.charAt(startPos++) == '+')
           {
             startPos = unpack(denyAclBuffer,version,startPos,'+');
           }
           ArrayList metadata = new ArrayList();
           startPos = unpackList(metadata,version,startPos,'+');
           StringBuilder ingestUrlBuffer = new StringBuilder();
           startPos = unpack(ingestUrlBuffer,version,startPos,'+');
           String ingestURL = ingestUrlBuffer.toString();
           ArrayList pubDates = new ArrayList();
           startPos = unpackList(pubDates,version,startPos,'+');
           ArrayList titles = new ArrayList();
           startPos = unpackList(titles,version,startPos,'+');
           ArrayList sources = new ArrayList();
           startPos = unpackList(sources,version,startPos,'+');
           ArrayList categories = new ArrayList();
           startPos = unpackList(categories,version,startPos,'+');
           ArrayList descriptions = new ArrayList();
           startPos = unpackList(descriptions,version,startPos,'+');
           ArrayList authorNames = new ArrayList();
           startPos = unpackList(authorNames,version,startPos,'+');
           ArrayList authorEmails = new ArrayList();
           startPos = unpackList(authorEmails,version,startPos,'+');

           if (ingestURL.length() > 0)
           {
             long dataSize = cache.getDataLength(urlValue);
             RepositoryDocument rd = new RepositoryDocument();

             // Set content type
             rd.setMimeType(cache.getContentType(urlValue));

             // Turn into acls and add into description
             String[] aclArray = new String[acls.size()];
             int j = 0;
             while (j < aclArray.length)
             {
               aclArray[j] = (String)acls.get(j);
               j++;
             }
             rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,aclArray);

             // Deny acl too
             if (denyAclBuffer.length() > 0)
             {
               String[] denyAclArray = new String[]{denyAclBuffer.toString()};
               rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAclArray);
             }

             // Grab metadata
             HashMap metaHash = new HashMap();
             int k = 0;
             while (k < metadata.size())
             {
               String metadataItem = (String)metadata.get(k++);
               unpackFixedList(fixedList,metadataItem,0,'=');
               HashMap hashValue = (HashMap)metaHash.get(fixedList[0]);
               if (hashValue == null)
               {
                 hashValue = new HashMap();
                 metaHash.put(fixedList[0],hashValue);
               }
               hashValue.put(fixedList[1],fixedList[1]);
             }
             Iterator metaIter = metaHash.keySet().iterator();
             while (metaIter.hasNext())
             {
               String key = (String)metaIter.next();
               HashMap metaList = (HashMap)metaHash.get(key);
               String[] values = new String[metaList.size()];
               Iterator iter = metaList.keySet().iterator();
               k = 0;
               while (iter.hasNext())
               {
                 values[k] = (String)iter.next();
                 k++;
               }
               rd.addField(key,values);
             }

             // Loop through the titles to add those to the metadata
             String[] titleValues = new String[titles.size()];
             k = 0;
             while (k < titleValues.length)
             {
               titleValues[k] = (String)titles.get(k);
               k++;
             }
             if (k > 0)
               rd.addField("title",titleValues);

             // Loop through the author names to add those to the metadata
             String[] authorNameValues = new String[authorNames.size()];
             k = 0;
             while (k < authorNameValues.length)
             {
               authorNameValues[k] = (String)authorNames.get(k);
               k++;
             }
             if (k > 0)
               rd.addField("authorname",authorNameValues);

             // Loop through the author emails to add those to the metadata
             String[] authorEmailValues = new String[authorEmails.size()];
             k = 0;
             while (k < authorEmailValues.length)
             {
               authorEmailValues[k] = (String)authorEmails.get(k);
               k++;
             }
             if (k > 0)
               rd.addField("authoremail",authorEmailValues);

             // Loop through the descriptions to add those to the metadata
             String[] descriptionValues = new String[descriptions.size()];
             k = 0;
             while (k < descriptionValues.length)
             {
               descriptionValues[k] = (String)descriptions.get(k);
               k++;
             }
             if (k > 0)
               rd.addField("summary",descriptionValues);

             // Loop through the sources to add those to the metadata
             String[] sourceValues = new String[sources.size()];
             k = 0;
             while (k < sourceValues.length)
             {
               sourceValues[k] = (String)sources.get(k);
               k++;
             }
             if (k > 0)
               rd.addField("source",sourceValues);

             // Add the categories now
             String[] categoryValues = new String[categories.size()];
             k = 0;
             while (k < categoryValues.length)
             {
               categoryValues[k] = (String)categories.get(k);
               k++;
             }
             if (k > 0)
               rd.addField("category",categoryValues);

             // The pubdates are a ms since epoch value; we want the minimum one for the origination time.
             Long minimumOrigTime = null;
             String[] pubDateValues = new String[pubDates.size()];
             String[] pubDateValuesISO = new String[pubDates.size()];
             TimeZone tz = TimeZone.getTimeZone("UTC");
             DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm'Z'");
             df.setTimeZone(tz);
             k = 0;
             while (k < pubDates.size())
             {
               String pubDate = (String)pubDates.get(k);
               pubDateValues[k] = pubDate;
               try
               {
                 Long pubDateLong = new Long(pubDate);
                 if (minimumOrigTime == null || pubDateLong.longValue() < minimumOrigTime.longValue())
                   minimumOrigTime = pubDateLong;
                 pubDateValuesISO[k] = df.format(new Date(pubDateLong.longValue()));
               }
               catch (NumberFormatException e)
               {
                 // Do nothing; the version string seems to not mean anything
               }
               k++;
             }
             if (k > 0)
             {
               rd.addField("pubdate",pubDateValues);
               rd.addField("pubdateiso",pubDateValuesISO);
             }

             if (minimumOrigTime != null)
               activities.setDocumentOriginationTime(urlValue,minimumOrigTime);

             InputStream is = cache.getData(urlValue);
             if (is != null)
             {
               try
               {
                 rd.setBinary(is,dataSize);
                 try
                 {
                   activities.ingestDocumentWithException(urlValue,version,ingestURL,rd);
                 }
                 catch (IOException e)
                 {
                   handleIOException(e,"reading data");
                 }
               }
               finally
               {
                 try
                 {
                   is.close();
                 }
                 catch (IOException e)
                 {
                   handleIOException(e,"closing stream");
                 }
               }
             }
           }
         }
         else
         {
           activities.noDocument(urlValue,version);

           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: Skipping document '"+urlValue+"' because it cannot be indexed");
         }
       }
       else
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Skipping document '"+urlValue+"' because it cannot have interesting links");
       }
       i++;
     }

   }

   protected static void handleIOException(IOException e, String context)
     throws ManifoldCFException, ServiceInterruption
   {
     if (e instanceof java.net.SocketTimeoutException)
       throw new ManifoldCFException("IO error "+context+": "+e.getMessage(),e);
     else if (e instanceof InterruptedIOException)
       throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
     else
       throw new ManifoldCFException("IO error "+context+": "+e.getMessage(),e);
   }

   /** Free a set of documents.  This method is called for all documents whose versions have been fetched using
   * the getDocumentVersions() method, including those that returned null versions.  It may be used to free resources
   * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER any calls to
   * processDocuments() for the documents in question.
   *@param documentIdentifiers is the set of document identifiers.
   *@param versions is the corresponding set of version identifiers (individual identifiers may be null).
   */
   @Override
   public void releaseDocumentVersions(String[] documentIdentifiers, String[] versions)
     throws ManifoldCFException
   {
     int i = 0;
     while (i < documentIdentifiers.length)
     {
       String version = versions[i];
       if (version != null)

       {
         String urlValue = documentIdentifiers[i];
         cache.deleteData(urlValue);
       }
       i++;
     }
   }

   // UI support methods.
   //
   // These support methods come in two varieties.  The first bunch is involved in setting up connection configuration information.  The second bunch
   // is involved in presenting and editing document specification information for a job.  The two kinds of methods are accordingly treated differently,
   // in that the first bunch cannot assume that the current connector object is connected, while the second bunch can.  That is why the first bunch
   // receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect()
   // method, above).

   /** Output the configuration header section.
   * This method is called in the head section of the connector's configuration page.  Its purpose is to add the required tabs to the list, and to output any
   * javascript methods that might be needed by the configuration editing HTML.
   *@param threadContext is the local thread context.
   *@param out is the output to which any HTML should be sent.
   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
   */
   @Override
   public void outputConfigurationHeader(IThreadContext threadContext, IHTTPOutput out,
     Locale locale, ConfigParams parameters, List<String> tabsArray)
     throws ManifoldCFException, IOException
   {
     tabsArray.add(Messages.getString(locale,"RSSConnector.Email"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.Robots"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.Bandwidth"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.Proxy"));
     out.print(
 "<script type=\"text/javascript\">\n"+
 "<!--\n"+
 "function checkConfig()\n"+
 "{\n"+
 "  if (editconnection.email.value != \"\" && editconnection.email.value.indexOf(\"@\") == -1)\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.NeedAValidEmailAddress")+"\");\n"+
 "    editconnection.email.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  if (editconnection.bandwidth.value != \"\" && !isInteger(editconnection.bandwidth.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.EnterAValidNumberOrBlankForNoLimit")+"\");\n"+
 "    editconnection.bandwidth.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  if (editconnection.connections.value == \"\" || !isInteger(editconnection.connections.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.EnterAValidNumberForTheMaxNumberOfOpenConnectionsPerServer")+"\");\n"+
 "    editconnection.connections.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  if (editconnection.fetches.value != \"\" && !isInteger(editconnection.fetches.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.EnterAValidNumberOrBlankForNoLimit")+"\");\n"+
 "    editconnection.fetches.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  return true;\n"+
 "}\n"+
 "\n"+
 "function checkConfigForSave()\n"+
 "{\n"+
 "  if (editconnection.email.value == \"\")\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.EmailAddressRequiredToBeIncludedInAllRequestHeaders")+"\");\n"+
 "    SelectTab(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.Email")+"\");\n"+
 "    editconnection.email.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  return true;\n"+
 "}\n"+
 "\n"+
 "//-->\n"+
 "</script>\n"
     );
   }

   /** Output the configuration body section.
   * This method is called in the body section of the connector's configuration page.  Its purpose is to present the required form elements for editing.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
   * form is "editconnection".
   *@param threadContext is the local thread context.
   *@param out is the output to which any HTML should be sent.
   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
   *@param tabName is the current tab name.
   */
   @Override
   public void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out,
     Locale locale, ConfigParams parameters, String tabName)
     throws ManifoldCFException, IOException
   {
     String email = parameters.getParameter(RSSConfig.PARAMETER_EMAIL);
     if (email == null)
       email = "";
     String robotsUsage = parameters.getParameter(RSSConfig.PARAMETER_ROBOTSUSAGE);
     if (robotsUsage == null)
       robotsUsage = RSSConfig.VALUE_ALL;
     String bandwidth = parameters.getParameter(RSSConfig.PARAMETER_BANDWIDTH);
     if (bandwidth == null)
       bandwidth = "64";
     String connections = parameters.getParameter(RSSConfig.PARAMETER_MAXOPEN);
     if (connections == null)
       connections = "2";
     String fetches = parameters.getParameter(RSSConfig.PARAMETER_MAXFETCHES);
     if (fetches == null)
       fetches = "12";
     String throttleGroup = parameters.getParameter(RSSConfig.PARAMETER_THROTTLEGROUP);
     if (throttleGroup == null)
       throttleGroup = "";
     String proxyHost = parameters.getParameter(RSSConfig.PARAMETER_PROXYHOST);
     if (proxyHost == null)
       proxyHost = "";
     String proxyPort = parameters.getParameter(RSSConfig.PARAMETER_PROXYPORT);
     if (proxyPort == null)
       proxyPort = "";
     String proxyAuthDomain = parameters.getParameter(RSSConfig.PARAMETER_PROXYAUTHDOMAIN);
     if (proxyAuthDomain == null)
       proxyAuthDomain = "";
     String proxyAuthUsername = parameters.getParameter(RSSConfig.PARAMETER_PROXYAUTHUSERNAME);
     if (proxyAuthUsername == null)
       proxyAuthUsername = "";
     String proxyAuthPassword = parameters.getObfuscatedParameter(RSSConfig.PARAMETER_PROXYAUTHPASSWORD);
     if (proxyAuthPassword == null)
       proxyAuthPassword = "";
     else
       proxyAuthPassword = out.mapPasswordToKey(proxyAuthPassword);

     // Email tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.Email")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.EmailAddressToContactColon") + "</nobr></td><td class=\"value\"><input type=\"text\" size=\"32\" name=\"email\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(email)+"\"/></td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"email\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(email)+"\"/>\n"
       );
     }

     // Robots tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.Robots")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.RobotsTxtUsageColon") + "</nobr></td>\n"+
 "    <td class=\"value\">\n"+
 "      <select name=\"robotsusage\" size=\"3\">\n"+
 "        <option value=\"none\" "+(robotsUsage.equals(RSSConfig.VALUE_NONE)?"selected=\"selected\"":"")+">" + Messages.getBodyString(locale,"RSSConnector.DontLookAtRobotsTxt") + "</option>\n"+
 "        <option value=\"data\" "+(robotsUsage.equals(RSSConfig.VALUE_DATA)?"selected=\"selected\"":"")+">" + Messages.getBodyString(locale,"RSSConnector.ObeyRobotsTxtForDataFetchesOnly") + "</option>\n"+
 "        <option value=\"all\" "+(robotsUsage.equals(RSSConfig.VALUE_ALL)?"selected=\"selected\"":"")+">" + Messages.getBodyString(locale,"RSSConnector.ObeyRobotsTxtForAllFetches") + "</option>\n"+
 "      </select>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"robotsusage\" value=\""+robotsUsage+"\"/>\n"
       );
     }

     // Bandwidth tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.Bandwidth")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.MaxKBytesPerSecondPerServerColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"6\" name=\"bandwidth\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(bandwidth)+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.MaxConnectionsPerServerColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"4\" name=\"connections\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(connections)+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.MaxFetchesPerMinutePerServerColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"4\" name=\"fetches\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(fetches)+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.ThrottleGroupNameColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"32\" name=\"throttlegroup\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(throttleGroup)+"\"/></td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"bandwidth\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(bandwidth)+"\"/>\n"+
 "<input type=\"hidden\" name=\"connections\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(connections)+"\"/>\n"+
 "<input type=\"hidden\" name=\"fetches\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(fetches)+"\"/>\n"+
 "<input type=\"hidden\" name=\"throttlegroup\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(throttleGroup)+"\"/>\n"
       );
     }

     // Proxy tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.Proxy")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.ProxyHostColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"40\" name=\"proxyhost\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyHost)+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.ProxyPortColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"5\" name=\"proxyport\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyPort)+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.ProxyAuthenticationDomainColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"32\" name=\"proxyauthdomain\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthDomain)+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.ProxyAuthenticationUserNameColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"32\" name=\"proxyauthusername\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthUsername)+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.ProxyAuthenticationPasswordColon") + "</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"password\" size=\"16\" name=\"proxyauthpassword\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthPassword)+"\"/></td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"proxyhost\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyHost)+"\"/>\n"+
 "<input type=\"hidden\" name=\"proxyport\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyPort)+"\"/>\n"+
 "<input type=\"hidden\" name=\"proxyauthusername\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthUsername)+"\"/>\n"+
 "<input type=\"hidden\" name=\"proxyauthdomain\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthDomain)+"\"/>\n"+
 "<input type=\"hidden\" name=\"proxyauthpassword\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(proxyAuthPassword)+"\"/>\n"
       );
     }
   }

   /** Process a configuration post.
   * This method is called at the start of the connector's configuration page, whenever there is a possibility that form data for a connection has been
   * posted.  Its purpose is to gather form information and modify the configuration parameters accordingly.
   * The name of the posted form is "editconnection".
   *@param threadContext is the local thread context.
   *@param variableContext is the set of variables available from the post, including binary file post information.
   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
   *@return null if all is well, or a string error message if there is an error that should prevent saving of the connection (and cause a redirection to an error page).
   */
   @Override
   public String processConfigurationPost(IThreadContext threadContext, IPostParameters variableContext,
     Locale locale, ConfigParams parameters)
     throws ManifoldCFException
   {
     String email = variableContext.getParameter("email");
     if (email != null)
       parameters.setParameter(RSSConfig.PARAMETER_EMAIL,email);
     String robotsUsage = variableContext.getParameter("robotsusage");
     if (robotsUsage != null)
       parameters.setParameter(RSSConfig.PARAMETER_ROBOTSUSAGE,robotsUsage);
     String bandwidth = variableContext.getParameter("bandwidth");
     if (bandwidth != null)
       parameters.setParameter(RSSConfig.PARAMETER_BANDWIDTH,bandwidth);
     String connections = variableContext.getParameter("connections");
     if (connections != null)
       parameters.setParameter(RSSConfig.PARAMETER_MAXOPEN,connections);
     String fetches = variableContext.getParameter("fetches");
     if (fetches != null)
       parameters.setParameter(RSSConfig.PARAMETER_MAXFETCHES,fetches);
     String throttleGroup = variableContext.getParameter("throttlegroup");
     if (throttleGroup != null)
       parameters.setParameter(RSSConfig.PARAMETER_THROTTLEGROUP,throttleGroup);
     String proxyHost = variableContext.getParameter("proxyhost");
     if (proxyHost != null)
       parameters.setParameter(RSSConfig.PARAMETER_PROXYHOST,proxyHost);
     String proxyPort = variableContext.getParameter("proxyport");
     if (proxyPort != null)
       parameters.setParameter(RSSConfig.PARAMETER_PROXYPORT,proxyPort);
     String proxyAuthDomain = variableContext.getParameter("proxyauthdomain");
     if (proxyAuthDomain != null)
       parameters.setParameter(RSSConfig.PARAMETER_PROXYAUTHDOMAIN,proxyAuthDomain);
     String proxyAuthUsername = variableContext.getParameter("proxyauthusername");
     if (proxyAuthUsername != null)
       parameters.setParameter(RSSConfig.PARAMETER_PROXYAUTHUSERNAME,proxyAuthUsername);
     String proxyAuthPassword = variableContext.getParameter("proxyauthpassword");
     if (proxyAuthPassword != null)
       parameters.setObfuscatedParameter(RSSConfig.PARAMETER_PROXYAUTHPASSWORD,variableContext.mapKeyToPassword(proxyAuthPassword));

     return null;
   }

   /** View configuration.
   * This method is called in the body section of the connector's view configuration page.  Its purpose is to present the connection information to the user.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
   *@param threadContext is the local thread context.
   *@param out is the output to which any HTML should be sent.
   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
   */
   @Override
   public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out,
     Locale locale, ConfigParams parameters)
     throws ManifoldCFException, IOException
   {
     out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr>\n"+
 "    <td class=\"description\" colspan=\"1\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.ParametersColon") + "</nobr></td>\n"+
 "    <td class=\"value\" colspan=\"3\">\n"
     );
     Iterator iter = parameters.listParameters();
     while (iter.hasNext())
     {
       String param = (String)iter.next();
       String value = parameters.getParameter(param);
       if (param.length() >= "password".length() && param.substring(param.length()-"password".length()).equalsIgnoreCase("password"))
       {
         out.print(
 "      <nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(param)+"=********</nobr><br/>\n"
         );
       }
       else if (param.length() >="keystore".length() && param.substring(param.length()-"keystore".length()).equalsIgnoreCase("keystore"))
       {
         IKeystoreManager kmanager = KeystoreManagerFactory.make("",value);
         out.print(
 "      <nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(param)+"=<"+Integer.toString(kmanager.getContents().length)+Messages.getBodyString(locale,"RSSConnector.certificates")+"></nobr><br/>\n"
         );
       }
       else
       {
         out.print(
 "      <nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(param)+"="+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(value)+"</nobr><br/>\n"
         );
       }
     }
     out.print(
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
     );
   }

   /** Output the specification header section.
   * This method is called in the head section of a job page which has selected a repository connection of the current type.  Its purpose is to add the required tabs
   * to the list, and to output any javascript methods that might be needed by the job editing HTML.
   *@param out is the output to which any HTML should be sent.
   *@param ds is the current document specification for this job.
   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
   */
   @Override
   public void outputSpecificationHeader(IHTTPOutput out, Locale locale, DocumentSpecification ds, List<String> tabsArray)
     throws ManifoldCFException, IOException
   {
     tabsArray.add(Messages.getString(locale,"RSSConnector.URLs"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.Canonicalization"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.URLMappings"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.Exclusions"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.TimeValues"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.Security"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.Metadata"));
     tabsArray.add(Messages.getString(locale,"RSSConnector.DechromedContent"));
     out.print(
 "<script type=\"text/javascript\">\n"+
 "<!--\n"+
 "function SpecOp(n, opValue, anchorvalue)\n"+
 "{\n"+
 "  eval(\"editjob.\"+n+\".value = \\\"\"+opValue+\"\\\"\");\n"+
 "  postFormSetAnchor(anchorvalue);\n"+
 "}\n"+
 "\n"+
 "function AddRegexp(anchorvalue)\n"+
 "{\n"+
 "  if (editjob.rssmatch.value == \"\")\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.MatchMustHaveARegexpValue")+"\");\n"+
 "    editjob.rssmatch.focus();\n"+
 "    return;\n"+
 "  }\n"+
 "\n"+
 "  SpecOp(\"rssop\",\"Add\",anchorvalue);\n"+
 "}\n"+
 "\n"+
 "function RemoveRegexp(index, anchorvalue)\n"+
 "{\n"+
 "  editjob.rssindex.value = index;\n"+
 "  SpecOp(\"rssop\",\"Delete\",anchorvalue);\n"+
 "}\n"+
 "\n"+
 "function SpecAddToken(anchorvalue)\n"+
 "{\n"+
 "  if (editjob.spectoken.value == \"\")\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.TypeInAnAccessToken")+"\");\n"+
 "    editjob.spectoken.focus();\n"+
 "    return;\n"+
 "  }\n"+
 "  SpecOp(\"accessop\",\"Add\",anchorvalue);\n"+
 "}\n"+
 "\n"+
 "function SpecAddMetadata(anchorvalue)\n"+
 "{\n"+
 "  if (editjob.specmetaname.value == \"\")\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.TypeInMetadataName")+"\");\n"+
 "    editjob.specmetaname.focus();\n"+
 "    return;\n"+
 "  }\n"+
 "  if (editjob.specmetavalue.value == \"\")\n"+
 "  {\n"+
 "    alert(\""+Messages.getString(locale,"RSSConnector.TypeInMetadataValue")+"\");\n"+
 "    editjob.specmetavalue.focus();\n"+
 "    return;\n"+
 "  }\n"+
 "  SpecOp(\"metadataop\",\"Add\",anchorvalue);\n"+
 "}\n"+
 "\n"+
 "function URLRegexpDelete(index, anchorvalue)\n"+
 "{\n"+
 "  editjob.urlregexpnumber.value = index;\n"+
 "  SpecOp(\"urlregexpop\",\"Delete\",anchorvalue);\n"+
 "}\n"+
 "\n"+
 "function URLRegexpAdd(anchorvalue)\n"+
 "{\n"+
 "  SpecOp(\"urlregexpop\",\"Add\",anchorvalue);\n"+
 "}\n"+
 "\n"+
 "function checkSpecification()\n"+
 "{\n"+
 "  if (editjob.feedtimeout.value == \"\" || !isInteger(editjob.feedtimeout.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.ATimeoutValueInSecondsIsRequired")+"\");\n"+
 "    editjob.feedtimeout.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  if (editjob.feedrefetch.value == \"\" || !isInteger(editjob.feedrefetch.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.ARefetchIntervalInMinutesIsRequired")+"\");\n"+
 "    editjob.feedrefetch.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  if (editjob.minfeedrefetch.value == \"\" || !isInteger(editjob.minfeedrefetch.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.AMinimumRefetchIntervalInMinutesIsRequire")+"\");\n"+
 "    editjob.minfeedrefetch.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  if (editjob.badfeedrefetch.value != \"\" && !isInteger(editjob.badfeedrefetch.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"RSSConnector.ABadFeedRefetchIntervalInMinutesIsRequired")+"\");\n"+
 "    editjob.badfeedrefetch.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "\n"+
 "  return true;\n"+
 "}\n"+
 "\n"+
 "//-->\n"+
 "</script>\n"
     );
   }

   /** Output the specification body section.
   * This method is called in the body section of a job page which has selected a repository connection of the current type.  Its purpose is to present the required form elements for editing.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
   * form is "editjob".
   *@param out is the output to which any HTML should be sent.
   *@param ds is the current document specification for this job.
   *@param tabName is the current tab name.
   */
   @Override
   public void outputSpecificationBody(IHTTPOutput out, Locale locale, DocumentSpecification ds, String tabName)
     throws ManifoldCFException, IOException
   {
     int i;
     int k;


     // Build the url seed string, and the url regexp match and map
     StringBuilder sb = new StringBuilder();
     ArrayList regexp = new ArrayList();
     ArrayList matchStrings = new ArrayList();
     int feedTimeoutValue = 60;
     int feedRefetchValue = 60;
     int minFeedRefetchValue = 15;
     Integer badFeedRefetchValue = null;
     String exclusions = "";

     // Now, loop through paths
     i = 0;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_FEED))
       {
         String rssURL = sn.getAttributeValue(RSSConfig.ATTR_URL);
         if (rssURL != null)
         {
           sb.append(rssURL).append("\n");
         }
       }
       else if (sn.getType().equals(RSSConfig.NODE_EXCLUDES))
       {
         exclusions = sn.getValue();
         if (exclusions == null)
           exclusions = "";
       }
       else if (sn.getType().equals(RSSConfig.NODE_MAP))
       {
         String match = sn.getAttributeValue(RSSConfig.ATTR_MATCH);
         String map = sn.getAttributeValue(RSSConfig.ATTR_MAP);
         if (match != null)
         {
           regexp.add(match);
           if (map == null)
             map = "";
           matchStrings.add(map);
         }
       }
       else if (sn.getType().equals(RSSConfig.NODE_FEEDTIMEOUT))
       {
         String value = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
         feedTimeoutValue = Integer.parseInt(value);
       }
       else if (sn.getType().equals(RSSConfig.NODE_FEEDRESCAN))
       {
         String value = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
         feedRefetchValue = Integer.parseInt(value);
       }
       else if (sn.getType().equals(RSSConfig.NODE_MINFEEDRESCAN))
       {
         String value = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
         minFeedRefetchValue = Integer.parseInt(value);
       }
       else if (sn.getType().equals(RSSConfig.NODE_BADFEEDRESCAN))
       {
         String value = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
         badFeedRefetchValue = new Integer(value);
       }
     }

     // URLs tab

     if (tabName.equals(Messages.getString(locale,"RSSConnector.URLs")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"value\" colspan=\"2\">\n"+
 "      <textarea rows=\"25\" cols=\"80\" name=\"rssurls\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(sb.toString())+"</textarea>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"rssurls\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(sb.toString())+"\"/>\n"
       );
     }

     // Exclusions tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.Exclusions")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\" colspan=\"1\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.Exclude") + "</nobr></td>\n"+
 "    <td class=\"value\" colspan=\"1\">\n"+
 "      <textarea rows=\"25\" cols=\"60\" name=\"exclusions\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(exclusions)+"</textarea>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"exclusions\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(exclusions)+"\"/>\n"
       );
     }

     // Canonicalization tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.Canonicalization")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"boxcell\" colspan=\"2\">\n"+
 "      <input type=\"hidden\" name=\"urlregexpop\" value=\"Continue\"/>\n"+
 "      <input type=\"hidden\" name=\"urlregexpnumber\" value=\"\"/>\n"+
 "      <table class=\"formtable\">\n"+
 "        <tr class=\"formheaderrow\">\n"+
 "          <td class=\"formcolumnheader\"></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.URLRegularExpression")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.Description")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.Reorder")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemoveJSPSessions")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemoveASPSessions")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemovePHPSessions")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemoveBVSessions")+"</nobr></td>\n"+
 "        </tr>\n"
       );
       int q = 0;
       int l = 0;
       while (q < ds.getChildCount())
       {
         SpecificationNode specNode = ds.getChild(q++);
         if (specNode.getType().equals(RSSConfig.NODE_URLSPEC))
         {
           // Ok, this node matters to us
           String regexpString = specNode.getAttributeValue(RSSConfig.ATTR_REGEXP);
           String description = specNode.getAttributeValue(RSSConfig.ATTR_DESCRIPTION);
           if (description == null)
             description = "";
           String allowReorder = specNode.getAttributeValue(RSSConfig.ATTR_REORDER);
           if (allowReorder == null || allowReorder.length() == 0)
             allowReorder = RSSConfig.VALUE_NO;
           String allowJavaSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_JAVASESSIONREMOVAL);
           if (allowJavaSessionRemoval == null || allowJavaSessionRemoval.length() == 0)
             allowJavaSessionRemoval = RSSConfig.VALUE_NO;
           String allowASPSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_ASPSESSIONREMOVAL);
           if (allowASPSessionRemoval == null || allowASPSessionRemoval.length() == 0)
             allowASPSessionRemoval = RSSConfig.VALUE_NO;
           String allowPHPSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_PHPSESSIONREMOVAL);
           if (allowPHPSessionRemoval == null || allowPHPSessionRemoval.length() == 0)
             allowPHPSessionRemoval = RSSConfig.VALUE_NO;
           String allowBVSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_BVSESSIONREMOVAL);
           if (allowBVSessionRemoval == null || allowBVSessionRemoval.length() == 0)
             allowBVSessionRemoval = RSSConfig.VALUE_NO;
           out.print(
 "        <tr class=\""+(((l % 2)==0)?"evenformrow":"oddformrow")+"\">\n"+
 "          <td class=\"formcolumncell\">\n"+
 "            <a name=\""+"urlregexp_"+Integer.toString(l)+"\">\n"+
 "              <input type=\"button\" value=\"Delete\" alt=\""+Messages.getAttributeString(locale,"RSSConnector.DeleteUrlRegexp")+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(regexpString)+"\" onclick='javascript:URLRegexpDelete("+Integer.toString(l)+",\"urlregexp_"+Integer.toString(l)+"\");'/>\n"+
 "            </a>\n"+
 "          </td>\n"+
 "          <td class=\"formcolumncell\">\n"+
 "            <input type=\"hidden\" name=\""+"urlregexp_"+Integer.toString(l)+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(regexpString)+"\"/>\n"+
 "            <input type=\"hidden\" name=\""+"urlregexpdesc_"+Integer.toString(l)+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(description)+"\"/>\n"+
 "            <input type=\"hidden\" name=\""+"urlregexpreorder_"+Integer.toString(l)+"\" value=\""+allowReorder+"\"/>\n"+
 "            <input type=\"hidden\" name=\""+"urlregexpjava_"+Integer.toString(l)+"\" value=\""+allowJavaSessionRemoval+"\"/>\n"+
 "            <input type=\"hidden\" name=\""+"urlregexpasp_"+Integer.toString(l)+"\" value=\""+allowASPSessionRemoval+"\"/>\n"+
 "            <input type=\"hidden\" name=\""+"urlregexpphp_"+Integer.toString(l)+"\" value=\""+allowPHPSessionRemoval+"\"/>\n"+
 "            <input type=\"hidden\" name=\""+"urlregexpbv_"+Integer.toString(l)+"\" value=\""+allowBVSessionRemoval+"\"/>\n"+
 "            <nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(regexpString)+"</nobr>\n"+
 "          </td>\n"+
 "          <td class=\"formcolumncell\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(description)+"</td>\n"+
 "          <td class=\"formcolumncell\">"+allowReorder+"</td>\n"+
 "          <td class=\"formcolumncell\">"+allowJavaSessionRemoval+"</td>\n"+
 "          <td class=\"formcolumncell\">"+allowASPSessionRemoval+"</td>\n"+
 "          <td class=\"formcolumncell\">"+allowPHPSessionRemoval+"</td>\n"+
 "          <td class=\"formcolumncell\">"+allowBVSessionRemoval+"</td>\n"+
 "        </tr>\n"
           );

           l++;
         }
       }
       if (l == 0)
       {
         out.print(
 "        <tr class=\"formrow\"><td colspan=\"8\" class=\"formcolumnmessage\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.NoCanonicalizationSpecified")+"</nobr></td></tr>\n"
         );
       }
       out.print(
 "        <tr class=\"formrow\"><td colspan=\"8\" class=\"formseparator\"><hr/></td></tr>\n"+
 "        <tr class=\"formrow\">\n"+
 "          <td class=\"formcolumncell\">\n"+
 "            <a name=\""+"urlregexp_"+Integer.toString(l)+"\">\n"+
 "              <input type=\"button\" value=\"Add\" alt=\""+Messages.getAttributeString(locale,"RSSConnector.AddUlRegexp")+"\" onclick='javascript:URLRegexpAdd(\"urlregexp_"+Integer.toString(l+1)+"\");'/>\n"+
 "              <input type=\"hidden\" name=\"urlregexpcount\" value=\""+Integer.toString(l)+"\"/>\n"+
 "            </a>\n"+
 "          </td>\n"+
 "          <td class=\"formcolumncell\"><input type=\"text\" name=\"urlregexp\" size=\"30\" value=\"\"/></td>\n"+
 "          <td class=\"formcolumncell\"><input type=\"text\" name=\"urlregexpdesc\" size=\"30\" value=\"\"/></td>\n"+
 "          <td class=\"formcolumncell\"><input type=\"checkbox\" name=\"urlregexpreorder\" value=\"yes\"/></td>\n"+
 "          <td class=\"formcolumncell\"><input type=\"checkbox\" name=\"urlregexpjava\" value=\"yes\" checked=\"true\"/></td>\n"+
 "          <td class=\"formcolumncell\"><input type=\"checkbox\" name=\"urlregexpasp\" value=\"yes\" checked=\"true\"/></td>\n"+
 "          <td class=\"formcolumncell\"><input type=\"checkbox\" name=\"urlregexpphp\" value=\"yes\" checked=\"true\"/></td>\n"+
 "          <td class=\"formcolumncell\"><input type=\"checkbox\" name=\"urlregexpbv\" value=\"yes\" checked=\"true\"/></td>\n"+
 "        </tr>\n"+
 "      </table>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       // Post the canonicalization specification
       int q = 0;
       int l = 0;
       while (q < ds.getChildCount())
       {
         SpecificationNode specNode = ds.getChild(q++);
         if (specNode.getType().equals(RSSConfig.NODE_URLSPEC))
         {
           // Ok, this node matters to us
           String regexpString = specNode.getAttributeValue(RSSConfig.ATTR_REGEXP);
           String description = specNode.getAttributeValue(RSSConfig.ATTR_DESCRIPTION);
           if (description == null)
             description = "";
           String allowReorder = specNode.getAttributeValue(RSSConfig.ATTR_REORDER);
           if (allowReorder == null || allowReorder.length() == 0)
             allowReorder = RSSConfig.VALUE_NO;
           String allowJavaSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_JAVASESSIONREMOVAL);
           if (allowJavaSessionRemoval == null || allowJavaSessionRemoval.length() == 0)
             allowJavaSessionRemoval = RSSConfig.VALUE_NO;
           String allowASPSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_ASPSESSIONREMOVAL);
           if (allowASPSessionRemoval == null || allowASPSessionRemoval.length() == 0)
             allowASPSessionRemoval = RSSConfig.VALUE_NO;
           String allowPHPSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_PHPSESSIONREMOVAL);
           if (allowPHPSessionRemoval == null || allowPHPSessionRemoval.length() == 0)
             allowPHPSessionRemoval = RSSConfig.VALUE_NO;
           String allowBVSessionRemoval = specNode.getAttributeValue(RSSConfig.ATTR_BVSESSIONREMOVAL);
           if (allowBVSessionRemoval == null || allowBVSessionRemoval.length() == 0)
             allowBVSessionRemoval = RSSConfig.VALUE_NO;
           out.print(
 "<input type=\"hidden\" name=\""+"urlregexp_"+Integer.toString(l)+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(regexpString)+"\"/>\n"+
 "<input type=\"hidden\" name=\""+"urlregexpdesc_"+Integer.toString(l)+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(description)+"\"/>\n"+
 "<input type=\"hidden\" name=\""+"urlregexpreorder_"+Integer.toString(l)+"\" value=\""+allowReorder+"\"/>\n"+
 "<input type=\"hidden\" name=\""+"urlregexpjava_"+Integer.toString(l)+"\" value=\""+allowJavaSessionRemoval+"\"/>\n"+
 "<input type=\"hidden\" name=\""+"urlregexpasp_"+Integer.toString(l)+"\" value=\""+allowASPSessionRemoval+"\"/>\n"+
 "<input type=\"hidden\" name=\""+"urlregexpphp_"+Integer.toString(l)+"\" value=\""+allowPHPSessionRemoval+"\"/>\n"+
 "<input type=\"hidden\" name=\""+"urlregexpbv_"+Integer.toString(l)+"\" value=\""+allowBVSessionRemoval+"\"/>\n"
           );
           l++;
         }
       }
       out.print(
 "<input type=\"hidden\" name=\"urlregexpcount\" value=\""+Integer.toString(l)+"\"/>\n"
       );
     }

     // Mappings tab

     if (tabName.equals(Messages.getString(locale,"RSSConnector.URLMappings")))
     {
       out.print(
 "<input type=\"hidden\" name=\"rssop\" value=\"\"/>\n"+
 "<input type=\"hidden\" name=\"rssindex\" value=\"\"/>\n"+
 "<input type=\"hidden\" name=\"rssmapcount\" value=\""+Integer.toString(regexp.size())+"\"/>\n"+
 "\n"+
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"4\"><hr/></td></tr>\n"
       );

       i = 0;
       while (i < regexp.size())
       {
         String prefix = "rssregexp_"+Integer.toString(i)+"_";
         out.print(
 "  <tr>\n"+
 "    <td class=\"value\">\n"+
 "      <a name=\""+"regexp_"+Integer.toString(i)+"\">\n"+
 "        <input type=\"button\" value=\"Remove\" onclick='javascript:RemoveRegexp("+Integer.toString(i)+",\"regexp_"+Integer.toString(i)+"\")' alt=\""+Messages.getAttributeString(locale,"RSSConnector.RemoveRegexp")+Integer.toString(i)+"\"/>\n"+
 "      </a>\n"+
 "    </td>\n"+
 "    <td class=\"value\"><input type=\"hidden\" name=\""+prefix+"match"+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape((String)regexp.get(i))+"\"/>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape((String)regexp.get(i))+"</td>\n"+
 "    <td class=\"value\">==></td>\n"+
 "    <td class=\"value\">\n"
         );
         String match = (String)matchStrings.get(i);
         out.print(
 "      <input type=\"hidden\" name=\""+prefix+"map"+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(match)+"\"/>\n"
         );
         if (match.length() == 0)
         {
           out.print(
 "      &lt;as is&gt;\n"
           );
         }
         else
         {
           out.print(
 "      "+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(match)+"\n"
           );
         }
         out.print(
 "    </td>\n"+
 "  </tr>\n"
         );
         i++;
       }
       out.print(
 "  <tr>\n"+
 "    <td class=\"value\"><a name=\""+"regexp_"+Integer.toString(i)+"\"><input type=\"button\" value=\"Add\" onclick='javascript:AddRegexp(\"regexp_"+Integer.toString(i+1)+"\")' alt=\""+Messages.getAttributeString(locale,"RSSConnector.AddRegexp")+"\"/></a></td>\n"+
 "    <td class=\"value\"><input type=\"text\" name=\"rssmatch\" size=\"16\" value=\"\"/></td>\n"+
 "    <td class=\"value\">==></td>\n"+
 "    <td class=\"value\"><input type=\"text\" name=\"rssmap\" size=\"16\" value=\"\"/></td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"rssmapcount\" value=\""+Integer.toString(regexp.size())+"\"/>\n"
       );
       i = 0;
       while (i < regexp.size())
       {
         String prefix = "rssregexp_"+Integer.toString(i)+"_";
         String match = (String)matchStrings.get(i);
         out.print(
 "<input type=\"hidden\" name=\""+prefix+"match"+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape((String)regexp.get(i))+"\"/>\n"+
 "<input type=\"hidden\" name=\""+prefix+"map"+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(match)+"\"/>\n"
         );
         i++;
       }
     }

     // Timeout Value tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.TimeValues")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.FeedConnectTimeout")+"</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"5\" name=\"feedtimeout\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Integer.toString(feedTimeoutValue))+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.DefaultFeedRefetchTime")+"</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"5\" name=\"feedrefetch\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Integer.toString(feedRefetchValue))+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.MinimumFeedRefetchTime")+"</nobr></td>\n"+
 "    <td class=\"value\"><input type=\"text\" size=\"5\" name=\"minfeedrefetch\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Integer.toString(minFeedRefetchValue))+"\"/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.BadFeedRefetchTime")+"</nobr></td>\n"+
 "    <td class=\"value\">\n"+
 "      <input type=\"hidden\" name=\"badfeedrefetch_present\" value=\"true\"/>\n"+
 "      <input type=\"text\" size=\"5\" name=\"badfeedrefetch\" value=\""+((badFeedRefetchValue==null)?"":org.apache.manifoldcf.ui.util.Encoder.attributeEscape(badFeedRefetchValue.toString()))+"\"/>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"feedtimeout\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Integer.toString(feedTimeoutValue))+"\"/>\n"+
 "<input type=\"hidden\" name=\"feedrefetch\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Integer.toString(feedRefetchValue))+"\"/>\n"+
 "<input type=\"hidden\" name=\"minfeedrefetch\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(Integer.toString(minFeedRefetchValue))+"\"/>\n"+
 "<input type=\"hidden\" name=\"badfeedrefetch_present\" value=\"true\"/>\n"+
 "<input type=\"hidden\" name=\"badfeedrefetch\" value=\""+((badFeedRefetchValue==null)?"":org.apache.manifoldcf.ui.util.Encoder.attributeEscape(badFeedRefetchValue.toString()))+"\"/>\n"
       );
     }

     // Dechromed content tab
     String dechromedMode = RSSConfig.VALUE_NONE;
     String chromedMode = RSSConfig.VALUE_USE;
     i = 0;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_DECHROMEDMODE))
         dechromedMode = sn.getAttributeValue(RSSConfig.ATTR_MODE);
       else if (sn.getType().equals(RSSConfig.NODE_CHROMEDMODE))
         chromedMode = sn.getAttributeValue(RSSConfig.ATTR_MODE);
     }
     if (tabName.equals(Messages.getString(locale,"RSSConnector.DechromedContent")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"1\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"value\"><nobr><input type=\"radio\" name=\"dechromedmode\" value=\"none\" "+(dechromedMode.equals(RSSConfig.VALUE_NONE)?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.NoDechromedContent")+"</nobr></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"value\"><nobr><input type=\"radio\" name=\"dechromedmode\" value=\"description\" "+(dechromedMode.equals(RSSConfig.VALUE_DESCRIPTION)?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.DechromedContentIfPresentInDescriptionField")+"</nobr></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"value\"><nobr><input type=\"radio\" name=\"dechromedmode\" value=\"content\" "+(dechromedMode.equals(RSSConfig.VALUE_CONTENT)?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.DechromedContentIfPresentInContentField")+"</nobr></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"separator\"><hr/></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"value\"><nobr><input type=\"radio\" name=\"chromedmode\" value=\"use\" "+(chromedMode.equals(RSSConfig.VALUE_USE)?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.UseChromedContentIfNoDechromedContentFound")+"</nobr></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"value\"><nobr><input type=\"radio\" name=\"chromedmode\" value=\"skip\" "+(chromedMode.equals(RSSConfig.VALUE_SKIP)?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.NeverUseChromedContent")+"</nobr></td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"value\"><nobr><input type=\"radio\" name=\"chromedmode\" value=\"metadata\" "+(chromedMode.equals(RSSConfig.VALUE_METADATA)?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.NoContentMetadataOnly")+"</nobr></td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       out.print(
 "<input type=\"hidden\" name=\"dechromedmode\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(dechromedMode)+"\"/>\n"+
 "<input type=\"hidden\" name=\"chromedmode\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(chromedMode)+"\"/>\n"
       );
     }

     // Security tab
     // There is no native security, so all we care about are the tokens.
     i = 0;

     if (tabName.equals(Messages.getString(locale,"RSSConnector.Security")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"
       );
       // Go through forced ACL
       i = 0;
       k = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i++);
         if (sn.getType().equals(RSSConfig.NODE_ACCESS))
         {
           String accessDescription = "_"+Integer.toString(k);
           String accessOpName = "accessop"+accessDescription;
           String token = sn.getAttributeValue(RSSConfig.ATTR_TOKEN);
           out.print(
 "  <tr>\n"+
 "    <td class=\"description\">\n"+
 "      <input type=\"hidden\" name=\""+accessOpName+"\" value=\"\"/>\n"+
 "      <input type=\"hidden\" name=\""+"spectoken"+accessDescription+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(token)+"\"/>\n"+
 "      <a name=\""+"token_"+Integer.toString(k)+"\">\n"+
 "        <input type=\"button\" value=\"Delete\" onClick='Javascript:SpecOp(\""+accessOpName+"\",\"Delete\",\"token_"+Integer.toString(k)+"\")' alt=\""+Messages.getAttributeString(locale,"RSSConnector.DeleteToken")+Integer.toString(k)+"\"/>\n"+
 "      </a>&nbsp;\n"+
 "    </td>\n"+
 "    <td class=\"value\">\n"+
 "      "+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(token)+"\n"+
 "    </td>\n"+
 "  </tr>\n"
           );
           k++;
         }
       }
       if (k == 0)
       {
         out.print(
 "  <tr>\n"+
 "    <td class=\"message\" colspan=\"2\">" + Messages.getBodyString(locale,"RSSConnector.NoAccessTokensPresent") + "</td>\n"+
 "  </tr>\n"
         );
       }
       out.print(
 "  <tr><td class=\"lightseparator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\">\n"+
 "      <input type=\"hidden\" name=\"tokencount\" value=\""+Integer.toString(k)+"\"/>\n"+
 "      <input type=\"hidden\" name=\"accessop\" value=\"\"/>\n"+
 "      <a name=\""+"token_"+Integer.toString(k)+"\">\n"+
 "        <input type=\"button\" value=\"Add\" onClick='Javascript:SpecAddToken(\"token_"+Integer.toString(k+1)+"\")' alt=\""+Messages.getAttributeString(locale,"RSSConnector.AddAccessToken")+"\"/>\n"+
 "      </a>&nbsp;\n"+
 "    </td>\n"+
 "    <td class=\"value\">\n"+
 "      <input type=\"text\" size=\"30\" name=\"spectoken\" value=\"\"/>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );
     }
     else
     {
       // Finally, go through forced ACL
       i = 0;
       k = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i++);
         if (sn.getType().equals(RSSConfig.NODE_ACCESS))
         {
           String accessDescription = "_"+Integer.toString(k);
           String token = sn.getAttributeValue(RSSConfig.ATTR_TOKEN);
           out.print(
 "<input type=\"hidden\" name=\""+"spectoken"+accessDescription+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(token)+"\"/>\n"
           );
           k++;
         }
       }
       out.print(
 "<input type=\"hidden\" name=\"tokencount\" value=\""+Integer.toString(k)+"\"/>\n"
       );
     }

     // "Metadata" tab
     if (tabName.equals(Messages.getString(locale,"RSSConnector.Metadata")))
     {
       out.print(
 "<table class=\"displaytable\">\n"+
 "  <tr><td class=\"separator\" colspan=\"4\"><hr/></td></tr>\n"
       );
       // Go through metadata
       i = 0;
       k = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i++);
         if (sn.getType().equals(RSSConfig.NODE_METADATA))
         {
           String metadataDescription = "_"+Integer.toString(k);
           String metadataOpName = "metadataop"+metadataDescription;
           String name = sn.getAttributeValue(RSSConfig.ATTR_NAME);
           String value = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
           out.print(
 "  <tr>\n"+
 "    <td class=\"description\">\n"+
 "      <input type=\"hidden\" name=\""+metadataOpName+"\" value=\"\"/>\n"+
 "      <input type=\"hidden\" name=\""+"specmetaname"+metadataDescription+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(name)+"\"/>\n"+
 "      <input type=\"hidden\" name=\""+"specmetavalue"+metadataDescription+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(value)+"\"/>\n"+
 "      <a name=\""+"metadata_"+Integer.toString(k)+"\">\n"+
 "        <input type=\"button\" value=\"Delete\" onClick='Javascript:SpecOp(\""+metadataOpName+"\",\"Delete\",\"metadata_"+Integer.toString(k)+"\")' alt=\""+Messages.getAttributeString(locale,"RSSConnector.DeleteMetadata")+Integer.toString(k)+"\"/>\n"+
 "      </a>&nbsp;\n"+
 "    </td>\n"+
 "    <td class=\"value\">\n"+
 "      "+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(name)+"\n"+
 "    </td>\n"+
 "    <td class=\"value\">=</td>\n"+
 "    <td class=\"value\">\n"+
 "      "+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(value)+"\n"+
 "    </td>\n"+
 "  </tr>\n"
           );
           k++;
         }

       }
       if (k == 0)
       {
         out.print(
 "  <tr>\n"+
 "    <td class=\"message\" colspan=\"4\">"+Messages.getBodyString(locale,"RSSConnector.NoMetadataPresent")+"</td>\n"+
 "  </tr>\n"
         );
       }
       out.print(
 "  <tr><td class=\"lightseparator\" colspan=\"4\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\">\n"+
 "      <input type=\"hidden\" name=\"metadatacount\" value=\""+Integer.toString(k)+"\"/>\n"+
 "      <input type=\"hidden\" name=\"metadataop\" value=\"\"/>\n"+
 "      <a name=\""+"metadata_"+Integer.toString(k)+"\">\n"+
 "        <input type=\"button\" value=\"Add\" onClick='Javascript:SpecAddMetadata(\"metadata_"+Integer.toString(k+1)+"\")' alt=\""+Messages.getAttributeString(locale,"RSSConnector.AddMetadata")+"\"/>\n"+
 "      </a>&nbsp;\n"+
 "    </td>\n"+
 "    <td class=\"value\">\n"+
 "      <input type=\"text\" size=\"30\" name=\"specmetaname\" value=\"\"/>\n"+
 "    </td>\n"+
 "    <td class=\"value\">=</td>\n"+
 "    <td class=\"value\">\n"+
 "      <input type=\"text\" size=\"80\" name=\"specmetavalue\" value=\"\"/>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
       );

     }
     else
     {
       // Finally, go through metadata
       i = 0;
       k = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i++);
         if (sn.getType().equals(RSSConfig.NODE_METADATA))
         {
           String metadataDescription = "_"+Integer.toString(k);
           String name = sn.getAttributeValue(RSSConfig.ATTR_NAME);
           String value = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
           out.print(
 "<input type=\"hidden\" name=\""+"specmetaname"+metadataDescription+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(name)+"\"/>\n"+
 "<input type=\"hidden\" name=\""+"specmetavalue"+metadataDescription+"\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(value)+"\"/>\n"
           );
           k++;
         }
       }
       out.print(
 "<input type=\"hidden\" name=\"metadatacount\" value=\""+Integer.toString(k)+"\"/>\n"
       );

     }
   }

   /** Process a specification post.
   * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
   * posted.  Its purpose is to gather form information and modify the document specification accordingly.
   * The name of the posted form is "editjob".
   *@param variableContext contains the post data, including binary file-upload information.
   *@param ds is the current document specification for this job.
   *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
   */
   @Override
   public String processSpecificationPost(IPostParameters variableContext, Locale locale, DocumentSpecification ds)
     throws ManifoldCFException
   {
     // Get the map
     String value = variableContext.getParameter("rssmapcount");
     if (value != null)
     {
       int mapsize = Integer.parseInt(value);

       // Clear it first
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_MAP))
           ds.removeChild(j);
         else
           j++;
       }

       // Grab the map values
       j = 0;
       while (j < mapsize)
       {
         String prefix = "rssregexp_"+Integer.toString(j)+"_";
         String match = variableContext.getParameter(prefix+"match");
         String map = variableContext.getParameter(prefix+"map");
         if (map == null)
           map = "";
         // Add to the documentum specification
         SpecificationNode node = new SpecificationNode(RSSConfig.NODE_MAP);
         node.setAttribute(RSSConfig.ATTR_MATCH,match);
         node.setAttribute(RSSConfig.ATTR_MAP,map);
         ds.addChild(ds.getChildCount(),node);

         j++;
       }
     }

     // Get the cgiPath
     String rssURLSequence = variableContext.getParameter("rssurls");
     if (rssURLSequence != null)
     {
       // Delete all url specs first
       int i = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i);
         if (sn.getType().equals(RSSConfig.NODE_FEED))
           ds.removeChild(i);
         else
           i++;
       }

       try
       {
         java.io.Reader str = new java.io.StringReader(rssURLSequence);
         try
         {
           java.io.BufferedReader is = new java.io.BufferedReader(str);
           try
           {
             while (true)
             {
               String nextString = is.readLine();
               if (nextString == null)
                 break;
               if (nextString.length() == 0)
                 continue;
               SpecificationNode node = new SpecificationNode(RSSConfig.NODE_FEED);
               node.setAttribute(RSSConfig.ATTR_URL,nextString);
               ds.addChild(ds.getChildCount(),node);
             }
           }
           finally
           {
             is.close();
           }
         }
         finally
         {
           str.close();
         }
       }
       catch (java.io.IOException e)
       {
         throw new ManifoldCFException("IO error",e);
       }
     }

     // Read the url specs
     String urlRegexpCount = variableContext.getParameter("urlregexpcount");
     if (urlRegexpCount != null && urlRegexpCount.length() > 0)
     {
       int regexpCount = Integer.parseInt(urlRegexpCount);
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_URLSPEC))
           ds.removeChild(j);
         else
           j++;
       }

       // Grab the operation and the index (if any)
       String operation = variableContext.getParameter("urlregexpop");
       if (operation == null)
         operation = "Continue";
       int opIndex = -1;
       if (operation.equals("Delete"))
         opIndex = Integer.parseInt(variableContext.getParameter("urlregexpnumber"));

       // Reconstruct urlspec nodes
       j = 0;
       while (j < regexpCount)
       {
         // For each index, first look for a delete operation
         if (!operation.equals("Delete") || j != opIndex)
         {
           // Add the jth node
           String regexp = variableContext.getParameter("urlregexp_"+Integer.toString(j));
           String regexpDescription = variableContext.getParameter("urlregexpdesc_"+Integer.toString(j));
           String reorder = variableContext.getParameter("urlregexpreorder_"+Integer.toString(j));
           String javaSession = variableContext.getParameter("urlregexpjava_"+Integer.toString(j));
           String aspSession = variableContext.getParameter("urlregexpasp_"+Integer.toString(j));
           String phpSession = variableContext.getParameter("urlregexpphp_"+Integer.toString(j));
           String bvSession = variableContext.getParameter("urlregexpbv_"+Integer.toString(j));
           SpecificationNode newSn = new SpecificationNode(RSSConfig.NODE_URLSPEC);
           newSn.setAttribute(RSSConfig.ATTR_REGEXP,regexp);
           if (regexpDescription != null && regexpDescription.length() > 0)
             newSn.setAttribute(RSSConfig.VALUE_DESCRIPTION,regexpDescription);
           if (reorder != null && reorder.length() > 0)
             newSn.setAttribute(RSSConfig.ATTR_REORDER,reorder);
           if (javaSession != null && javaSession.length() > 0)
             newSn.setAttribute(RSSConfig.ATTR_JAVASESSIONREMOVAL,javaSession);
           if (aspSession != null && aspSession.length() > 0)
             newSn.setAttribute(RSSConfig.ATTR_ASPSESSIONREMOVAL,aspSession);
           if (phpSession != null && phpSession.length() > 0)
             newSn.setAttribute(RSSConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
           if (bvSession != null && bvSession.length() > 0)
             newSn.setAttribute(RSSConfig.ATTR_BVSESSIONREMOVAL,bvSession);
           ds.addChild(ds.getChildCount(),newSn);
         }
         j++;
       }
       if (operation.equals("Add"))
       {
         String regexp = variableContext.getParameter("urlregexp");
         String regexpDescription = variableContext.getParameter("urlregexpdesc");
         String reorder = variableContext.getParameter("urlregexpreorder");
         String javaSession = variableContext.getParameter("urlregexpjava");
         String aspSession = variableContext.getParameter("urlregexpasp");
         String phpSession = variableContext.getParameter("urlregexpphp");
         String bvSession = variableContext.getParameter("urlregexpbv");

         // Add a new node at the end
         SpecificationNode newSn = new SpecificationNode(RSSConfig.NODE_URLSPEC);
         newSn.setAttribute(RSSConfig.ATTR_REGEXP,regexp);
         if (regexpDescription != null && regexpDescription.length() > 0)
           newSn.setAttribute(RSSConfig.VALUE_DESCRIPTION,regexpDescription);
         if (reorder != null && reorder.length() > 0)
           newSn.setAttribute(RSSConfig.ATTR_REORDER,reorder);
         if (javaSession != null && javaSession.length() > 0)
           newSn.setAttribute(RSSConfig.ATTR_JAVASESSIONREMOVAL,javaSession);
         if (aspSession != null && aspSession.length() > 0)
           newSn.setAttribute(RSSConfig.ATTR_ASPSESSIONREMOVAL,aspSession);
         if (phpSession != null && phpSession.length() > 0)
           newSn.setAttribute(RSSConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
         if (bvSession != null && bvSession.length() > 0)
           newSn.setAttribute(RSSConfig.ATTR_BVSESSIONREMOVAL,bvSession);
         ds.addChild(ds.getChildCount(),newSn);
       }
     }

     // Get the exclusions
     String exclusions = variableContext.getParameter("exclusions");
     if (exclusions != null)
     {
       // Delete existing exclusions record first
       int i = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i);
         if (sn.getType().equals(RSSConfig.NODE_EXCLUDES))
           ds.removeChild(i);
         else
           i++;
       }

       SpecificationNode cn = new SpecificationNode(RSSConfig.NODE_EXCLUDES);
       cn.setValue(exclusions);
       ds.addChild(ds.getChildCount(),cn);
     }

     // Read the feed timeout, if present
     String feedTimeoutValue = variableContext.getParameter("feedtimeout");
     if (feedTimeoutValue != null && feedTimeoutValue.length() > 0)
     {
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_FEEDTIMEOUT))
           ds.removeChild(j);
         else
           j++;
       }
       SpecificationNode node = new SpecificationNode(RSSConfig.NODE_FEEDTIMEOUT);
       node.setAttribute(RSSConfig.ATTR_VALUE,feedTimeoutValue);
       ds.addChild(ds.getChildCount(),node);
     }

     // Read the feed refetch interval, if present
     String feedRefetchValue = variableContext.getParameter("feedrefetch");
     if (feedRefetchValue != null && feedRefetchValue.length() > 0)
     {
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_FEEDRESCAN))
           ds.removeChild(j);
         else
           j++;
       }
       SpecificationNode node = new SpecificationNode(RSSConfig.NODE_FEEDRESCAN);
       node.setAttribute(RSSConfig.ATTR_VALUE,feedRefetchValue);
       ds.addChild(ds.getChildCount(),node);
     }

     // Read the minimum feed refetch interval, if present
     String minFeedRefetchValue = variableContext.getParameter("minfeedrefetch");
     if (minFeedRefetchValue != null && minFeedRefetchValue.length() > 0)
     {
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_MINFEEDRESCAN))
           ds.removeChild(j);
         else
           j++;
       }
       SpecificationNode node = new SpecificationNode(RSSConfig.NODE_MINFEEDRESCAN);
       node.setAttribute(RSSConfig.ATTR_VALUE,minFeedRefetchValue);
       ds.addChild(ds.getChildCount(),node);
     }

     // Read the bad feed refetch interval (which is allowed to be null)
     String badFeedRefetchValuePresent = variableContext.getParameter("badfeedrefetch_present");
     if (badFeedRefetchValuePresent != null && badFeedRefetchValuePresent.length() > 0)
     {
       String badFeedRefetchValue = variableContext.getParameter("badfeedrefetch");
       int k = 0;
       while (k < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(k);
         if (sn.getType().equals(RSSConfig.NODE_BADFEEDRESCAN))
           ds.removeChild(k);
         else
           k++;
       }
       if (badFeedRefetchValue != null && badFeedRefetchValue.length() > 0)
       {
         SpecificationNode node = new SpecificationNode(RSSConfig.NODE_BADFEEDRESCAN);
         node.setAttribute(RSSConfig.ATTR_VALUE,badFeedRefetchValue);
         ds.addChild(ds.getChildCount(),node);
       }
     }

     // Read the dechromed mode
     String dechromedMode = variableContext.getParameter("dechromedmode");
     if (dechromedMode != null && dechromedMode.length() > 0)
     {
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_DECHROMEDMODE))
           ds.removeChild(j);
         else
           j++;
       }
       SpecificationNode node = new SpecificationNode(RSSConfig.NODE_DECHROMEDMODE);
       node.setAttribute(RSSConfig.ATTR_MODE,dechromedMode);
       ds.addChild(ds.getChildCount(),node);
     }

     // Read the chromed mode
     String chromedMode = variableContext.getParameter("chromedmode");
     if (chromedMode != null && chromedMode.length() > 0)
     {
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_CHROMEDMODE))
           ds.removeChild(j);
         else
           j++;
       }
       SpecificationNode node = new SpecificationNode(RSSConfig.NODE_CHROMEDMODE);
       node.setAttribute(RSSConfig.ATTR_MODE,chromedMode);
       ds.addChild(ds.getChildCount(),node);
     }

     // Now, do whatever action we were told to do.
     String rssop = variableContext.getParameter("rssop");
     if (rssop != null && rssop.equals("Add"))
     {
       // Add a match to the end
       String match = variableContext.getParameter("rssmatch");
       String map = variableContext.getParameter("rssmap");
       SpecificationNode node = new SpecificationNode(RSSConfig.NODE_MAP);
       node.setAttribute(RSSConfig.ATTR_MATCH,match);
       node.setAttribute(RSSConfig.ATTR_MAP,map);
       ds.addChild(ds.getChildCount(),node);
     }
     else if (rssop != null && rssop.equals("Delete"))
     {
       int index = Integer.parseInt(variableContext.getParameter("rssindex"));
       int j = 0;
       while (j < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(j);
         if (sn.getType().equals(RSSConfig.NODE_MAP))
         {
           if (index == 0)
           {
             ds.removeChild(j);
             break;
           }
           index--;
         }
         j++;
       }
     }

     String xc = variableContext.getParameter("tokencount");
     if (xc != null)
     {
       // Delete all tokens first
       int i = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i);
         if (sn.getType().equals(RSSConfig.NODE_ACCESS))
           ds.removeChild(i);
         else
           i++;
       }

       int accessCount = Integer.parseInt(xc);
       i = 0;
       while (i < accessCount)
       {
         String accessDescription = "_"+Integer.toString(i);
         String accessOpName = "accessop"+accessDescription;
         xc = variableContext.getParameter(accessOpName);
         if (xc != null && xc.equals("Delete"))
         {
           // Next row
           i++;
           continue;
         }
         // Get the stuff we need
         String accessSpec = variableContext.getParameter("spectoken"+accessDescription);
         SpecificationNode node = new SpecificationNode(RSSConfig.NODE_ACCESS);
         node.setAttribute(RSSConfig.ATTR_TOKEN,accessSpec);
         ds.addChild(ds.getChildCount(),node);
         i++;
       }

       String op = variableContext.getParameter("accessop");
       if (op != null && op.equals("Add"))
       {
         String accessspec = variableContext.getParameter("spectoken");
         SpecificationNode node = new SpecificationNode(RSSConfig.NODE_ACCESS);
         node.setAttribute(RSSConfig.ATTR_TOKEN,accessspec);
         ds.addChild(ds.getChildCount(),node);
       }
     }

     xc = variableContext.getParameter("metadatacount");
     if (xc != null)
     {
       // Delete all tokens first
       int i = 0;
       while (i < ds.getChildCount())
       {
         SpecificationNode sn = ds.getChild(i);
         if (sn.getType().equals(RSSConfig.NODE_METADATA))
           ds.removeChild(i);
         else
           i++;
       }

       int metadataCount = Integer.parseInt(xc);
       i = 0;
       while (i < metadataCount)
       {
         String metadataDescription = "_"+Integer.toString(i);
         String metadataOpName = "metadataop"+metadataDescription;
         xc = variableContext.getParameter(metadataOpName);
         if (xc != null && xc.equals("Delete"))
         {
           // Next row
           i++;
           continue;
         }
         // Get the stuff we need
         String metaNameSpec = variableContext.getParameter("specmetaname"+metadataDescription);
         String metaValueSpec = variableContext.getParameter("specmetavalue"+metadataDescription);
         SpecificationNode node = new SpecificationNode(RSSConfig.NODE_METADATA);
         node.setAttribute(RSSConfig.ATTR_NAME,metaNameSpec);
         node.setAttribute(RSSConfig.ATTR_VALUE,metaValueSpec);
         ds.addChild(ds.getChildCount(),node);
         i++;
       }

       String op = variableContext.getParameter("metadataop");
       if (op != null && op.equals("Add"))
       {
         String metaNameSpec = variableContext.getParameter("specmetaname");
         String metaValueSpec = variableContext.getParameter("specmetavalue");

         SpecificationNode node = new SpecificationNode(RSSConfig.NODE_METADATA);
         node.setAttribute(RSSConfig.ATTR_NAME,metaNameSpec);
         node.setAttribute(RSSConfig.ATTR_VALUE,metaValueSpec);

         ds.addChild(ds.getChildCount(),node);
       }
     }
     return null;
   }

   /** View specification.
   * This method is called in the body section of a job's view page.  Its purpose is to present the document specification information to the user.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
   *@param out is the output to which any HTML should be sent.
   *@param ds is the current document specification for this job.
   */
   @Override
   public void viewSpecification(IHTTPOutput out, Locale locale, DocumentSpecification ds)
     throws ManifoldCFException, IOException
   {
     String exclusions = "";

     out.print(
 "<table class=\"displaytable\">\n"
     );
     int i = 0;
     boolean seenAny = false;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_FEED))
       {
         if (seenAny == false)
         {
           out.print(
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RSSUrls")+"</nobr></td>\n"+
 "    <td class=\"value\">\n"
           );
           seenAny = true;
         }
         out.print(
 "      <nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(sn.getAttributeValue(RSSConfig.ATTR_URL))+"</nobr><br/>\n"
         );
       }
       else if (sn.getType().equals(RSSConfig.NODE_EXCLUDES))
       {
         exclusions = sn.getValue();
         if (exclusions == null)
           exclusions = "";
       }
     }

     if (seenAny)
     {
       out.print(
 "    </td>\n"+
 "  </tr>\n"
       );
     }
     else
     {
       out.print(
 "  <tr><td class=\"message\" colspan=\"2\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.NoRSSUrlsSpecified")+"</nobr></td></tr>\n"
       );
     }
     out.print(
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"
     );
     i = 0;
     int l = 0;
     seenAny = false;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_URLSPEC))
       {
         if (l == 0)
         {
           out.print(
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.URLCanonicalization")+"</nobr></td>\n"+
 "    <td class=\"value\">\n"+
 "      <table class=\"formtable\">\n"+
 "        <tr class=\"formheaderrow\">\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.URLRegexp")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.Description")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.Reorder")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemoveJSPSessions")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemoveASPSessions")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemovePHPSessions")+"</nobr></td>\n"+
 "          <td class=\"formcolumnheader\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.RemoveBVSessions")+"</nobr></td>\n"+
 "        </tr>\n"
           );
         }
         String regexpString = sn.getAttributeValue(RSSConfig.ATTR_REGEXP);
         String description = sn.getAttributeValue(RSSConfig.ATTR_DESCRIPTION);
         if (description == null)
           description = "";
         String allowReorder = sn.getAttributeValue(RSSConfig.ATTR_REORDER);
         if (allowReorder == null || allowReorder.length() == 0)
           allowReorder = RSSConfig.VALUE_NO;
         String allowJavaSessionRemoval = sn.getAttributeValue(RSSConfig.ATTR_JAVASESSIONREMOVAL);
         if (allowJavaSessionRemoval == null || allowJavaSessionRemoval.length() == 0)
           allowJavaSessionRemoval = RSSConfig.VALUE_NO;
         String allowASPSessionRemoval = sn.getAttributeValue(RSSConfig.ATTR_ASPSESSIONREMOVAL);
         if (allowASPSessionRemoval == null || allowASPSessionRemoval.length() == 0)
           allowASPSessionRemoval = RSSConfig.VALUE_NO;
         String allowPHPSessionRemoval = sn.getAttributeValue(RSSConfig.ATTR_PHPSESSIONREMOVAL);
         if (allowPHPSessionRemoval == null || allowPHPSessionRemoval.length() == 0)
           allowPHPSessionRemoval = RSSConfig.VALUE_NO;
         String allowBVSessionRemoval = sn.getAttributeValue(RSSConfig.ATTR_BVSESSIONREMOVAL);
         if (allowBVSessionRemoval == null || allowBVSessionRemoval.length() == 0)
           allowBVSessionRemoval = RSSConfig.VALUE_NO;
         out.print(
 "        <tr class=\""+(((l % 2)==0)?"evenformrow":"oddformrow")+"\">\n"+
 "          <td class=\"formcolumncell\"><nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(regexpString)+"</nobr></td>\n"+
 "          <td class=\"formcolumncell\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(description)+"</td>\n"+
 "          <td class=\"formcolumncell\"><nobr>"+allowReorder+"</nobr></td>\n"+
 "          <td class=\"formcolumncell\"><nobr>"+allowJavaSessionRemoval+"</nobr></td>\n"+
 "          <td class=\"formcolumncell\"><nobr>"+allowASPSessionRemoval+"</nobr></td>\n"+
 "          <td class=\"formcolumncell\"><nobr>"+allowPHPSessionRemoval+"</nobr></td>\n"+
 "          <td class=\"formcolumncell\"><nobr>"+allowBVSessionRemoval+"</nobr></td>\n"+
 "        </tr>\n"
         );
         l++;
       }
     }
     if (l > 0)
     {
       out.print(
 "      </table>\n"+
 "    </td>\n"+
 "  </tr>\n"
       );
     }
     else
     {
       out.print(
 "  <tr><td class=\"message\" colspan=\"2\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.NoCanonicalizationSpecified")+"</nobr></td></tr>\n"
       );
     }

     out.print(
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"
     );
     i = 0;
     seenAny = false;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_MAP))
       {
         if (seenAny == false)
         {
           out.print(
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.URLMappingsColon")+"</nobr></td>\n"+
 "    <td class=\"value\">\n"
           );
           seenAny = true;
         }
         String match = sn.getAttributeValue(RSSConfig.ATTR_MATCH);
         String map = sn.getAttributeValue(RSSConfig.ATTR_MAP);
         out.print(
 "      <nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(match)+"</nobr>\n"
         );
         if (map != null && map.length() > 0)
         {
           out.print(
 "      &nbsp;--&gt;&nbsp;<nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(map)+"</nobr>\n"
           );
         }
         out.print(
 "      <br/>\n"
         );
       }
     }

     if (seenAny)
     {
       out.print(
 "    </td>\n"+
 "  </tr>\n"
       );
     }
     else
     {
       out.print(
 "  <tr><td class=\"message\" colspan=\"2\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.NoMappingsSpecifiedWillAcceptAllUrls")+"</nobr></td></tr>\n"
       );
     }
     out.print(
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.Exclude") + "</nobr></td>\n"+
 "    <td class=\"value\">\n"
     );
     try
     {
       java.io.Reader str = new java.io.StringReader(exclusions);
       try
       {
         java.io.BufferedReader is = new java.io.BufferedReader(str);
         try
         {
           while (true)
           {
             String nextString = is.readLine();
             if (nextString == null)
               break;
             if (nextString.length() == 0)
               continue;
             out.print(
 "      <nobr>"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(nextString)+"</nobr><br/>\n"
             );
           }
         }
         finally
         {
           is.close();
         }
       }
       finally
       {
         str.close();
       }
     }
     catch (java.io.IOException e)
     {
       throw new ManifoldCFException("IO error: "+e.getMessage(),e);
     }
     out.print(
 "    </td>\n"+
 "  </tr>\n"
     );
     out.print(
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"
     );
     String feedTimeoutValue = "60";
     String feedRefetchValue = "60";
     String minFeedRefetchValue = "15";
     String badFeedRefetchValue = null;
     String dechromedMode = RSSConfig.VALUE_NONE;
     String chromedMode = RSSConfig.VALUE_USE;
     i = 0;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_FEEDTIMEOUT))
       {
         feedTimeoutValue = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
       }
       else if (sn.getType().equals(RSSConfig.NODE_FEEDRESCAN))
       {
         feedRefetchValue = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
       }
       else if (sn.getType().equals(RSSConfig.NODE_MINFEEDRESCAN))
       {
         minFeedRefetchValue = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
       }
       else if (sn.getType().equals(RSSConfig.NODE_BADFEEDRESCAN))
       {
         badFeedRefetchValue = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
       }
       else if (sn.getType().equals(RSSConfig.NODE_DECHROMEDMODE))
       {
         dechromedMode = sn.getAttributeValue(RSSConfig.ATTR_MODE);
       }
       else if (sn.getType().equals(RSSConfig.NODE_CHROMEDMODE))
       {
         chromedMode = sn.getAttributeValue(RSSConfig.ATTR_MODE);
       }
     }
     out.print(
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.FeedConnectionTimeout")+"</nobr></td>\n"+
 "    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(feedTimeoutValue)+"</td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.DefaultFeedRescanInterval")+"</nobr></td>\n"+
 "    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(feedRefetchValue)+"</td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.MinimumFeedRescanInterval")+"</nobr></td>\n"+
 "    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(minFeedRefetchValue)+"</td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.BadFeedRescanInterval")+"</nobr></td>\n"+
 "    <td class=\"value\">"+((badFeedRefetchValue==null)?"(Default feed rescan value)":org.apache.manifoldcf.ui.util.Encoder.bodyEscape(badFeedRefetchValue))+"</td>\n"+

 "  </tr>\n"+
 "      \n"+
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"+
 "\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.DechromedContentSource")+"</nobr></td>\n"+
 "    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(dechromedMode)+"</td>\n"+
 "  </tr>\n"+
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.ChromedContent")+"</nobr></td>\n"+
 "    <td class=\"value\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(chromedMode)+"</td>\n"+
 "  </tr>\n"+
 "\n"
     );
     out.print(
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"
     );

     // Go through looking for access tokens
     seenAny = false;
     i = 0;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_ACCESS))
       {
         if (seenAny == false)
         {
           out.print(
 "  <tr><td class=\"description\"><nobr>"+Messages.getBodyString(locale,"RSSConnector.AccessTokens")+"</nobr></td>\n"+
 "    <td class=\"value\">\n"
           );
           seenAny = true;
         }
         String token = sn.getAttributeValue(RSSConfig.ATTR_TOKEN);
         out.print(
 "      "+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(token)+"<br/>\n"
         );
       }
     }

     if (seenAny)
     {
       out.print(
 "    </td>\n"+
 "  </tr>\n"
       );
     }
     else
     {
       out.print(
 "  <tr><td class=\"message\" colspan=\"2\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.NoAccessTokensSpecified") + "</nobr></td></tr>\n"
       );
     }
     out.print(
 "  <tr><td class=\"separator\" colspan=\"2\"><hr/></td></tr>\n"
     );
     // Go through looking for metadata
     seenAny = false;
     i = 0;
     while (i < ds.getChildCount())
     {
       SpecificationNode sn = ds.getChild(i++);
       if (sn.getType().equals(RSSConfig.NODE_METADATA))
       {
         if (seenAny == false)
         {
           out.print(
 "  <tr><td class=\"description\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.MetadataColon") + "</nobr></td>\n"+
 "    <td class=\"value\">\n"
           );
           seenAny = true;
         }
         String name = sn.getAttributeValue(RSSConfig.ATTR_NAME);
         String value = sn.getAttributeValue(RSSConfig.ATTR_VALUE);
         out.print(
 "      "+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(name)+"&nbsp;=&nbsp;"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(value)+"<br/>\n"
         );
       }
     }

     if (seenAny)
     {
       out.print(
 "    </td>\n"+
 "  </tr>\n"
       );
     }
     else
     {
       out.print(
 "  <tr><td class=\"message\" colspan=\"2\"><nobr>" + Messages.getBodyString(locale,"RSSConnector.NoMetadataSpecified") + "</nobr></td></tr>\n"
       );
     }
     out.print(
 "</table>\n"
     );
   }

   /** Handle an RSS feed document, using SAX to limit the memory impact */
   protected void handleRSSFeedSAX(String documentIdentifier, IProcessActivity activities, Filter filter)
     throws ManifoldCFException, ServiceInterruption
   {
     // The SAX model uses parsing events to control parsing, which allows me to manage memory usage much better.
     // This is essential for when a feed contains dechromed content as well as links.

     // First, catch all flavors of IO exception, and handle them properly
     try
     {
       // Open the input stream, and set up the parse
       InputStream is = cache.getData(documentIdentifier);
       if (is == null)
       {
         Logging.connectors.error("RSS: Document '"+documentIdentifier+"' should be in cache but isn't");
         return;
       }
       try
       {
         Parser p = new Parser();
         // Parse the document.  This will cause various things to occur, within the instantiated XMLParsingContext class.
         XMLFuzzyHierarchicalParseState x = new XMLFuzzyHierarchicalParseState();
         OuterContextClass c = new OuterContextClass(x,documentIdentifier,activities,filter);
         x.setContext(c);
         try
         {
           // Believe it or not, there are no parsing errors we can get back now.
           p.parseWithCharsetDetection(null,is,x);
           c.checkIfValidFeed();
           c.setDefaultRescanTimeIfNeeded();
         }
         finally
         {
           x.cleanup();
         }
       }
       finally
       {
         is.close();
       }
     }
     catch (java.net.SocketTimeoutException e)
     {
       throw new ManifoldCFException("Socket timeout error: "+e.getMessage(),e);
     }
     catch (ConnectTimeoutException e)
     {
       throw new ManifoldCFException("Socket connect timeout error: "+e.getMessage(),e);
     }
     catch (InterruptedIOException e)
     {
       throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
     }
     catch (IOException e)
     {
       throw new ManifoldCFException("IO error: "+e.getMessage(),e);
     }

   }

   /** This class handles the outermost XML context for the feed document. */
   protected class OuterContextClass extends XMLParsingContext
   {
     /** Keep track of the number of valid feed signals we saw */
     protected int outerTagCount = 0;
     /** The document identifier */
     protected String documentIdentifier;
     /** Activities interface */
     protected IProcessActivity activities;
     /** Filter */
     protected Filter filter;
     /** Flag indicating the the rescan time was set for this feed */
     protected boolean rescanTimeSet = false;

     public OuterContextClass(XMLFuzzyHierarchicalParseState theStream, String documentIdentifier, IProcessActivity activities, Filter filter)
     {
       super(theStream);
       this.documentIdentifier = documentIdentifier;
       this.activities = activities;
       this.filter = filter;
     }

     /** Check if feed was valid */
     public void checkIfValidFeed()
     {
       if (outerTagCount == 0)
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: RSS document '"+documentIdentifier+"' does not have rss, feed, or rdf:RDF tag - not valid feed");
       }
     }

     /** Check if the rescan flag was set or not, and if not, make sure it gets set properly */
     public void setDefaultRescanTimeIfNeeded()
       throws ManifoldCFException
     {
       if (rescanTimeSet == false)
       {
         // Set it!
         // Need to set the requeue parameters appropriately, since otherwise the feed reverts to default document
         // rescan or expire behavior.
         long currentTime = System.currentTimeMillis();
         Long rescanTime = filter.getBadFeedRescanTime(currentTime);
         if (rescanTime == null)
           rescanTime = filter.getDefaultRescanTime(currentTime);

         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: In RSS document '"+documentIdentifier+"' setting default rescan time to "+((rescanTime==null)?"null":rescanTime.toString()));

         activities.setDocumentScheduleBounds(documentIdentifier,rescanTime,rescanTime,null,null);
         rescanTimeSet = true;
       }
     }

     /** Handle the tag beginning to set the correct second-level parsing context */
     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       if (localName.equals("rss"))
       {
         // RSS feed detected
         outerTagCount++;
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Parsed bottom-level XML for RSS document '"+documentIdentifier+"'");
         return new RSSContextClass(theStream,namespace,localName,qName,atts,documentIdentifier,activities,filter);
       }
       else if (localName.equals("RDF"))
       {
         // RDF/Atom feed detected
         outerTagCount++;
         return new RDFContextClass(theStream,namespace,localName,qName,atts,documentIdentifier,activities,filter);
       }
       else if (localName.equals("feed"))
       {
         // Basic feed detected
         outerTagCount++;
         return new FeedContextClass(theStream,namespace,localName,qName,atts,documentIdentifier,activities,filter);
       }
       else if (localName.equals("urlset") || localName.equals("sitemapindex"))
       {
         // Sitemap detected
         outerTagCount++;
         return new UrlsetContextClass(theStream,namespace,localName,qName,atts,documentIdentifier,activities,filter);
       }

       // The default action is to establish a new default context.
       return super.beginTag(namespace,localName,qName,atts);
     }

     /** Handle the tag ending */
     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext context = theStream.getContext();
       String tagName = context.getLocalname();
       if (tagName.equals("rss"))
       {
         rescanTimeSet = ((RSSContextClass)context).process();
       }
       else if (tagName.equals("RDF"))
       {
         rescanTimeSet = ((RDFContextClass)context).process();
       }
       else if (tagName.equals("feed"))
       {
         rescanTimeSet = ((FeedContextClass)context).process();
       }
       else if (tagName.equals("urlset") || tagName.equals("sitemapindex"))
       {
         rescanTimeSet = ((UrlsetContextClass)context).process();
       }
       else
         super.endTag();
     }

   }

   protected class RSSContextClass extends XMLParsingContext
   {
     /** The document identifier */
     protected String documentIdentifier;
     /** Activities interface */
     protected IProcessActivity activities;
     /** Filter */
     protected Filter filter;
     /** Rescan time set flag */
     protected boolean rescanTimeSet = false;

     public RSSContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, String documentIdentifier, IProcessActivity activities, Filter filter)
     {
       super(theStream,namespace,localName,qName,atts);
       this.documentIdentifier = documentIdentifier;
       this.activities = activities;
       this.filter = filter;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // Handle each channel
       if (localName.equals("channel"))
       {
         // Channel detected
         return new RSSChannelContextClass(theStream,namespace,localName,qName,atts,documentIdentifier,activities,filter);
       }

       // Skip everything else.
       return super.beginTag(namespace,localName,qName,atts);
     }

     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       // If it's our channel tag, process global channel information
       XMLParsingContext context = theStream.getContext();
       String tagName = context.getLocalname();
       if (tagName.equals("channel"))
       {
         rescanTimeSet = ((RSSChannelContextClass)context).process();
       }
       else
         super.endTag();
     }

     /** Process this data */
     protected boolean process()
       throws ManifoldCFException
     {
       return rescanTimeSet;
     }

   }

   protected class RSSChannelContextClass extends XMLParsingContext
   {
     /** The document identifier */
     protected String documentIdentifier;
     /** Activities interface */
     protected IProcessActivity activities;
     /** Filter */
     protected Filter filter;

     /** TTL value is set on a per-channel basis */
     protected String ttlValue = null;

     public RSSChannelContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, String documentIdentifier, IProcessActivity activities, Filter filter)
     {
       super(theStream,namespace,localName,qName,atts);
       this.documentIdentifier = documentIdentifier;
       this.activities = activities;
       this.filter = filter;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "ttl" and "item", nothing else.
       if (localName.equals("ttl"))
       {
         // TTL value seen.  Prepare to record it, as a string.
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("item"))
       {
         // Item seen.  We don't need any of the attributes etc., but we need to start a new context.
         return new RSSItemContextClass(theStream,namespace,localName,qName,atts,filter.getDechromedContentMode());
       }
       // Skip everything else.
       return super.beginTag(namespace,localName,qName,atts);
     }

     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("ttl"))
         // If the current context must be the TTL one, record its data value.
         ttlValue = ((XMLStringParsingContext)theContext).getValue();
       else if (theTag.equals("item"))
       {
         // It's an item.
         RSSItemContextClass itemContext = (RSSItemContextClass)theContext;
         // Presumably, since we are done parsing, we've recorded all the information we need in the context, object including:
         // (1) File name (if any), containing dechromed content
         // (2) Link name(s)
         // (3) Pubdate
         // (4) Title
         // The job now is to pull this info out and call the activities interface appropriately.

         // NOTE: After this endTag() method is called, tagCleanup() will be called for the item context.  This should clean up
         // all dangling files etc. that need to be removed.
         // If an exception or error is thrown during the parse, this endTag() method will NOT be called, but the tagCleanup()
         // method will be called regardless.
         itemContext.process(documentIdentifier,activities,filter);
       }
       else
         super.endTag();
     }

     /** Process this data, return true if rescan time was set */
     protected boolean process()
       throws ManifoldCFException
     {
       // Deal with the ttlvalue, if it was found
       // Use the ttl value as a signal for when we ought to look at this feed again.  If not present, use the default.
       long currentTime = System.currentTimeMillis();
       Long rescanTime = filter.getDefaultRescanTime(currentTime);
       if (ttlValue != null)
       {
         try
         {
           int minutes = Integer.parseInt(ttlValue);
           long nextTime = currentTime + minutes * 60000L;
           rescanTime = new Long(nextTime);
           // Set the upper bound time; we want to scan the feeds aggressively.
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: In RSS document '"+documentIdentifier+"', found a ttl value of "+ttlValue+"; setting refetch time accordingly");
         }
         catch (NumberFormatException e)
         {
           Logging.connectors.warn("RSS: RSS document '"+documentIdentifier+"' has illegal ttl value '"+ttlValue+"'");
         }
       }

       if (rescanTime != null)
       {
         Long minimumTime = filter.getMinimumRescanTime(currentTime);
         if (minimumTime != null)
         {
           if (rescanTime.longValue() < minimumTime.longValue())
             rescanTime = minimumTime;
         }
       }

       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: In RSS document '"+documentIdentifier+"' setting rescan time to "+((rescanTime==null)?"null":rescanTime.toString()));

       activities.setDocumentScheduleBounds(documentIdentifier,rescanTime,rescanTime,null,null);
       return true;
     }
   }

   protected class RSSItemContextClass extends XMLParsingContext
   {
     protected int dechromedContentMode;
     protected String guidField = null;
     protected String linkField = null;
     protected String pubDateField = null;
     protected String titleField = null;
     protected String descriptionField = null;
     protected String authorEmailField = null;
     protected String authorNameField = null;
     protected ArrayList categoryField = new ArrayList();
     protected File contentsFile = null;

     public RSSItemContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, int dechromedContentMode)
     {
       super(theStream,namespace,localName,qName,atts);
       this.dechromedContentMode = dechromedContentMode;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "ttl" and "item", nothing else.
       if (localName.equals("link"))
       {
         // "link" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("guid"))
       {
         // "guid" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("pubdate"))
       {
         // "pubDate" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("title"))
       {
         // "title" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("category"))
       {
         // "category" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("author"))
       {
         // "author" tag, which contains email
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("creator"))
       {
         // "creator" tag which contains name (like dc:creator)
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else
       {
         // Handle potentially longer fields.  Both "description" and "content" fields can potentially be large; they are thus
         // processed as temporary files.  But the dance is complicated because (a) we only want one PRIMARY content source,
         // and (b) we want access to the description field, if it is not used as primary content.
         switch (dechromedContentMode)
         {
         case DECHROMED_NONE:
           if (localName.equals("description"))
           {
             return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
           }
           break;
         case DECHROMED_DESCRIPTION:
           if (localName.equals("description"))
           {
             try
             {
               File tempFile = File.createTempFile("_rssdata_","tmp");
               return new XMLFileParsingContext(theStream,namespace,localName,qName,atts,tempFile);
             }
             catch (java.net.SocketTimeoutException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
             catch (InterruptedIOException e)
             {
               throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
             }
             catch (IOException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
           }
           break;
         case DECHROMED_CONTENT:
           if (localName.equals("content"))
           {
             try
             {
               File tempFile = File.createTempFile("_rssdata_","tmp");
               return new XMLFileParsingContext(theStream,namespace,localName,qName,atts,tempFile);
             }
             catch (java.net.SocketTimeoutException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
             catch (InterruptedIOException e)
             {
               throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
             }
             catch (IOException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
           }
           else if (localName.equals("description"))
           {
             return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
           }
           break;
         default:
           break;
         }
         // Skip everything else.
         return super.beginTag(namespace,localName,qName,atts);
       }
     }

     /** Convert the individual sub-fields of the item context into their final forms */
     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("link"))
       {
         linkField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("guid"))
       {
         guidField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("pubdate"))
       {
         pubDateField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("title"))
       {
         titleField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("category"))
       {
         categoryField.add(((XMLStringParsingContext)theContext).getValue());
       }
       else if (theTag.equals("author"))
       {
         authorEmailField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("creator"))
       {
         authorNameField = ((XMLStringParsingContext)theContext).getValue();
       }
       else
       {
         // What we want is: (a) if dechromed mode is NONE, just put the description file in the description field; (b)
         // if dechromed mode is "description", put the description field in the primary content field; (c)
         // if dechromed mode is "content", put the content field in the primary content field, and the description field in the description field.
         switch (dechromedContentMode)
         {
         case DECHROMED_NONE:
           if (theTag.equals("description"))
           {
             descriptionField = ((XMLStringParsingContext)theContext).getValue();
           }
           break;
         case DECHROMED_DESCRIPTION:
           if (theTag.equals("description"))
           {
             // Content file has been written; retrieve it (being sure not to leak any files already hanging around!)
             tagCleanup();
             contentsFile = ((XMLFileParsingContext)theContext).getCompletedFile();
             return;
           }
           break;
         case DECHROMED_CONTENT:
           if (theTag.equals("content"))
           {
             tagCleanup();
             // Retrieve content file
             contentsFile = ((XMLFileParsingContext)theContext).getCompletedFile();
             return;
           }
           else if (theTag.equals("description"))
           {
             descriptionField = ((XMLStringParsingContext)theContext).getValue();
           }
           break;
         default:
           break;
         }

         super.endTag();
       }
     }

     protected void tagCleanup()
       throws ManifoldCFException
     {
       // Delete the contents file if it is there.
       if (contentsFile != null)
       {
         contentsFile.delete();
         contentsFile = null;
       }
     }

     /** Process the data accumulated for this item */
     public void process(String documentIdentifier, IProcessActivity activities, Filter filter)
       throws ManifoldCFException
     {
       if (linkField == null || linkField.length() == 0)
         linkField = guidField;

       if (linkField != null && linkField.length() > 0)
       {
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
         {
           origDateDate = DateParser.parseRFC822Date(pubDateField);
           // Special for China Daily News
           if (origDateDate == null)
             origDateDate = DateParser.parseChinaDate(pubDateField);
           // Special for LL
           if (origDateDate == null)
             origDateDate = DateParser.parseISO8601Date(pubDateField);
         }
         Long origDate;
         if (origDateDate != null)
           origDate = new Long(origDateDate.getTime());
         else
           origDate = null;

         String[] links = linkField.split(", ");
         int l = 0;
         while (l < links.length)
         {
           String rawURL = links[l++].trim();
           // Process the link
           String newIdentifier = makeDocumentIdentifier(filter.getCanonicalizationPolicies(),documentIdentifier,rawURL);
           if (newIdentifier != null)
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("RSS: In RSS document '"+documentIdentifier+"', found a link to '"+newIdentifier+"', which has origination date "+
               ((origDate==null)?"null":origDate.toString()));
             if (filter.isLegalURL(newIdentifier))
             {
               if (contentsFile == null && filter.getChromedContentMode() != CHROMED_METADATA_ONLY)
               {
                 // It's a reference!  Add it.
                 String[] dataNames = new String[]{"pubdate","title","source","authoremail","authorname","category","description"};
                 String[][] dataValues = new String[dataNames.length][];
                 if (origDate != null)
                   dataValues[0] = new String[]{origDate.toString()};
                 if (titleField != null)
                   dataValues[1] = new String[]{titleField};
                 dataValues[2] = new String[]{documentIdentifier};
                 if (authorEmailField != null)
                   dataValues[3] = new String[]{authorEmailField};
                 if (authorNameField != null)
                   dataValues[4] = new String[]{authorNameField};
                 dataValues[5] = new String[categoryField.size()];
                 int q = 0;
                 while (q < categoryField.size())
                 {
                   (dataValues[5])[q] = (String)categoryField.get(q);
                   q++;
                 }
                 if (descriptionField != null)
                   dataValues[6] = new String[]{descriptionField};
                 // Add document reference, not including the data to pass down, but including a description
                 activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
               }
               else
               {
                 // The issue here is that if a document is ingested without a jobqueue entry, the document will not
                 // be cleaned up if the job is deleted; nor is there any expiration possibility.  So, we really do need to make
                 // sure a jobqueue entry gets created somehow.  Therefore I can't just ingest the document
                 // right here.

                 // Since the dechromed data is available from the feed, the possibility remains of passing the document

                 // Now, set up the carrydown info
                 String[] dataNames = new String[]{"pubdate","title","source","authoremail","authorname","category","data","description"};
                 Object[][] dataValues = new Object[dataNames.length][];
                 if (origDate != null)
                   dataValues[0] = new String[]{origDate.toString()};
                 if (titleField != null)
                   dataValues[1] = new String[]{titleField};
                 dataValues[2] = new String[]{documentIdentifier};
                 if (authorEmailField != null)
                   dataValues[3] = new String[]{authorEmailField};
                 if (authorNameField != null)
                   dataValues[4] = new String[]{authorNameField};
                 dataValues[5] = new String[categoryField.size()];
                 int q = 0;
                 while (q < categoryField.size())
                 {
                   (dataValues[5])[q] = (String)categoryField.get(q);
                   q++;
                 }

                 if (descriptionField != null)
                   dataValues[7] = new String[]{descriptionField};

                 if (contentsFile == null)
                 {
                   CharacterInput ci = new NullCharacterInput();
                   try
                   {
                     dataValues[6] = new Object[]{ci};

                     // Add document reference, including the data to pass down, and the dechromed content too
                     activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
                   }
                   finally
                   {
                     ci.discard();
                   }
                 }
                 else
                 {
                   CharacterInput ci = new TempFileCharacterInput(contentsFile);
                   try
                   {
                     contentsFile = null;
                     dataValues[6] = new Object[]{ci};

                     // Add document reference, including the data to pass down, and the dechromed content too
                     activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
                   }
                   finally
                   {
                     ci.discard();
                   }
                 }
               }
             }
             else
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("RSS: Identifier '"+newIdentifier+"' is excluded");
             }
           }
           else
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("RSS: In RSS document '"+documentIdentifier+"', found an unincluded URL '"+rawURL+"'");
           }
         }
       }
     }
   }

   protected class RDFContextClass extends XMLParsingContext
   {
     /** The document identifier */
     protected String documentIdentifier;
     /** Activities interface */
     protected IProcessActivity activities;
     /** Filter */
     protected Filter filter;

     /** ttl value */
     protected String ttlValue = null;

     public RDFContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, String documentIdentifier, IProcessActivity activities, Filter filter)
     {
       super(theStream,namespace,localName,qName,atts);
       this.documentIdentifier = documentIdentifier;
       this.activities = activities;
       this.filter = filter;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "ttl" and "item", nothing else.
       if (localName.equals("ttl"))
       {
         // TTL value seen.  Prepare to record it, as a string.
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("item"))
       {
         // Item seen.  We don't need any of the attributes etc., but we need to start a new context.
         return new RDFItemContextClass(theStream,namespace,localName,qName,atts,filter.getDechromedContentMode());
       }
       // Skip everything else.
       return super.beginTag(namespace,localName,qName,atts);
     }

     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("ttl"))
         // If the current context must be the TTL one, record its data value.
         ttlValue = ((XMLStringParsingContext)theContext).getValue();
       else if (theTag.equals("item"))
       {
         // It's an item.
         RDFItemContextClass itemContext = (RDFItemContextClass)theContext;
         // Presumably, since we are done parsing, we've recorded all the information we need in the context, object including:
         // (1) File name (if any), containing dechromed content
         // (2) Link name(s)
         // (3) Pubdate
         // (4) Title
         // The job now is to pull this info out and call the activities interface appropriately.

         // NOTE: After this endTag() method is called, tagCleanup() will be called for the item context.  This should clean up
         // all dangling files etc. that need to be removed.
         // If an exception or error is thrown during the parse, this endTag() method will NOT be called, but the tagCleanup()
         // method will be called regardless.
         itemContext.process(documentIdentifier,activities,filter);
       }
       else
         super.endTag();
     }

     /** Process this data */
     protected boolean process()
       throws ManifoldCFException
     {
       // Deal with the ttlvalue, if it was found
       // Use the ttl value as a signal for when we ought to look at this feed again.  If not present, use the default.
       long currentTime = System.currentTimeMillis();
       Long rescanTime = filter.getDefaultRescanTime(currentTime);
       if (ttlValue != null)
       {
         try
         {
           int minutes = Integer.parseInt(ttlValue);
           long nextTime = currentTime + minutes * 60000L;
           rescanTime = new Long(nextTime);
           // Set the upper bound time; we want to scan the feeds aggressively.
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: In RDF document '"+documentIdentifier+"', found a ttl value of "+ttlValue+"; setting refetch time accordingly");
         }
         catch (NumberFormatException e)
         {
           Logging.connectors.warn("RSS: RDF document '"+documentIdentifier+"' has illegal ttl value '"+ttlValue+"'");
         }
       }

       if (rescanTime != null)
       {
         Long minimumTime = filter.getMinimumRescanTime(currentTime);
         if (minimumTime != null)
         {
           if (rescanTime.longValue() < minimumTime.longValue())
             rescanTime = minimumTime;
         }
       }

       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: In RDF document '"+documentIdentifier+"' setting rescan time to "+((rescanTime==null)?"null":rescanTime.toString()));

       activities.setDocumentScheduleBounds(documentIdentifier,rescanTime,rescanTime,null,null);
       return true;
     }
   }

   protected class RDFItemContextClass extends XMLParsingContext
   {
     protected int dechromedContentMode;
     protected String linkField = null;
     protected String pubDateField = null;
     protected String titleField = null;
     protected String authorNameField = null;
     protected String descriptionField = null;
     protected File contentsFile = null;

     public RDFItemContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, int dechromedContentMode)
     {
       super(theStream,namespace,localName,qName,atts);
       this.dechromedContentMode = dechromedContentMode;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "ttl" and "item", nothing else.
       if (localName.equals("link"))
       {
         // "link" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("date"))
       {
         // "dc:date" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("title"))
       {
         // "title" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("creator"))
       {
         // "creator" tag (e.g. "dc:creator")
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else
       {
         switch (dechromedContentMode)
         {
         case DECHROMED_NONE:
           if (localName.equals("description"))
           {
             return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
           }
           break;
         case DECHROMED_DESCRIPTION:
           if (localName.equals("description"))
           {
             try
             {
               File tempFile = File.createTempFile("_rssdata_","tmp");
               return new XMLFileParsingContext(theStream,namespace,localName,qName,atts,tempFile);
             }
             catch (java.net.SocketTimeoutException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
             catch (InterruptedIOException e)
             {
               throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
             }
             catch (IOException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
           }
           break;
         case DECHROMED_CONTENT:
           if (localName.equals("content"))
           {
             try
             {
               File tempFile = File.createTempFile("_rssdata_","tmp");
               return new XMLFileParsingContext(theStream,namespace,localName,qName,atts,tempFile);
             }
             catch (java.net.SocketTimeoutException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
             catch (InterruptedIOException e)
             {
               throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
             }
             catch (IOException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
           }
           else if (localName.equals("description"))
           {
             return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
           }
           break;
         default:
           break;
         }
         // Skip everything else.
         return super.beginTag(namespace,localName,qName,atts);
       }
     }

     /** Convert the individual sub-fields of the item context into their final forms */
     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("link"))
       {
         linkField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("date"))
       {
         pubDateField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("title"))
       {
         titleField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("creator"))
       {
         authorNameField = ((XMLStringParsingContext)theContext).getValue();
       }
       else
       {
         switch (dechromedContentMode)
         {
         case DECHROMED_NONE:
           if (theTag.equals("description"))
           {
             descriptionField = ((XMLStringParsingContext)theContext).getValue();
           }
           break;
         case DECHROMED_DESCRIPTION:
           if (theTag.equals("description"))
           {
             // Content file has been written; retrieve it (being sure not to leak any files already hanging around!)
             tagCleanup();
             contentsFile = ((XMLFileParsingContext)theContext).getCompletedFile();
             return;
           }
           break;
         case DECHROMED_CONTENT:
           if (theTag.equals("dc:content"))
           {
             // Retrieve content file
             tagCleanup();
             contentsFile = ((XMLFileParsingContext)theContext).getCompletedFile();
             return;
           }
           else if (theTag.equals("description"))
           {
             descriptionField = ((XMLStringParsingContext)theContext).getValue();
           }
           break;
         default:
           break;
         }

         super.endTag();
       }
     }

     protected void tagCleanup()
       throws ManifoldCFException
     {
       // Delete the contents file if it is there.
       if (contentsFile != null)
       {
         contentsFile.delete();
         contentsFile = null;
       }
     }

     /** Process the data accumulated for this item */
     public void process(String documentIdentifier, IProcessActivity activities, Filter filter)
       throws ManifoldCFException
     {
       if (linkField != null && linkField.length() > 0)
       {
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
           origDateDate = DateParser.parseISO8601Date(pubDateField);

         Long origDate;
         if (origDateDate != null)
           origDate = new Long(origDateDate.getTime());
         else
           origDate = null;

         String[] links = linkField.split(", ");
         int l = 0;
         while (l < links.length)
         {
           String rawURL = links[l++].trim();
           // Process the link
           String newIdentifier = makeDocumentIdentifier(filter.getCanonicalizationPolicies(),documentIdentifier,rawURL);
           if (newIdentifier != null)
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("RSS: In RDF document '"+documentIdentifier+"', found a link to '"+newIdentifier+"', which has origination date "+
               ((origDate==null)?"null":origDate.toString()));
             if (filter.isLegalURL(newIdentifier))
             {
               if (contentsFile == null && filter.getChromedContentMode() != CHROMED_METADATA_ONLY)
               {
                 // It's a reference!  Add it.
                 String[] dataNames = new String[]{"pubdate","title","source","authorname","description"};
                 String[][] dataValues = new String[dataNames.length][];
                 if (origDate != null)
                   dataValues[0] = new String[]{origDate.toString()};
                 if (titleField != null)
                   dataValues[1] = new String[]{titleField};
                 dataValues[2] = new String[]{documentIdentifier};
                 if (authorNameField != null)
                   dataValues[3] = new String[]{authorNameField};
                 if (descriptionField != null)
                   dataValues[4] = new String[]{descriptionField};

                 // Add document reference, including the data to pass down
                 activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
               }
               else
               {
                 // The issue here is that if a document is ingested without a jobqueue entry, the document will not
                 // be cleaned up if the job is deleted; nor is there any expiration possibility.  So, we really do need to make
                 // sure a jobqueue entry gets created somehow.  Therefore I can't just ingest the document
                 // right here.

                 // Now, set up the carrydown info
                 String[] dataNames = new String[]{"pubdate","title","source","authorname","data","description"};
                 Object[][] dataValues = new Object[dataNames.length][];
                 if (origDate != null)
                   dataValues[0] = new String[]{origDate.toString()};
                 if (titleField != null)
                   dataValues[1] = new String[]{titleField};
                 dataValues[2] = new String[]{documentIdentifier};
                 if (authorNameField != null)
                   dataValues[3] = new String[]{authorNameField};
                 if (descriptionField != null)
                   dataValues[5] = new String[]{descriptionField};

                 if (contentsFile == null)
                 {
                   CharacterInput ci = new NullCharacterInput();
                   try
                   {
                     dataValues[4] = new Object[]{ci};

                     // Add document reference, including the data to pass down, and the dechromed content too
                     activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
                   }
                   finally
                   {
                     ci.discard();
                   }
                 }
                 else
                 {
                   CharacterInput ci = new TempFileCharacterInput(contentsFile);
                   try
                   {
                     contentsFile = null;
                     dataValues[4] = new Object[]{ci};

                     // Add document reference, including the data to pass down, and the dechromed content too
                     activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
                   }
                   finally
                   {
                     ci.discard();
                   }
                 }
               }
             }
             else
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("RSS: Identifier '"+newIdentifier+"' is excluded");
             }
           }
           else
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("RSS: In RSS document '"+documentIdentifier+"', found an unincluded URL '"+rawURL+"'");
           }
         }
       }
     }
   }

   protected class FeedContextClass extends XMLParsingContext
   {
     /** The document identifier */
     protected String documentIdentifier;
     /** Activities interface */
     protected IProcessActivity activities;
     /** Filter */
     protected Filter filter;

     /** ttl value */
     protected String ttlValue = null;

     public FeedContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, String documentIdentifier, IProcessActivity activities, Filter filter)
     {
       super(theStream,namespace,localName,qName,atts);
       this.documentIdentifier = documentIdentifier;
       this.activities = activities;
       this.filter = filter;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "ttl" and "item", nothing else.
       if (localName.equals("ttl"))
       {
         // TTL value seen.  Prepare to record it, as a string.
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("entry"))
       {
         // Item seen.  We don't need any of the attributes etc., but we need to start a new context.
         return new FeedItemContextClass(theStream,namespace,localName,qName,atts,filter.getDechromedContentMode());
       }
       // Skip everything else.
       return super.beginTag(namespace,localName,qName,atts);
     }

     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("ttl"))
         // If the current context must be the TTL one, record its data value.
         ttlValue = ((XMLStringParsingContext)theContext).getValue();
       else if (theTag.equals("entry"))
       {
         // It's an item.
         FeedItemContextClass itemContext = (FeedItemContextClass)theContext;
         // Presumably, since we are done parsing, we've recorded all the information we need in the context, object including:
         // (1) File name (if any), containing dechromed content
         // (2) Link name(s)
         // (3) Pubdate
         // (4) Title
         // The job now is to pull this info out and call the activities interface appropriately.

         // NOTE: After this endTag() method is called, tagCleanup() will be called for the item context.  This should clean up
         // all dangling files etc. that need to be removed.
         // If an exception or error is thrown during the parse, this endTag() method will NOT be called, but the tagCleanup()
         // method will be called regardless.
         itemContext.process(documentIdentifier,activities,filter);
       }
       else
         super.endTag();
     }

     /** Process this data */
     protected boolean process()
       throws ManifoldCFException
     {
       // Deal with the ttlvalue, if it was found
       // Use the ttl value as a signal for when we ought to look at this feed again.  If not present, use the default.
       long currentTime = System.currentTimeMillis();
       Long rescanTime = filter.getDefaultRescanTime(currentTime);
       if (ttlValue != null)
       {
         try
         {
           int minutes = Integer.parseInt(ttlValue);
           long nextTime = currentTime + minutes * 60000L;
           rescanTime = new Long(nextTime);
           // Set the upper bound time; we want to scan the feeds aggressively.
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: In Atom document '"+documentIdentifier+"', found a ttl value of "+ttlValue+"; setting refetch time accordingly");
         }
         catch (NumberFormatException e)
         {
           Logging.connectors.warn("RSS: Atom document '"+documentIdentifier+"' has illegal ttl value '"+ttlValue+"'");
         }
       }

       if (rescanTime != null)
       {
         Long minimumTime = filter.getMinimumRescanTime(currentTime);
         if (minimumTime != null)
         {
           if (rescanTime.longValue() < minimumTime.longValue())
             rescanTime = minimumTime;
         }
       }

       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: In Atom document '"+documentIdentifier+"' setting rescan time to "+((rescanTime==null)?"null":rescanTime.toString()));

       activities.setDocumentScheduleBounds(documentIdentifier,rescanTime,rescanTime,null,null);
       return true;
     }
   }

   protected class FeedItemContextClass extends XMLParsingContext
   {
     protected int dechromedContentMode;
     protected List<String> linkField = new ArrayList<String>();
     protected String pubDateField = null;
     protected String titleField = null;
     protected String authorNameField = null;
     protected String authorEmailField = null;
     protected ArrayList categoryField = new ArrayList();
     protected File contentsFile = null;
     protected String descriptionField = null;

     public FeedItemContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, int dechromedContentMode)
     {
       super(theStream,namespace,localName,qName,atts);
       this.dechromedContentMode = dechromedContentMode;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "ttl" and "item", nothing else.
       if (localName.equals("link"))
       {
         // "link" tag
         String ref = atts.get("href");
         if (ref != null && ref.length() > 0)
           linkField.add(ref);
         return super.beginTag(namespace,localName,qName,atts);
       }
       else if (localName.equals("published") || localName.equals("updated"))
       {
         // "published" pr "updated" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("title"))
       {
         // "title" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("author"))
       {
         return new FeedAuthorContextClass(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("category"))
       {
         String category = atts.get("term");
         if (category != null && category.length() > 0)
           categoryField.add(category);
         return super.beginTag(namespace,localName,qName,atts);
       }
       else
       {
         switch (dechromedContentMode)
         {
         case DECHROMED_NONE:
           if (localName.equals("subtitle"))
           {
             return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
           }
           break;
         case DECHROMED_DESCRIPTION:
           if (localName.equals("subtitle"))
           {
             try
             {
               File tempFile = File.createTempFile("_rssdata_","tmp");
               return new XMLFileParsingContext(theStream,namespace,localName,qName,atts,tempFile);
             }
             catch (java.net.SocketTimeoutException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
             catch (InterruptedIOException e)
             {
               throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
             }
             catch (IOException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
           }
           break;
         case DECHROMED_CONTENT:
           if (localName.equals("content"))
           {
             try
             {
               File tempFile = File.createTempFile("_rssdata_","tmp");
               return new XMLFileParsingContext(theStream,namespace,localName,qName,atts,tempFile);
             }
             catch (java.net.SocketTimeoutException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
             catch (InterruptedIOException e)
             {
               throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
             }
             catch (IOException e)
             {
               throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
             }
           }
           else if (localName.equals("subtitle"))
           {
             return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
           }
           break;
         default:
           break;
         }
         // Skip everything else.
         return super.beginTag(namespace,localName,qName,atts);
       }
     }

     /** Convert the individual sub-fields of the item context into their final forms */
     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("published") || theTag.equals("updated"))
       {
         pubDateField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("title"))
       {
         titleField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("author"))
       {
         FeedAuthorContextClass authorContext = (FeedAuthorContextClass)theContext;
         authorEmailField = authorContext.getAuthorEmail();
         authorNameField = authorContext.getAuthorName();
       }
       else
       {
         switch (dechromedContentMode)
         {
         case DECHROMED_NONE:
           if (theTag.equals("subtitle"))
           {
             titleField = ((XMLStringParsingContext)theContext).getValue();
           }
           break;
         case DECHROMED_DESCRIPTION:
           if (theTag.equals("subtitle"))
           {
             // Content file has been written; retrieve it (being sure not to leak any files already hanging around!)
             tagCleanup();
             contentsFile = ((XMLFileParsingContext)theContext).getCompletedFile();
             return;
           }
           break;
         case DECHROMED_CONTENT:
           if (theTag.equals("content"))
           {
             // Retrieve content file
             tagCleanup();
             contentsFile = ((XMLFileParsingContext)theContext).getCompletedFile();
             return;
           }
           else if (theTag.equals("subtitle"))
           {
             titleField = ((XMLStringParsingContext)theContext).getValue();
           }
           break;
         default:
           break;
         }

         super.endTag();
       }
     }

     protected void tagCleanup()
       throws ManifoldCFException
     {
       // Delete the contents file if it is there.
       if (contentsFile != null)
       {
         contentsFile.delete();
         contentsFile = null;
       }
     }

     /** Process the data accumulated for this item */
     public void process(String documentIdentifier, IProcessActivity activities, Filter filter)
       throws ManifoldCFException
     {
       if (linkField.size() > 0)
       {
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
           origDateDate = DateParser.parseISO8601Date(pubDateField);

         Long origDate;
         if (origDateDate != null)
           origDate = new Long(origDateDate.getTime());
         else
           origDate = null;

         for (String linkValue : linkField)
         {
           String[] links = linkValue.split(", ");
           int l = 0;
           while (l < links.length)
           {
             String rawURL = links[l++].trim();
             // Process the link
             String newIdentifier = makeDocumentIdentifier(filter.getCanonicalizationPolicies(),documentIdentifier,rawURL);
             if (newIdentifier != null)
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("RSS: In Atom document '"+documentIdentifier+"', found a link to '"+newIdentifier+"', which has origination date "+
                 ((origDate==null)?"null":origDate.toString()));
               if (filter.isLegalURL(newIdentifier))
               {
                 if (contentsFile == null && filter.getChromedContentMode() != CHROMED_METADATA_ONLY)
                 {
                   // It's a reference!  Add it.
                   String[] dataNames = new String[]{"pubdate","title","source","category","description"};
                   String[][] dataValues = new String[dataNames.length][];
                   if (origDate != null)
                     dataValues[0] = new String[]{origDate.toString()};
                   if (titleField != null)
                     dataValues[1] = new String[]{titleField};
                   dataValues[2] = new String[]{documentIdentifier};
                   dataValues[3] = new String[categoryField.size()];
                   int q = 0;
                   while (q < categoryField.size())
                   {
                     (dataValues[3])[q] = (String)categoryField.get(q);
                     q++;
                   }
                   if (descriptionField != null)
                     dataValues[4] = new String[]{descriptionField};

                   // Add document reference, including the data to pass down
                   activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
                 }
                 else
                 {
                   // The issue here is that if a document is ingested without a jobqueue entry, the document will not
                   // be cleaned up if the job is deleted; nor is there any expiration possibility.  So, we really do need to make
                   // sure a jobqueue entry gets created somehow.  Therefore I can't just ingest the document
                   // right here.

                   // Now, set up the carrydown info
                   String[] dataNames = new String[]{"pubdate","title","source","category","data","description"};
                   Object[][] dataValues = new Object[dataNames.length][];
                   if (origDate != null)
                     dataValues[0] = new String[]{origDate.toString()};
                   if (titleField != null)
                     dataValues[1] = new String[]{titleField};
                   dataValues[2] = new String[]{documentIdentifier};
                   dataValues[3] = new String[categoryField.size()];
                   int q = 0;
                   while (q < categoryField.size())
                   {
                     (dataValues[3])[q] = (String)categoryField.get(q);
                     q++;
                   }
                   if (descriptionField != null)
                     dataValues[5] = new String[]{descriptionField};

                   if (contentsFile == null)
                   {
                     CharacterInput ci = new NullCharacterInput();
                     try
                     {
                       dataValues[4] = new Object[]{ci};

                       // Add document reference, including the data to pass down, and the dechromed content too
                       activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
                     }
                     finally
                     {
                       ci.discard();
                     }
                   }
                   else
                   {
                     CharacterInput ci = new TempFileCharacterInput(contentsFile);
                     try
                     {
                       contentsFile = null;

                       dataValues[4] = new Object[]{ci};

                       // Add document reference, including the data to pass down, and the dechromed content too
                       activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
                     }
                     finally
                     {
                       ci.discard();
                     }
                   }
                 }
               }
               else
               {
                 if (Logging.connectors.isDebugEnabled())
                   Logging.connectors.debug("RSS: Identifier '"+newIdentifier+"' is excluded");
               }
             }
             else
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("RSS: In Atom document '"+documentIdentifier+"', found an unincluded URL '"+rawURL+"'");
             }
           }
         }
       }
     }
   }

   protected class FeedAuthorContextClass extends XMLParsingContext
   {
     protected String authorNameField = null;
     protected String authorEmailField = null;

     public FeedAuthorContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts)
     {
       super(theStream,namespace,localName,qName,atts);
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       if (localName.equals("name"))
       {
         // "name" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("email"))
       {
         // "email" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else
       {
         // Skip everything else.
         return super.beginTag(namespace,localName,qName,atts);
       }
     }

     /** Convert the individual sub-fields of the item context into their final forms */
     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("name"))
       {
         authorNameField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("email"))
       {
         authorEmailField = ((XMLStringParsingContext)theContext).getValue();
       }
       else
       {
         super.endTag();
       }
     }

     public String getAuthorName()
     {
       return authorNameField;
     }

     public String getAuthorEmail()
     {
       return authorEmailField;
     }
   }

   protected class UrlsetContextClass extends XMLParsingContext
   {
     /** The document identifier */
     protected String documentIdentifier;
     /** Activities interface */
     protected IProcessActivity activities;
     /** Filter */
     protected Filter filter;

     /** ttl value */
     protected String ttlValue = null;

     public UrlsetContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts, String documentIdentifier, IProcessActivity activities, Filter filter)
     {
       super(theStream,namespace,localName,qName,atts);
       this.documentIdentifier = documentIdentifier;
       this.activities = activities;
       this.filter = filter;
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "url", nothing else.
       if (localName.equals("url") || localName.equals("sitemap"))
       {
         // Item seen.  We don't need any of the attributes etc., but we need to start a new context.
         return new UrlsetItemContextClass(theStream,namespace,localName,qName,atts);
       }
       // Skip everything else.
       return super.beginTag(namespace,localName,qName,atts);
     }

     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("url") || theTag.equals("sitemap"))
       {
         // It's an item.
         UrlsetItemContextClass itemContext = (UrlsetItemContextClass)theContext;
         // Presumably, since we are done parsing, we've recorded all the information we need in the context, object including:
         // (1) File name (if any), containing dechromed content
         // (2) Link name(s)
         // (3) Pubdate
         // (4) Title
         // The job now is to pull this info out and call the activities interface appropriately.

         // NOTE: After this endTag() method is called, tagCleanup() will be called for the item context.  This should clean up
         // all dangling files etc. that need to be removed.
         // If an exception or error is thrown during the parse, this endTag() method will NOT be called, but the tagCleanup()
         // method will be called regardless.
         itemContext.process(documentIdentifier,activities,filter);
       }
       else
         super.endTag();
     }

     /** Process this data */
     protected boolean process()
       throws ManifoldCFException
     {
       // Deal with the ttlvalue, if it was found
       // Use the ttl value as a signal for when we ought to look at this feed again.  If not present, use the default.
       long currentTime = System.currentTimeMillis();
       Long rescanTime = filter.getDefaultRescanTime(currentTime);
       if (ttlValue != null)
       {
         try
         {
           int minutes = Integer.parseInt(ttlValue);
           long nextTime = currentTime + minutes * 60000L;
           rescanTime = new Long(nextTime);
           // Set the upper bound time; we want to scan the feeds aggressively.
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: In SiteMap document '"+documentIdentifier+"', found a ttl value of "+ttlValue+"; setting refetch time accordingly");
         }
         catch (NumberFormatException e)
         {
           Logging.connectors.warn("RSS: SiteMap document '"+documentIdentifier+"' has illegal ttl value '"+ttlValue+"'");
         }
       }

       if (rescanTime != null)
       {
         Long minimumTime = filter.getMinimumRescanTime(currentTime);
         if (minimumTime != null)
         {
           if (rescanTime.longValue() < minimumTime.longValue())
             rescanTime = minimumTime;
         }
       }

       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("RSS: In SiteMap document '"+documentIdentifier+"' setting rescan time to "+((rescanTime==null)?"null":rescanTime.toString()));

       activities.setDocumentScheduleBounds(documentIdentifier,rescanTime,rescanTime,null,null);
       return true;
     }
   }

   protected class UrlsetItemContextClass extends XMLParsingContext
   {
     protected String linkField = null;
     protected String pubDateField = null;

     public UrlsetItemContextClass(XMLFuzzyHierarchicalParseState theStream, String namespace, String localName, String qName, Map<String,String> atts)
     {
       super(theStream,namespace,localName,qName,atts);
     }

     @Override
     protected XMLParsingContext beginTag(String namespace, String localName, String qName, Map<String,String> atts)
       throws ManifoldCFException
     {
       // The tags we care about are "loc" and "lastmod", nothing else.
       if (localName.equals("loc"))
       {
         // "loc" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else if (localName.equals("lastmod"))
       {
         // "lastmod" tag
         return new XMLStringParsingContext(theStream,namespace,localName,qName,atts);
       }
       else
       {
         // Skip everything else.
         return super.beginTag(namespace,localName,qName,atts);
       }
     }

     /** Convert the individual sub-fields of the item context into their final forms */
     @Override
     protected void endTag()
       throws ManifoldCFException
     {
       XMLParsingContext theContext = theStream.getContext();
       String theTag = theContext.getLocalname();
       if (theTag.equals("loc"))
       {
         linkField = ((XMLStringParsingContext)theContext).getValue();
       }
       else if (theTag.equals("lastmod"))
       {
         pubDateField = ((XMLStringParsingContext)theContext).getValue();
       }
       else
       {
         super.endTag();
       }
     }

     protected void tagCleanup()
       throws ManifoldCFException
     {
     }

     /** Process the data accumulated for this item */
     public void process(String documentIdentifier, IProcessActivity activities, Filter filter)
       throws ManifoldCFException
     {
       if (linkField != null && linkField.length() > 0)
       {
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
           origDateDate = DateParser.parseISO8601Date(pubDateField);

         Long origDate;
         if (origDateDate != null)
           origDate = new Long(origDateDate.getTime());
         else
           origDate = null;

         String[] links = linkField.split(", ");
         int l = 0;
         while (l < links.length)
         {
           String rawURL = links[l++].trim();
           // Process the link
           String newIdentifier = makeDocumentIdentifier(filter.getCanonicalizationPolicies(),documentIdentifier,rawURL);
           if (newIdentifier != null)
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("RSS: In SiteMap document '"+documentIdentifier+"', found a link to '"+newIdentifier+"', which has origination date "+
               ((origDate==null)?"null":origDate.toString()));
             if (filter.isLegalURL(newIdentifier))
             {
               // It's a reference!  Add it.
               String[] dataNames = new String[]{"pubdate","source"};
               String[][] dataValues = new String[dataNames.length][];
               if (origDate != null)
                 dataValues[0] = new String[]{origDate.toString()};
               dataValues[1] = new String[]{documentIdentifier};

               // Add document reference, including the data to pass down
               activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
             }
             else
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("RSS: Identifier '"+newIdentifier+"' is excluded");
             }
           }
           else
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("RSS: In SiteMap document '"+documentIdentifier+"', found an unincluded URL '"+rawURL+"'");
           }
         }
       }
     }
   }


   /** Get the maximum number of documents to amalgamate together into one batch, for this connector.
   *@return the maximum number. 0 indicates "unlimited".
   */
   public int getMaxDocumentRequest()
   {
     // RSS and the web in general do not batch well.  Multiple chunks have no advantage over one-at-a-time requests.
     return 1;
   }

   // Protected methods and classes

   /** Code to check if data is interesting, based on response code and content type.
   */
   protected boolean isContentInteresting(IFingerprintActivity activities, String contentType)
     throws ServiceInterruption, ManifoldCFException
   {
     // Look at the content type and decide if it's a kind we want.  This is defined
     // as something we think we can either ingest, or extract links from.

     // For now, we're only going to attempt to extract links from html.  This will change eventually.
     // But the check here is just what the content type is.
     if (contentType == null)
       return false;

     int pos = contentType.indexOf(";");
     if (pos != -1)
       contentType = contentType.substring(0,pos);
     contentType = contentType.trim();

     return activities.checkMimeTypeIndexable(contentType);
   }

   /** Code to check if an already-fetched document should be ingested.
   */
   protected boolean isDataIngestable(IFingerprintActivity activities, String documentIdentifier)
     throws ServiceInterruption, ManifoldCFException
   {
     if (activities.checkLengthIndexable(cache.getDataLength(documentIdentifier)) == false)
       return false;

     if (activities.checkURLIndexable(documentIdentifier) == false)
       return false;

     // Check if it's a recognized content type
     String contentType = cache.getContentType(documentIdentifier);

     // Some sites have multiple content types.  We just look at the LAST one in that case.
     if (contentType != null)
     {
       String[] contentTypes = contentType.split(",");
       if (contentTypes.length > 0)
         contentType = contentTypes[contentTypes.length-1].trim();
       else
         contentType = null;
     }

     if (contentType == null)
       return false;

     int pos = contentType.indexOf(";");
     if (pos != -1)
       contentType = contentType.substring(0,pos);
     contentType = contentType.trim();

     return activities.checkMimeTypeIndexable(contentType);
   }

   /** Given the current parameters, find the correct throttled fetcher object
   * (or create one if not there).
   */
   protected ThrottledFetcher getFetcher()
   {
     synchronized (fetcherMap)
     {
       ThrottledFetcher tf = fetcherMap.get(throttleGroupName);
       if (tf == null)
       {
         tf = new ThrottledFetcher();
         fetcherMap.put(throttleGroupName,tf);
       }
       return tf;
     }
   }

   /** Read a string as a sequence of individual expressions, urls, etc.
   */
   protected static ArrayList stringToArray(String input)
   {
     ArrayList list = new ArrayList();
     try
     {
       java.io.Reader str = new java.io.StringReader(input);
       try
       {
         java.io.BufferedReader is = new java.io.BufferedReader(str);
         try
         {
           while (true)
           {
             String nextString = is.readLine();
             if (nextString == null)
               break;
             if (nextString.length() == 0)
               continue;
             nextString.trim();
             if (nextString.startsWith("#"))
               continue;
             list.add(nextString);
           }
         }
         finally
         {
           is.close();
         }
       }
       finally
       {
         str.close();
       }
     }
     catch (java.io.IOException e)
     {
       // Eat the exception and exit.
     }
     return list;
   }

   /** Compile all regexp entries in the passed in list, and add them to the output
   * list.
   */
   protected static void compileList(ArrayList output, ArrayList input)
     throws ManifoldCFException
   {
     int i = 0;
     while (i < input.size())
     {
       String inputString = (String)input.get(i++);
       try
       {
         output.add(Pattern.compile(inputString));
       }
       catch (PatternSyntaxException e)
       {
         throw new ManifoldCFException("Mapping regular expression '"+inputString+"' is illegal: "+e.getMessage(),e);
       }
     }
   }

   /** Given the current parameters, find the correct robots object (or create
   * one if none found).
   */
   protected Robots getRobots(ThrottledFetcher fetcher)
   {
     synchronized (robotsMap)
     {
       Robots r = (Robots)robotsMap.get(throttleGroupName);
       if (r == null)
       {
         r = new Robots(fetcher);
         robotsMap.put(throttleGroupName,r);
       }
       return r;
     }
   }

   // Protected classes

   /** The throttle specification class.  Each server name is a different bin in this model.
   */
   protected static class ThrottleSpec implements IThrottleSpec
   {
     protected final int maxOpenConnectionsPerServer;
     protected final long minimumMillisecondsPerFetchPerServer;
     protected final double minimumMillisecondsPerBytePerServer;

     public ThrottleSpec(int maxOpenConnectionsPerServer, long minimumMillisecondsPerFetchPerServer,
       double minimumMillisecondsPerBytePerServer)
     {
       this.maxOpenConnectionsPerServer = maxOpenConnectionsPerServer;
       this.minimumMillisecondsPerFetchPerServer = minimumMillisecondsPerFetchPerServer;
       this.minimumMillisecondsPerBytePerServer = minimumMillisecondsPerBytePerServer;
     }

     /** Given a bin name, find the max open connections to use for that bin.
     *@return Integer.MAX_VALUE if no limit found.
     */
     public int getMaxOpenConnections(String binName)
     {
       return maxOpenConnectionsPerServer;
     }

     /** Look up minimum milliseconds per byte for a bin.
     *@return 0.0 if no limit found.
     */
     public double getMinimumMillisecondsPerByte(String binName)
     {
       return minimumMillisecondsPerBytePerServer;
     }

     /** Look up minimum milliseconds for a fetch for a bin.
     *@return 0 if no limit found.
     */
     public long getMinimumMillisecondsPerFetch(String binName)
     {
       return minimumMillisecondsPerFetchPerServer;
     }
   }

   /** Name/value class */
   protected static class NameValue
   {
     protected String name;
     protected String value;

     public NameValue(String name, String value)
     {
       this.name = name;
       this.value = value;
     }

     public String getName()
     {
       return name;
     }

     public String getValue()
     {
       return value;
     }
   }

   /** Evaluator token.
   */
   protected static class EvaluatorToken
   {
     public final static int TYPE_GROUP = 0;
     public final static int TYPE_TEXT = 1;
     public final static int TYPE_COMMA = 2;

     public final static int GROUPSTYLE_NONE = 0;
     public final static int GROUPSTYLE_LOWER = 1;
     public final static int GROUPSTYLE_UPPER = 2;
     public final static int GROUPSTYLE_MIXED = 3;

     protected int type;
     protected int groupNumber = -1;
     protected int groupStyle = GROUPSTYLE_NONE;
     protected String textValue = null;

     public EvaluatorToken()
     {
       type = TYPE_COMMA;
     }

     public EvaluatorToken(int groupNumber, int groupStyle)
     {
       type = TYPE_GROUP;
       this.groupNumber = groupNumber;
       this.groupStyle = groupStyle;
     }

     public EvaluatorToken(String text)
     {
       type = TYPE_TEXT;
       this.textValue = text;
     }

     public int getType()
     {
       return type;
     }

     public int getGroupNumber()
     {
       return groupNumber;
     }

     public int getGroupStyle()
     {
       return groupStyle;
     }

     public String getTextValue()
     {
       return textValue;
     }

   }


   /** Token stream.
   */
   protected static class EvaluatorTokenStream
   {
     protected String text;
     protected int pos;
     protected EvaluatorToken token = null;

     /** Constructor.
     */
     public EvaluatorTokenStream(String text)
     {
       this.text = text;
       this.pos = 0;
     }

     /** Get current token.
     */
     public EvaluatorToken peek()
       throws ManifoldCFException
     {
       if (token == null)
       {
         token = nextToken();
       }
       return token;
     }

     /** Go on to next token.
     */
     public void advance()
     {
       token = null;
     }

     protected EvaluatorToken nextToken()
       throws ManifoldCFException
     {
       char x;
       // Fetch the next token
       while (true)
       {
         if (pos == text.length())
           return null;
         x = text.charAt(pos);
         if (x > ' ')
           break;
         pos++;
       }

       StringBuilder sb;

       if (x == '"')
       {
         // Parse text
         pos++;
         sb = new StringBuilder();
         while (true)
         {
           if (pos == text.length())
             break;
           x = text.charAt(pos);
           pos++;
           if (x == '"')
           {
             break;
           }
           if (x == '\\')
           {
             if (pos == text.length())
               break;
             x = text.charAt(pos++);
           }
           sb.append(x);
         }

         return new EvaluatorToken(sb.toString());
       }

       if (x == ',')
       {
         pos++;
         return new EvaluatorToken();
       }

       // Eat number at beginning
       sb = new StringBuilder();
       while (true)
       {
         if (pos == text.length())
           break;
         x = text.charAt(pos);
         if (x >= '0' && x <= '9')
         {
           sb.append(x);
           pos++;
           continue;
         }
         break;
       }
       String numberValue = sb.toString();
       int groupNumber = 0;
       if (numberValue.length() > 0)
         groupNumber = new Integer(numberValue).intValue();
       // Save the next char position
       int modifierPos = pos;
       // Go to the end of the word
       while (true)
       {
         if (pos == text.length())
           break;
         x = text.charAt(pos);
         if (x == ',' || x >= '0' && x <= '9' || x <= ' ' && x >= 0)
           break;
         pos++;
       }

       int style = EvaluatorToken.GROUPSTYLE_NONE;
       if (modifierPos != pos)
       {
         String modifier = text.substring(modifierPos,pos);
         if (modifier.startsWith("u"))
           style = EvaluatorToken.GROUPSTYLE_UPPER;
         else if (modifier.startsWith("l"))
           style = EvaluatorToken.GROUPSTYLE_LOWER;
         else if (modifier.startsWith("m"))
           style = EvaluatorToken.GROUPSTYLE_MIXED;
         else
           throw new ManifoldCFException("Unknown style: "+modifier);
       }
       return new EvaluatorToken(groupNumber,style);
     }
   }

   /** Class representing a URL regular expression match, for the purposes of determining canonicalization policy */
   protected static class CanonicalizationPolicy
   {
     protected Pattern matchPattern;
     protected boolean reorder;
     protected boolean removeJavaSession;
     protected boolean removeAspSession;
     protected boolean removePhpSession;
     protected boolean removeBVSession;

     public CanonicalizationPolicy(Pattern matchPattern, boolean reorder, boolean removeJavaSession, boolean removeAspSession,
       boolean removePhpSession, boolean removeBVSession)
     {
       this.matchPattern = matchPattern;
       this.reorder = reorder;
       this.removeJavaSession = removeJavaSession;
       this.removeAspSession = removeAspSession;
       this.removePhpSession = removePhpSession;
       this.removeBVSession = removeBVSession;
     }

     public boolean checkMatch(String url)
     {
       Matcher matcher = matchPattern.matcher(url);
       return matcher.find();
     }

     public boolean canReorder()
     {
       return reorder;
     }

     public boolean canRemoveJavaSession()
     {
       return removeJavaSession;
     }

     public boolean canRemoveAspSession()
     {
       return removeAspSession;
     }

     public boolean canRemovePhpSession()
     {
       return removePhpSession;
     }

     public boolean canRemoveBvSession()
     {
       return removeBVSession;
     }

   }

   /** Class representing a list of canonicalization rules */
   protected static class CanonicalizationPolicies
   {
     protected ArrayList rules = new ArrayList();

     public CanonicalizationPolicies()
     {
     }

     public void addRule(CanonicalizationPolicy rule)
     {
       rules.add(rule);
     }

     public CanonicalizationPolicy findMatch(String url)
     {
       int i = 0;
       while (i < rules.size())
       {
         CanonicalizationPolicy rule = (CanonicalizationPolicy)rules.get(i++);
         if (rule.checkMatch(url))
           return rule;
       }
       return null;
     }
   }

   /** Class representing a mapping rule */
   protected static class MappingRule
   {
     protected Pattern matchPattern;
     protected String evalExpression;

     public MappingRule(Pattern matchPattern, String evalExpression)
     {
       this.matchPattern = matchPattern;
       this.evalExpression = evalExpression;
     }

     public boolean checkMatch(String url)
     {
       Matcher matcher = matchPattern.matcher(url);
       return matcher.matches();
     }

     public String map(String url)
       throws ManifoldCFException
     {
       // Create a matcher, and attempt to do a match
       Matcher matcher = matchPattern.matcher(url);
       if (!matcher.matches())
       {
         return null;
       }

       // A match!  Now, interpret the output expression
       if (evalExpression == null || evalExpression.length() == 0)
         return url;

       StringBuilder sb = new StringBuilder();
       EvaluatorTokenStream et = new EvaluatorTokenStream(evalExpression);

       while (true)
       {
         EvaluatorToken t = et.peek();
         if (t == null)
           break;
         switch (t.getType())
         {
         case EvaluatorToken.TYPE_COMMA:
           et.advance();
           break;
         case EvaluatorToken.TYPE_GROUP:
           et.advance();
           String groupValue = matcher.group(t.getGroupNumber());
           switch (t.getGroupStyle())
           {
           case EvaluatorToken.GROUPSTYLE_NONE:
             sb.append(groupValue);
             break;
           case EvaluatorToken.GROUPSTYLE_LOWER:
             sb.append(groupValue.toLowerCase());
             break;
           case EvaluatorToken.GROUPSTYLE_UPPER:
             sb.append(groupValue.toUpperCase());
             break;
           case EvaluatorToken.GROUPSTYLE_MIXED:
             if (groupValue.length() > 0)
             {
               sb.append(groupValue.substring(0,1).toUpperCase());
               sb.append(groupValue.substring(1).toLowerCase());
             }
             break;
           default:
             throw new ManifoldCFException("Illegal group style");
           }
           break;
         case EvaluatorToken.TYPE_TEXT:
           et.advance();
           sb.append(t.getTextValue());
           break;
         default:
           throw new ManifoldCFException("Illegal token type");
         }
       }
       return sb.toString();
     }

   }

   /** Class that represents all mappings */
   protected static class MappingRules
   {
     protected ArrayList mappings = new ArrayList();

     public MappingRules()
     {
     }

     public void add(MappingRule rule)
     {
       mappings.add(rule);
     }

     public boolean isMatch(String url)
     {
       if (mappings.size() == 0)
         return true;
       int i = 0;
       while (i < mappings.size())
       {
         MappingRule p = (MappingRule)mappings.get(i);
         if (p.checkMatch(url))
           return true;
         i++;
       }
       return false;
     }

     public String map(String url)
       throws ManifoldCFException
     {
       if (mappings.size() == 0)
         return url;
       int i = 0;
       while (i < mappings.size())
       {
         MappingRule p = (MappingRule)mappings.get(i);
         String rval = p.map(url);
         if (rval != null)
           return rval;
         i++;
       }
       return null;
     }
   }

   /** Class that handles parsing and interpretation of the document specification.
   * Note that I believe it to be faster to do this once, gathering all the data, than to scan the document specification multiple times.
   * Therefore, this class contains the *entire* interpreted set of data from a document specification.
   */
   protected static class Filter
   {
     protected MappingRules mappings = new MappingRules();
     protected HashMap seeds = null;
     protected Integer defaultRescanInterval = null;
     protected Integer minimumRescanInterval = null;
     protected Integer badFeedRescanInterval = null;
     protected int dechromedContentMode = DECHROMED_NONE;
     protected int chromedContentMode = CHROMED_USE;
     protected int feedTimeoutValue = 60000;
     protected ArrayList metadata = new ArrayList();
     protected HashMap acls = new HashMap();
     protected CanonicalizationPolicies canonicalizationPolicies = new CanonicalizationPolicies();
     /** The arraylist of include patterns */
     protected ArrayList includePatterns = new ArrayList();
     /** The arraylist of exclude patterns */
     protected ArrayList excludePatterns = new ArrayList();

     /** Constructor. */
     public Filter(DocumentSpecification spec, boolean warnOnBadSeed)
       throws ManifoldCFException
     {
       String excludes = "";

       // To save allocation, preallocate the seeds map assuming that it will require 1.5x the number of nodes in the spec
       int initialSize = spec.getChildCount();
       if (initialSize == 0)
         initialSize = 1;
       seeds = new HashMap((initialSize * 3) >> 1);

       int i = 0;

       // First pass.  Find all of the rules (which are necessary to canonicalize the seeds, etc.)
       while (i < spec.getChildCount())
       {
         SpecificationNode n = spec.getChild(i++);
         if (n.getType().equals(RSSConfig.NODE_MAP))
         {
           String match = n.getAttributeValue(RSSConfig.ATTR_MATCH);
           String map = n.getAttributeValue(RSSConfig.ATTR_MAP);
           if (match != null && match.length() > 0)
           {
             Pattern p;
             try
             {
               p = Pattern.compile(match);
             }
             catch (java.util.regex.PatternSyntaxException e)
             {
               throw new ManifoldCFException("Regular expression '"+match+"' is illegal: "+e.getMessage(),e);
             }
             if (map == null)
               map = "";
             mappings.add(new MappingRule(p,map));
           }
         }
         else if (n.getType().equals(RSSConfig.NODE_EXCLUDES))
         {
           excludes = n.getValue();
           if (excludes == null)
             excludes = "";
         }
         else if (n.getType().equals(RSSConfig.NODE_URLSPEC))
         {
           String urlRegexp = n.getAttributeValue(RSSConfig.ATTR_REGEXP);
           if (urlRegexp == null)
             urlRegexp = "";
           String reorder = n.getAttributeValue(RSSConfig.ATTR_REORDER);
           boolean reorderValue;
           if (reorder == null)
             reorderValue = false;
           else
           {
             if (reorder.equals(RSSConfig.VALUE_YES))
               reorderValue = true;
             else
               reorderValue = false;
           }

           String javaSession = n.getAttributeValue(RSSConfig.ATTR_JAVASESSIONREMOVAL);
           boolean javaSessionValue;
           if (javaSession == null)
             javaSessionValue = false;
           else
           {
             if (javaSession.equals(RSSConfig.VALUE_YES))
               javaSessionValue = true;
             else
               javaSessionValue = false;
           }

           String aspSession = n.getAttributeValue(RSSConfig.ATTR_ASPSESSIONREMOVAL);
           boolean aspSessionValue;
           if (aspSession == null)
             aspSessionValue = false;
           else
           {
             if (aspSession.equals(RSSConfig.VALUE_YES))
               aspSessionValue = true;
             else
               aspSessionValue = false;
           }

           String phpSession = n.getAttributeValue(RSSConfig.ATTR_PHPSESSIONREMOVAL);
           boolean phpSessionValue;
           if (phpSession == null)
             phpSessionValue = false;
           else
           {
             if (phpSession.equals(RSSConfig.VALUE_YES))
               phpSessionValue = true;
             else
               phpSessionValue = false;
           }

           String bvSession = n.getAttributeValue(RSSConfig.ATTR_BVSESSIONREMOVAL);
           boolean bvSessionValue;
           if (bvSession == null)
             bvSessionValue = false;
           else
           {
             if (bvSession.equals(RSSConfig.VALUE_YES))
               bvSessionValue = true;
             else
               bvSessionValue = false;
           }
           try
           {
             canonicalizationPolicies.addRule(new CanonicalizationPolicy(Pattern.compile(urlRegexp),reorderValue,javaSessionValue,aspSessionValue,
               phpSessionValue, bvSessionValue));
           }
           catch (java.util.regex.PatternSyntaxException e)
           {
             throw new ManifoldCFException("Canonicalization regular expression '"+urlRegexp+"' is illegal: "+e.getMessage(),e);
           }
         }
       }

       compileList(excludePatterns,stringToArray(excludes));

       // Second pass.  Do the rest of the work,
       i = 0;
       while (i < spec.getChildCount())
       {
         SpecificationNode n = spec.getChild(i++);
         if (n.getType().equals(RSSConfig.NODE_FEED))
         {
           String rssURL = n.getAttributeValue(RSSConfig.ATTR_URL);
           if (rssURL != null && rssURL.length() > 0)
           {
             String canonicalURL = makeDocumentIdentifier(canonicalizationPolicies,null,rssURL);
             if (canonicalURL != null)
             {
               seeds.put(canonicalURL,canonicalURL);
             }
             else
             {
               if (warnOnBadSeed)
                 Logging.connectors.warn("RSS: Illegal seed feed '"+rssURL+"'");
             }
           }
         }
         else if (n.getType().equals(RSSConfig.NODE_METADATA))
         {
           String name = n.getAttributeValue(RSSConfig.ATTR_NAME);
           String value = n.getAttributeValue(RSSConfig.ATTR_VALUE);
           if (name != null && name.length() > 0 && value != null && value.length() > 0)
             metadata.add(new NameValue(name,value));
         }
         else if (n.getType().equals(RSSConfig.NODE_ACCESS))
         {
           String token = n.getAttributeValue(RSSConfig.ATTR_TOKEN);
           acls.put(token,token);
         }
         else if (n.getType().equals(RSSConfig.NODE_FEEDRESCAN))
         {
           String interval = n.getAttributeValue(RSSConfig.ATTR_VALUE);
           if (interval != null && interval.length() > 0)
           {
             try
             {
               defaultRescanInterval = new Integer(interval);
             }
             catch (NumberFormatException e)
             {
               throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
             }
           }
         }
         else if (n.getType().equals(RSSConfig.NODE_MINFEEDRESCAN))
         {
           String interval = n.getAttributeValue(RSSConfig.ATTR_VALUE);
           if (interval != null && interval.length() > 0)
           {
             try
             {
               minimumRescanInterval = new Integer(interval);
             }
             catch (NumberFormatException e)
             {
               throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
             }
           }
         }
         else if (n.getType().equals(RSSConfig.NODE_BADFEEDRESCAN))
         {
           String interval = n.getAttributeValue(RSSConfig.ATTR_VALUE);
           if (interval != null && interval.length() > 0)
           {
             try
             {
               badFeedRescanInterval = new Integer(interval);
             }
             catch (NumberFormatException e)
             {
               throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
             }
           }
         }
         else if (n.getType().equals(RSSConfig.NODE_FEEDTIMEOUT))
         {
           String value = n.getAttributeValue(RSSConfig.ATTR_VALUE);
           if (value != null && value.length() > 0)
           {
             try
             {
               feedTimeoutValue= Integer.parseInt(value) * 1000;
             }
             catch (NumberFormatException e)
             {
               throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
             }
           }
         }
         else if (n.getType().equals(RSSConfig.NODE_DECHROMEDMODE))
         {
           String mode = n.getAttributeValue(RSSConfig.ATTR_MODE);
           if (mode != null && mode.length() > 0)
           {
             if (mode.equals(RSSConfig.VALUE_NONE))
               dechromedContentMode = DECHROMED_NONE;
             else if (mode.equals(RSSConfig.VALUE_DESCRIPTION))
               dechromedContentMode = DECHROMED_DESCRIPTION;
             else if (mode.equals(RSSConfig.VALUE_CONTENT))
               dechromedContentMode = DECHROMED_CONTENT;
           }
         }
         else if (n.getType().equals(RSSConfig.NODE_CHROMEDMODE))
         {
           String mode = n.getAttributeValue(RSSConfig.ATTR_MODE);
           if (mode != null && mode.length() > 0)
           {
             if (mode.equals(RSSConfig.VALUE_USE))
               chromedContentMode = CHROMED_USE;
             else if (mode.equals(RSSConfig.VALUE_SKIP))
               chromedContentMode = CHROMED_SKIP;
             else if (mode.equals(RSSConfig.VALUE_METADATA))
               chromedContentMode = CHROMED_METADATA_ONLY;
           }
         }
       }
     }

     /** Check if document is a seed */
     public boolean isSeed(String canonicalUrl)
     {
       return seeds.get(canonicalUrl) != null;
     }

     /** Iterate over all canonicalized seeds */
     public Iterator getSeeds()
     {
       return seeds.keySet().iterator();
     }

     /** Get the specified metadata */
     public ArrayList getMetadata()
     {
       return metadata;
     }

     /** Get the acls */
     public String[] getAcls()
     {
       String[] rval = new String[acls.size()];
       Iterator iter = acls.keySet().iterator();
       int i = 0;
       while (iter.hasNext())
       {
         rval[i++] = (String)iter.next();
       }
       return rval;
     }

     /** Get the feed timeout value */
     public int getFeedTimeoutValue()
     {
       return feedTimeoutValue;
     }

     /** Get the dechromed content mode */
     public int getDechromedContentMode()
     {
       return dechromedContentMode;
     }

     /** Get the chromed content mode */
     public int getChromedContentMode()
     {
       return chromedContentMode;
     }

     /** Get the next time (by default) a feed should be scanned */
     public Long getDefaultRescanTime(long currentTime)
     {
       if (defaultRescanInterval == null)
         return null;
       return new Long(defaultRescanInterval.intValue() * 60000L + currentTime);
     }

     /** Get the minimum next time a feed should be scanned */
     public Long getMinimumRescanTime(long currentTime)
     {
       if (minimumRescanInterval == null)
         return null;
       return new Long(minimumRescanInterval.intValue() * 60000L + currentTime);
     }

     /** Get the next time a "bad feed" should be rescanned */
     public Long getBadFeedRescanTime(long currentTime)
     {
       if (badFeedRescanInterval == null)
         return null;
       return new Long(badFeedRescanInterval.intValue() * 60000L + currentTime);
     }

     /** Check for legality of a url.
     * @return true if the passed-in url is either a seed, or a legal url, according to this
     * filter.
     */
     public boolean isLegalURL(String url)
     {
       if (seeds.get(url) != null)
         return true;
       if (mappings.isMatch(url) == false)
       {
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("RSS: Url '"+url+"' is illegal because it did not match a mapping rule");
         return false;
       }
       // Now make sure it's not in the exclude list.
       int i = 0;
       while (i < excludePatterns.size())
       {
         Pattern p = (Pattern)excludePatterns.get(i);
         Matcher m = p.matcher(url);
         if (m.find())
         {
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: Url '"+url+"' is illegal because exclude pattern '"+p.toString()+"' matched it");
           return false;
         }
         i++;
       }

       return true;
     }

     /** Scan patterns and return the one that matches first.
     * @return null if the url doesn't match or should not be ingested, or the new string if it does.
     */
     public String mapDocumentURL(String url)
       throws ManifoldCFException
     {
       if (seeds.get(url) != null)
         return null;
       return mappings.map(url);
     }

     /** Get canonicalization policies */
     public CanonicalizationPolicies getCanonicalizationPolicies()
     {
       return canonicalizationPolicies;
     }
   }

 }