| /* $Id: JobManager.java 998576 2010-09-19 01:11:02Z kwright $ */ |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.manifoldcf.crawler.jobs; |
| |
| import org.apache.manifoldcf.core.interfaces.*; |
| import org.apache.manifoldcf.agents.interfaces.*; |
| import org.apache.manifoldcf.crawler.interfaces.*; |
| import java.util.*; |
| import java.util.regex.*; |
| import org.apache.manifoldcf.crawler.system.Logging; |
| import org.apache.manifoldcf.crawler.system.ManifoldCF; |
| |
| /** This is the main job manager. It provides methods that support both job definition, and the threads that execute the jobs. |
| */ |
| public class JobManager implements IJobManager |
| { |
| public static final String _rcsid = "@(#)$Id: JobManager.java 998576 2010-09-19 01:11:02Z kwright $"; |
| |
| protected static final String stufferLock = "_STUFFER_"; |
| protected static final String deleteStufferLock = "_DELETESTUFFER_"; |
| protected static final String expireStufferLock = "_EXPIRESTUFFER_"; |
| protected static final String cleanStufferLock = "_CLEANSTUFFER_"; |
| protected static final String jobStopLock = "_JOBSTOP_"; |
| protected static final String jobResumeLock = "_JOBRESUME_"; |
| protected static final String jobResetLock = "_JOBRESET_"; |
| protected static final String hopLock = "_HOPLOCK_"; |
| |
| // Member variables |
| protected final IDBInterface database; |
| protected final IOutputConnectionManager outputMgr; |
| protected final IRepositoryConnectionManager connectionMgr; |
| protected final ITransformationConnectionManager transformationMgr; |
| |
| protected final IOutputConnectorManager outputConnectorMgr; |
| protected final IConnectorManager connectorMgr; |
| protected final ITransformationConnectorManager transformationConnectorMgr; |
| |
| protected final IRepositoryConnectorPool repositoryConnectorPool; |
| protected final ILockManager lockManager; |
| protected final IThreadContext threadContext; |
| protected final JobQueue jobQueue; |
| protected final Jobs jobs; |
| protected final HopCount hopCount; |
| protected final Carrydown carryDown; |
| protected final EventManager eventManager; |
| |
| |
| protected static Random random = new Random(); |
| |
| /** Constructor. |
| *@param threadContext is the thread context. |
| *@param database is the database. |
| */ |
| public JobManager(IThreadContext threadContext, IDBInterface database) |
| throws ManifoldCFException |
| { |
| this.database = database; |
| this.threadContext = threadContext; |
| jobs = new Jobs(threadContext,database); |
| jobQueue = new JobQueue(threadContext,database); |
| hopCount = new HopCount(threadContext,database); |
| carryDown = new Carrydown(database); |
| eventManager = new EventManager(database); |
| outputMgr = OutputConnectionManagerFactory.make(threadContext); |
| connectionMgr = RepositoryConnectionManagerFactory.make(threadContext); |
| transformationMgr = TransformationConnectionManagerFactory.make(threadContext); |
| outputConnectorMgr = OutputConnectorManagerFactory.make(threadContext); |
| connectorMgr = ConnectorManagerFactory.make(threadContext); |
| transformationConnectorMgr = TransformationConnectorManagerFactory.make(threadContext); |
| repositoryConnectorPool = RepositoryConnectorPoolFactory.make(threadContext); |
| lockManager = LockManagerFactory.make(threadContext); |
| } |
| |
| /** Install. |
| */ |
| @Override |
| public void install() |
| throws ManifoldCFException |
| { |
| jobs.install(transformationMgr.getTableName(),transformationMgr.getConnectionNameColumn(), |
| outputMgr.getTableName(),outputMgr.getConnectionNameColumn(), |
| connectionMgr.getTableName(),connectionMgr.getConnectionNameColumn()); |
| jobQueue.install(jobs.getTableName(),jobs.idField); |
| hopCount.install(jobs.getTableName(),jobs.idField); |
| carryDown.install(jobs.getTableName(),jobs.idField); |
| eventManager.install(); |
| } |
| |
| /** Uninstall. |
| */ |
| @Override |
| public void deinstall() |
| throws ManifoldCFException |
| { |
| eventManager.deinstall(); |
| carryDown.deinstall(); |
| hopCount.deinstall(); |
| jobQueue.deinstall(); |
| jobs.deinstall(); |
| } |
| |
| /** Export configuration */ |
| @Override |
| public void exportConfiguration(java.io.OutputStream os) |
| throws java.io.IOException, ManifoldCFException |
| { |
| // Write a version indicator |
| ManifoldCF.writeDword(os,4); |
| // Get the job list |
| IJobDescription[] list = getAllJobs(); |
| // Write the number of authorities |
| ManifoldCF.writeDword(os,list.length); |
| // Loop through the list and write the individual repository connection info |
| for (IJobDescription job : list) |
| { |
| ManifoldCF.writeString(os,job.getConnectionName()); |
| ManifoldCF.writeString(os,job.getDescription()); |
| ManifoldCF.writeDword(os,job.getType()); |
| ManifoldCF.writeDword(os,job.getStartMethod()); |
| ManifoldCF.writeLong(os,job.getInterval()); |
| ManifoldCF.writeLong(os,job.getExpiration()); |
| ManifoldCF.writeLong(os,job.getReseedInterval()); |
| ManifoldCF.writeDword(os,job.getPriority()); |
| ManifoldCF.writeDword(os,job.getHopcountMode()); |
| ManifoldCF.writeString(os,job.getSpecification().toXML()); |
| |
| // Write schedule |
| int recCount = job.getScheduleRecordCount(); |
| ManifoldCF.writeDword(os,recCount); |
| for (int j = 0; j < recCount; j++) |
| { |
| ScheduleRecord sr = job.getScheduleRecord(j); |
| writeEnumeratedValues(os,sr.getDayOfWeek()); |
| writeEnumeratedValues(os,sr.getMonthOfYear()); |
| writeEnumeratedValues(os,sr.getDayOfMonth()); |
| writeEnumeratedValues(os,sr.getYear()); |
| writeEnumeratedValues(os,sr.getHourOfDay()); |
| writeEnumeratedValues(os,sr.getMinutesOfHour()); |
| ManifoldCF.writeString(os,sr.getTimezone()); |
| ManifoldCF.writeLong(os,sr.getDuration()); |
| ManifoldCF.writeByte(os,sr.getRequestMinimum()?1:0); |
| } |
| |
| // Write hop count filters |
| Map filters = job.getHopCountFilters(); |
| ManifoldCF.writeDword(os,filters.size()); |
| Iterator iter = filters.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| String linkType = (String)iter.next(); |
| Long hopcount = (Long)filters.get(linkType); |
| ManifoldCF.writeString(os,linkType); |
| ManifoldCF.writeLong(os,hopcount); |
| } |
| |
| // Write forced metadata information |
| ManifoldCF.writeDword(os,0); |
| |
| // Write pipeline information |
| ManifoldCF.writeDword(os,job.countPipelineStages()); |
| for (int j = 0; j < job.countPipelineStages(); j++) |
| { |
| ManifoldCF.writeSdword(os,job.getPipelineStagePrerequisite(j)); |
| ManifoldCF.writeByte(os,job.getPipelineStageIsOutputConnection(j)?0x1:0x0); |
| ManifoldCF.writeString(os,job.getPipelineStageConnectionName(j)); |
| ManifoldCF.writeString(os,job.getPipelineStageDescription(j)); |
| ManifoldCF.writeString(os,job.getPipelineStageSpecification(j).toXML()); |
| } |
| } |
| } |
| |
| protected static void writeEnumeratedValues(java.io.OutputStream os, EnumeratedValues ev) |
| throws java.io.IOException |
| { |
| if (ev == null) |
| { |
| ManifoldCF.writeSdword(os,-1); |
| return; |
| } |
| int size = ev.size(); |
| ManifoldCF.writeSdword(os,size); |
| Iterator iter = ev.getValues(); |
| while (iter.hasNext()) |
| { |
| ManifoldCF.writeDword(os,((Integer)iter.next()).intValue()); |
| } |
| } |
| |
| /** Import configuration */ |
| @Override |
| public void importConfiguration(java.io.InputStream is) |
| throws java.io.IOException, ManifoldCFException |
| { |
| int version = ManifoldCF.readDword(is); |
| if (version != 2 && version != 3 && version != 4) |
| throw new java.io.IOException("Unknown job configuration version: "+Integer.toString(version)); |
| int count = ManifoldCF.readDword(is); |
| for (int i = 0; i < count; i++) |
| { |
| IJobDescription job = createJob(); |
| |
| String outputConnectionName = null; |
| String outputSpecification = null; |
| |
| job.setConnectionName(ManifoldCF.readString(is)); |
| if (version < 4) |
| outputConnectionName = ManifoldCF.readString(is); |
| job.setDescription(ManifoldCF.readString(is)); |
| job.setType(ManifoldCF.readDword(is)); |
| job.setStartMethod(ManifoldCF.readDword(is)); |
| job.setInterval(ManifoldCF.readLong(is)); |
| job.setExpiration(ManifoldCF.readLong(is)); |
| job.setReseedInterval(ManifoldCF.readLong(is)); |
| job.setPriority(ManifoldCF.readDword(is)); |
| job.setHopcountMode(ManifoldCF.readDword(is)); |
| job.getSpecification().fromXML(ManifoldCF.readString(is)); |
| if (version < 4) |
| outputSpecification = ManifoldCF.readString(is); |
| |
| // Read schedule |
| int recCount = ManifoldCF.readDword(is); |
| for (int j = 0; j < recCount; j++) |
| { |
| EnumeratedValues dayOfWeek = readEnumeratedValues(is); |
| EnumeratedValues monthOfYear = readEnumeratedValues(is); |
| EnumeratedValues dayOfMonth = readEnumeratedValues(is); |
| EnumeratedValues year = readEnumeratedValues(is); |
| EnumeratedValues hourOfDay = readEnumeratedValues(is); |
| EnumeratedValues minutesOfHour = readEnumeratedValues(is); |
| String timezone = ManifoldCF.readString(is); |
| Long duration = ManifoldCF.readLong(is); |
| boolean requestMinimum; |
| if (version >= 3) |
| requestMinimum = (ManifoldCF.readByte(is) != 0); |
| else |
| requestMinimum = false; |
| |
| ScheduleRecord sr = new ScheduleRecord(dayOfWeek, monthOfYear, dayOfMonth, year, |
| hourOfDay, minutesOfHour, timezone, duration, requestMinimum); |
| job.addScheduleRecord(sr); |
| } |
| |
| // Read hop count filters |
| int hopFilterCount = ManifoldCF.readDword(is); |
| for (int j = 0; j < hopFilterCount; j++) |
| { |
| String linkType = ManifoldCF.readString(is); |
| Long hopcount = ManifoldCF.readLong(is); |
| job.addHopCountFilter(linkType,hopcount); |
| } |
| |
| if (version >= 4) |
| { |
| // Read forced metadata information |
| int paramCount = ManifoldCF.readDword(is); |
| for (int j = 0; j < paramCount; j++) |
| { |
| String key = ManifoldCF.readString(is); |
| int valueCount = ManifoldCF.readDword(is); |
| for (int k = 0; k < valueCount; k++) |
| { |
| String value = ManifoldCF.readString(is); |
| // Discard it; we don't support this anymore |
| } |
| } |
| |
| // Read pipeline information |
| int pipelineCount = ManifoldCF.readDword(is); |
| for (int j = 0; j < pipelineCount; j++) |
| { |
| int prerequisite = ManifoldCF.readSdword(is); |
| int isOutput = ManifoldCF.readByte(is); |
| String connectionName = ManifoldCF.readString(is); |
| String description = ManifoldCF.readString(is); |
| String specification = ManifoldCF.readString(is); |
| job.addPipelineStage(prerequisite,isOutput == 0x1,connectionName,description).fromXML(specification); |
| } |
| } |
| |
| if (outputConnectionName != null) |
| { |
| // Add a single pipeline stage for the output connection |
| job.addPipelineStage(-1,true,outputConnectionName,"").fromXML(outputSpecification); |
| } |
| |
| // Attempt to save this job |
| save(job); |
| } |
| } |
| |
| protected EnumeratedValues readEnumeratedValues(java.io.InputStream is) |
| throws java.io.IOException |
| { |
| int size = ManifoldCF.readSdword(is); |
| if (size == -1) |
| return null; |
| int[] values = new int[size]; |
| int i = 0; |
| while (i < size) |
| { |
| values[i++] = ManifoldCF.readDword(is); |
| } |
| return new EnumeratedValues(values); |
| } |
| |
| /** Note the deregistration of a connector used by the specified connections. |
| * This method will be called when the connector is deregistered. Jobs that use these connections |
| * must therefore enter appropriate states. |
| *@param connectionNames is the set of connection names. |
| */ |
| @Override |
| public void noteConnectorDeregistration(String[] connectionNames) |
| throws ManifoldCFException |
| { |
| // For each connection, find the corresponding list of jobs. From these jobs, we want the job id and the status. |
| List<String> list = new ArrayList<String>(); |
| int maxCount = database.findConjunctionClauseMax(new ClauseDescription[]{}); |
| int currentCount = 0; |
| int i = 0; |
| while (i < connectionNames.length) |
| { |
| if (currentCount == maxCount) |
| { |
| noteConnectionDeregistration(list); |
| list.clear(); |
| currentCount = 0; |
| } |
| |
| list.add(connectionNames[i++]); |
| currentCount++; |
| } |
| if (currentCount > 0) |
| noteConnectionDeregistration(list); |
| } |
| |
| /** Note deregistration for a batch of connection names. |
| */ |
| protected void noteConnectionDeregistration(List<String> list) |
| throws ManifoldCFException |
| { |
| ArrayList newList = new ArrayList(); |
| String query = database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new MultiClause(jobs.connectionNameField,list)}); |
| // Query for the matching jobs, and then for each job potentially adjust the state |
| IResultSet set = database.performQuery("SELECT "+jobs.idField+","+jobs.statusField+" FROM "+ |
| jobs.getTableName()+" WHERE "+query+" FOR UPDATE", |
| newList,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| int statusValue = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| jobs.noteConnectorDeregistration(jobID,statusValue); |
| } |
| } |
| |
| /** Note the registration of a connector used by the specified connections. |
| * This method will be called when a connector is registered, on which the specified |
| * connections depend. |
| *@param connectionNames is the set of connection names. |
| */ |
| @Override |
| public void noteConnectorRegistration(String[] connectionNames) |
| throws ManifoldCFException |
| { |
| // For each connection, find the corresponding list of jobs. From these jobs, we want the job id and the status. |
| List<String> list = new ArrayList<String>(); |
| int maxCount = database.findConjunctionClauseMax(new ClauseDescription[]{}); |
| int currentCount = 0; |
| int i = 0; |
| while (i < connectionNames.length) |
| { |
| if (currentCount == maxCount) |
| { |
| noteConnectionRegistration(list); |
| list.clear(); |
| currentCount = 0; |
| } |
| |
| list.add(connectionNames[i++]); |
| currentCount++; |
| } |
| if (currentCount > 0) |
| noteConnectionRegistration(list); |
| } |
| |
| /** Note registration for a batch of connection names. |
| */ |
| protected void noteConnectionRegistration(List<String> list) |
| throws ManifoldCFException |
| { |
| // Query for the matching jobs, and then for each job potentially adjust the state |
| ArrayList newList = new ArrayList(); |
| String query = database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new MultiClause(jobs.connectionNameField,list)}); |
| IResultSet set = database.performQuery("SELECT "+jobs.idField+","+jobs.statusField+" FROM "+ |
| jobs.getTableName()+" WHERE "+query+" FOR UPDATE", |
| newList,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| int statusValue = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| jobs.noteConnectorRegistration(jobID,statusValue); |
| } |
| } |
| |
| /** Note the deregistration of an output connector used by the specified connections. |
| * This method will be called when the connector is deregistered. Jobs that use these connections |
| * must therefore enter appropriate states. |
| *@param connectionNames is the set of connection names. |
| */ |
| @Override |
| public void noteOutputConnectorDeregistration(String[] connectionNames) |
| throws ManifoldCFException |
| { |
| // For each connection, find the corresponding list of jobs. From these jobs, we want the job id and the status. |
| List<String> list = new ArrayList<String>(); |
| int maxCount = database.findConjunctionClauseMax(new ClauseDescription[]{}); |
| int currentCount = 0; |
| int i = 0; |
| while (i < connectionNames.length) |
| { |
| if (currentCount == maxCount) |
| { |
| noteOutputConnectionDeregistration(list); |
| list.clear(); |
| currentCount = 0; |
| } |
| |
| list.add(connectionNames[i++]); |
| currentCount++; |
| } |
| if (currentCount > 0) |
| noteOutputConnectionDeregistration(list); |
| } |
| |
| /** Note deregistration for a batch of output connection names. |
| */ |
| protected void noteOutputConnectionDeregistration(List<String> list) |
| throws ManifoldCFException |
| { |
| // Query for the matching jobs, and then for each job potentially adjust the state |
| Long[] jobIDs = jobs.findJobsMatchingOutputs(list); |
| if (jobIDs.length == 0) |
| return; |
| |
| StringBuilder query = new StringBuilder(); |
| ArrayList newList = new ArrayList(); |
| |
| query.append("SELECT ").append(jobs.idField).append(",").append(jobs.statusField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new MultiClause(jobs.idField,jobIDs)})) |
| .append(" FOR UPDATE"); |
| IResultSet set = database.performQuery(query.toString(),newList,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| int statusValue = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| jobs.noteOutputConnectorDeregistration(jobID,statusValue); |
| } |
| } |
| |
| /** Note the registration of an output connector used by the specified connections. |
| * This method will be called when a connector is registered, on which the specified |
| * connections depend. |
| *@param connectionNames is the set of connection names. |
| */ |
| @Override |
| public void noteOutputConnectorRegistration(String[] connectionNames) |
| throws ManifoldCFException |
| { |
| // For each connection, find the corresponding list of jobs. From these jobs, we want the job id and the status. |
| List<String> list = new ArrayList<String>(); |
| int maxCount = database.findConjunctionClauseMax(new ClauseDescription[]{}); |
| int currentCount = 0; |
| int i = 0; |
| while (i < connectionNames.length) |
| { |
| if (currentCount == maxCount) |
| { |
| noteOutputConnectionRegistration(list); |
| list.clear(); |
| currentCount = 0; |
| } |
| |
| list.add(connectionNames[i++]); |
| currentCount++; |
| } |
| if (currentCount > 0) |
| noteOutputConnectionRegistration(list); |
| } |
| |
| /** Note registration for a batch of output connection names. |
| */ |
| protected void noteOutputConnectionRegistration(List<String> list) |
| throws ManifoldCFException |
| { |
| // Query for the matching jobs, and then for each job potentially adjust the state |
| Long[] jobIDs = jobs.findJobsMatchingOutputs(list); |
| if (jobIDs.length == 0) |
| return; |
| |
| StringBuilder query = new StringBuilder(); |
| ArrayList newList = new ArrayList(); |
| |
| query.append("SELECT ").append(jobs.idField).append(",").append(jobs.statusField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new MultiClause(jobs.idField,jobIDs)})) |
| .append(" FOR UPDATE"); |
| IResultSet set = database.performQuery(query.toString(),newList,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| int statusValue = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| jobs.noteOutputConnectorRegistration(jobID,statusValue); |
| } |
| } |
| |
| /** Note the deregistration of a transformation connector used by the specified connections. |
| * This method will be called when the connector is deregistered. Jobs that use these connections |
| * must therefore enter appropriate states. |
| *@param connectionNames is the set of connection names. |
| */ |
| @Override |
| public void noteTransformationConnectorDeregistration(String[] connectionNames) |
| throws ManifoldCFException |
| { |
| // For each connection, find the corresponding list of jobs. From these jobs, we want the job id and the status. |
| List<String> list = new ArrayList<String>(); |
| int maxCount = database.findConjunctionClauseMax(new ClauseDescription[]{}); |
| int currentCount = 0; |
| int i = 0; |
| while (i < connectionNames.length) |
| { |
| if (currentCount == maxCount) |
| { |
| noteConnectionDeregistration(list); |
| list.clear(); |
| currentCount = 0; |
| } |
| |
| list.add(connectionNames[i++]); |
| currentCount++; |
| } |
| if (currentCount > 0) |
| noteTransformationConnectionDeregistration(list); |
| } |
| |
| /** Note deregistration for a batch of transformation connection names. |
| */ |
| protected void noteTransformationConnectionDeregistration(List<String> list) |
| throws ManifoldCFException |
| { |
| // Query for the matching jobs, and then for each job potentially adjust the state |
| Long[] jobIDs = jobs.findJobsMatchingTransformations(list); |
| if (jobIDs.length == 0) |
| return; |
| |
| StringBuilder query = new StringBuilder(); |
| ArrayList newList = new ArrayList(); |
| |
| query.append("SELECT ").append(jobs.idField).append(",").append(jobs.statusField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new MultiClause(jobs.idField,jobIDs)})) |
| .append(" FOR UPDATE"); |
| IResultSet set = database.performQuery(query.toString(),newList,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| int statusValue = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| jobs.noteTransformationConnectorDeregistration(jobID,statusValue); |
| } |
| } |
| |
| /** Note the registration of a transformation connector used by the specified connections. |
| * This method will be called when a connector is registered, on which the specified |
| * connections depend. |
| *@param connectionNames is the set of connection names. |
| */ |
| @Override |
| public void noteTransformationConnectorRegistration(String[] connectionNames) |
| throws ManifoldCFException |
| { |
| // For each connection, find the corresponding list of jobs. From these jobs, we want the job id and the status. |
| List<String> list = new ArrayList<String>(); |
| int maxCount = database.findConjunctionClauseMax(new ClauseDescription[]{}); |
| int currentCount = 0; |
| int i = 0; |
| while (i < connectionNames.length) |
| { |
| if (currentCount == maxCount) |
| { |
| noteConnectionDeregistration(list); |
| list.clear(); |
| currentCount = 0; |
| } |
| |
| list.add(connectionNames[i++]); |
| currentCount++; |
| } |
| if (currentCount > 0) |
| noteTransformationConnectionRegistration(list); |
| } |
| |
| /** Note registration for a batch of transformation connection names. |
| */ |
| protected void noteTransformationConnectionRegistration(List<String> list) |
| throws ManifoldCFException |
| { |
| // Query for the matching jobs, and then for each job potentially adjust the state |
| Long[] jobIDs = jobs.findJobsMatchingTransformations(list); |
| if (jobIDs.length == 0) |
| return; |
| |
| StringBuilder query = new StringBuilder(); |
| ArrayList newList = new ArrayList(); |
| |
| query.append("SELECT ").append(jobs.idField).append(",").append(jobs.statusField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new MultiClause(jobs.idField,jobIDs)})) |
| .append(" FOR UPDATE"); |
| IResultSet set = database.performQuery(query.toString(),newList,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| int statusValue = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| jobs.noteTransformationConnectorRegistration(jobID,statusValue); |
| } |
| } |
| |
| /** Note a change in connection configuration. |
| * This method will be called whenever a connection's configuration is modified, or when an external repository change |
| * is signalled. |
| */ |
| @Override |
| public void noteConnectionChange(String connectionName) |
| throws ManifoldCFException |
| { |
| jobs.noteConnectionChange(connectionName); |
| } |
| |
| /** Note a change in output connection configuration. |
| * This method will be called whenever a connection's configuration is modified, or when an external target config change |
| * is signalled. |
| */ |
| @Override |
| public void noteOutputConnectionChange(String connectionName) |
| throws ManifoldCFException |
| { |
| jobs.noteOutputConnectionChange(connectionName); |
| } |
| |
| /** Note a change in transformation connection configuration. |
| * This method will be called whenever a connection's configuration is modified. |
| */ |
| @Override |
| public void noteTransformationConnectionChange(String connectionName) |
| throws ManifoldCFException |
| { |
| jobs.noteTransformationConnectionChange(connectionName); |
| } |
| |
| /** Assess jobs marked to be in need of assessment for connector status changes. |
| */ |
| public void assessMarkedJobs() |
| throws ManifoldCFException |
| { |
| database.beginTransaction(); |
| try |
| { |
| // Query for all jobs marked "ASSESSMENT_UNKNOWN". |
| jobs.assessMarkedJobs(); |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| |
| /** Load a sorted list of job descriptions. |
| *@return the list, sorted by description. |
| */ |
| @Override |
| public IJobDescription[] getAllJobs() |
| throws ManifoldCFException |
| { |
| return jobs.getAll(); |
| } |
| |
| /** Create a new job. |
| *@return the new job. |
| */ |
| @Override |
| public IJobDescription createJob() |
| throws ManifoldCFException |
| { |
| return jobs.create(); |
| } |
| |
| /** Get the hoplock for a given job ID */ |
| protected String getHopLockName(Long jobID) |
| { |
| return hopLock + jobID; |
| } |
| |
| |
| |
| /** Delete a job. |
| *@param id is the job's identifier. This method will purge all the records belonging to the job from the database, as |
| * well as remove all documents indexed by the job from the index. |
| */ |
| @Override |
| public void deleteJob(Long id) |
| throws ManifoldCFException |
| { |
| database.beginTransaction(); |
| try |
| { |
| // If the job is running, throw an error |
| ArrayList list = new ArrayList(); |
| String query = database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,id)}); |
| IResultSet set = database.performQuery("SELECT "+jobs.statusField+" FROM "+ |
| jobs.getTableName()+" WHERE "+query+" FOR UPDATE",list,null,null); |
| if (set.getRowCount() == 0) |
| throw new ManifoldCFException("Attempting to delete a job that doesn't exist: "+id); |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus(row.getValue(jobs.statusField).toString()); |
| if (status == jobs.STATUS_ACTIVE || status == jobs.STATUS_ACTIVESEEDING || |
| status == jobs.STATUS_ACTIVE_UNINSTALLED || status == jobs.STATUS_ACTIVESEEDING_UNINSTALLED) |
| throw new ManifoldCFException("Job "+id+" is active; you must shut it down before deleting it"); |
| if (status != jobs.STATUS_INACTIVE) |
| throw new ManifoldCFException("Job "+id+" is busy; you must wait and/or shut it down before deleting it"); |
| jobs.writePermanentStatus(id,jobs.STATUS_READYFORDELETE,true); |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Job "+id+" marked for deletion"); |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| |
| } |
| |
| /** Load a job for editing. |
| *@param id is the job's identifier. |
| *@return null if the job doesn't exist. |
| */ |
| @Override |
| public IJobDescription load(Long id) |
| throws ManifoldCFException |
| { |
| return jobs.load(id,false); |
| } |
| |
| /** Load a job. |
| *@param id is the job's identifier. |
| *@param readOnly is true if a read-only object is desired. |
| *@return null if the job doesn't exist. |
| */ |
| @Override |
| public IJobDescription load(Long id, boolean readOnly) |
| throws ManifoldCFException |
| { |
| return jobs.load(id,readOnly); |
| } |
| |
| /** Save a job. |
| *@param jobDescription is the job description. |
| */ |
| @Override |
| public void save(IJobDescription jobDescription) |
| throws ManifoldCFException |
| { |
| ManifoldCF.noteConfigurationChange(); |
| jobs.save(jobDescription); |
| } |
| |
| /** See if there's a reference to a connection name. |
| *@param connectionName is the name of the connection. |
| *@return true if there is a reference, false otherwise. |
| */ |
| @Override |
| public boolean checkIfReference(String connectionName) |
| throws ManifoldCFException |
| { |
| return jobs.checkIfReference(connectionName); |
| } |
| |
| /** See if there's a reference to an output connection name. |
| *@param connectionName is the name of the connection. |
| *@return true if there is a reference, false otherwise. |
| */ |
| @Override |
| public boolean checkIfOutputReference(String connectionName) |
| throws ManifoldCFException |
| { |
| return jobs.checkIfOutputReference(connectionName); |
| } |
| |
| /** See if there's a reference to a transformation connection name. |
| *@param connectionName is the name of the connection. |
| *@return true if there is a reference, false otherwise. |
| */ |
| @Override |
| public boolean checkIfTransformationReference(String connectionName) |
| throws ManifoldCFException |
| { |
| return jobs.checkIfTransformationReference(connectionName); |
| } |
| |
| /** Get the job IDs associated with a given connection name. |
| *@param connectionName is the name of the connection. |
| *@return the set of job id's associated with that connection. |
| */ |
| @Override |
| public IJobDescription[] findJobsForConnection(String connectionName) |
| throws ManifoldCFException |
| { |
| return jobs.findJobsForConnection(connectionName); |
| } |
| |
| /** Clear job seeding state. |
| *@param jobID is the job ID. |
| */ |
| @Override |
| public void clearJobSeedingState(Long jobID) |
| throws ManifoldCFException |
| { |
| jobs.clearSeedingState(jobID); |
| } |
| |
| // These methods cover activities that require interaction with the job queue. |
| // The job queue is maintained underneath this interface, and all threads that perform |
| // job activities need to go through this layer. |
| |
| /** Reset the job queue for an individual process ID. |
| * If a node was shut down in the middle of doing something, sufficient information should |
| * be around in the database to allow the node's activities to be cleaned up. |
| *@param processID is the process ID of the node we want to clean up after. |
| */ |
| @Override |
| public void cleanupProcessData(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Cleaning up process data for process '"+processID+"'"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Clean up events |
| eventManager.restart(processID); |
| // Clean up job queue |
| jobQueue.restart(processID); |
| // Clean up jobs |
| jobs.restart(processID); |
| // Clean up hopcount stuff |
| hopCount.restart(processID); |
| // Clean up carrydown stuff |
| carryDown.restart(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| Logging.jobs.debug("Cleanup complete"); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting for restart: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset the job queue for all process IDs. |
| * If a node was shut down in the middle of doing something, sufficient information should |
| * be around in the database to allow the node's activities to be cleaned up. |
| */ |
| @Override |
| public void cleanupProcessData() |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Cleaning up all process data"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Clean up events |
| eventManager.restart(); |
| // Clean up job queue |
| jobQueue.restart(); |
| // Clean up jobs |
| jobs.restart(); |
| // Clean up hopcount stuff |
| hopCount.restart(); |
| // Clean up carrydown stuff |
| carryDown.restart(); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| Logging.jobs.debug("Cleanup complete"); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting for restart: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Prepare to start the entire cluster. |
| * If there are no other nodes alive, then at the time the first node comes up, we need to |
| * reset the job queue for ALL processes that had been running before. This method must |
| * be called in addition to cleanupProcessData(). |
| */ |
| @Override |
| public void prepareForClusterStart() |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Starting cluster"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Clean up events |
| eventManager.restartCluster(); |
| // Clean up job queue |
| jobQueue.restartCluster(); |
| // Clean up jobs |
| jobs.restartCluster(); |
| // Clean up hopcount stuff |
| hopCount.restartCluster(); |
| // Clean up carrydown stuff |
| carryDown.restartCluster(); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| Logging.jobs.debug("Cluster start complete"); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction starting cluster: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset as part of restoring document worker threads. |
| *@param processID is the current process ID. |
| */ |
| @Override |
| public void resetDocumentWorkerStatus(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Resetting document active status"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobQueue.resetDocumentWorkerStatus(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting document active status: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| Logging.jobs.debug("Reset complete"); |
| } |
| |
| /** Reset as part of restoring seeding threads. |
| *@param processID is the current process ID. |
| */ |
| @Override |
| public void resetSeedingWorkerStatus(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Resetting seeding status"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.resetSeedingWorkerStatus(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting seeding worker status: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| Logging.jobs.debug("Reset complete"); |
| } |
| |
| /** Reset as part of restoring doc delete threads. |
| *@param processID is the current process ID. |
| */ |
| @Override |
| public void resetDocDeleteWorkerStatus(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Resetting doc deleting status"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobQueue.resetDocDeleteWorkerStatus(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting doc deleting worker status: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| Logging.jobs.debug("Reset complete"); |
| } |
| |
| /** Reset as part of restoring doc cleanup threads. |
| *@param processID is the current process ID. |
| */ |
| @Override |
| public void resetDocCleanupWorkerStatus(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Resetting doc cleaning status"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobQueue.resetDocCleanupWorkerStatus(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting doc cleaning status: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| Logging.jobs.debug("Reset complete"); |
| } |
| |
| /** Reset as part of restoring delete startup threads. |
| *@param processID is the current process ID. |
| */ |
| @Override |
| public void resetDeleteStartupWorkerStatus(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Resetting job delete starting up status"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.resetDeleteStartupWorkerStatus(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting job delete starting up status: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| Logging.jobs.debug("Reset complete"); |
| } |
| |
| /** Reset as part of restoring notification threads. |
| */ |
| @Override |
| public void resetNotificationWorkerStatus(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Resetting notification worker status"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.resetNotificationWorkerStatus(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting notification worker status: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| Logging.jobs.debug("Reset complete"); |
| } |
| |
| /** Reset as part of restoring startup threads. |
| */ |
| @Override |
| public void resetStartupWorkerStatus(String processID) |
| throws ManifoldCFException |
| { |
| Logging.jobs.debug("Resetting job starting up status"); |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.resetStartupWorkerStatus(processID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting job starting up status: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| Logging.jobs.debug("Reset complete"); |
| } |
| |
| // These methods support job delete threads |
| |
| /** Delete ingested document identifiers (as part of deleting the owning job). |
| * The number of identifiers specified is guaranteed to be less than the maxInClauseCount |
| * for the database. |
| *@param identifiers is the set of document identifiers. |
| */ |
| @Override |
| public void deleteIngestedDocumentIdentifiers(DocumentDescription[] identifiers) |
| throws ManifoldCFException |
| { |
| jobQueue.deleteIngestedDocumentIdentifiers(identifiers); |
| // Hopcount rows get removed when the job itself is removed. |
| // carrydown records get removed when the job itself is removed. |
| } |
| |
| /** Get list of cleanable document descriptions. This list will take into account |
| * multiple jobs that may own the same document. All documents for which a description |
| * is returned will be transitioned to the "beingcleaned" state. Documents which are |
| * not in transition and are eligible, but are owned by other jobs, will have their |
| * jobqueue entries deleted by this method. |
| *@param processID is the current process ID. |
| *@param maxCount is the maximum number of documents to return. |
| *@param currentTime is the current time; some fetches do not occur until a specific time. |
| *@return the document descriptions for these documents. |
| */ |
| @Override |
| public DocumentSetAndFlags getNextCleanableDocuments(String processID, int maxCount, long currentTime) |
| throws ManifoldCFException |
| { |
| // The query will be built here, because it joins the jobs table against the jobqueue |
| // table. |
| // |
| // This query must only pick up documents that are not active in any job and |
| // which belong to a job that's in a "shutting down" state and are in |
| // a "purgatory" state. |
| // |
| // We are in fact more conservative in this query than we need to be; the documents |
| // excluded will include some that simply match our criteria, which is designed to |
| // be fast rather than perfect. The match we make is: hashvalue against hashvalue, and |
| // different job id's. |
| // |
| // SELECT id,jobid,docid FROM jobqueue t0 WHERE t0.status='P' AND EXISTS(SELECT 'x' FROM |
| // jobs t3 WHERE t0.jobid=t3.id AND t3.status='X') |
| // AND NOT EXISTS(SELECT 'x' FROM jobqueue t2 WHERE t0.hashval=t2.hashval AND t0.jobid!=t2.jobid |
| // AND t2.status IN ('A','F','B')) |
| // |
| |
| // Do a simple preliminary query, since the big query is currently slow, so that we don't waste time during stasis or |
| // ingestion. |
| // Moved outside of transaction, so we have no chance of locking up job status cache key for an extended period of time. |
| if (!jobs.cleaningJobsPresent()) |
| return new DocumentSetAndFlags(new DocumentDescription[0],new boolean[0]); |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to find documents to put on the cleaning queue"); |
| } |
| |
| while (true) |
| { |
| long sleepAmt = 0L; |
| |
| // Enter a write lock. This means we don't need a FOR UPDATE on the query. |
| lockManager.enterWriteLock(cleanStufferLock); |
| try |
| { |
| database.beginTransaction(); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("After "+new Long(System.currentTimeMillis()-startTime).toString()+" ms, beginning query to look for documents to put on cleaning queue"); |
| |
| // Note: This query does not do "FOR UPDATE", because it is running under the only thread that can possibly change the document's state to "being cleaned". |
| ArrayList list = new ArrayList(); |
| |
| StringBuilder sb = new StringBuilder("SELECT "); |
| sb.append(jobQueue.idField).append(",") |
| .append(jobQueue.jobIDField).append(",") |
| .append(jobQueue.docHashField).append(",") |
| .append(jobQueue.docIDField).append(",") |
| .append(jobQueue.failTimeField).append(",") |
| .append(jobQueue.failCountField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause("t0."+jobQueue.statusField,jobQueue.statusToString(jobQueue.STATUS_PURGATORY))})).append(" AND ") |
| .append("(t0.").append(jobQueue.checkTimeField).append(" IS NULL OR t0.").append(jobQueue.checkTimeField).append("<=?) AND "); |
| |
| list.add(new Long(currentTime)); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause("t1."+jobs.statusField,jobs.statusToString(jobs.STATUS_SHUTTINGDOWN)), |
| new JoinClause("t1."+jobs.idField,"t0."+jobQueue.jobIDField)})) |
| .append(") AND "); |
| |
| sb.append("NOT EXISTS(SELECT 'x' FROM ").append(jobQueue.getTableName()).append(" t2 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new JoinClause("t2."+jobQueue.docHashField,"t0."+jobQueue.docHashField)})).append(" AND ") |
| .append("t2.").append(jobQueue.statusField).append(" IN (?,?,?,?,?,?) AND ") |
| .append("t2.").append(jobQueue.jobIDField).append("!=t0.").append(jobQueue.jobIDField) |
| .append(") "); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)); |
| |
| sb.append(database.constructOffsetLimitClause(0,maxCount)); |
| |
| // The checktime is null field check is for backwards compatibility |
| IResultSet set = database.performQuery(sb.toString(),list,null,null,maxCount,null); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Done getting docs to cleaning queue after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms."); |
| |
| // We need to organize the returned set by connection name and output connection name, so that we can efficiently |
| // use getUnindexableDocumentIdentifiers. |
| // This is a table keyed by connection name and containing an ArrayList, which in turn contains DocumentDescription |
| // objects. |
| Map<String,List<DocumentDescription>> connectionNameMap = new HashMap<String,List<DocumentDescription>>(); |
| Map<String,DocumentDescription> documentIDMap = new HashMap<String,DocumentDescription>(); |
| for (int i = 0; i < set.getRowCount(); i++) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobQueue.jobIDField); |
| String documentIDHash = (String)row.getValue(jobQueue.docHashField); |
| String documentID = (String)row.getValue(jobQueue.docIDField); |
| Long failTimeValue = (Long)row.getValue(jobQueue.failTimeField); |
| Long failCountValue = (Long)row.getValue(jobQueue.failCountField); |
| // Failtime is probably not useful in this context, but we'll bring it along for completeness |
| long failTime; |
| if (failTimeValue == null) |
| failTime = -1L; |
| else |
| failTime = failTimeValue.longValue(); |
| int failCount; |
| if (failCountValue == null) |
| failCount = 0; |
| else |
| failCount = (int)failCountValue.longValue(); |
| IJobDescription jobDesc = load(jobID); |
| String connectionName = jobDesc.getConnectionName(); |
| DocumentDescription dd = new DocumentDescription((Long)row.getValue(jobQueue.idField), |
| jobID,documentIDHash,documentID,failTime,failCount); |
| String compositeDocumentID = makeCompositeID(documentIDHash,connectionName); |
| documentIDMap.put(compositeDocumentID,dd); |
| List<DocumentDescription> x = connectionNameMap.get(connectionName); |
| if (x == null) |
| { |
| // New entry needed |
| x = new ArrayList<DocumentDescription>(); |
| connectionNameMap.put(connectionName,x); |
| } |
| x.add(dd); |
| } |
| |
| // For each bin, obtain a filtered answer, and enter all answers into a hash table. |
| // We'll then scan the result again to look up the right descriptions for return, |
| // and delete the ones that are owned multiply. |
| Map<String,String> allowedDocIds = new HashMap<String,String>(); |
| Iterator<String> iter = connectionNameMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| String connectionName = iter.next(); |
| List<DocumentDescription> x = connectionNameMap.get(connectionName); |
| // Do the filter query |
| DocumentDescription[] descriptions = new DocumentDescription[x.size()]; |
| for (int j = 0; j < descriptions.length; j++) |
| { |
| descriptions[j] = (DocumentDescription)x.get(j); |
| } |
| String[] docIDHashes = getUnindexableDocumentIdentifiers(descriptions,connectionName); |
| for (String docIDHash : docIDHashes) |
| { |
| String key = makeCompositeID(docIDHash,connectionName); |
| allowedDocIds.put(key,docIDHash); |
| } |
| } |
| |
| // Now, assemble a result, and change the state of the records accordingly |
| // First thing to do is order by document hash, so we reduce the risk of deadlock. |
| String[] compositeIDArray = new String[documentIDMap.size()]; |
| int j = 0; |
| iter = documentIDMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| compositeIDArray[j++] = iter.next(); |
| } |
| |
| java.util.Arrays.sort(compositeIDArray); |
| |
| DocumentDescription[] rval = new DocumentDescription[documentIDMap.size()]; |
| boolean[] rvalBoolean = new boolean[documentIDMap.size()]; |
| for (int i = 0; i < compositeIDArray.length; i++) |
| { |
| String compositeDocID = compositeIDArray[i]; |
| DocumentDescription dd = documentIDMap.get(compositeDocID); |
| // Determine whether we can delete it from the index or not |
| rvalBoolean[i] = (allowedDocIds.get(compositeDocID) != null); |
| // Set the record status to "being cleaned" and return it |
| rval[i] = dd; |
| jobQueue.setCleaningStatus(dd.getID(),processID); |
| } |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Done pruning unindexable docs after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms."); |
| |
| return new DocumentSetAndFlags(rval,rvalBoolean); |
| |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction finding deleteable docs: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(cleanStufferLock); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Create a composite document hash key. This consists of the document id hash plus the |
| * connection name. |
| */ |
| protected static String makeCompositeID(String docIDHash, String connectionName) |
| { |
| return docIDHash + ":" + connectionName; |
| } |
| |
| /** Get list of deletable document descriptions. This list will take into account |
| * multiple jobs that may own the same document. All documents for which a description |
| * is returned will be transitioned to the "beingdeleted" state. Documents which are |
| * not in transition and are eligible, but are owned by other jobs, will have their |
| * jobqueue entries deleted by this method. |
| *@param processID is the current process ID. |
| *@param maxCount is the maximum number of documents to return. |
| *@param currentTime is the current time; some fetches do not occur until a specific time. |
| *@return the document descriptions for these documents. |
| */ |
| @Override |
| public DocumentDescription[] getNextDeletableDocuments(String processID, |
| int maxCount, long currentTime) |
| throws ManifoldCFException |
| { |
| // The query will be built here, because it joins the jobs table against the jobqueue |
| // table. |
| // |
| // This query must only pick up documents that are not active in any job and |
| // which either belong to a job that's in a "delete pending" state and are in |
| // a "complete", "purgatory", or "pendingpurgatory" state, OR belong to a job |
| // that's in a "shutting down" state and are in the "purgatory" state. |
| // |
| // We are in fact more conservative in this query than we need to be; the documents |
| // excluded will include some that simply match our criteria, which is designed to |
| // be fast rather than perfect. The match we make is: hashvalue against hashvalue, and |
| // different job id's. |
| // |
| // SELECT id,jobid,docid FROM jobqueue t0 WHERE (t0.status IN ('C','P','G') AND EXISTS(SELECT 'x' FROM |
| // jobs t1 WHERE t0.jobid=t1.id AND t1.status='D') |
| // AND NOT EXISTS(SELECT 'x' FROM jobqueue t2 WHERE t0.hashval=t2.hashval AND t0.jobid!=t2.jobid |
| // AND t2.status IN ('A','F','B')) |
| // |
| |
| // Do a simple preliminary query, since the big query is currently slow, so that we don't waste time during stasis or |
| // ingestion. |
| // Moved outside of transaction, so we have no chance of locking up job status cache key for an extended period of time. |
| if (!jobs.deletingJobsPresent()) |
| return new DocumentDescription[0]; |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to find documents to put on the delete queue"); |
| } |
| |
| while (true) |
| { |
| long sleepAmt = 0L; |
| |
| // Enter a write lock so that multiple threads can't be in here at the same time |
| lockManager.enterWriteLock(deleteStufferLock); |
| try |
| { |
| database.beginTransaction(); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("After "+new Long(System.currentTimeMillis()-startTime).toString()+" ms, beginning query to look for documents to put on delete queue"); |
| |
| // Note: This query does not do "FOR UPDATE", because it is running under the only thread that can possibly change the document's state to "being deleted". |
| // If FOR UPDATE was included, deadlock happened a lot. |
| ArrayList list = new ArrayList(); |
| StringBuilder sb = new StringBuilder("SELECT "); |
| sb.append(jobQueue.idField).append(",") |
| .append(jobQueue.jobIDField).append(",") |
| .append(jobQueue.docHashField).append(",") |
| .append(jobQueue.docIDField).append(",") |
| .append(jobQueue.failTimeField).append(",") |
| .append(jobQueue.failCountField).append(" FROM ").append(jobQueue.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause("t0."+jobQueue.statusField,jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE))})).append(" AND ") |
| .append("t0.").append(jobQueue.checkTimeField).append("<=? AND "); |
| |
| list.add(new Long(currentTime)); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause("t1."+jobs.statusField,jobs.statusToString(jobs.STATUS_DELETING)), |
| new JoinClause("t1."+jobs.idField,"t0."+jobQueue.jobIDField)})).append(") AND "); |
| |
| sb.append("NOT EXISTS(SELECT 'x' FROM ").append(jobQueue.getTableName()).append(" t2 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new JoinClause("t2."+jobQueue.docHashField,"t0."+jobQueue.docHashField)})).append(" AND ") |
| .append("t2.").append(jobQueue.statusField).append(" IN (?,?,?,?,?,?) AND ") |
| .append("t2.").append(jobQueue.jobIDField).append("!=t0.").append(jobQueue.jobIDField) |
| .append(") "); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)); |
| |
| sb.append(database.constructOffsetLimitClause(0,maxCount)); |
| |
| // The checktime is null field check is for backwards compatibility |
| IResultSet set = database.performQuery(sb.toString(),list,null,null,maxCount,null); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Done getting docs to delete queue after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms."); |
| |
| // We need to organize the returned set by connection name, so that we can efficiently |
| // use getUnindexableDocumentIdentifiers. |
| // This is a table keyed by connection name and containing an ArrayList, which in turn contains DocumentDescription |
| // objects. |
| Map<String,List<DocumentDescription>> connectionNameMap = new HashMap<String,List<DocumentDescription>>(); |
| Map<String,DocumentDescription> documentIDMap = new HashMap<String,DocumentDescription>(); |
| for (int i = 0; i < set.getRowCount(); i++) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobQueue.jobIDField); |
| String documentIDHash = (String)row.getValue(jobQueue.docHashField); |
| String documentID = (String)row.getValue(jobQueue.docIDField); |
| Long failTimeValue = (Long)row.getValue(jobQueue.failTimeField); |
| Long failCountValue = (Long)row.getValue(jobQueue.failCountField); |
| // Failtime is probably not useful in this context, but we'll bring it along for completeness |
| long failTime; |
| if (failTimeValue == null) |
| failTime = -1L; |
| else |
| failTime = failTimeValue.longValue(); |
| int failCount; |
| if (failCountValue == null) |
| failCount = 0; |
| else |
| failCount = (int)failCountValue.longValue(); |
| IJobDescription jobDesc = load(jobID); |
| String connectionName = jobDesc.getConnectionName(); |
| DocumentDescription dd = new DocumentDescription((Long)row.getValue(jobQueue.idField), |
| jobID,documentIDHash,documentID,failTime,failCount); |
| String compositeDocumentID = makeCompositeID(documentIDHash,connectionName); |
| documentIDMap.put(compositeDocumentID,dd); |
| List<DocumentDescription> x = connectionNameMap.get(connectionName); |
| if (x == null) |
| { |
| // New entry needed |
| x = new ArrayList<DocumentDescription>(); |
| connectionNameMap.put(connectionName,x); |
| } |
| x.add(dd); |
| } |
| |
| // For each bin, obtain a filtered answer, and enter all answers into a hash table. |
| // We'll then scan the result again to look up the right descriptions for return, |
| // and delete the ones that are owned multiply. |
| Map<String,String> allowedDocIds = new HashMap<String,String>(); |
| Iterator<String> iter = connectionNameMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| String connectionName = iter.next(); |
| List<DocumentDescription> x = connectionNameMap.get(connectionName); |
| // Do the filter query |
| DocumentDescription[] descriptions = new DocumentDescription[x.size()]; |
| for (int j = 0; j < descriptions.length; j++) |
| { |
| descriptions[j] = x.get(j); |
| } |
| String[] docIDHashes = getUnindexableDocumentIdentifiers(descriptions,connectionName); |
| for (int j = 0; j < docIDHashes.length; j++) |
| { |
| String docIDHash = docIDHashes[j]; |
| String key = makeCompositeID(docIDHash,connectionName); |
| allowedDocIds.put(key,docIDHash); |
| } |
| } |
| |
| // Now, assemble a result, and change the state of the records accordingly |
| // First thing to do is order by document hash to reduce chances of deadlock. |
| String[] compositeIDArray = new String[documentIDMap.size()]; |
| int j = 0; |
| iter = documentIDMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| compositeIDArray[j++] = iter.next(); |
| } |
| |
| java.util.Arrays.sort(compositeIDArray); |
| |
| DocumentDescription[] rval = new DocumentDescription[allowedDocIds.size()]; |
| j = 0; |
| for (int i = 0; i < compositeIDArray.length; i++) |
| { |
| String compositeDocumentID = compositeIDArray[i]; |
| DocumentDescription dd = documentIDMap.get(compositeDocumentID); |
| if (allowedDocIds.get(compositeDocumentID) == null) |
| { |
| // Delete this record and do NOT return it. |
| jobQueue.deleteRecord(dd.getID()); |
| // What should we do about hopcount here? |
| // We are deleting a record which belongs to a job that is being |
| // cleaned up. The job itself will go away when this is done, |
| // and so will all the hopcount stuff pertaining to it. So, the |
| // treatment I've chosen here is to leave the hopcount alone and |
| // let the job cleanup get rid of it at the right time. |
| // Note: carrydown records handled in the same manner... |
| //carryDown.deleteRecords(dd.getJobID(),new String[]{dd.getDocumentIdentifier()}); |
| } |
| else |
| { |
| // Set the record status to "being deleted" and return it |
| rval[j++] = dd; |
| jobQueue.setDeletingStatus(dd.getID(),processID); |
| } |
| } |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Done pruning unindexable docs after "+new Long(System.currentTimeMillis()-startTime).toString()+" ms."); |
| |
| return rval; |
| |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction finding deleteable docs: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(deleteStufferLock); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Get a list of document identifiers that should actually be deleted from the index, from a list that |
| * might contain identifiers that are shared with other jobs, which are targeted to the same output connection. |
| * The input list is guaranteed to be smaller in size than maxInClauseCount for the database. |
| *@param documentIdentifiers is the set of document identifiers to consider. |
| *@param connectionName is the connection name for ALL the document identifiers. |
| *@return the set of documents which should be removed from the index, where there are no potential conflicts. |
| */ |
| protected String[] getUnindexableDocumentIdentifiers(DocumentDescription[] documentIdentifiers, String connectionName) |
| throws ManifoldCFException |
| { |
| // This is where we will count the individual document id's |
| Map<String,MutableInteger> countMap = new HashMap<String,MutableInteger>(); |
| |
| // First thing: Compute the set of document identifier hash values to query against |
| Set<String> map = new HashSet<String>(); |
| for (int i = 0; i < documentIdentifiers.length; i++) |
| { |
| String hash = documentIdentifiers[i].getDocumentIdentifierHash(); |
| map.add(hash); |
| countMap.put(hash,new MutableInteger(0)); |
| } |
| |
| if (map.size() == 0) |
| return new String[0]; |
| |
| // Build a query |
| StringBuilder sb = new StringBuilder(); |
| ArrayList list = new ArrayList(); |
| |
| List<String> docList = new ArrayList<String>(); |
| Iterator<String> iter = map.iterator(); |
| while (iter.hasNext()) |
| { |
| docList.add(iter.next()); |
| } |
| |
| // Note: There is a potential race condition here. One job may be running while another is in process of |
| // being deleted. If they share a document, then the delete task could decide to delete the document and do so right |
| // after the ingestion takes place in the running job, but right before the document's status is updated |
| // in the job queue [which would have prevented the deletion]. |
| // Unless a transaction is thrown around the time ingestion is taking place (which is a very bad idea) |
| // we are stuck with the possibility of this condition, which will essentially lead to a document being |
| // missing from the index. |
| // One way of dealing with this is to treat "active" documents as already ingested, for the purpose of |
| // reference counting. Then these documents will not be deleted. The risk then becomes that the "active" |
| // document entry will not be completed (say, because of a restart), and thus the corresponding document |
| // will never be removed from the index. |
| // |
| // Instead, the only solution is to not queue a document for any activity that is inconsistent with activities |
| // that may already be ongoing for that document. For this reason, I have introduced a "BEING_DELETED" |
| // and "BEING_CLEANED" state |
| // for a document. These states will allow the various queries that queue up activities to avoid documents that |
| // are currently being processed elsewhere. |
| |
| sb.append("SELECT t0.").append(jobQueue.docHashField).append(" FROM ").append(jobQueue.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause("t0."+jobQueue.docHashField,docList)})).append(" AND ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?,?,?,?) AND "); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_COMPLETE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_UNCHANGED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE)); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new JoinClause("t1."+jobs.idField,"t0."+jobQueue.jobIDField)})).append(" AND ") |
| .append("t1.").append(jobs.connectionNameField).append("=?)"); |
| |
| list.add(connectionName); |
| |
| // Do the query, and then count the number of times each document identifier occurs. |
| IResultSet results = database.performQuery(sb.toString(),list,null,null); |
| for (int i = 0; i < results.getRowCount(); i++) |
| { |
| IResultRow row = results.getRow(i); |
| String docIDHash = (String)row.getValue(jobQueue.docHashField); |
| MutableInteger mi = (MutableInteger)countMap.get(docIDHash); |
| if (mi != null) |
| mi.increment(); |
| } |
| |
| // Go through and count only those that have a count of 1. |
| int count = 0; |
| iter = countMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| String docIDHash = iter.next(); |
| MutableInteger mi = countMap.get(docIDHash); |
| if (mi.intValue() == 1) |
| count++; |
| } |
| |
| String[] rval = new String[count]; |
| iter = countMap.keySet().iterator(); |
| count = 0; |
| while (iter.hasNext()) |
| { |
| String docIDHash = iter.next(); |
| MutableInteger mi = countMap.get(docIDHash); |
| if (mi.intValue() == 1) |
| rval[count++] = docIDHash; |
| } |
| |
| return rval; |
| } |
| |
| // These methods support the reprioritization thread. |
| |
| /** Get a list of already-processed documents to reprioritize. Documents in all jobs will be |
| * returned by this method. Up to n document descriptions will be returned. |
| *@param currentTime is the current time stamp for this prioritization pass. Avoid |
| * picking up any documents that are labeled with this timestamp or after. |
| *@param n is the maximum number of document descriptions desired. |
| *@return the document descriptions. |
| */ |
| @Override |
| public DocumentDescription[] getNextAlreadyProcessedReprioritizationDocuments(long currentTime, int n) |
| throws ManifoldCFException |
| { |
| StringBuilder sb = new StringBuilder(); |
| ArrayList list = new ArrayList(); |
| |
| // The desired query is: |
| // SELECT docid FROM jobqueue WHERE prioritysettime < (currentTime) LIMIT (n) |
| |
| sb.append("SELECT ") |
| .append(jobQueue.idField).append(",") |
| .append(jobQueue.docHashField).append(",") |
| .append(jobQueue.docIDField).append(",") |
| .append(jobQueue.jobIDField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE "); |
| |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(JobQueue.STATUS_COMPLETE), |
| jobQueue.statusToString(JobQueue.STATUS_UNCHANGED), |
| jobQueue.statusToString(JobQueue.STATUS_PURGATORY)}), |
| new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))})).append(" "); |
| |
| sb.append(database.constructOffsetLimitClause(0,n)); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null); |
| |
| DocumentDescription[] rval = new DocumentDescription[set.getRowCount()]; |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i); |
| rval[i] =new DocumentDescription((Long)row.getValue(jobQueue.idField), |
| (Long)row.getValue(jobQueue.jobIDField), |
| (String)row.getValue(jobQueue.docHashField), |
| (String)row.getValue(jobQueue.docIDField)); |
| i++; |
| } |
| |
| return rval; |
| } |
| |
| /** Get a list of not-yet-processed documents to reprioritize. Documents in all jobs will be |
| * returned by this method. Up to n document descriptions will be returned. |
| *@param currentTime is the current time stamp for this prioritization pass. Avoid |
| * picking up any documents that are labeled with this timestamp or after. |
| *@param n is the maximum number of document descriptions desired. |
| *@return the document descriptions. |
| */ |
| @Override |
| public DocumentDescription[] getNextNotYetProcessedReprioritizationDocuments(long currentTime, int n) |
| throws ManifoldCFException |
| { |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| // This query MUST return only documents that are in a pending state which belong to an active job!!! |
| |
| sb.append(jobQueue.idField).append(",") |
| .append(jobQueue.docHashField).append(",") |
| .append(jobQueue.docIDField).append(",") |
| .append(jobQueue.jobIDField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobQueue.statusField,new Object[]{ |
| JobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED), |
| JobQueue.statusToString(jobQueue.STATUS_PENDING), |
| JobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))})).append(" AND ") |
| .append(jobQueue.checkActionField).append("=?").append(" AND "); |
| |
| list.add(jobQueue.actionToString(JobQueue.ACTION_RESCAN)); |
| |
| // Per CONNECTORS-290, we need to be leaving priorities blank for jobs that aren't using them, |
| // so this will be changed to not include jobs where the priorities have been bashed to null. |
| // |
| // I've included ALL states that might have non-null doc priorities. This includes states |
| // corresponding to uninstalled connectors, since there is no transition that cleans out the |
| // document priorities in these states. The time during which a connector is uninstalled is |
| // expected to be short, because typically this state is the result of an installation procedure |
| // rather than willful action on the part of a user. |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause("t1."+jobs.statusField,new Object[]{ |
| Jobs.statusToString(Jobs.STATUS_STARTINGUP), |
| Jobs.statusToString(Jobs.STATUS_STARTINGUPMINIMAL), |
| Jobs.statusToString(Jobs.STATUS_ACTIVE), |
| Jobs.statusToString(Jobs.STATUS_ACTIVESEEDING), |
| Jobs.statusToString(Jobs.STATUS_ACTIVE_UNINSTALLED), |
| Jobs.statusToString(Jobs.STATUS_ACTIVESEEDING_UNINSTALLED) |
| }), |
| new JoinClause("t1."+jobs.idField,"t0."+jobQueue.jobIDField)})) |
| .append(") "); |
| |
| sb.append(database.constructOffsetLimitClause(0,n)); |
| |
| // Analyze jobqueue tables unconditionally, since it's become much more sensitive in 8.3 than it used to be. |
| //jobQueue.unconditionallyAnalyzeTables(); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null); |
| |
| DocumentDescription[] rval = new DocumentDescription[set.getRowCount()]; |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i); |
| rval[i] =new DocumentDescription((Long)row.getValue(jobQueue.idField), |
| (Long)row.getValue(jobQueue.jobIDField), |
| (String)row.getValue(jobQueue.docHashField), |
| (String)row.getValue(jobQueue.docIDField)); |
| i++; |
| } |
| |
| return rval; |
| } |
| |
| /** Save a set of document priorities. In the case where a document was eligible to have its |
| * priority set, but it no longer is eligible, then the provided priority will not be written. |
| *@param currentTime is the time in milliseconds since epoch. |
| *@param documentDescriptions are the document descriptions. |
| *@param priorities are the desired priorities. |
| */ |
| @Override |
| public void writeDocumentPriorities(long currentTime, DocumentDescription[] documentDescriptions, IPriorityCalculator[] priorities) |
| throws ManifoldCFException |
| { |
| |
| // Retry loop - in case we get a deadlock despite our best efforts |
| while (true) |
| { |
| // This should be ordered by document identifier hash in order to prevent potential deadlock conditions |
| HashMap indexMap = new HashMap(); |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| String documentIDHash = documentDescriptions[i].getDocumentIdentifierHash() + ":"+documentDescriptions[i].getJobID(); |
| docIDHashes[i] = documentIDHash; |
| indexMap.put(documentIDHash,new Integer(i)); |
| i++; |
| } |
| |
| java.util.Arrays.sort(docIDHashes); |
| |
| long sleepAmt = 0L; |
| |
| // Start the transaction now |
| database.beginTransaction(); |
| try |
| { |
| |
| // Need to order the writes by doc id. |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| Integer x = (Integer)indexMap.remove(docIDHash); |
| if (x == null) |
| throw new ManifoldCFException("Assertion failure: duplicate document identifier jobid/hash detected!"); |
| int index = x.intValue(); |
| DocumentDescription dd = documentDescriptions[index]; |
| IPriorityCalculator priority = priorities[index]; |
| jobQueue.writeDocPriority(currentTime,dd.getID(),priority); |
| i++; |
| } |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction writing doc priorities: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Get up to the next n documents to be expired. |
| * This method marks the documents whose descriptions have been returned as "being processed", or active. |
| * The same marking is used as is used for documents that have been queued for worker threads. The model |
| * is thus identical. |
| * |
| *@param processID is the current process ID. |
| *@param n is the maximum number of records desired. |
| *@param currentTime is the current time. |
| *@return the array of document descriptions to expire. |
| */ |
| @Override |
| public DocumentSetAndFlags getExpiredDocuments(String processID, int n, long currentTime) |
| throws ManifoldCFException |
| { |
| // Screening query |
| // Moved outside of transaction, so there's less chance of keeping jobstatus cache key tied up |
| // for an extended period of time. |
| if (!jobs.activeJobsPresent()) |
| return new DocumentSetAndFlags(new DocumentDescription[0], new boolean[0]); |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Beginning query to look for documents to expire"); |
| } |
| |
| // Put together a query with a limit of n |
| // Note well: This query does not do "FOR UPDATE". The reason is that only one thread can possibly change the document's state to active. |
| // If FOR UPDATE was included, deadlock conditions would be common because of the complexity of this query. |
| |
| ArrayList list = new ArrayList(); |
| |
| StringBuilder sb = new StringBuilder("SELECT "); |
| sb.append("t0.").append(jobQueue.idField).append(",") |
| .append("t0.").append(jobQueue.jobIDField).append(",") |
| .append("t0.").append(jobQueue.docHashField).append(",") |
| .append("t0.").append(jobQueue.docIDField).append(",") |
| .append("t0.").append(jobQueue.statusField).append(",") |
| .append("t0.").append(jobQueue.failTimeField).append(",") |
| .append("t0.").append(jobQueue.failCountField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause("t0."+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(JobQueue.STATUS_PENDING), |
| jobQueue.statusToString(JobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause("t0."+jobQueue.checkActionField,jobQueue.actionToString(JobQueue.ACTION_REMOVE)), |
| new UnitaryClause("t0."+jobQueue.checkTimeField,"<=",new Long(currentTime))})).append(" AND "); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause("t1."+jobs.statusField,new Object[]{ |
| jobs.statusToString(jobs.STATUS_ACTIVE), |
| jobs.statusToString(jobs.STATUS_ACTIVESEEDING)}), |
| new JoinClause("t1."+jobs.idField,"t0."+jobQueue.jobIDField)})).append(") AND "); |
| |
| sb.append("NOT EXISTS(SELECT 'x' FROM ").append(jobQueue.getTableName()).append(" t2 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new JoinClause("t2."+jobQueue.docHashField,"t0."+jobQueue.docHashField)})).append(" AND ") |
| .append("t2.").append(jobQueue.statusField).append(" IN (?,?,?,?,?,?)").append(" AND ") |
| .append("t2.").append(jobQueue.jobIDField).append("!=t0.").append(jobQueue.jobIDField).append(") "); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)); |
| |
| sb.append(database.constructOffsetLimitClause(0,n)); |
| |
| String query = sb.toString(); |
| |
| // Analyze jobqueue tables unconditionally, since it's become much more sensitive in 8.3 than it used to be. |
| //jobQueue.unconditionallyAnalyzeTables(); |
| |
| ArrayList answers = new ArrayList(); |
| |
| int repeatCount = 0; |
| while (true) |
| { |
| long sleepAmt = 0L; |
| |
| // Enter a write lock, so only one thread can be doing this. That makes FOR UPDATE unnecessary. |
| lockManager.enterWriteLock(expireStufferLock); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| { |
| repeatCount++; |
| Logging.perf.debug(" Attempt "+Integer.toString(repeatCount)+" to expire documents, after "+ |
| new Long(System.currentTimeMillis() - startTime)+" ms"); |
| } |
| |
| database.beginTransaction(); |
| try |
| { |
| IResultSet set = database.performQuery(query,list,null,null,n,null); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug(" Expiring "+Integer.toString(set.getRowCount())+" documents"); |
| |
| // To avoid deadlock, we want to update the document id hashes in order. This means reading into a structure I can sort by docid hash, |
| // before updating any rows in jobqueue. |
| Map<String,List<DocumentDescription>> connectionNameMap = new HashMap<String,List<DocumentDescription>>(); |
| Map<String,DocumentDescription> documentIDMap = new HashMap<String,DocumentDescription>(); |
| Map<String,Integer> statusMap = new HashMap<String,Integer>(); |
| |
| for (int i = 0; i < set.getRowCount(); i++) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobQueue.jobIDField); |
| String documentIDHash = (String)row.getValue(jobQueue.docHashField); |
| String documentID = (String)row.getValue(jobQueue.docIDField); |
| int status = jobQueue.stringToStatus(row.getValue(jobQueue.statusField).toString()); |
| Long failTimeValue = (Long)row.getValue(jobQueue.failTimeField); |
| Long failCountValue = (Long)row.getValue(jobQueue.failCountField); |
| // Failtime is probably not useful in this context, but we'll bring it along for completeness |
| long failTime; |
| if (failTimeValue == null) |
| failTime = -1L; |
| else |
| failTime = failTimeValue.longValue(); |
| int failCount; |
| if (failCountValue == null) |
| failCount = 0; |
| else |
| failCount = (int)failCountValue.longValue(); |
| IJobDescription jobDesc = load(jobID); |
| String connectionName = jobDesc.getConnectionName(); |
| DocumentDescription dd = new DocumentDescription((Long)row.getValue(jobQueue.idField), |
| jobID,documentIDHash,documentID,failTime,failCount); |
| String compositeDocumentID = makeCompositeID(documentIDHash,connectionName); |
| documentIDMap.put(compositeDocumentID,dd); |
| statusMap.put(compositeDocumentID,new Integer(status)); |
| List<DocumentDescription> x = connectionNameMap.get(connectionName); |
| if (x == null) |
| { |
| // New entry needed |
| x = new ArrayList<DocumentDescription>(); |
| connectionNameMap.put(connectionName,x); |
| } |
| x.add(dd); |
| } |
| |
| // For each bin, obtain a filtered answer, and enter all answers into a hash table. |
| // We'll then scan the result again to look up the right descriptions for return, |
| // and delete the ones that are owned multiply. |
| Map<String,String> allowedDocIds = new HashMap<String,String>(); |
| Iterator<String> iter = connectionNameMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| String connectionName = iter.next(); |
| List<DocumentDescription> x = connectionNameMap.get(connectionName); |
| // Do the filter query |
| DocumentDescription[] descriptions = new DocumentDescription[x.size()]; |
| for (int j = 0; j < descriptions.length; j++) |
| { |
| descriptions[j] = x.get(j); |
| } |
| String[] docIDHashes = getUnindexableDocumentIdentifiers(descriptions,connectionName); |
| for (int j = 0; j < docIDHashes.length; j++) |
| { |
| String docIDHash = docIDHashes[j]; |
| String key = makeCompositeID(docIDHash,connectionName); |
| allowedDocIds.put(key,docIDHash); |
| } |
| } |
| |
| // Now, assemble a result, and change the state of the records accordingly |
| // First thing to do is order by document hash, so we reduce the risk of deadlock. |
| String[] compositeIDArray = new String[documentIDMap.size()]; |
| int j = 0; |
| iter = documentIDMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| compositeIDArray[j++] = iter.next(); |
| } |
| |
| java.util.Arrays.sort(compositeIDArray); |
| |
| DocumentDescription[] rval = new DocumentDescription[documentIDMap.size()]; |
| boolean[] rvalBoolean = new boolean[documentIDMap.size()]; |
| for (int i = 0; i < compositeIDArray.length; i++) |
| { |
| String compositeDocID = compositeIDArray[i]; |
| DocumentDescription dd = documentIDMap.get(compositeDocID); |
| // Determine whether we can delete it from the index or not |
| rvalBoolean[i] = (allowedDocIds.get(compositeDocID) != null); |
| // Set the record status to "being cleaned" and return it |
| rval[i] = dd; |
| jobQueue.updateActiveRecord(dd.getID(),statusMap.get(compositeDocID).intValue(),processID); |
| } |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| |
| return new DocumentSetAndFlags(rval, rvalBoolean); |
| |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction finding docs to expire: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(expireStufferLock); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| // This method supports the "queue stuffer" thread |
| |
| /** |
| /** Get up to the next n document(s) to be fetched and processed. |
| * This fetch returns records that contain the document identifier, plus all instructions |
| * pertaining to the document's handling (e.g. whether it should be refetched if the version |
| * has not changed). |
| * This method also marks the documents whose descriptions have be returned as "being processed". |
| *@param processID is the current process ID. |
| *@param n is the maximum number of records desired. |
| *@param currentTime is the current time; some fetches do not occur until a specific time. |
| *@param interval is the number of milliseconds that this set of documents should represent (for throttling). |
| *@param blockingDocuments is the place to record documents that were encountered, are eligible for reprioritization, |
| * but could not be queued due to throttling considerations. |
| *@param statistics are the current performance statistics per connection, which are used to balance the queue stuffing |
| * so that individual connections are not overwhelmed. |
| *@param scanRecord retains the bins from all documents encountered from the query, even those that were skipped due |
| * to being overcommitted. |
| *@return the array of document descriptions to fetch and process. |
| */ |
| @Override |
| public DocumentDescription[] getNextDocuments(String processID, |
| int n, long currentTime, long interval, |
| BlockingDocuments blockingDocuments, PerformanceStatistics statistics, |
| DepthStatistics scanRecord) |
| throws ManifoldCFException |
| { |
| // NOTE WELL: Jobs that are throttled must control the number of documents that are fetched in |
| // a given interval. Therefore, the returned result has the following constraints on it: |
| // 1) There must be no more than n documents returned total; |
| // 2) For any given job that is throttled, the total number of documents returned must be |
| // consistent with the time interval provided. |
| // In general, this requires the database layer to perform fairly advanced filtering on the |
| // the result, far in excess of a simple count. An implementation of an interface is therefore |
| // going to need to be passed into the performQuery() operation, which prunes the resultset |
| // as it is being read into memory. That's a new feature that will need to be added to the |
| // database layer. |
| |
| // Screening query |
| // Moved outside of transaction, so there's less chance of keeping jobstatus cache key tied up |
| // for an extended period of time. |
| if (!jobs.activeJobsPresent()) |
| return new DocumentDescription[0]; |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to find documents to queue"); |
| } |
| |
| // Below there used to be one large transaction, with multiple read seconds and multiple write sections. |
| // As part of reducing the chance of postgresql encountering deadlock conditions, I wanted to break this |
| // transaction up. However, the transaction depended for its correctness in throttling on making sure |
| // that the throttles that were built were based on the same active jobs that the subsequent queries |
| // that did the stuffing relied upon. This made reorganization impossible until I realized that with |
| // Postgresql's way of doing transaction isolation this was going to happen anyway, so I needed a more |
| // robust solution. |
| // |
| // Specifically, I chose to change the way documents were queued so that only documents from properly |
| // throttled jobs could be queued. That meant I needed to add stuff to the ThrottleLimit class to track |
| // the very knowledge of an active job. This had the additional benefit of meaning there was no chance of |
| // a query occurring from inside a resultset filter. |
| // |
| // But, after I did this, it was no longer necessary to have such a large transaction either. |
| |
| |
| // Anything older than 10 minutes ago is considered eligible for reprioritization. |
| long prioritizationTime = currentTime - 60000L * 10L; |
| |
| ThrottleLimit vList = new ThrottleLimit(n,prioritizationTime); |
| |
| IResultSet jobconnections = jobs.getActiveJobConnections(); |
| HashMap connectionSet = new HashMap(); |
| int i = 0; |
| while (i < jobconnections.getRowCount()) |
| { |
| IResultRow row = jobconnections.getRow(i++); |
| Long jobid = (Long)row.getValue("jobid"); |
| String connectionName = (String)row.getValue("connectionname"); |
| vList.addJob(jobid,connectionName); |
| connectionSet.put(connectionName,connectionName); |
| } |
| |
| // Find the active connection names. We'll load these, and then get throttling info |
| // from each one. |
| String[] activeConnectionNames = new String[connectionSet.size()]; |
| Iterator iter = connectionSet.keySet().iterator(); |
| i = 0; |
| while (iter.hasNext()) |
| { |
| activeConnectionNames[i++] = (String)iter.next(); |
| } |
| IRepositoryConnection[] connections = connectionMgr.loadMultiple(activeConnectionNames); |
| |
| |
| // Accumulate a sum of the max_connection_count * avg_connection_rate values, so we can calculate the appropriate adjustment |
| // factor and set the connection limits. |
| HashMap rawFetchCounts = new HashMap(); |
| double rawFetchCountTotal = 0.0; |
| i = 0; |
| while (i < connections.length) |
| { |
| IRepositoryConnection connection = connections[i++]; |
| String connectionName = connection.getName(); |
| int maxConnections = connection.getMaxConnections(); |
| double avgFetchRate = statistics.calculateConnectionFetchRate(connectionName); |
| double weightedRawFetchCount = avgFetchRate * (double)maxConnections; |
| // Keep the avg rate for later use, since it may get updated before next time we need it. |
| rawFetchCounts.put(connectionName,new Double(weightedRawFetchCount)); |
| rawFetchCountTotal += weightedRawFetchCount; |
| } |
| |
| // Calculate an adjustment factor |
| double fetchCountAdjustmentFactor = ((double)n) / rawFetchCountTotal; |
| |
| // For each job, we must amortize the maximum number of fetches per ms to the actual interval, |
| // and also randomly select an extra fetch based on the fractional probability. (This latter is |
| // necessary for the case where the maximum fetch rate is specified to be pretty low.) |
| // |
| i = 0; |
| while (i < connections.length) |
| { |
| IRepositoryConnection connection = connections[i++]; |
| String connectionName = connection.getName(); |
| // Check if throttled... |
| String[] throttles = connection.getThrottles(); |
| int k = 0; |
| while (k < throttles.length) |
| { |
| // The key is the regexp value itself |
| String throttle = throttles[k++]; |
| float throttleValue = connection.getThrottleValue(throttle); |
| // For the given connection, set the fetch limit per bin. This is calculated using the time interval |
| // and the desired fetch rate. The fractional remainder is used to conditionally provide an "extra fetch" |
| // on a weighted random basis. |
| // |
| // In the future, the connection may specify tuples which pair a regexp describing a set of bins against |
| // a fetch rate. In that case, each fetch rate would need to be turned into a precise maximum |
| // count. |
| double fetchesPerTimeInterval = (double)throttleValue * (double)interval; |
| // Actual amount will be the integer value of this, plus an additional 1 if the random number aligns |
| int fetches = (int)fetchesPerTimeInterval; |
| fetchesPerTimeInterval -= (double)fetches; |
| if (random.nextDouble() <= fetchesPerTimeInterval) |
| fetches++; |
| // Save the limit in the ThrottleLimit structure |
| vList.addLimit(connectionName,throttle,fetches); |
| } |
| // For the overall connection, we also have a limit which is based on the number of connections there are actually available. |
| Double weightedRawFetchCount = (Double)rawFetchCounts.get(connectionName); |
| double adjustedFetchCount = weightedRawFetchCount.doubleValue() * fetchCountAdjustmentFactor; |
| |
| // Note well: Queuing starvation that results from there being very few available documents for high-priority connections is dealt with here by simply allowing |
| // the stuffer thread to keep queuing documents until there are enough. This will be pretty inefficient if there's an active connection that is fast and has lots |
| // of available connection handles, but the bulk of the activity is on slow speed/highly handle limited connections, but I honestly can't think of a better way at the moment. |
| // One good way to correct a bit for this problem is to set a higher document count floor for each connection - say 5 documents - then we won't loop as much. |
| // |
| // Be off in the higher direction rather than the lower; this also prohibits zero values and sets a minimum. |
| int fetchCount = ((int)adjustedFetchCount) + 5; |
| |
| vList.setConnectionLimit(connectionName,fetchCount); |
| } |
| |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("After "+new Long(System.currentTimeMillis()-startTime).toString()+" ms, beginning query to look for documents to queue"); |
| |
| // System.out.println("Done building throttle structure"); |
| |
| // Locate records. |
| // Note that we do NOT want to get everything there is to know about the job |
| // using this query, since the file specification may be large and expensive |
| // to parse. We will load a (cached) copy of the job description for that purpose. |
| // |
| // NOTE: This query deliberately excludes documents which may be being processed by another job. |
| // (It actually excludes a bit more than that, because the exact query is impossible to write given |
| // the fact that document id's cannot be compared.) These are documents where there is ANOTHER |
| // document entry with the same hash value, a different job id, and a status which is either "active", |
| // "activepurgatory", or "beingdeleted". (It does not check whether the jobs have the same connection or |
| // whether the document id's are in fact the same, and therefore may temporarily block legitimate document |
| // activity under rare circumstances.) |
| // |
| // The query I want is: |
| // SELECT jobid,docid,status FROM jobqueue t0 WHERE status IN ('P','G') AND checktime <=xxx |
| // AND EXISTS(SELECT 'x' FROM |
| // jobs t1 WHERE t0.jobid=t1.id AND t1.status='A') |
| // AND NOT EXISTS(SELECT 'x' FROM jobqueue t2 WHERE t0.hashval=t2.hashval AND t0.jobid!=t2.jobid |
| // AND t2.status IN ('A','F','D')) |
| // ORDER BY docpriority ASC LIMIT xxx |
| // |
| |
| // NOTE WELL: The above query did just fine until adaptive recrawling was seriously tried. Then, because every |
| // document in a job was still active, it failed miserably, actually causing Postgresql to stop responding at |
| // one point. Why? Well, the key thing is the sort criteria - there just isn't any way to sort 1M documents |
| // without working with a monster resultset. |
| // |
| // I introduced a new index as a result - based solely on docpriority - and postgresql now correctly uses that index |
| // to pull its results in an ordered fashion |
| // |
| // |
| // Another subtlety is that I *must* mark the documents active as I find them, so that they do not |
| // have any chance of getting returned twice. |
| |
| // Accumulate the answers here |
| ArrayList answers = new ArrayList(); |
| |
| // The current time value |
| Long currentTimeValue = new Long(currentTime); |
| |
| // Always analyze jobqueue before this query. Otherwise stuffing may get a bad plan, interfering with performance. |
| // This turned out to be needed in postgresql 8.3, even though 8.2 worked fine. |
| //jobQueue.unconditionallyAnalyzeTables(); |
| |
| // Loop through priority values |
| int currentPriority = 1; |
| |
| boolean isDone = false; |
| |
| while (!isDone && currentPriority <= 10) |
| { |
| if (jobs.hasPriorityJobs(currentPriority)) |
| { |
| Long currentPriorityValue = new Long((long)currentPriority); |
| fetchAndProcessDocuments(answers,currentTimeValue,currentPriorityValue,vList,connections,processID); |
| isDone = !vList.checkContinue(); |
| } |
| currentPriority++; |
| } |
| |
| // Assert the blocking documents we discovered |
| vList.tallyBlockingDocuments(blockingDocuments); |
| |
| // Convert the saved answers to an array |
| DocumentDescription[] rval = new DocumentDescription[answers.size()]; |
| i = 0; |
| while (i < rval.length) |
| { |
| rval[i] = (DocumentDescription)answers.get(i); |
| i++; |
| } |
| |
| // After we're done pulling stuff from the queue, find the eligible row with the best priority on the queue, and save the bins for assessment. |
| // This done to decide what the "floor" bincount should be - the idea being that it is wrong to assign priorities for new documents which are |
| // higher than the current level that is currently being dequeued. |
| // |
| // The complicating factor here is that there are indeed many potential *classes* of documents, each of which might have its own current |
| // document priority level. For example, documents could be classed by job, which might make sense because there is a possibility that two jobs' |
| // job priorities may differ. Also, because of document fetch scheduling, each time frame may represent a class in its own right as well. |
| // These classes would have to be associated with independent bin counts, if we were to make any use of them. Then, it would be also necessary |
| // to know what classes a document belonged to in order to be able to calculate its priority. |
| // |
| // An alternative way to proceed is to just have ONE class, and document priorities then get assigned without regard to job, queuing time, etc. |
| // That's the current reality. The code below works in that model, knowing full well that it is an approximation to an ideal. |
| |
| // Find the one row from a live job that has the best document priority, which is available within the current time window. |
| // Note that if there is NO such document, it means we were able to queue all eligible documents, and thus prioritization is probably not even |
| // germane at the moment. |
| |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobQueue.docPriorityField).append(",").append(jobQueue.jobIDField).append(",") |
| .append(jobQueue.docHashField).append(",").append(jobQueue.docIDField) |
| .append(" FROM ").append(jobQueue.getTableName()) |
| .append(" t0 ").append(jobQueue.getGetNextDocumentsIndexHint()).append(" WHERE "); |
| |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| //new UnitaryClause(jobQueue.docPriorityField,">=",new Long(0L)), |
| new MultiClause(jobQueue.statusField, |
| new Object[]{jobQueue.statusToString(JobQueue.STATUS_PENDING), |
| jobQueue.statusToString(JobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause(jobQueue.checkActionField,"=",jobQueue.actionToString(JobQueue.ACTION_RESCAN)), |
| new UnitaryClause(jobQueue.checkTimeField,"<=",currentTimeValue)})).append(" AND "); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause("t1."+jobs.statusField,new Object[]{ |
| Jobs.statusToString(jobs.STATUS_ACTIVE), |
| Jobs.statusToString(jobs.STATUS_ACTIVESEEDING)}), |
| new JoinClause("t1."+jobs.idField,"t0."+jobQueue.jobIDField)})) |
| .append(") "); |
| |
| sb.append(" ").append(database.constructIndexOrderByClause(new String[]{ |
| jobQueue.docPriorityField, jobQueue.statusField, jobQueue.checkActionField, jobQueue.checkTimeField}, |
| true)).append(" ") |
| .append(database.constructOffsetLimitClause(0,1,true)); |
| |
| IResultSet set; |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| set = database.performQuery(sb.toString(),list,null,null,1,null); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction adding document bins: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| |
| if (set.getRowCount() > 0) |
| { |
| IResultRow row = set.getRow(0); |
| Double docPriority = (Double)row.getValue(jobQueue.docPriorityField); |
| if (docPriority != null && docPriority.doubleValue() < jobQueue.noDocPriorityValue) |
| scanRecord.addBins(docPriority); |
| } |
| return rval; |
| } |
| |
| /** Fetch and process documents matching the passed-in criteria */ |
| protected void fetchAndProcessDocuments(ArrayList answers, Long currentTimeValue, Long currentPriorityValue, |
| ThrottleLimit vList, IRepositoryConnection[] connections, String processID) |
| throws ManifoldCFException |
| { |
| |
| // Note well: This query does not do "FOR UPDATE". The reason is that only one thread can possibly change the document's state to active. |
| // When FOR UPDATE was included, deadlock conditions were common because of the complexity of this query. |
| // So, instead, as part of CONNECTORS-781, I've introduced a write lock for the pertinent section. |
| |
| ArrayList list = new ArrayList(); |
| |
| StringBuilder sb = new StringBuilder("SELECT t0."); |
| sb.append(jobQueue.idField).append(",t0."); |
| if (Logging.scheduling.isDebugEnabled()) |
| sb.append(jobQueue.docPriorityField).append(",t0."); |
| sb.append(jobQueue.jobIDField).append(",t0.") |
| .append(jobQueue.docHashField).append(",t0.") |
| .append(jobQueue.docIDField).append(",t0.") |
| .append(jobQueue.statusField).append(",t0.") |
| .append(jobQueue.failTimeField).append(",t0.") |
| .append(jobQueue.failCountField).append(",t0.") |
| .append(jobQueue.prioritySetField).append(" FROM ").append(jobQueue.getTableName()) |
| .append(" t0 ").append(jobQueue.getGetNextDocumentsIndexHint()).append(" WHERE "); |
| |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| //new UnitaryClause("t0."+jobQueue.docPriorityField,">=",new Long(0L)), |
| new MultiClause("t0."+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(JobQueue.STATUS_PENDING), |
| jobQueue.statusToString(JobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause("t0."+jobQueue.checkActionField,"=",jobQueue.actionToString(JobQueue.ACTION_RESCAN)), |
| new UnitaryClause("t0."+jobQueue.checkTimeField,"<=",currentTimeValue)})).append(" AND "); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause("t1."+jobs.statusField,new Object[]{ |
| Jobs.statusToString(jobs.STATUS_ACTIVE), |
| Jobs.statusToString(jobs.STATUS_ACTIVESEEDING)}), |
| new JoinClause("t1."+jobs.idField,"t0."+jobQueue.jobIDField), |
| new UnitaryClause("t1."+jobs.priorityField,currentPriorityValue)})) |
| .append(") AND "); |
| |
| sb.append("NOT EXISTS(SELECT 'x' FROM ").append(jobQueue.getTableName()).append(" t2 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new JoinClause("t2."+jobQueue.docHashField,"t0."+jobQueue.docHashField)})).append(" AND ") |
| .append("t2.").append(jobQueue.statusField).append(" IN (?,?,?,?,?,?) AND ") |
| .append("t2.").append(jobQueue.jobIDField).append("!=t0.").append(jobQueue.jobIDField) |
| .append(") AND "); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)); |
| |
| // Prerequisite event clause: AND NOT EXISTS(SELECT 'x' FROM prereqevents t3,events t4 WHERE t3.ownerid=t0.id AND t3.name=t4.name) |
| sb.append("NOT EXISTS(SELECT 'x' FROM ").append(jobQueue.prereqEventManager.getTableName()).append(" t3,").append(eventManager.getTableName()).append(" t4 WHERE t0.") |
| .append(jobQueue.idField).append("=t3.").append(jobQueue.prereqEventManager.ownerField).append(" AND t3.") |
| .append(jobQueue.prereqEventManager.eventNameField).append("=t4.").append(eventManager.eventNameField) |
| .append(")"); |
| |
| sb.append(" ").append(database.constructIndexOrderByClause(new String[]{ |
| "t0."+jobQueue.docPriorityField, "t0."+jobQueue.statusField, "t0."+jobQueue.checkActionField, "t0."+jobQueue.checkTimeField}, |
| true)).append(" "); |
| |
| String query = sb.toString(); |
| |
| // Before entering the transaction, we must provide the throttlelimit object with all the connector |
| // instances it could possibly need. The purpose for doing this is to prevent a deadlock where |
| // connector starvation causes database lockup. |
| // |
| // The preallocation of multiple connector instances is certainly a worry. If any other part |
| // of the code allocates multiple connector instances also, the potential exists for this to cause |
| // deadlock all by itself. I've therefore built a "grab multiple" and a "release multiple" |
| // at the connector factory level to make sure these requests are properly ordered. |
| |
| String[] orderingKeys = new String[connections.length]; |
| int k = 0; |
| while (k < connections.length) |
| { |
| IRepositoryConnection connection = connections[k]; |
| orderingKeys[k] = connection.getName(); |
| k++; |
| } |
| |
| // Never sleep with a resource locked! |
| while (true) |
| { |
| long sleepAmt = 0L; |
| |
| // Write lock insures that only one thread cluster-wide can be doing this at a given time, so FOR UPDATE is unneeded. |
| lockManager.enterWriteLock(stufferLock); |
| try |
| { |
| |
| IRepositoryConnector[] connectors = repositoryConnectorPool.grabMultiple(orderingKeys,connections); |
| try |
| { |
| // Hand the connectors off to the ThrottleLimit instance |
| k = 0; |
| while (k < connections.length) |
| { |
| vList.addConnectionName(connections[k].getName(),connectors[k]); |
| k++; |
| } |
| |
| // Now we can tack the limit onto the query. Before this point, remainingDocuments would be crap |
| int limitValue = vList.getRemainingDocuments(); |
| String finalQuery = query + database.constructOffsetLimitClause(0,limitValue,true); |
| |
| if (Logging.perf.isDebugEnabled()) |
| { |
| Logging.perf.debug("Queuing documents from time "+currentTimeValue.toString()+" job priority "+currentPriorityValue.toString()+ |
| " (up to "+Integer.toString(vList.getRemainingDocuments())+" documents)"); |
| } |
| |
| database.beginTransaction(); |
| try |
| { |
| IResultSet set = database.performQuery(finalQuery,list,null,null,-1,vList); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug(" Queuing "+Integer.toString(set.getRowCount())+" documents"); |
| |
| // To avoid deadlock, we want to update the document id hashes in order. This means reading into a structure I can sort by docid hash, |
| // before updating any rows in jobqueue. |
| String[] docIDHashes = new String[set.getRowCount()]; |
| Map storageMap = new HashMap(); |
| Map statusMap = new HashMap(); |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i); |
| Long id = (Long)row.getValue(jobQueue.idField); |
| Long jobID = (Long)row.getValue(jobQueue.jobIDField); |
| String docIDHash = (String)row.getValue(jobQueue.docHashField); |
| String docID = (String)row.getValue(jobQueue.docIDField); |
| int status = jobQueue.stringToStatus(row.getValue(jobQueue.statusField).toString()); |
| Long failTimeValue = (Long)row.getValue(jobQueue.failTimeField); |
| Long failCountValue = (Long)row.getValue(jobQueue.failCountField); |
| long failTime; |
| if (failTimeValue == null) |
| failTime = -1L; |
| else |
| failTime = failTimeValue.longValue(); |
| int failCount; |
| if (failCountValue == null) |
| failCount = -1; |
| else |
| failCount = (int)failCountValue.longValue(); |
| |
| DocumentDescription dd = new DocumentDescription(id,jobID,docIDHash,docID,failTime,failCount); |
| docIDHashes[i] = docIDHash + ":" + jobID; |
| storageMap.put(docIDHashes[i],dd); |
| statusMap.put(docIDHashes[i],new Integer(status)); |
| if (Logging.scheduling.isDebugEnabled()) |
| { |
| Double docPriority = (Double)row.getValue(jobQueue.docPriorityField); |
| Logging.scheduling.debug("Stuffing document '"+docID+"' that has priority "+docPriority.toString()+" onto active list"); |
| } |
| i++; |
| } |
| |
| // No duplicates are possible here |
| java.util.Arrays.sort(docIDHashes); |
| |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| DocumentDescription dd = (DocumentDescription)storageMap.get(docIDHash); |
| Long id = dd.getID(); |
| int status = ((Integer)statusMap.get(docIDHash)).intValue(); |
| |
| // Set status to "ACTIVE". |
| jobQueue.updateActiveRecord(id,status,processID); |
| |
| answers.add(dd); |
| |
| i++; |
| } |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction finding docs to queue: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| finally |
| { |
| repositoryConnectorPool.releaseMultiple(connections,connectors); |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(stufferLock); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| // These methods support the individual fetch/process threads. |
| |
| /** Verify that a specific job is indeed still active. This is used to permit abort or pause to be relatively speedy. |
| * The query done within MUST be cached in order to not cause undue performance degradation. |
| *@param jobID is the job identifier. |
| *@return true if the job is in one of the "active" states. |
| */ |
| @Override |
| public boolean checkJobActive(Long jobID) |
| throws ManifoldCFException |
| { |
| return jobs.checkJobActive(jobID); |
| } |
| |
| /** Verify if a job is still processing documents, or no longer has any outstanding active documents */ |
| @Override |
| public boolean checkJobBusy(Long jobID) |
| throws ManifoldCFException |
| { |
| return jobQueue.checkJobBusy(jobID); |
| } |
| |
| /** Note completion of document processing by a job thread of a document. |
| * This method causes the state of the document to be marked as "completed". |
| *@param documentDescriptions are the description objects for the documents that were processed. |
| */ |
| @Override |
| public void markDocumentCompletedMultiple(DocumentDescription[] documentDescriptions) |
| throws ManifoldCFException |
| { |
| // Before we can change a document status, we need to know the *current* status. Therefore, a SELECT xxx FOR UPDATE/UPDATE |
| // transaction is needed in order to complete these documents correctly. |
| // |
| // Since we are therefore setting row locks on thejobqueue table, we need to work to avoid unnecessary deadlocking. To do that, we have to |
| // lock rows in document id hash order!! Luckily, the DocumentDescription objects have a document identifier buried within, which we can use to |
| // order the "select for update" operations appropriately. |
| // |
| |
| Map<String,Integer> indexMap = new HashMap<String,Integer>(); |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| |
| for (int i = 0; i < documentDescriptions.length; i++) |
| { |
| String documentIDHash = documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID(); |
| docIDHashes[i] = documentIDHash; |
| indexMap.put(documentIDHash,new Integer(i)); |
| } |
| |
| java.util.Arrays.sort(docIDHashes); |
| |
| // Retry loop - in case we get a deadlock despite our best efforts |
| while (true) |
| { |
| long sleepAmt = 0L; |
| |
| // Start the transaction now |
| database.beginTransaction(); |
| try |
| { |
| // Do one row at a time, to avoid deadlocking things |
| for (String docIDHash : docIDHashes) |
| { |
| // Get the DocumentDescription object |
| DocumentDescription dd = documentDescriptions[indexMap.get(docIDHash).intValue()]; |
| |
| // Query for the status |
| ArrayList list = new ArrayList(); |
| String query = database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.idField,dd.getID())}); |
| TrackerClass.notePreread(dd.getID()); |
| IResultSet set = database.performQuery("SELECT "+jobQueue.statusField+" FROM "+jobQueue.getTableName()+" WHERE "+ |
| query+" FOR UPDATE",list,null,null); |
| TrackerClass.noteRead(dd.getID()); |
| if (set.getRowCount() > 0) |
| { |
| IResultRow row = set.getRow(0); |
| // Grab the status |
| int status = jobQueue.stringToStatus((String)row.getValue(jobQueue.statusField)); |
| // Update the jobqueue table |
| jobQueue.updateCompletedRecord(dd.getID(),status); |
| } |
| } |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction marking completed "+Integer.toString(docIDHashes.length)+ |
| " docs: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Note completion of document processing by a job thread of a document. |
| * This method causes the state of the document to be marked as "completed". |
| *@param documentDescription is the description object for the document that was processed. |
| */ |
| @Override |
| public void markDocumentCompleted(DocumentDescription documentDescription) |
| throws ManifoldCFException |
| { |
| markDocumentCompletedMultiple(new DocumentDescription[]{documentDescription}); |
| } |
| |
| /** Delete from queue as a result of processing of an active document. |
| * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING, |
| * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN. The RESCAN variants are interpreted |
| * as meaning that the document should not be deleted, but should instead be popped back on the queue for |
| * a repeat processing attempt. |
| *@param documentDescriptions are the set of description objects for the documents that were processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentDeletedMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| // It's no longer an issue to have to deal with documents being conditionally deleted; that's been |
| // taken over by the hopcountremoval method below. So just use the simple 'delete' functionality. |
| return doDeleteMultiple(jobID,legalLinkTypes,documentDescriptions,hopcountMethod); |
| } |
| |
| /** Delete from queue as a result of processing of an active document. |
| * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING, |
| * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN. The RESCAN variants are interpreted |
| * as meaning that the document should not be deleted, but should instead be popped back on the queue for |
| * a repeat processing attempt. |
| *@param documentDescription is the description object for the document that was processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentDeleted(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| return markDocumentDeletedMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod); |
| } |
| |
| /** Mark hopcount removal from queue as a result of processing of an active document. |
| * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING, |
| * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN. The RESCAN variants are interpreted |
| * as meaning that the document should not be marked as removed, but should instead be popped back on the queue for |
| * a repeat processing attempt. |
| *@param documentDescriptions are the set of description objects for the documents that were processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentHopcountRemovalMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| // For each record, we're going to have to choose between marking it as "hopcount removed", and marking |
| // it for rescan. So the basic flow will involve changing a document's status,. |
| |
| // Before we can change a document status, we need to know the *current* status. Therefore, a SELECT xxx FOR UPDATE/UPDATE |
| // transaction is needed in order to complete these documents correctly. |
| // |
| // Since we are therefore setting row locks on thejobqueue table, we need to work to avoid unnecessary deadlocking. To do that, we have to |
| // lock rows in document id hash order!! Luckily, the DocumentDescription objects have a document identifier buried within, which we can use to |
| // order the "select for update" operations appropriately. |
| // |
| |
| HashMap indexMap = new HashMap(); |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| String documentIDHash = documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID(); |
| docIDHashes[i] = documentIDHash; |
| indexMap.put(documentIDHash,new Integer(i)); |
| i++; |
| } |
| |
| java.util.Arrays.sort(docIDHashes); |
| |
| // Retry loop - in case we get a deadlock despite our best efforts |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| // Do one row at a time, to avoid deadlocking things |
| List<String> deleteList = new ArrayList<String>(); |
| |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| |
| // Get the DocumentDescription object |
| DocumentDescription dd = documentDescriptions[((Integer)indexMap.get(docIDHash)).intValue()]; |
| |
| // Query for the status |
| ArrayList list = new ArrayList(); |
| String query = database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.idField,dd.getID())}); |
| TrackerClass.notePreread(dd.getID()); |
| IResultSet set = database.performQuery("SELECT "+jobQueue.statusField+" FROM "+jobQueue.getTableName()+" WHERE "+ |
| query+" FOR UPDATE",list,null,null); |
| TrackerClass.noteRead(dd.getID()); |
| if (set.getRowCount() > 0) |
| { |
| IResultRow row = set.getRow(0); |
| // Grab the status |
| int status = jobQueue.stringToStatus((String)row.getValue(jobQueue.statusField)); |
| // Update the jobqueue table |
| boolean didDelete = jobQueue.updateOrHopcountRemoveRecord(dd.getID(),status); |
| if (didDelete) |
| { |
| deleteList.add(dd.getDocumentIdentifierHash()); |
| } |
| } |
| i++; |
| } |
| |
| String[] docIDSimpleHashes = new String[deleteList.size()]; |
| for (int j = 0; j < docIDSimpleHashes.length; j++) |
| { |
| docIDSimpleHashes[j] = deleteList.get(j); |
| } |
| |
| // Next, find the documents that are affected by carrydown deletion. |
| DocumentDescription[] rval = calculateAffectedDeleteCarrydownChildren(jobID,docIDSimpleHashes); |
| |
| // Since hopcount inheritance and prerequisites came from the addDocument() method, |
| // we don't delete them here. |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| return rval; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction marking completed "+Integer.toString(docIDHashes.length)+ |
| " docs: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Mark hopcount removal from queue as a result of processing of an active document. |
| * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING, |
| * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN. The RESCAN variants are interpreted |
| * as meaning that the document should not be marked as removed, but should instead be popped back on the queue for |
| * a repeat processing attempt. |
| *@param documentDescription is the description object for the document that was processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentHopcountRemoval(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| return markDocumentHopcountRemovalMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod); |
| } |
| |
| /** Delete from queue as a result of expiration of an active document. |
| * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING, |
| * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN. Since the document expired, |
| * no special activity takes place as a result of the document being in a RESCAN state. |
| *@param documentDescriptions are the set of description objects for the documents that were processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentExpiredMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| return doDeleteMultiple(jobID,legalLinkTypes,documentDescriptions,hopcountMethod); |
| } |
| |
| /** Delete from queue as a result of expiration of an active document. |
| * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING, |
| * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN. Since the document expired, |
| * no special activity takes place as a result of the document being in a RESCAN state. |
| *@param documentDescription is the description object for the document that was processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentExpired(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| return markDocumentExpiredMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod); |
| } |
| |
| /** Delete from queue as a result of cleaning up an unreachable document. |
| * The document is expected to be in the PURGATORY state. There is never any need to reprocess the |
| * document. |
| *@param documentDescriptions are the set of description objects for the documents that were processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentCleanedUpMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| return doDeleteMultiple(jobID,legalLinkTypes,documentDescriptions,hopcountMethod); |
| } |
| |
| /** Delete from queue as a result of cleaning up an unreachable document. |
| * The document is expected to be in the PURGATORY state. There is never any need to reprocess the |
| * document. |
| *@param documentDescription is the description object for the document that was processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] markDocumentCleanedUp(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| return markDocumentCleanedUpMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod); |
| } |
| |
| /** Delete documents with no repercussions. We don't have to worry about the current state of each document, |
| * since the document is definitely going away. |
| *@param documentDescriptions are the set of description objects for the documents that were processed. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| protected DocumentDescription[] doDeleteMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| if (documentDescriptions.length == 0) |
| return new DocumentDescription[0]; |
| |
| // Order of locking is not normally important here, because documents that wind up being deleted are never being worked on by anything else. |
| // In all cases, the state of the document excludes other activity. |
| // The only tricky situation is when a thread is processing a document which happens to be getting deleted, while another thread is trying to add |
| // a reference for the very same document to the queue. Then, order of locking matters, so the deletions should happen in a specific order to avoid |
| // the possibility of deadlock. Nevertheless, this is enough of a risk that I've chosen to order the deletions by document id hash order, just like everywhere |
| // else. |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to delete "+Integer.toString(documentDescriptions.length)+" docs and clean up hopcount for job "+jobID.toString()); |
| } |
| |
| HashMap indexMap = new HashMap(); |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| docIDHashes[i] = documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID(); |
| indexMap.put(docIDHashes[i],new Integer(i)); |
| i++; |
| } |
| |
| // Sort by doc hash, to establish non-blocking lock order |
| java.util.Arrays.sort(docIDHashes); |
| |
| DocumentDescription[] rval; |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Waited "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to start deleting "+Integer.toString(docIDHashes.length)+ |
| " docs and clean up hopcount for job "+jobID.toString()); |
| |
| String[] docIDSimpleHashes = new String[docIDHashes.length]; |
| // Delete jobqueue rows FIRST. Even though we do this before assessing the carrydown implications, it is OK because it's the CHILDREN of these |
| // rows that might get affected by carrydown data deletion, not the rows themselves! |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| DocumentDescription dd = documentDescriptions[((Integer)indexMap.get(docIDHash)).intValue()]; |
| // Individual operations are necessary so order can be controlled. |
| jobQueue.deleteRecord(dd.getID()); |
| docIDSimpleHashes[i] = dd.getDocumentIdentifierHash(); |
| i++; |
| } |
| |
| // Next, find the documents that are affected by carrydown deletion. |
| rval = calculateAffectedDeleteCarrydownChildren(jobID,docIDSimpleHashes); |
| |
| // Finally, delete the carrydown records in question. |
| carryDown.deleteRecords(jobID,docIDSimpleHashes); |
| if (legalLinkTypes.length > 0) |
| hopCount.deleteDocumentIdentifiers(jobID,legalLinkTypes,docIDSimpleHashes,hopcountMethod); |
| |
| database.performCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to delete "+Integer.toString(docIDHashes.length)+ |
| " docs and clean up hopcount for job "+jobID.toString()); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction deleting "+Integer.toString(docIDHashes.length)+ |
| " docs and clean up hopcount for job "+jobID.toString()+": "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| return rval; |
| } |
| |
| /** Helper method: Find the document descriptions that will be affected due to carrydown row deletions. |
| */ |
| protected DocumentDescription[] calculateAffectedDeleteCarrydownChildren(Long jobID, String[] docIDHashes) |
| throws ManifoldCFException |
| { |
| // Break the request into pieces, as needed, and throw everything into a hash for uniqueness. |
| // We are going to need to break up this query into a number of subqueries, each covering a subset of parent id hashes. |
| // The goal is to throw all the children into a hash, to make them unique at the end. |
| HashMap resultHash = new HashMap(); |
| ArrayList list = new ArrayList(); |
| int maxCount = maxClauseProcessDeleteHashSet(); |
| int i = 0; |
| int z = 0; |
| while (i < docIDHashes.length) |
| { |
| if (z == maxCount) |
| { |
| processDeleteHashSet(jobID,resultHash,list); |
| list.clear(); |
| z = 0; |
| } |
| list.add(docIDHashes[i]); |
| i++; |
| z++; |
| } |
| |
| if (z > 0) |
| processDeleteHashSet(jobID,resultHash,list); |
| |
| // Now, put together the result document list from the hash. |
| DocumentDescription[] rval = new DocumentDescription[resultHash.size()]; |
| i = 0; |
| Iterator iter = resultHash.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| Long id = (Long)iter.next(); |
| DocumentDescription dd = (DocumentDescription)resultHash.get(id); |
| rval[i++] = dd; |
| } |
| return rval; |
| } |
| |
| /** Get maximum count. |
| */ |
| protected int maxClauseProcessDeleteHashSet() |
| { |
| return database.findConjunctionClauseMax(new ClauseDescription[]{ |
| new JoinClause("t1."+carryDown.jobIDField,"t0."+jobQueue.jobIDField), |
| new JoinClause("t1."+carryDown.childIDHashField,"t0."+jobQueue.docHashField)}); |
| } |
| |
| /** Helper method: look up rows affected by a deleteRecords operation. |
| */ |
| protected void processDeleteHashSet(Long jobID, HashMap resultHash, ArrayList list) |
| throws ManifoldCFException |
| { |
| // The query here mirrors the carrydown.restoreRecords() delete query! However, it also fetches enough information to build a DocumentDescription |
| // object for return, and so a join is necessary against the jobqueue table. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList newList = new ArrayList(); |
| |
| sb.append("t0.").append(jobQueue.idField).append(",") |
| .append("t0.").append(jobQueue.docHashField).append(",") |
| .append("t0.").append(jobQueue.docIDField) |
| .append(" FROM ").append(carryDown.getTableName()).append(" t1, ") |
| .append(jobQueue.getTableName()).append(" t0 WHERE "); |
| |
| sb.append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause("t1."+carryDown.jobIDField,jobID), |
| new MultiClause("t1."+carryDown.parentIDHashField,list)})).append(" AND "); |
| |
| sb.append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new JoinClause("t0."+jobQueue.docHashField,"t1."+carryDown.childIDHashField), |
| new JoinClause("t0."+jobQueue.jobIDField,"t1."+carryDown.jobIDField)})); |
| |
| /* |
| sb.append("t0.").append(jobQueue.idField).append(",") |
| .append("t0.").append(jobQueue.docHashField).append(",") |
| .append("t0.").append(jobQueue.docIDField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause("t0."+jobQueue.jobIDField,jobID)})).append(" AND "); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(carryDown.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(newList,new ClauseDescription[]{ |
| new JoinClause("t1."+carryDown.jobIDField,"t0."+jobQueue.jobIDField), |
| new MultiClause("t1."+carryDown.parentIDHashField,list), |
| new JoinClause("t1."+carryDown.childIDHashField,"t0."+jobQueue.docHashField)})) |
| .append(")"); |
| */ |
| |
| IResultSet set = database.performQuery(sb.toString(),newList,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long id = (Long)row.getValue(jobQueue.idField); |
| String documentIdentifierHash = (String)row.getValue(jobQueue.docHashField); |
| String documentIdentifier = (String)row.getValue(jobQueue.docIDField); |
| resultHash.put(id,new DocumentDescription(id,jobID,documentIdentifierHash,documentIdentifier)); |
| } |
| } |
| |
| |
| |
| /** Requeue a document for further processing in the future. |
| * This method is called after a document is processed, when the job is a "continuous" one. |
| * It is essentially equivalent to noting that the document processing is complete, except the |
| * document remains on the queue. |
| *@param documentDescriptions is the set of description objects for the document that was processed. |
| *@param executeTimes are the times that the documents should be rescanned. Null indicates "never". |
| *@param actions are what should be done when the time arrives. Choices are ACTION_RESCAN or ACTION_REMOVE. |
| */ |
| @Override |
| public void requeueDocumentMultiple(DocumentDescription[] documentDescriptions, Long[] executeTimes, |
| int[] actions) |
| throws ManifoldCFException |
| { |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| Long[] ids = new Long[documentDescriptions.length]; |
| Long[] executeTimesNew = new Long[documentDescriptions.length]; |
| int[] actionsNew = new int[documentDescriptions.length]; |
| |
| // First loop maps document identifier back to an index. |
| HashMap indexMap = new HashMap(); |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| docIDHashes[i] =documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID(); |
| indexMap.put(docIDHashes[i],new Integer(i)); |
| i++; |
| } |
| |
| // Sort! |
| java.util.Arrays.sort(docIDHashes); |
| |
| // Next loop populates the actual arrays we use to feed the operation so that the ordering is correct. |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| Integer x = (Integer)indexMap.remove(docIDHash); |
| if (x == null) |
| throw new ManifoldCFException("Assertion failure: duplicate document identifier jobid/hash detected!"); |
| int index = x.intValue(); |
| ids[i] = documentDescriptions[index].getID(); |
| executeTimesNew[i] = executeTimes[index]; |
| actionsNew[i] = actions[index]; |
| i++; |
| } |
| |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Going through ids in order should greatly reduce or eliminate chances of deadlock occurring. We thus need to pay attention to the sorted order. |
| i = 0; |
| while (i < ids.length) |
| { |
| jobQueue.setRequeuedStatus(ids[i],executeTimesNew[i],actionsNew[i],-1L,-1); |
| i++; |
| } |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction requeuing documents: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Requeue a document for further processing in the future. |
| * This method is called after a document is processed, when the job is a "continuous" one. |
| * It is essentially equivalent to noting that the document processing is complete, except the |
| * document remains on the queue. |
| *@param documentDescription is the description object for the document that was processed. |
| *@param executeTime is the time that the document should be rescanned. Null indicates "never". |
| *@param action is what should be done when the time arrives. Choices include ACTION_RESCAN or ACTION_REMOVE. |
| */ |
| @Override |
| public void requeueDocument(DocumentDescription documentDescription, Long executeTime, int action) |
| throws ManifoldCFException |
| { |
| requeueDocumentMultiple(new DocumentDescription[]{documentDescription},new Long[]{executeTime},new int[]{action}); |
| } |
| |
| /** Reset a set of documents for further processing in the future. |
| * This method is called after some unknown number of the documents were processed, but then a service interruption occurred. |
| * Note well: The logic here basically presumes that we cannot know whether the documents were indeed processed or not. |
| * If we knew for a fact that none of the documents had been handled, it would be possible to look at the document's |
| * current status and decide what the new status ought to be, based on a true rollback scenario. Such cases, however, are rare enough so that |
| * special logic is probably not worth it. |
| *@param documentDescriptions is the set of description objects for the document that was processed. |
| *@param executeTime is the time that the documents should be rescanned. |
| *@param failTime is the time beyond which a service interruption will be considered a hard failure. |
| *@param failCount is the number of retries beyond which a service interruption will be considered a hard failure. |
| */ |
| @Override |
| public void resetDocumentMultiple(DocumentDescription[] documentDescriptions, long executeTime, |
| int action, long failTime, int failCount) |
| throws ManifoldCFException |
| { |
| Long executeTimeLong = new Long(executeTime); |
| Long[] ids = new Long[documentDescriptions.length]; |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| Long[] executeTimes = new Long[documentDescriptions.length]; |
| int[] actions = new int[documentDescriptions.length]; |
| long[] failTimes = new long[documentDescriptions.length]; |
| int[] failCounts = new int[documentDescriptions.length]; |
| |
| // First loop maps document identifier back to an index. |
| HashMap indexMap = new HashMap(); |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| docIDHashes[i] =documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID(); |
| indexMap.put(docIDHashes[i],new Integer(i)); |
| i++; |
| } |
| |
| // Sort! |
| java.util.Arrays.sort(docIDHashes); |
| |
| // Next loop populates the actual arrays we use to feed the operation so that the ordering is correct. |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| Integer x = (Integer)indexMap.remove(docIDHash); |
| if (x == null) |
| throw new ManifoldCFException("Assertion failure: duplicate document identifier jobid/hash detected!"); |
| int index = x.intValue(); |
| ids[i] = documentDescriptions[index].getID(); |
| executeTimes[i] = executeTimeLong; |
| actions[i] = action; |
| long oldFailTime = documentDescriptions[index].getFailTime(); |
| if (oldFailTime == -1L) |
| oldFailTime = failTime; |
| failTimes[i] = oldFailTime; |
| int oldFailCount = documentDescriptions[index].getFailRetryCount(); |
| if (oldFailCount == -1) |
| oldFailCount = failCount; |
| else |
| { |
| oldFailCount--; |
| if (failCount != -1 && oldFailCount > failCount) |
| oldFailCount = failCount; |
| } |
| failCounts[i] = oldFailCount; |
| i++; |
| } |
| |
| // Documents get marked PENDINGPURGATORY regardless of their current state; this is because we can't know at this point whether |
| // an ingestion attempt occurred or not, so we have to treat the documents as having been processed at least once. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Going through ids in order should greatly reduce or eliminate chances of deadlock occurring. We thus need to pay attention to the sorted order. |
| i = 0; |
| while (i < ids.length) |
| { |
| jobQueue.setRequeuedStatus(ids[i],executeTimes[i],actions[i],(failTimes==null)?-1L:failTimes[i],(failCounts==null)?-1:failCounts[i]); |
| i++; |
| } |
| |
| database.performCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting documents: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| |
| /** Reset a set of cleaning documents for further processing in the future. |
| * This method is called after some unknown number of the documents were cleaned, but then an ingestion service interruption occurred. |
| * Note well: The logic here basically presumes that we cannot know whether the documents were indeed cleaned or not. |
| * If we knew for a fact that none of the documents had been handled, it would be possible to look at the document's |
| * current status and decide what the new status ought to be, based on a true rollback scenario. Such cases, however, are rare enough so that |
| * special logic is probably not worth it. |
| *@param documentDescriptions is the set of description objects for the document that was cleaned. |
| *@param checkTime is the minimum time for the next cleaning attempt. |
| */ |
| @Override |
| public void resetCleaningDocumentMultiple(DocumentDescription[] documentDescriptions, long checkTime) |
| throws ManifoldCFException |
| { |
| Long[] ids = new Long[documentDescriptions.length]; |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| |
| // First loop maps document identifier back to an index. |
| HashMap indexMap = new HashMap(); |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| docIDHashes[i] =documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID(); |
| indexMap.put(docIDHashes[i],new Integer(i)); |
| i++; |
| } |
| |
| // Sort! |
| java.util.Arrays.sort(docIDHashes); |
| |
| // Next loop populates the actual arrays we use to feed the operation so that the ordering is correct. |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| Integer x = (Integer)indexMap.remove(docIDHash); |
| if (x == null) |
| throw new ManifoldCFException("Assertion failure: duplicate document identifier jobid/hash detected!"); |
| int index = x.intValue(); |
| ids[i] = documentDescriptions[index].getID(); |
| i++; |
| } |
| |
| // Documents get marked PURGATORY regardless of their current state; this is because we can't know at this point what the actual prior state was. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Going through ids in order should greatly reduce or eliminate chances of deadlock occurring. We thus need to pay attention to the sorted order. |
| i = 0; |
| while (i < ids.length) |
| { |
| jobQueue.setUncleaningStatus(ids[i],checkTime); |
| i++; |
| } |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting cleaning documents: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset a cleaning document back to its former state. |
| * This gets done when a deleting thread sees a service interruption, etc., from the ingestion system. |
| *@param documentDescription is the description of the document that was cleaned. |
| *@param checkTime is the minimum time for the next cleaning attempt. |
| */ |
| @Override |
| public void resetCleaningDocument(DocumentDescription documentDescription, long checkTime) |
| throws ManifoldCFException |
| { |
| resetCleaningDocumentMultiple(new DocumentDescription[]{documentDescription},checkTime); |
| } |
| |
| /** Reset a set of deleting documents for further processing in the future. |
| * This method is called after some unknown number of the documents were deleted, but then an ingestion service interruption occurred. |
| * Note well: The logic here basically presumes that we cannot know whether the documents were indeed processed or not. |
| * If we knew for a fact that none of the documents had been handled, it would be possible to look at the document's |
| * current status and decide what the new status ought to be, based on a true rollback scenario. Such cases, however, are rare enough so that |
| * special logic is probably not worth it. |
| *@param documentDescriptions is the set of description objects for the document that was processed. |
| *@param checkTime is the minimum time for the next cleaning attempt. |
| */ |
| @Override |
| public void resetDeletingDocumentMultiple(DocumentDescription[] documentDescriptions, long checkTime) |
| throws ManifoldCFException |
| { |
| Long[] ids = new Long[documentDescriptions.length]; |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| |
| // First loop maps document identifier back to an index. |
| HashMap indexMap = new HashMap(); |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| docIDHashes[i] =documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID(); |
| indexMap.put(docIDHashes[i],new Integer(i)); |
| i++; |
| } |
| |
| // Sort! |
| java.util.Arrays.sort(docIDHashes); |
| |
| // Next loop populates the actual arrays we use to feed the operation so that the ordering is correct. |
| i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| Integer x = (Integer)indexMap.remove(docIDHash); |
| if (x == null) |
| throw new ManifoldCFException("Assertion failure: duplicate document identifier jobid/hash detected!"); |
| int index = x.intValue(); |
| ids[i] = documentDescriptions[index].getID(); |
| i++; |
| } |
| |
| // Documents get marked COMPLETED regardless of their current state; this is because we can't know at this point what the actual prior state was. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Going through ids in order should greatly reduce or eliminate chances of deadlock occurring. We thus need to pay attention to the sorted order. |
| i = 0; |
| while (i < ids.length) |
| { |
| jobQueue.setUndeletingStatus(ids[i],checkTime); |
| i++; |
| } |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting documents: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset a deleting document back to its former state. |
| * This gets done when a deleting thread sees a service interruption, etc., from the ingestion system. |
| *@param documentDescription is the description object for the document that was cleaned. |
| *@param checkTime is the minimum time for the next cleaning attempt. |
| */ |
| @Override |
| public void resetDeletingDocument(DocumentDescription documentDescription, long checkTime) |
| throws ManifoldCFException |
| { |
| resetDeletingDocumentMultiple(new DocumentDescription[]{documentDescription},checkTime); |
| } |
| |
| |
| /** Reset an active document back to its former state. |
| * This gets done when there's a service interruption and the document cannot be processed yet. |
| * Note well: This method formerly presumed that a perfect rollback was possible, and that there was zero chance of any |
| * processing activity occuring before it got called. That assumption appears incorrect, however, so I've opted to now |
| * presume that processing has perhaps occurred. Perfect rollback is thus no longer possible. |
| *@param documentDescription is the description object for the document that was processed. |
| *@param executeTime is the time that the document should be rescanned. |
| *@param failTime is the time that the document should be considered to have failed, if it has not been |
| * successfully read until then. |
| */ |
| @Override |
| public void resetDocument(DocumentDescription documentDescription, long executeTime, int action, long failTime, |
| int failCount) |
| throws ManifoldCFException |
| { |
| resetDocumentMultiple(new DocumentDescription[]{documentDescription},executeTime,action,failTime,failCount); |
| } |
| |
| /** Eliminate duplicates, and sort */ |
| protected static String[] eliminateDuplicates(String[] docIDHashes) |
| { |
| HashMap map = new HashMap(); |
| int i = 0; |
| while (i < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[i++]; |
| map.put(docIDHash,docIDHash); |
| } |
| String[] rval = new String[map.size()]; |
| i = 0; |
| Iterator iter = map.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| rval[i++] = (String)iter.next(); |
| } |
| java.util.Arrays.sort(rval); |
| return rval; |
| } |
| |
| /** Build a reorder map, describing how to convert an original index into a reordered index. */ |
| protected static HashMap buildReorderMap(String[] originalIDHashes, String[] reorderedIDHashes) |
| { |
| HashMap reorderSet = new HashMap(); |
| int i = 0; |
| while (i < reorderedIDHashes.length) |
| { |
| String reorderedIDHash = reorderedIDHashes[i]; |
| Integer position = new Integer(i); |
| reorderSet.put(reorderedIDHash,position); |
| i++; |
| } |
| |
| HashMap map = new HashMap(); |
| int j = 0; |
| while (j < originalIDHashes.length) |
| { |
| String originalIDHash = originalIDHashes[j]; |
| Integer position = (Integer)reorderSet.get(originalIDHash); |
| if (position != null) |
| { |
| map.put(new Integer(j),position); |
| // Remove, so that only one of each duplicate will have a place in the map |
| reorderSet.remove(originalIDHash); |
| } |
| j++; |
| } |
| |
| return map; |
| } |
| |
| // Retry methods. These set failTime and failCount. |
| |
| /** Retry startup. |
| *@param jsr is the current job notification record. |
| *@param failTime is the new fail time (-1L if none). |
| *@param failCount is the new fail retry count (-1 if none). |
| */ |
| @Override |
| public void retryStartup(JobStartRecord jsr, long failTime, int failCount) |
| throws ManifoldCFException |
| { |
| Long jobID = jsr.getJobID(); |
| long oldFailTime = jsr.getFailTime(); |
| if (oldFailTime == -1L) |
| oldFailTime = failTime; |
| failTime = oldFailTime; |
| int oldFailCount = jsr.getFailRetryCount(); |
| if (oldFailCount == -1) |
| oldFailCount = failCount; |
| else |
| { |
| oldFailCount--; |
| if (failCount != -1 && oldFailCount > failCount) |
| oldFailCount = failCount; |
| } |
| failCount = oldFailCount; |
| |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.retryStartup(jobID,jsr.getRequestMinimum(),failTime,failCount); |
| database.performCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting job startup: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Retry seeding. |
| *@param jsr is the current job seeding record. |
| *@param failTime is the new fail time (-1L if none). |
| *@param failCount is the new fail retry count (-1 if none). |
| */ |
| @Override |
| public void retrySeeding(JobSeedingRecord jsr, long failTime, int failCount) |
| throws ManifoldCFException |
| { |
| Long jobID = jsr.getJobID(); |
| long oldFailTime = jsr.getFailTime(); |
| if (oldFailTime == -1L) |
| oldFailTime = failTime; |
| failTime = oldFailTime; |
| int oldFailCount = jsr.getFailRetryCount(); |
| if (oldFailCount == -1) |
| oldFailCount = failCount; |
| else |
| { |
| oldFailCount--; |
| if (failCount != -1 && oldFailCount > failCount) |
| oldFailCount = failCount; |
| } |
| failCount = oldFailCount; |
| |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.retrySeeding(jobID,failTime,failCount); |
| database.performCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting job seeding: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Retry notification. |
| *@param jnr is the current job notification record. |
| *@param failTime is the new fail time (-1L if none). |
| *@param failCount is the new fail retry count (-1 if none). |
| */ |
| @Override |
| public void retryNotification(JobNotifyRecord jnr, long failTime, int failCount) |
| throws ManifoldCFException |
| { |
| Long jobID = jnr.getJobID(); |
| long oldFailTime = jnr.getFailTime(); |
| if (oldFailTime == -1L) |
| oldFailTime = failTime; |
| failTime = oldFailTime; |
| int oldFailCount = jnr.getFailRetryCount(); |
| if (oldFailCount == -1) |
| oldFailCount = failCount; |
| else |
| { |
| oldFailCount--; |
| if (failCount != -1 && oldFailCount > failCount) |
| oldFailCount = failCount; |
| } |
| failCount = oldFailCount; |
| |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.retryNotification(jobID,failTime,failCount); |
| database.performCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting job notification: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| } |
| |
| /** Retry delete notification. |
| *@param jnr is the current job notification record. |
| *@param failTime is the new fail time (-1L if none). |
| *@param failCount is the new fail retry count (-1 if none). |
| */ |
| @Override |
| public void retryDeleteNotification(JobNotifyRecord jnr, long failTime, int failCount) |
| throws ManifoldCFException |
| { |
| Long jobID = jnr.getJobID(); |
| long oldFailTime = jnr.getFailTime(); |
| if (oldFailTime == -1L) |
| oldFailTime = failTime; |
| failTime = oldFailTime; |
| int oldFailCount = jnr.getFailRetryCount(); |
| if (oldFailCount == -1) |
| oldFailCount = failCount; |
| else |
| { |
| oldFailCount--; |
| if (failCount != -1 && oldFailCount > failCount) |
| oldFailCount = failCount; |
| } |
| failCount = oldFailCount; |
| |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.retryDeleteNotification(jobID,failTime,failCount); |
| database.performCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting job notification: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| } |
| |
| // Add documents methods |
| |
| /** Add an initial set of documents to the queue. |
| * This method is called during job startup, when the queue is being loaded. |
| * A set of document references is passed to this method, which updates the status of the document |
| * in the specified job's queue, according to specific state rules. |
| *@param processID is the current process ID. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param docIDs are the local document identifiers. |
| *@param overrideSchedule is true if any existing document schedule should be overridden. |
| *@param hopcountMethod is either accurate, nodelete, or neverdelete. |
| *@param currentTime is the current time in milliseconds since epoch. |
| *@param documentPriorities are the document priorities corresponding to the document identifiers. |
| *@param prereqEventNames are the events that must be completed before each document can be processed. |
| */ |
| @Override |
| public void addDocumentsInitial(String processID, Long jobID, String[] legalLinkTypes, |
| String[] docIDHashes, String[] docIDs, boolean overrideSchedule, |
| int hopcountMethod, long currentTime, IPriorityCalculator[] documentPriorities, |
| String[][] prereqEventNames) |
| throws ManifoldCFException |
| { |
| if (docIDHashes.length == 0) |
| return; |
| |
| // The document identifiers need to be sorted in a consistent fashion to reduce deadlock, and have duplicates removed, before going ahead. |
| // But, the documentPriorities and the return booleans need to correspond to the initial array. So, after we come up with |
| // our internal order, we need to construct a map that takes an original index and maps it to the reduced, reordered index. |
| String[] reorderedDocIDHashes = eliminateDuplicates(docIDHashes); |
| HashMap reorderMap = buildReorderMap(docIDHashes,reorderedDocIDHashes); |
| IPriorityCalculator[] reorderedDocumentPriorities = new IPriorityCalculator[reorderedDocIDHashes.length]; |
| String[][] reorderedDocumentPrerequisites = new String[reorderedDocIDHashes.length][]; |
| String[] reorderedDocumentIdentifiers = new String[reorderedDocIDHashes.length]; |
| int i = 0; |
| while (i < docIDHashes.length) |
| { |
| Integer newPosition = (Integer)reorderMap.get(new Integer(i)); |
| if (newPosition != null) |
| { |
| reorderedDocumentPriorities[newPosition.intValue()] = documentPriorities[i]; |
| if (prereqEventNames != null) |
| reorderedDocumentPrerequisites[newPosition.intValue()] = prereqEventNames[i]; |
| else |
| reorderedDocumentPrerequisites[newPosition.intValue()] = null; |
| reorderedDocumentIdentifiers[newPosition.intValue()] = docIDs[i]; |
| } |
| i++; |
| } |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to add "+Integer.toString(reorderedDocIDHashes.length)+" initial docs and hopcounts for job "+jobID.toString()); |
| } |
| |
| // Postgres gets all screwed up if we permit multiple threads into the hopcount code, unless serialized |
| // transactions are used. But serialized transactions may require a retry in order |
| // to resolve transaction conflicts. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Waited "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to start adding "+Integer.toString(reorderedDocIDHashes.length)+ |
| " initial docs and hopcounts for job "+jobID.toString()); |
| |
| // Go through document id's one at a time, in order - mainly to prevent deadlock as much as possible. Search for any existing row in jobqueue first (for update) |
| int z = 0; |
| while (z < reorderedDocIDHashes.length) |
| { |
| String docIDHash = reorderedDocIDHashes[z]; |
| IPriorityCalculator docPriority = reorderedDocumentPriorities[z]; |
| String docID = reorderedDocumentIdentifiers[z]; |
| String[] docPrereqs = reorderedDocumentPrerequisites[z]; |
| |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobQueue.idField).append(",") |
| .append(jobQueue.statusField).append(",") |
| .append(jobQueue.checkTimeField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.docHashField,docIDHash), |
| new UnitaryClause(jobQueue.jobIDField,jobID)})); |
| |
| sb.append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| long executeTime = overrideSchedule?0L:-1L; |
| |
| if (set.getRowCount() > 0) |
| { |
| // Found a row, and it is now locked. |
| IResultRow row = set.getRow(0); |
| |
| // Decode the row |
| Long rowID = (Long)row.getValue(jobQueue.idField); |
| int status = jobQueue.stringToStatus((String)row.getValue(jobQueue.statusField)); |
| Long checkTimeValue = (Long)row.getValue(jobQueue.checkTimeField); |
| |
| jobQueue.updateExistingRecordInitial(rowID,status,checkTimeValue,executeTime,currentTime,docPriority,docPrereqs,processID); |
| } |
| else |
| { |
| // Not found. Attempt an insert instead. This may fail due to constraints, but if this happens, the whole transaction will be retried. |
| jobQueue.insertNewRecordInitial(jobID,docIDHash,docID,docPriority,executeTime,currentTime,docPrereqs,processID); |
| } |
| |
| z++; |
| } |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to add "+Integer.toString(reorderedDocIDHashes.length)+ |
| " initial docs for job "+jobID.toString()); |
| |
| if (legalLinkTypes.length > 0) |
| hopCount.recordSeedReferences(jobID,legalLinkTypes,reorderedDocIDHashes,hopcountMethod,processID); |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to add "+Integer.toString(reorderedDocIDHashes.length)+ |
| " initial docs and hopcounts for job "+jobID.toString()); |
| |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction adding "+Integer.toString(reorderedDocIDHashes.length)+ |
| " initial docs for job "+jobID.toString()+": "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Add an initial set of remaining documents to the queue. |
| * This method is called during job startup, when the queue is being loaded, to list documents that |
| * were NOT included by calling addDocumentsInitial(). Documents listed here are simply designed to |
| * enable the framework to get rid of old, invalid seeds. They are not queued for processing. |
| *@param processID is the current process ID. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param docIDHashes are the local document identifier hashes. |
| *@param hopcountMethod is either accurate, nodelete, or neverdelete. |
| */ |
| @Override |
| public void addRemainingDocumentsInitial(String processID, |
| Long jobID, String[] legalLinkTypes, String[] docIDHashes, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| if (docIDHashes.length == 0) |
| return; |
| |
| String[] reorderedDocIDHashes = eliminateDuplicates(docIDHashes); |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to add "+Integer.toString(reorderedDocIDHashes.length)+" remaining docs and hopcounts for job "+jobID.toString()); |
| } |
| |
| // Postgres gets all screwed up if we permit multiple threads into the hopcount code, unless the transactions are serialized, |
| // and allows one transaction to see the effects of another transaction before it's been committed. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Waited "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to start adding "+Integer.toString(reorderedDocIDHashes.length)+ |
| " remaining docs and hopcounts for job "+jobID.toString()); |
| |
| jobQueue.addRemainingDocumentsInitial(jobID,reorderedDocIDHashes,processID); |
| if (legalLinkTypes.length > 0) |
| hopCount.recordSeedReferences(jobID,legalLinkTypes,reorderedDocIDHashes,hopcountMethod,processID); |
| |
| database.performCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to add "+Integer.toString(reorderedDocIDHashes.length)+ |
| " remaining docs and hopcounts for job "+jobID.toString()); |
| |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction adding "+Integer.toString(reorderedDocIDHashes.length)+ |
| " remaining docs and hopcounts for job "+jobID.toString()+": "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Signal that a seeding pass has been done. |
| * Call this method at the end of a seeding pass. It is used to perform the bookkeeping necessary to |
| * maintain the hopcount table. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param isPartial is set if the seeds provided are only a partial list. Some connectors cannot |
| * supply a full list of seeds on every seeding iteration; this acknowledges that limitation. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| */ |
| @Override |
| public void doneDocumentsInitial(Long jobID, String[] legalLinkTypes, boolean isPartial, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to finish initial docs and hopcounts for job "+jobID.toString()); |
| } |
| |
| // Postgres gets all screwed up if we permit multiple threads into the hopcount code, unless serialized transactions are used. |
| // and allows one transaction to see the effects of another transaction before it's been committed. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Waited "+new Long(System.currentTimeMillis()-startTime).toString()+ |
| " ms to start finishing initial docs and hopcounts for job "+jobID.toString()); |
| |
| jobQueue.doneDocumentsInitial(jobID,isPartial); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+ |
| " ms to finish initial docs for job "+jobID.toString()); |
| |
| if (legalLinkTypes.length > 0) |
| hopCount.finishSeedReferences(jobID,legalLinkTypes,hopcountMethod); |
| |
| database.performCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+ |
| " ms to finish initial docs and hopcounts for job "+jobID.toString()); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction finishing initial docs and hopcounts for job "+jobID.toString()+": "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Get the specified hop counts, with the limit as described. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param docIDHashes are the hashes for the set of documents to find the hopcount for. |
| *@param linkType is the kind of link to find the hopcount for. |
| *@param limit is the limit, beyond which a negative distance may be returned. |
| *@param hopcountMethod is the method for managing hopcounts that is in effect. |
| *@return a vector of booleans corresponding to the documents requested. A true value is returned |
| * if the document is within the specified limit, false otherwise. |
| */ |
| @Override |
| public boolean[] findHopCounts(Long jobID, String[] legalLinkTypes, String[] docIDHashes, String linkType, int limit, |
| int hopcountMethod) |
| throws ManifoldCFException |
| { |
| if (docIDHashes.length == 0) |
| return new boolean[0]; |
| |
| if (legalLinkTypes.length == 0) |
| throw new ManifoldCFException("Nonsensical request; asking for hopcounts where none are kept"); |
| |
| // The idea is to delay queue processing as much as possible, because that avoids having to wait |
| // on locks and having to repeat our evaluations. |
| // |
| // Luckily, we can glean a lot of information from what's hanging around. Specifically, whatever value |
| // we find in the table is an upper bound on the true hop distance value. So, only if we have documents |
| // that are outside the limit does the queue need to be processed. |
| // |
| // It is therefore really helpful to write in an estimated value for any newly created record, if possible. Even if the |
| // estimate is possibly greater than the true value, a great deal of locking and queue processing will be |
| // avoided. |
| |
| // The flow here is to: |
| // - grab the right hoplock |
| // - process the queue |
| // - if the queue is empty, get the hopcounts we wanted, otherwise release the lock and loop around |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Beginning work to get "+Integer.toString(docIDHashes.length)+" hopcounts for job "+jobID.toString()); |
| } |
| |
| // Make an answer array. |
| boolean[] rval = new boolean[docIDHashes.length]; |
| |
| // Make a hash of what we still need a definitive answer for. |
| HashMap badAnswers = new HashMap(); |
| int i = 0; |
| while (i < rval.length) |
| { |
| String docIDHash = docIDHashes[i]; |
| rval[i] = false; |
| badAnswers.put(docIDHash,new Integer(i)); |
| i++; |
| } |
| |
| int iterationCount = 0; |
| while (true) |
| { |
| // Ask for only about documents we don't have a definitive answer for yet. |
| String[] askDocIDHashes = new String[badAnswers.size()]; |
| i = 0; |
| Iterator iter = badAnswers.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| askDocIDHashes[i++] = (String)iter.next(); |
| } |
| |
| int[] distances = hopCount.findHopCounts(jobID,askDocIDHashes,linkType); |
| i = 0; |
| while (i < distances.length) |
| { |
| int distance = distances[i]; |
| String docIDHash = askDocIDHashes[i]; |
| if (distance != -1 && distance <= limit) |
| { |
| // Found a usable value |
| rval[((Integer)badAnswers.remove(docIDHash)).intValue()] = true; |
| } |
| i++; |
| } |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Iteration "+Integer.toString(iterationCount++)+": After initial check, "+Integer.toString(badAnswers.size())+ |
| " hopcounts remain to be found for job "+jobID.toString()+", out of "+Integer.toString(docIDHashes.length)+ |
| " ("+new Long(System.currentTimeMillis()-startTime).toString()+" ms)"); |
| |
| if (badAnswers.size() == 0) |
| return rval; |
| |
| // It appears we need to process the queue. We need to enter the hoplock section |
| // to make sure only one player is updating values at a time. Then, before we exit, we get the |
| // remaining values. |
| |
| askDocIDHashes = new String[badAnswers.size()]; |
| i = 0; |
| iter = badAnswers.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| askDocIDHashes[i++] = (String)iter.next(); |
| } |
| |
| // Currently, only one thread can possibly process any of the queue at a given time. This is because the queue marks are not set to something |
| // other than than the "in queue" value during processing. My instinct is that queue processing is likely to interfere with other queue processing, |
| // so I've taken the route of prohibiting more than one batch of queue processing at a time, for now. |
| |
| String hopLockName = getHopLockName(jobID); |
| long sleepAmt = 0L; |
| lockManager.enterWriteLock(hopLockName); |
| try |
| { |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Processing queue for job "+jobID.toString()+" ("+new Long(System.currentTimeMillis()-startTime).toString()+" ms)"); |
| |
| // The internal queue processing only does 200 at a time. This is a compromise between maximum efficiency (bigger number) |
| // and the requirement that database writes are effectively blocked for a while (which argues for a smaller number). |
| boolean definitive = hopCount.processQueue(jobID,legalLinkTypes,hopcountMethod); |
| // If definitive answers were not found, we leave the lock and go back to check on the status of the questions we were |
| // interested in. If the answers are all OK then we are done; if not, we need to process more queue, and keep doing that |
| // until we really ARE done. |
| if (!definitive) |
| { |
| // Sleep a little bit so another thread can have a whack at things |
| sleepAmt = 100L; |
| database.performCommit(); |
| continue; |
| } |
| |
| // Definitive answers found; continue through. |
| distances = hopCount.findHopCounts(jobID,askDocIDHashes,linkType); |
| database.performCommit(); |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction processing queue for job "+jobID.toString()+": "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(hopLockName); |
| sleepFor(sleepAmt); |
| } |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Definitive answers found for "+Integer.toString(docIDHashes.length)+ |
| " hopcounts for job "+jobID.toString()+" ("+new Long(System.currentTimeMillis()-startTime).toString()+" ms)"); |
| |
| // All answers are guaranteed to be accurate now. |
| i = 0; |
| while (i < distances.length) |
| { |
| int distance = distances[i]; |
| String docIDHash = askDocIDHashes[i]; |
| if (distance != -1 && distance <= limit) |
| { |
| // Found a usable value |
| rval[((Integer)badAnswers.remove(docIDHash)).intValue()] = true; |
| } |
| i++; |
| } |
| return rval; |
| } |
| } |
| |
| /** Get all the current seeds. |
| * Returns the seed document identifiers for a job. |
| *@param jobID is the job identifier. |
| *@return the document identifiers that are currently considered to be seeds. |
| */ |
| @Override |
| public String[] getAllSeeds(Long jobID) |
| throws ManifoldCFException |
| { |
| return jobQueue.getAllSeeds(jobID); |
| } |
| |
| /** Add documents to the queue in bulk. |
| * This method is called during document processing, when a set of document references are discovered. |
| * The document references are passed to this method, which updates the status of the document(s) |
| * in the specified job's queue, according to specific state rules. |
| *@param processID is the process ID. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param docIDHashes are the local document identifier hashes. |
| *@param parentIdentifierHash is the optional parent identifier hash of this document. Pass null if none. |
| * MUST be present in the case of carrydown information. |
| *@param relationshipType is the optional link type between this document and its parent. Pass null if there |
| * is no relationship with a parent. |
| *@param hopcountMethod is the desired method for managing hopcounts. |
| *@param dataNames are the names of the data to carry down to the child from this parent. |
| *@param dataValues are the values to carry down to the child from this parent, corresponding to dataNames above. If CharacterInput objects are passed in here, |
| * it is the caller's responsibility to clean these up. |
| *@param currentTime is the time in milliseconds since epoch that will be recorded for this operation. |
| *@param documentPriorities are the desired document priorities for the documents. |
| *@param prereqEventNames are the events that must be completed before a document can be queued. |
| */ |
| @Override |
| public void addDocuments(String processID, |
| Long jobID, String[] legalLinkTypes, |
| String[] docIDHashes, String[] docIDs, |
| String parentIdentifierHash, String relationshipType, |
| int hopcountMethod, String[][] dataNames, Object[][][] dataValues, |
| long currentTime, IPriorityCalculator[] documentPriorities, |
| String[][] prereqEventNames) |
| throws ManifoldCFException |
| { |
| if (docIDs.length == 0) |
| return; |
| |
| // Sort the id hashes and eliminate duplicates. This will help avoid deadlock conditions. |
| // However, we also need to keep the carrydown data in synch, so track that around as well, and merge if there are |
| // duplicate document identifiers. |
| HashMap nameMap = new HashMap(); |
| int k = 0; |
| while (k < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[k]; |
| // If there are duplicates, we need to merge them. |
| HashMap names = (HashMap)nameMap.get(docIDHash); |
| if (names == null) |
| { |
| names = new HashMap(); |
| nameMap.put(docIDHash,names); |
| } |
| |
| String[] nameList = dataNames[k]; |
| Object[][] dataList = dataValues[k]; |
| |
| int z = 0; |
| while (z < nameList.length) |
| { |
| String name = nameList[z]; |
| Object[] values = dataList[z]; |
| HashMap valueMap = (HashMap)names.get(name); |
| if (valueMap == null) |
| { |
| valueMap = new HashMap(); |
| names.put(name,valueMap); |
| } |
| int y = 0; |
| while (y < values.length) |
| { |
| // Calculate the value hash; that's the true key, and the one that cannot be duplicated. |
| String valueHash; |
| if (values[y] instanceof CharacterInput) |
| { |
| // It's a CharacterInput object. |
| valueHash = ((CharacterInput)values[y]).getHashValue(); |
| } |
| else |
| { |
| // It better be a String. |
| valueHash = ManifoldCF.hash((String)values[y]); |
| } |
| valueMap.put(valueHash,values[y]); |
| y++; |
| } |
| z++; |
| } |
| k++; |
| } |
| |
| String[] reorderedDocIDHashes = eliminateDuplicates(docIDHashes); |
| HashMap reorderMap = buildReorderMap(docIDHashes,reorderedDocIDHashes); |
| IPriorityCalculator[] reorderedDocumentPriorities = new IPriorityCalculator[reorderedDocIDHashes.length]; |
| String[][] reorderedDocumentPrerequisites = new String[reorderedDocIDHashes.length][]; |
| String[] reorderedDocumentIdentifiers = new String[reorderedDocIDHashes.length]; |
| boolean[] rval = new boolean[docIDHashes.length]; |
| int i = 0; |
| while (i < docIDHashes.length) |
| { |
| Integer newPosition = (Integer)reorderMap.get(new Integer(i)); |
| if (newPosition != null) |
| { |
| reorderedDocumentPriorities[newPosition.intValue()] = documentPriorities[i]; |
| if (prereqEventNames != null) |
| reorderedDocumentPrerequisites[newPosition.intValue()] = prereqEventNames[i]; |
| else |
| reorderedDocumentPrerequisites[newPosition.intValue()] = null; |
| reorderedDocumentIdentifiers[newPosition.intValue()] = docIDs[i]; |
| } |
| rval[i] = false; |
| i++; |
| } |
| |
| dataNames = new String[reorderedDocIDHashes.length][]; |
| String[][][] dataHashValues = new String[reorderedDocIDHashes.length][][]; |
| dataValues = new Object[reorderedDocIDHashes.length][][]; |
| |
| k = 0; |
| while (k < reorderedDocIDHashes.length) |
| { |
| String docIDHash = reorderedDocIDHashes[k]; |
| HashMap names = (HashMap)nameMap.get(docIDHash); |
| dataNames[k] = new String[names.size()]; |
| dataHashValues[k] = new String[names.size()][]; |
| dataValues[k] = new Object[names.size()][]; |
| Iterator iter = names.keySet().iterator(); |
| int z = 0; |
| while (iter.hasNext()) |
| { |
| String dataName = (String)iter.next(); |
| (dataNames[k])[z] = dataName; |
| HashMap values = (HashMap)names.get(dataName); |
| (dataHashValues[k])[z] = new String[values.size()]; |
| (dataValues[k])[z] = new Object[values.size()]; |
| Iterator iter2 = values.keySet().iterator(); |
| int y = 0; |
| while (iter2.hasNext()) |
| { |
| String dataValueHash = (String)iter2.next(); |
| Object dataValue = values.get(dataValueHash); |
| ((dataHashValues[k])[z])[y] = dataValueHash; |
| ((dataValues[k])[z])[y] = dataValue; |
| y++; |
| } |
| z++; |
| } |
| k++; |
| } |
| |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to add "+Integer.toString(reorderedDocIDHashes.length)+" docs and hopcounts for job "+jobID.toString()+" parent identifier "+parentIdentifierHash); |
| } |
| |
| // Postgres gets all screwed up if we permit multiple threads into the hopcount code, |
| // and allows one transaction to see the effects of another transaction before it's been committed. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Waited "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to start adding "+Integer.toString(reorderedDocIDHashes.length)+ |
| " docs and hopcounts for job "+jobID.toString()+" parent identifier hash "+parentIdentifierHash); |
| |
| // Go through document id's one at a time, in order - mainly to prevent deadlock as much as possible. Search for any existing row in jobqueue first (for update) |
| Map<String,JobqueueRecord> existingRows = new HashMap<String,JobqueueRecord>(); |
| |
| for (int z = 0; z < reorderedDocIDHashes.length; z++) |
| { |
| String docIDHash = reorderedDocIDHashes[z]; |
| |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobQueue.idField).append(",") |
| .append(jobQueue.statusField).append(",") |
| .append(jobQueue.checkTimeField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.docHashField,docIDHash), |
| new UnitaryClause(jobQueue.jobIDField,jobID)})); |
| |
| sb.append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| if (set.getRowCount() > 0) |
| { |
| // Found a row, and it is now locked. |
| IResultRow row = set.getRow(0); |
| |
| // Decode the row |
| Long rowID = (Long)row.getValue(jobQueue.idField); |
| int status = jobQueue.stringToStatus((String)row.getValue(jobQueue.statusField)); |
| Long checkTimeValue = (Long)row.getValue(jobQueue.checkTimeField); |
| |
| existingRows.put(docIDHash,new JobqueueRecord(rowID,status,checkTimeValue)); |
| } |
| else |
| { |
| // Not found. Attempt an insert instead. This may fail due to constraints, but if this happens, the whole transaction will be retried. |
| jobQueue.insertNewRecord(jobID,docIDHash,reorderedDocumentIdentifiers[z],reorderedDocumentPriorities[z],0L,currentTime,reorderedDocumentPrerequisites[z]); |
| } |
| |
| } |
| |
| // Update all the carrydown data at once, for greatest efficiency. |
| boolean[] carrydownChangesSeen = carryDown.recordCarrydownDataMultiple(jobID,parentIdentifierHash,reorderedDocIDHashes,dataNames,dataHashValues,dataValues,processID); |
| |
| // Same with hopcount. |
| boolean[] hopcountChangesSeen = null; |
| if (parentIdentifierHash != null && relationshipType != null) |
| hopcountChangesSeen = hopCount.recordReferences(jobID,legalLinkTypes,parentIdentifierHash,reorderedDocIDHashes,relationshipType,hopcountMethod,processID); |
| |
| boolean reactivateRemovedHopcountRecords = false; |
| |
| for (int z = 0; z < reorderedDocIDHashes.length; z++) |
| { |
| String docIDHash = reorderedDocIDHashes[z]; |
| JobqueueRecord jr = existingRows.get(docIDHash); |
| if (jr != null) |
| { |
| // It was an existing row; do the update logic |
| // The hopcountChangesSeen array describes whether each reference is a new one. This |
| // helps us determine whether we're going to need to "flip" HOPCOUNTREMOVED documents |
| // to the PENDING state. If the new link ended in an existing record, THEN we need to flip them all! |
| jobQueue.updateExistingRecord(jr.getRecordID(),jr.getStatus(),jr.getCheckTimeValue(), |
| 0L,currentTime,carrydownChangesSeen[z] || (hopcountChangesSeen!=null && hopcountChangesSeen[z]), |
| reorderedDocumentPriorities[z],reorderedDocumentPrerequisites[z]); |
| // Signal if we need to perform the flip |
| if (hopcountChangesSeen != null && hopcountChangesSeen[z]) |
| reactivateRemovedHopcountRecords = true; |
| } |
| } |
| |
| if (reactivateRemovedHopcountRecords) |
| jobQueue.reactivateHopcountRemovedRecords(jobID); |
| |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to add "+Integer.toString(reorderedDocIDHashes.length)+ |
| " docs and hopcounts for job "+jobID.toString()+" parent identifier hash "+parentIdentifierHash); |
| |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| sleepAmt = getRandomAmount(); |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction adding "+Integer.toString(reorderedDocIDHashes.length)+ |
| " docs and hopcounts for job "+jobID.toString()+" parent identifier hash "+parentIdentifierHash+": "+e.getMessage()+"; sleeping for "+new Long(sleepAmt).toString()+" ms",e); |
| continue; |
| } |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| |
| /** Add a document to the queue. |
| * This method is called during document processing, when a document reference is discovered. |
| * The document reference is passed to this method, which updates the status of the document |
| * in the specified job's queue, according to specific state rules. |
| *@param processID is the process ID. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param docIDHash is the local document identifier hash value. |
| *@param parentIdentifierHash is the optional parent identifier hash of this document. Pass null if none. |
| * MUST be present in the case of carrydown information. |
| *@param relationshipType is the optional link type between this document and its parent. Pass null if there |
| * is no relationship with a parent. |
| *@param hopcountMethod is the desired method for managing hopcounts. |
| *@param dataNames are the names of the data to carry down to the child from this parent. |
| *@param dataValues are the values to carry down to the child from this parent, corresponding to dataNames above. |
| *@param currentTime is the time in milliseconds since epoch that will be recorded for this operation. |
| *@param priority is the desired document priority for the document. |
| *@param prereqEventNames are the events that must be completed before the document can be processed. |
| */ |
| @Override |
| public void addDocument(String processID, |
| Long jobID, String[] legalLinkTypes, String docIDHash, String docID, |
| String parentIdentifierHash, String relationshipType, |
| int hopcountMethod, String[] dataNames, Object[][] dataValues, |
| long currentTime, IPriorityCalculator priority, String[] prereqEventNames) |
| throws ManifoldCFException |
| { |
| addDocuments(processID,jobID,legalLinkTypes, |
| new String[]{docIDHash},new String[]{docID}, |
| parentIdentifierHash,relationshipType,hopcountMethod,new String[][]{dataNames}, |
| new Object[][][]{dataValues},currentTime,new IPriorityCalculator[]{priority},new String[][]{prereqEventNames}); |
| } |
| |
| /** Undo the addition of child documents to the queue, for a set of documents. |
| * This method is called at the end of document processing, to back out any incomplete additions to the queue, and restore |
| * the status quo ante prior to the incomplete additions. Call this method instead of finishDocuments() if the |
| * addition of documents was not completed. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param parentIdentifierHashes are the hashes of the document identifiers for whom child link extraction just took place. |
| */ |
| @Override |
| public void revertDocuments(Long jobID, String[] legalLinkTypes, |
| String[] parentIdentifierHashes) |
| throws ManifoldCFException |
| { |
| if (parentIdentifierHashes.length == 0) |
| return; |
| |
| if (legalLinkTypes.length == 0) |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| // Revert carrydown records |
| carryDown.revertRecords(jobID,parentIdentifierHashes); |
| database.performCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| else |
| { |
| // Revert both hopcount and carrydown |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| carryDown.revertRecords(jobID,parentIdentifierHashes); |
| hopCount.revertParents(jobID,parentIdentifierHashes); |
| database.performCommit(); |
| break; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| } |
| |
| /** Complete adding child documents to the queue, for a set of documents. |
| * This method is called at the end of document processing, to help the hopcount tracking engine do its bookkeeping. |
| *@param jobID is the job identifier. |
| *@param legalLinkTypes is the set of legal link types that this connector generates. |
| *@param parentIdentifierHashes are the document identifier hashes for whom child link extraction just took place. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@return the set of documents for which carrydown data was changed by this operation. These documents are likely |
| * to be requeued as a result of the change. |
| */ |
| @Override |
| public DocumentDescription[] finishDocuments(Long jobID, String[] legalLinkTypes, String[] parentIdentifierHashes, int hopcountMethod) |
| throws ManifoldCFException |
| { |
| if (parentIdentifierHashes.length == 0) |
| return new DocumentDescription[0]; |
| |
| DocumentDescription[] rval; |
| |
| if (legalLinkTypes.length == 0) |
| { |
| // Must at least end the carrydown transaction. By itself, this does not need a serialized transaction; however, occasional |
| // deadlock is possible when a document shares multiple parents, so do the whole retry drill |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| // A certain set of carrydown records are going to be deleted by the ensuing restoreRecords command. Calculate that set of records! |
| rval = calculateAffectedRestoreCarrydownChildren(jobID,parentIdentifierHashes); |
| carryDown.restoreRecords(jobID,parentIdentifierHashes); |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction finishing "+ |
| Integer.toString(parentIdentifierHashes.length)+" doc carrydown records for job "+jobID.toString()+": "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| else |
| { |
| long startTime = 0L; |
| if (Logging.perf.isDebugEnabled()) |
| { |
| startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Waiting to finish "+Integer.toString(parentIdentifierHashes.length)+" doc hopcounts for job "+jobID.toString()); |
| } |
| |
| // Postgres gets all screwed up if we permit multiple threads into the hopcount code, |
| // and allows one transaction to see the effects of another transaction before it's been committed. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| // A certain set of carrydown records are going to be deleted by the ensuing restoreRecords command. Calculate that set of records! |
| rval = calculateAffectedRestoreCarrydownChildren(jobID,parentIdentifierHashes); |
| |
| carryDown.restoreRecords(jobID,parentIdentifierHashes); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Waited "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to start finishing "+ |
| Integer.toString(parentIdentifierHashes.length)+" doc hopcounts for job "+jobID.toString()); |
| |
| hopCount.finishParents(jobID,legalLinkTypes,parentIdentifierHashes,hopcountMethod); |
| database.performCommit(); |
| |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Took "+new Long(System.currentTimeMillis()-startTime).toString()+" ms to finish "+ |
| Integer.toString(parentIdentifierHashes.length)+" doc hopcounts for job "+jobID.toString()); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction finishing "+ |
| Integer.toString(parentIdentifierHashes.length)+" doc hopcounts for job "+jobID.toString()+": "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (RuntimeException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| return rval; |
| } |
| |
| /** Helper method: Calculate the unique set of affected carrydown children resulting from a "restoreRecords" operation. |
| */ |
| protected DocumentDescription[] calculateAffectedRestoreCarrydownChildren(Long jobID, String[] parentIDHashes) |
| throws ManifoldCFException |
| { |
| // We are going to need to break up this query into a number of subqueries, each covering a subset of parent id hashes. |
| // The goal is to throw all the children into a hash, to make them unique at the end. |
| HashMap resultHash = new HashMap(); |
| ArrayList list = new ArrayList(); |
| int maxCount = database.getMaxOrClause(); |
| int i = 0; |
| int z = 0; |
| while (i < parentIDHashes.length) |
| { |
| if (z == maxCount) |
| { |
| processParentHashSet(jobID,resultHash,list); |
| list.clear(); |
| z = 0; |
| } |
| list.add(parentIDHashes[i]); |
| i++; |
| z++; |
| } |
| |
| if (z > 0) |
| processParentHashSet(jobID,resultHash,list); |
| |
| // Now, put together the result document list from the hash. |
| DocumentDescription[] rval = new DocumentDescription[resultHash.size()]; |
| i = 0; |
| Iterator iter = resultHash.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| Long id = (Long)iter.next(); |
| DocumentDescription dd = (DocumentDescription)resultHash.get(id); |
| rval[i++] = dd; |
| } |
| return rval; |
| } |
| |
| /** Helper method: look up rows affected by a restoreRecords operation. |
| */ |
| protected void processParentHashSet(Long jobID, HashMap resultHash, ArrayList list) |
| throws ManifoldCFException |
| { |
| // The query here mirrors the carrydown.restoreRecords() delete query! However, it also fetches enough information to build a DocumentDescription |
| // object for return, and so a join is necessary against the jobqueue table. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList newlist = new ArrayList(); |
| |
| sb.append("t0.").append(jobQueue.idField).append(",") |
| .append("t0.").append(jobQueue.docHashField).append(",") |
| .append("t0.").append(jobQueue.docIDField) |
| .append(" FROM ").append(carryDown.getTableName()).append(" t1, ") |
| .append(jobQueue.getTableName()).append(" t0 WHERE "); |
| |
| sb.append(database.buildConjunctionClause(newlist,new ClauseDescription[]{ |
| new UnitaryClause("t1."+carryDown.jobIDField,jobID), |
| new MultiClause("t1."+carryDown.parentIDHashField,list)})).append(" AND "); |
| |
| sb.append(database.buildConjunctionClause(newlist,new ClauseDescription[]{ |
| new JoinClause("t0."+jobQueue.docHashField,"t1."+carryDown.childIDHashField), |
| new JoinClause("t0."+jobQueue.jobIDField,"t1."+carryDown.jobIDField)})).append(" AND "); |
| |
| sb.append("t1.").append(carryDown.newField).append("=?"); |
| newlist.add(carryDown.statusToString(carryDown.ISNEW_BASE)); |
| |
| /* |
| sb.append("t0.").append(jobQueue.idField).append(",") |
| .append("t0.").append(jobQueue.docHashField).append(",") |
| .append("t0.").append(jobQueue.docIDField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(newlist,new ClauseDescription[]{ |
| new UnitaryClause("t0."+jobQueue.jobIDField,jobID)})).append(" AND "); |
| |
| sb.append("EXISTS(SELECT 'x' FROM ").append(carryDown.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(newlist,new ClauseDescription[]{ |
| new JoinClause("t1."+carryDown.jobIDField,"t0."+jobQueue.jobIDField), |
| new MultiClause("t1."+carryDown.parentIDHashField,list), |
| new JoinClause("t1."+carryDown.childIDHashField,"t0."+jobQueue.docHashField)})).append(" AND ") |
| .append("t1.").append(carryDown.newField).append("=?") |
| .append(")"); |
| |
| newlist.add(carryDown.statusToString(carryDown.ISNEW_BASE)); |
| */ |
| |
| IResultSet set = database.performQuery(sb.toString(),newlist,null,null); |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long id = (Long)row.getValue(jobQueue.idField); |
| String documentIdentifierHash = (String)row.getValue(jobQueue.docHashField); |
| String documentIdentifier = (String)row.getValue(jobQueue.docIDField); |
| resultHash.put(id,new DocumentDescription(id,jobID,documentIdentifierHash,documentIdentifier)); |
| } |
| } |
| |
| /** Begin an event sequence. |
| *@param processID is the current process ID. |
| *@param eventName is the name of the event. |
| *@return true if the event could be created, or false if it's already there. |
| */ |
| @Override |
| public boolean beginEventSequence(String processID, String eventName) |
| throws ManifoldCFException |
| { |
| try |
| { |
| eventManager.createEvent(eventName,processID); |
| return true; |
| } |
| catch (ManifoldCFException e) |
| { |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| return false; |
| throw e; |
| } |
| } |
| |
| /** Complete an event sequence. |
| *@param eventName is the name of the event. |
| */ |
| @Override |
| public void completeEventSequence(String eventName) |
| throws ManifoldCFException |
| { |
| eventManager.destroyEvent(eventName); |
| } |
| |
| |
| /** Requeue a document set because of carrydown changes. |
| * This method is called when carrydown data is modified for a set of documents. The documents must be requeued for immediate reprocessing, even to the |
| * extent that if one is *already* being processed, it will need to be done over again. |
| *@param documentDescriptions is the set of description objects for the documents that have had their parent carrydown information changed. |
| *@param docPriorities are the document priorities to assign to the documents, if needed. |
| */ |
| @Override |
| public void carrydownChangeDocumentMultiple(DocumentDescription[] documentDescriptions, long currentTime, IPriorityCalculator[] docPriorities) |
| throws ManifoldCFException |
| { |
| if (documentDescriptions.length == 0) |
| return; |
| |
| // Order the updates by document hash, to prevent deadlock as much as possible. |
| |
| // This map contains the original index of the document id hash. |
| HashMap docHashMap = new HashMap(); |
| |
| String[] docIDHashes = new String[documentDescriptions.length]; |
| int i = 0; |
| while (i < documentDescriptions.length) |
| { |
| docIDHashes[i] = documentDescriptions[i].getDocumentIdentifier() + ":" + documentDescriptions[i].getJobID(); |
| docHashMap.put(docIDHashes[i],new Integer(i)); |
| i++; |
| } |
| |
| // Sort the hashes |
| java.util.Arrays.sort(docIDHashes); |
| |
| // Enter transaction and prepare to look up document states in dochash order |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| // This is the map that will contain the rows we found, keyed by docIDHash. |
| HashMap existingRows = new HashMap(); |
| |
| // Loop through hashes in order |
| int j = 0; |
| while (j < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[j]; |
| // Get the index |
| int originalIndex = ((Integer)docHashMap.get(docIDHash)).intValue(); |
| // Lookup document description |
| DocumentDescription dd = documentDescriptions[originalIndex]; |
| // Do the query. We can base this on the id column since we have that. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobQueue.idField).append(",") |
| .append(jobQueue.statusField).append(",") |
| .append(jobQueue.checkTimeField) |
| .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.idField,dd.getID())})).append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| // If the row is there, we use its current info to requeue it properly. |
| if (set.getRowCount() > 0) |
| { |
| // Found a row, and it is now locked. |
| IResultRow row = set.getRow(0); |
| |
| // Decode the row |
| Long rowID = (Long)row.getValue(jobQueue.idField); |
| int status = jobQueue.stringToStatus((String)row.getValue(jobQueue.statusField)); |
| Long checkTimeValue = (Long)row.getValue(jobQueue.checkTimeField); |
| |
| existingRows.put(docIDHash,new JobqueueRecord(rowID,status,checkTimeValue)); |
| } |
| j++; |
| } |
| |
| // Ok, existingRows contains all the rows we want to try to update. Go through these and update. |
| while (j < docIDHashes.length) |
| { |
| String docIDHash = docIDHashes[j]; |
| int originalIndex = ((Integer)docHashMap.get(docIDHash)).intValue(); |
| |
| JobqueueRecord jr = (JobqueueRecord)existingRows.get(docIDHash); |
| if (jr != null) |
| // It was an existing row; do the update logic; use the 'carrydown changes' flag = true all the time. |
| jobQueue.updateExistingRecord(jr.getRecordID(),jr.getStatus(),jr.getCheckTimeValue(), |
| 0L,currentTime,true,docPriorities[originalIndex],null); |
| j++; |
| } |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction handling "+Integer.toString(docIDHashes.length)+" carrydown changes: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Requeue a document because of carrydown changes. |
| * This method is called when carrydown data is modified for a document. The document must be requeued for immediate reprocessing, even to the |
| * extent that if it is *already* being processed, it will need to be done over again. |
| *@param documentDescription is the description object for the document that has had its parent carrydown information changed. |
| *@param docPriority is the document priority to assign to the document, if needed. |
| */ |
| @Override |
| public void carrydownChangeDocument(DocumentDescription documentDescription, long currentTime, IPriorityCalculator docPriority) |
| throws ManifoldCFException |
| { |
| carrydownChangeDocumentMultiple(new DocumentDescription[]{documentDescription},currentTime,new IPriorityCalculator[]{docPriority}); |
| } |
| |
| /** Sleep a random amount of time after a transaction abort. |
| */ |
| protected long getRandomAmount() |
| { |
| return database.getSleepAmt(); |
| } |
| |
| protected void sleepFor(long amt) |
| throws ManifoldCFException |
| { |
| database.sleepFor(amt); |
| } |
| |
| /** Retrieve specific parent data for a given document. |
| *@param jobID is the job identifier. |
| *@param docIDHash is the document identifier hash value. |
| *@param dataName is the kind of data to retrieve. |
| *@return the unique data values. |
| */ |
| @Override |
| public String[] retrieveParentData(Long jobID, String docIDHash, String dataName) |
| throws ManifoldCFException |
| { |
| return carryDown.getDataValues(jobID,docIDHash,dataName); |
| } |
| |
| /** Retrieve specific parent data for a given document. |
| *@param jobID is the job identifier. |
| *@param docIDHash is the document identifier hash value. |
| *@param dataName is the kind of data to retrieve. |
| *@return the unique data values. |
| */ |
| @Override |
| public CharacterInput[] retrieveParentDataAsFiles(Long jobID, String docIDHash, String dataName) |
| throws ManifoldCFException |
| { |
| return carryDown.getDataValuesAsFiles(jobID,docIDHash,dataName); |
| } |
| |
| // These methods support the job threads (which start jobs and end jobs) |
| // There is one thread that starts jobs. It simply looks for jobs which are ready to |
| // start, and changes their state accordingly. |
| // There is also a pool of threads that end jobs. These threads wait for a job that |
| // looks like it is done, and do completion processing if it is. |
| |
| /** Start all jobs in need of starting. |
| * This method marks all the appropriate jobs as "in progress", which is all that should be |
| * needed to start them. |
| * It's also the case that the start event should be logged in the event log. In order to make it possible for |
| * the caller to do this logging, a set of job ID's will be returned containing the jobs that |
| * were started. |
| *@param currentTime is the current time in milliseconds since epoch. |
| *@param unwaitList is filled in with the set of job ID objects that were resumed. |
| */ |
| @Override |
| public void startJobs(long currentTime, ArrayList unwaitList) |
| throws ManifoldCFException |
| { |
| // This method should compare the lasttime field against the current time, for all |
| // "not active" jobs, and see if a job should be started. |
| // |
| // If a job is to be started, then the following occurs: |
| // (1) If the job is "full scan", then all COMPLETED jobqueue entries are converted to |
| // PURGATORY. |
| // (2) The job is labeled as "ACTIVE". |
| // (3) The starttime field is set. |
| // (4) The endtime field is nulled out. |
| // |
| // This method also assesses jobs that are ACTIVE or PAUSED to see if they should be |
| // converted to ACTIVEWAIT or PAUSEDWAIT. This would happen if the current time exceeded |
| // the value in the "windowend" field for the job. |
| // |
| // Finally, jobs in ACTIVEWAIT or PAUSEDWAIT are assessed to see if they should become |
| // ACTIVE or PAUSED. This will occur if we have entered a new window for the job. |
| |
| // Note well: We can't combine locks across both our lock manager and the database unless we do it consistently. The |
| // consistent practice throughout CF is to do the external locks first, then the database locks. This particular method |
| // thus cannot use cached job description information, because it must throw database locks first against the jobs table. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // First, query the appropriate fields of all jobs. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(",") |
| .append(jobs.lastTimeField).append(",") |
| .append(jobs.statusField).append(",") |
| .append(jobs.startMethodField).append(",") |
| .append(jobs.connectionNameField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobs.statusField,new Object[]{ |
| jobs.statusToString(jobs.STATUS_INACTIVE), |
| jobs.statusToString(jobs.STATUS_ACTIVEWAIT), |
| jobs.statusToString(jobs.STATUS_ACTIVEWAITSEEDING), |
| jobs.statusToString(jobs.STATUS_PAUSEDWAIT), |
| jobs.statusToString(jobs.STATUS_PAUSEDWAITSEEDING)})})).append(" AND ") |
| .append(jobs.startMethodField).append("!=? FOR UPDATE"); |
| |
| list.add(jobs.startMethodToString(IJobDescription.START_DISABLE)); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| // Next, we query for the schedule information. In order to do that, we amass a list of job identifiers that we want schedule info |
| // for. |
| Long[] jobIDSet = new Long[set.getRowCount()]; |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i); |
| jobIDSet[i++] = (Long)row.getValue(jobs.idField); |
| } |
| |
| ScheduleRecord[][] srSet = jobs.readScheduleRecords(jobIDSet); |
| |
| i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i); |
| |
| Long jobID = (Long)row.getValue(jobs.idField); |
| int startMethod = jobs.stringToStartMethod((String)row.getValue(jobs.startMethodField)); |
| String connectionName = (String)row.getValue(jobs.connectionNameField); |
| ScheduleRecord[] thisSchedule = srSet[i++]; |
| |
| // Run at specific times |
| |
| // We need to start with the start time as given, plus one |
| long startInterval = ((Long)row.getValue(jobs.lastTimeField)).longValue() + 1; |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Checking if job "+jobID.toString()+" needs to be started; it was last checked at "+ |
| new Long(startInterval).toString()+", and now it is "+new Long(currentTime).toString()); |
| |
| // Proceed to the current time, and find a match if there is one to be found. |
| // If not -> continue |
| |
| // We go through *all* the schedule records. The one that matches that has the latest |
| // end time is the one we take. |
| Long matchTime = null; |
| Long duration = null; |
| boolean requestMinimum = false; |
| |
| for (int l = 0; l < thisSchedule.length; l++) |
| { |
| long trialStartInterval = startInterval; |
| ScheduleRecord sr = thisSchedule[l]; |
| Long thisDuration = sr.getDuration(); |
| if (startMethod == IJobDescription.START_WINDOWINSIDE && |
| thisDuration != null) |
| { |
| // Bump the start interval back before the beginning of the current interval. |
| // This will guarantee a start as long as there is time in the window. |
| long trialStart = currentTime - thisDuration.longValue(); |
| if (trialStart < trialStartInterval) |
| trialStartInterval = trialStart; |
| } |
| |
| Long thisMatchTime = checkTimeMatch(trialStartInterval,currentTime, |
| sr.getDayOfWeek(), |
| sr.getDayOfMonth(), |
| sr.getMonthOfYear(), |
| sr.getYear(), |
| sr.getHourOfDay(), |
| sr.getMinutesOfHour(), |
| sr.getTimezone(), |
| thisDuration); |
| |
| if (thisMatchTime == null) |
| { |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug(" No time match found within interval "+new Long(trialStartInterval).toString()+ |
| " to "+new Long(currentTime).toString()); |
| continue; |
| } |
| |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug(" Time match FOUND within interval "+new Long(trialStartInterval).toString()+ |
| " to "+new Long(currentTime).toString()); |
| |
| if (matchTime == null || thisDuration == null || |
| (duration != null && thisMatchTime.longValue() + thisDuration.longValue() > |
| matchTime.longValue() + duration.longValue())) |
| { |
| matchTime = thisMatchTime; |
| duration = thisDuration; |
| requestMinimum = sr.getRequestMinimum(); |
| } |
| } |
| |
| if (matchTime == null) |
| { |
| jobs.updateLastTime(jobID,currentTime); |
| continue; |
| } |
| |
| int status = jobs.stringToStatus(row.getValue(jobs.statusField).toString()); |
| |
| |
| // Calculate the end of the window |
| Long windowEnd = null; |
| if (duration != null) |
| { |
| windowEnd = new Long(matchTime.longValue()+duration.longValue()); |
| } |
| |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job '"+jobID+"' is within run window at "+new Long(currentTime).toString()+" ms. (which starts at "+ |
| matchTime.toString()+" ms."+((duration==null)?"":(" and goes for "+duration.toString()+" ms."))+")"); |
| } |
| |
| int newJobState; |
| switch (status) |
| { |
| case Jobs.STATUS_INACTIVE: |
| // If job was formerly "inactive", do the full startup. |
| // Start this job! but with no end time. |
| // This does not get logged because the startup thread does the logging. |
| jobs.startJob(jobID,windowEnd,requestMinimum); |
| jobQueue.clearFailTimes(jobID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Signalled for job start for job "+jobID); |
| } |
| break; |
| case Jobs.STATUS_ACTIVEWAIT: |
| unwaitList.add(jobID); |
| jobs.unwaitJob(jobID,Jobs.STATUS_RESUMING,windowEnd); |
| jobQueue.clearFailTimes(jobID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Un-waited job "+jobID); |
| } |
| break; |
| case Jobs.STATUS_ACTIVEWAITSEEDING: |
| unwaitList.add(jobID); |
| jobs.unwaitJob(jobID,Jobs.STATUS_RESUMINGSEEDING,windowEnd); |
| jobQueue.clearFailTimes(jobID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Un-waited job "+jobID); |
| } |
| break; |
| case Jobs.STATUS_PAUSEDWAIT: |
| unwaitList.add(jobID); |
| jobs.unwaitJob(jobID,jobs.STATUS_PAUSED,windowEnd); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Un-waited (but still paused) job "+jobID); |
| } |
| break; |
| case Jobs.STATUS_PAUSEDWAITSEEDING: |
| unwaitList.add(jobID); |
| jobs.unwaitJob(jobID,jobs.STATUS_PAUSEDSEEDING,windowEnd); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Un-waited (but still paused) job "+jobID); |
| } |
| break; |
| case Jobs.STATUS_PAUSINGWAITING: |
| unwaitList.add(jobID); |
| jobs.unwaitJob(jobID,jobs.STATUS_PAUSING,windowEnd); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Un-waited (but still paused) job "+jobID); |
| } |
| break; |
| case Jobs.STATUS_PAUSINGWAITINGSEEDING: |
| unwaitList.add(jobID); |
| jobs.unwaitJob(jobID,jobs.STATUS_PAUSINGSEEDING,windowEnd); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Un-waited (but still paused) job "+jobID); |
| } |
| break; |
| default: |
| break; |
| } |
| |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction resetting for restart: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Put active or paused jobs in wait state, if they've exceeded their window. |
| *@param currentTime is the current time in milliseconds since epoch. |
| *@param waitList is filled in with the set of job ID's that were put into a wait state. |
| */ |
| @Override |
| public void waitJobs(long currentTime, ArrayList waitList) |
| throws ManifoldCFException |
| { |
| // This method assesses jobs that are ACTIVE or PAUSED to see if they should be |
| // converted to ACTIVEWAIT or PAUSEDWAIT. This would happen if the current time exceeded |
| // the value in the "windowend" field for the job. |
| // |
| database.beginTransaction(); |
| try |
| { |
| // First, query the appropriate fields of all jobs. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(",") |
| .append(jobs.statusField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobs.statusField,new Object[]{ |
| jobs.statusToString(jobs.STATUS_ACTIVE), |
| jobs.statusToString(jobs.STATUS_ACTIVESEEDING), |
| jobs.statusToString(jobs.STATUS_ACTIVE_UNINSTALLED), |
| jobs.statusToString(jobs.STATUS_ACTIVESEEDING_UNINSTALLED), |
| jobs.statusToString(jobs.STATUS_PAUSED), |
| jobs.statusToString(jobs.STATUS_PAUSEDSEEDING)})})).append(" AND ") |
| .append(jobs.windowEndField).append("<? FOR UPDATE"); |
| |
| list.add(new Long(currentTime)); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| |
| Long jobID = (Long)row.getValue(jobs.idField); |
| waitList.add(jobID); |
| |
| int status = jobs.stringToStatus(row.getValue(jobs.statusField).toString()); |
| |
| // Make the job wait. |
| switch (status) |
| { |
| case Jobs.STATUS_ACTIVE: |
| case Jobs.STATUS_ACTIVE_UNINSTALLED: |
| jobs.waitJob(jobID,Jobs.STATUS_ACTIVEWAITING); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" now in 'wait' state due to window end"); |
| } |
| break; |
| case Jobs.STATUS_ACTIVESEEDING: |
| case Jobs.STATUS_ACTIVESEEDING_UNINSTALLED: |
| jobs.waitJob(jobID,Jobs.STATUS_ACTIVEWAITINGSEEDING); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" now in 'wait' state due to window end"); |
| } |
| break; |
| case Jobs.STATUS_PAUSED: |
| jobs.waitJob(jobID,Jobs.STATUS_PAUSEDWAIT); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" now in 'wait paused' state due to window end"); |
| } |
| break; |
| case Jobs.STATUS_PAUSEDSEEDING: |
| jobs.waitJob(jobID,Jobs.STATUS_PAUSEDWAITSEEDING); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" now in 'wait paused' state due to window end"); |
| } |
| break; |
| case Jobs.STATUS_PAUSING: |
| jobs.waitJob(jobID,Jobs.STATUS_PAUSINGWAITING); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" now in 'wait paused' state due to window end"); |
| } |
| break; |
| case Jobs.STATUS_PAUSINGSEEDING: |
| jobs.waitJob(jobID,Jobs.STATUS_PAUSINGWAITINGSEEDING); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" now in 'wait paused' state due to window end"); |
| } |
| break; |
| default: |
| break; |
| } |
| |
| } |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| |
| /** Reset job schedule. This re-evaluates whether the job should be started now. This method would typically |
| * be called after a job's scheduling window has been changed. |
| *@param jobID is the job identifier. |
| */ |
| @Override |
| public void resetJobSchedule(Long jobID) |
| throws ManifoldCFException |
| { |
| // Note: This is problematic; the expected behavior is for the job to start if "we are within the window", |
| // but not to start if the transition to active status was long enough ago. |
| // Since there's no "right" way to do this, do nothing for now. |
| |
| // This explicitly did NOT work - it caused the job to refire every time it was saved. |
| // jobs.updateLastTime(jobID,0L); |
| } |
| |
| /** Check if the specified job parameters have a 'hit' within the specified interval. |
| *@param startTime is the start time. |
| *@param currentTimestamp is the end time. |
| *@param daysOfWeek is the enumerated days of the week, or null. |
| *@param daysOfMonth is the enumerated days of the month, or null. |
| *@param months is the enumerated months, or null. |
| *@param years is the enumerated years, or null. |
| *@param hours is the enumerated hours, or null. |
| *@param minutes is the enumerated minutes, or null. |
| *@return null if there is NO hit within the interval; otherwise the actual time of the hit in milliseconds |
| * from epoch is returned. |
| */ |
| protected static Long checkTimeMatch(long startTime, long currentTimestamp, |
| EnumeratedValues daysOfWeek, |
| EnumeratedValues daysOfMonth, |
| EnumeratedValues months, |
| EnumeratedValues years, |
| EnumeratedValues hours, |
| EnumeratedValues minutes, |
| String timezone, |
| Long duration) |
| { |
| // What we do here is start with the previous timestamp, and advance until we |
| // either encounter a match, or we exceed the current timestamp. |
| |
| Calendar c; |
| if (timezone == null) |
| { |
| c = Calendar.getInstance(); |
| } |
| else |
| { |
| c = Calendar.getInstance(TimeZone.getTimeZone(timezone)); |
| } |
| |
| // Get the current starting time |
| c.setTimeInMillis(startTime); |
| |
| // If there's a duration value, we can't match unless we're within the window. |
| // That means we find a match, and then we verify that the end time is greater than the currenttimestamp. |
| // If not, we move on (by incrementing) |
| |
| // The main loop works off of the calendar and these values. |
| while (c.getTimeInMillis() < currentTimestamp) |
| { |
| // Round up to the nearest minute, unless at 0 already |
| int x = c.get(Calendar.MILLISECOND); |
| if (x != c.getMinimum(Calendar.MILLISECOND)) |
| { |
| int amtToAdd = c.getLeastMaximum(Calendar.MILLISECOND)+1-x; |
| if (amtToAdd < 1) |
| amtToAdd = 1; |
| c.add(Calendar.MILLISECOND,amtToAdd); |
| continue; |
| } |
| x = c.get(Calendar.SECOND); |
| if (x != c.getMinimum(Calendar.SECOND)) |
| { |
| int amtToAdd = c.getLeastMaximum(Calendar.SECOND)+1-x; |
| if (amtToAdd < 1) |
| amtToAdd = 1; |
| c.add(Calendar.SECOND,amtToAdd); |
| continue; |
| } |
| boolean startedToCareYet = false; |
| x = c.get(Calendar.MINUTE); |
| // If we care about minutes, round up, otherwise go to the 0 value |
| if (minutes == null) |
| { |
| if (x != c.getMinimum(Calendar.MINUTE)) |
| { |
| int amtToAdd = c.getLeastMaximum(Calendar.MINUTE)+1-x; |
| if (amtToAdd < 1) |
| amtToAdd = 1; |
| c.add(Calendar.MINUTE,amtToAdd); |
| continue; |
| } |
| } |
| else |
| { |
| // See if it is a legit value. |
| if (!minutes.checkValue(x-c.getMinimum(Calendar.MINUTE))) |
| { |
| // Advance to next legit value |
| // We could be clever, but we just advance one |
| c.add(Calendar.MINUTE,1); |
| continue; |
| } |
| startedToCareYet = true; |
| } |
| // Hours |
| x = c.get(Calendar.HOUR_OF_DAY); |
| if (hours == null) |
| { |
| if (!startedToCareYet && x != c.getMinimum(Calendar.HOUR_OF_DAY)) |
| { |
| int amtToAdd = c.getLeastMaximum(Calendar.HOUR_OF_DAY)+1-x; |
| if (amtToAdd < 1) |
| amtToAdd = 1; |
| c.add(Calendar.HOUR_OF_DAY,amtToAdd); |
| continue; |
| } |
| } |
| else |
| { |
| if (!hours.checkValue(x-c.getMinimum(Calendar.HOUR_OF_DAY))) |
| { |
| // next hour |
| c.add(Calendar.HOUR_OF_DAY,1); |
| continue; |
| } |
| startedToCareYet = true; |
| } |
| // Days of month and days of week are at the same level; |
| // these advance concurrently. However, if NEITHER is specified, and nothing |
| // earlier was, then we do the 1st of the month. |
| x = c.get(Calendar.DAY_OF_WEEK); |
| if (daysOfWeek != null) |
| { |
| if (!daysOfWeek.checkValue(x-c.getMinimum(Calendar.DAY_OF_WEEK))) |
| { |
| // next day |
| c.add(Calendar.DAY_OF_WEEK,1); |
| continue; |
| } |
| startedToCareYet = true; |
| } |
| x = c.get(Calendar.DAY_OF_MONTH); |
| if (daysOfMonth == null) |
| { |
| // If nothing is specified but the month or the year, do it on the 1st. |
| if (!startedToCareYet && x != c.getMinimum(Calendar.DAY_OF_MONTH)) |
| { |
| // Move as rapidly as possible towards the first of the month. But in no case, increment |
| // less than one day. |
| int amtToAdd = c.getLeastMaximum(Calendar.DAY_OF_MONTH)+1-x; |
| if (amtToAdd < 1) |
| amtToAdd = 1; |
| c.add(Calendar.DAY_OF_MONTH,amtToAdd); |
| continue; |
| } |
| } |
| else |
| { |
| if (!daysOfMonth.checkValue(x-c.getMinimum(Calendar.DAY_OF_MONTH))) |
| { |
| // next day |
| c.add(Calendar.DAY_OF_MONTH,1); |
| continue; |
| } |
| startedToCareYet = true; |
| } |
| x = c.get(Calendar.MONTH); |
| if (months == null) |
| { |
| if (!startedToCareYet && x != c.getMinimum(Calendar.MONTH)) |
| { |
| int amtToAdd = c.getLeastMaximum(Calendar.MONTH)+1-x; |
| if (amtToAdd < 1) |
| amtToAdd = 1; |
| c.add(Calendar.MONTH,amtToAdd); |
| continue; |
| } |
| } |
| else |
| { |
| if (!months.checkValue(x-c.getMinimum(Calendar.MONTH))) |
| { |
| c.add(Calendar.MONTH,1); |
| continue; |
| } |
| startedToCareYet = true; |
| } |
| x = c.get(Calendar.YEAR); |
| if (years != null) |
| { |
| if (!years.checkValue(x)) |
| { |
| c.add(Calendar.YEAR,1); |
| continue; |
| } |
| startedToCareYet = true; |
| } |
| |
| // Looks like a match. |
| // Last check is to be sure we are in the window, if any. If we are outside the window, |
| // must skip forward. |
| if (duration != null && c.getTimeInMillis() + duration.longValue() <= currentTimestamp) |
| { |
| c.add(Calendar.MILLISECOND,c.getLeastMaximum(Calendar.MILLISECOND)); |
| continue; |
| } |
| |
| return new Long(c.getTimeInMillis()); |
| } |
| return null; |
| } |
| |
| /** Manually start a job. The specified job will be run REGARDLESS of the timed windows, and |
| * will not cease until complete. If the job is already running, this operation will assure that |
| * the job does not pause when its window ends. The job can be manually paused, or manually aborted. |
| *@param jobID is the ID of the job to start. |
| */ |
| @Override |
| public void manualStart(Long jobID) |
| throws ManifoldCFException |
| { |
| manualStart(jobID,false); |
| } |
| |
| /** Manually start a job. The specified job will be run REGARDLESS of the timed windows, and |
| * will not cease until complete. If the job is already running, this operation will assure that |
| * the job does not pause when its window ends. The job can be manually paused, or manually aborted. |
| *@param jobID is the ID of the job to start. |
| *@param requestMinimum is true if a minimal job run is requested. |
| */ |
| @Override |
| public void manualStart(Long jobID, boolean requestMinimum) |
| throws ManifoldCFException |
| { |
| database.beginTransaction(); |
| try |
| { |
| // First, query the appropriate fields of all jobs. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() < 1) |
| throw new ManifoldCFException("No such job: "+jobID); |
| |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus(row.getValue(jobs.statusField).toString()); |
| if (status != Jobs.STATUS_INACTIVE) |
| throw new ManifoldCFException("Job "+jobID+" is already running"); |
| |
| IJobDescription jobDescription = jobs.load(jobID,true); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Manually starting job "+jobID); |
| } |
| // Start this job! but with no end time. |
| jobs.startJob(jobID,null,requestMinimum); |
| jobQueue.clearFailTimes(jobID); |
| |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Manual job start signal for job "+jobID+" successfully sent"); |
| } |
| |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| } |
| } |
| |
| /** Note job delete started. |
| *@param jobID is the job id. |
| *@param startTime is the job delete start time. |
| */ |
| @Override |
| public void noteJobDeleteStarted(Long jobID, long startTime) |
| throws ManifoldCFException |
| { |
| jobs.noteJobDeleteStarted(jobID,startTime); |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Job "+jobID+" delete is now started"); |
| } |
| |
| /** Note job started. |
| *@param jobID is the job id. |
| *@param startTime is the job start time. |
| */ |
| @Override |
| public void noteJobStarted(Long jobID, long startTime, String seedingVersion) |
| throws ManifoldCFException |
| { |
| jobs.noteJobStarted(jobID,startTime,seedingVersion); |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Job "+jobID+" is now started"); |
| } |
| |
| /** Note job seeded. |
| *@param jobID is the job id. |
| *@param seedingVersion is the job seeding version string to record. |
| */ |
| @Override |
| public void noteJobSeeded(Long jobID, String seedingVersion) |
| throws ManifoldCFException |
| { |
| jobs.noteJobSeeded(jobID,seedingVersion); |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Job "+jobID+" has been successfully reseeded"); |
| } |
| |
| /** Prepare for a delete scan. |
| *@param jobID is the job id. |
| */ |
| @Override |
| public void prepareDeleteScan(Long jobID) |
| throws ManifoldCFException |
| { |
| // No special treatment needed for hopcount or carrydown, since these all get deleted at once |
| // at the end of the job delete process. |
| TrackerClass.notePrecommit(); |
| jobQueue.prepareDeleteScan(jobID); |
| TrackerClass.noteCommit(); |
| } |
| |
| /** Prepare a job to be run. |
| * This method is called regardless of the details of the job; what differs is only the flags that are passed in. |
| * The code inside will determine the appropriate procedures. |
| * (This method replaces prepareFullScan() and prepareIncrementalScan(). ) |
| *@param jobID is the job id. |
| *@param legalLinkTypes are the link types allowed for the job. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| *@param connectorModel is the model used by the connector for the job. |
| *@param continuousJob is true if the job is a continuous one. |
| *@param fromBeginningOfTime is true if the job is running starting from time 0. |
| *@param requestMinimum is true if the minimal amount of work is requested for the job run. |
| */ |
| @Override |
| public void prepareJobScan(Long jobID, String[] legalLinkTypes, int hopcountMethod, |
| int connectorModel, boolean continuousJob, boolean fromBeginningOfTime, |
| boolean requestMinimum) |
| throws ManifoldCFException |
| { |
| |
| // (1) If the connector has MODEL_ADD_CHANGE_DELETE, then |
| // we let the connector run the show; there's no purge phase, and therefore the |
| // documents are left in a COMPLETED state if they don't show up in the list |
| // of seeds that require the attention of the connector. However, we do need to |
| // preload the queue with all the existing documents, if there was any change to the |
| // specification information (which will mean that fromBeginningOfTime is set). |
| // |
| // (2) If the connector has MODEL_ALL, then it's a full crawl no matter what, so |
| // we do a full scan initialization. |
| // |
| // (3) If the connector has some other model, we look at the start time. A start |
| // time of 0 implies a full scan, while any other start time implies an incremental |
| // scan. |
| |
| // Always reset document schedules for those documents already pending! |
| jobQueue.resetPendingDocumentSchedules(jobID); |
| |
| // Complete connector model is told everything, so no delete phase. |
| if (connectorModel == IRepositoryConnector.MODEL_ADD_CHANGE_DELETE) |
| { |
| if (fromBeginningOfTime) |
| queueAllExisting(jobID,legalLinkTypes); |
| return; |
| } |
| |
| // If the connector model is complete via chaining, then we just need to make |
| // sure discovery works to queue the changes. |
| if (connectorModel == IRepositoryConnector.MODEL_CHAINED_ADD_CHANGE_DELETE) |
| { |
| if (fromBeginningOfTime) |
| queueAllExisting(jobID,legalLinkTypes); |
| else |
| jobQueue.preparePartialScan(jobID); |
| return; |
| } |
| |
| // Look for a minimum crawl. |
| // Minimum crawls do only what is seeded, in general. These are partial scans, always. MODEL_ALL disables this |
| // functionality, as does a scan from the beginning of time (after the job spec has been changed). |
| if (requestMinimum && connectorModel != IRepositoryConnector.MODEL_ALL && !fromBeginningOfTime) |
| { |
| // Minimum crawl requested. |
| // If it is a chained model, do the partial prep. If it's a non-chained model, do nothing for prep; the seeding |
| // will flag the documents we want to look at. |
| if (connectorModel == IRepositoryConnector.MODEL_CHAINED_ADD || |
| connectorModel == IRepositoryConnector.MODEL_CHAINED_ADD_CHANGE) |
| jobQueue.preparePartialScan(jobID); |
| return; |
| } |
| |
| if (!continuousJob && connectorModel != IRepositoryConnector.MODEL_PARTIAL && |
| (connectorModel == IRepositoryConnector.MODEL_ALL || fromBeginningOfTime)) |
| { |
| // Prepare for a full scan if: |
| // (a) not a continuous job, and |
| // (b) not a partial model (which always disables full scans), and |
| // (c) either MODEL_ALL or from the beginning of time (which are essentially equivalent) |
| prepareFullScan(jobID,legalLinkTypes,hopcountMethod); |
| } |
| else |
| { |
| // Map COMPLETE and UNCHANGED to PENDINGPURGATORY, if: |
| // (a) job is continuous, OR |
| // (b) MODEL_PARTIAL, OR |
| // (c) not MODEL_ALL AND not from beginning of time |
| // This causes all existing documents to be rechecked! This is needed because the model is not |
| // complete at this point; we have ADD but we don't have either CHANGE or DELETE. |
| jobQueue.prepareIncrementalScan(jobID); |
| } |
| } |
| |
| /** Queue all existing. |
| *@param jobID is the job id. |
| *@param legalLinkTypes are the link types allowed for the job. |
| */ |
| protected void queueAllExisting(Long jobID, String[] legalLinkTypes) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| if (legalLinkTypes.length > 0) |
| { |
| jobQueue.reactivateHopcountRemovedRecords(jobID); |
| } |
| |
| jobQueue.queueAllExisting(jobID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction during queueAllExisting: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| } |
| |
| /** Prepare for a full scan. |
| *@param jobID is the job id. |
| *@param legalLinkTypes are the link types allowed for the job. |
| *@param hopcountMethod describes how to handle deletions for hopcount purposes. |
| */ |
| protected void prepareFullScan(Long jobID, String[] legalLinkTypes, int hopcountMethod) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| // Since we delete documents here, we need to manage the hopcount part of the world too. |
| database.beginTransaction(database.TRANSACTION_SERIALIZED); |
| try |
| { |
| // Delete the documents we have never fetched, including any hopcount records we've calculated. |
| if (legalLinkTypes.length > 0) |
| { |
| ArrayList list = new ArrayList(); |
| String query = database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause("t99."+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED)})}); |
| hopCount.deleteMatchingDocuments(jobID,legalLinkTypes,jobQueue.getTableName()+" t99", |
| "t99."+jobQueue.docHashField,"t99."+jobQueue.jobIDField, |
| query,list, |
| hopcountMethod); |
| } |
| |
| jobQueue.prepareFullScan(jobID); |
| TrackerClass.notePrecommit(); |
| database.performCommit(); |
| TrackerClass.noteCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction preparing full scan: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| TrackerClass.noteRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Manually abort a running job. The job will be permanently stopped, and will not run again until |
| * automatically started based on schedule, or manually started. |
| *@param jobID is the job to abort. |
| */ |
| @Override |
| public void manualAbort(Long jobID) |
| throws ManifoldCFException |
| { |
| // Just whack status back to "INACTIVE". The active documents will continue to be processed until done, |
| // but that's fine. There will be no finishing stage, obviously. |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Manually aborting job "+jobID); |
| } |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.abortJob(jobID,null); |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction aborting job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" abort signal successfully sent"); |
| } |
| } |
| |
| /** Manually restart a running job. The job will be stopped and restarted. Any schedule affinity will be lost, |
| * until the job finishes on its own. |
| *@param jobID is the job to abort. |
| *@param requestMinimum is true if a minimal job run is requested. |
| */ |
| @Override |
| public void manualAbortRestart(Long jobID, boolean requestMinimum) |
| throws ManifoldCFException |
| { |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Manually restarting job "+jobID); |
| } |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.abortRestartJob(jobID,requestMinimum); |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction restarting job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" restart signal successfully sent"); |
| } |
| } |
| |
| /** Manually restart a running job. The job will be stopped and restarted. Any schedule affinity will be lost, |
| * until the job finishes on its own. |
| *@param jobID is the job to abort. |
| */ |
| @Override |
| public void manualAbortRestart(Long jobID) |
| throws ManifoldCFException |
| { |
| manualAbortRestart(jobID,false); |
| } |
| |
| /** Abort a running job due to a fatal error condition. |
| *@param jobID is the job to abort. |
| *@param errorText is the error text. |
| *@return true if this is the first logged abort request for this job. |
| */ |
| @Override |
| public boolean errorAbort(Long jobID, String errorText) |
| throws ManifoldCFException |
| { |
| // Just whack status back to "INACTIVE". The active documents will continue to be processed until done, |
| // but that's fine. There will be no finishing stage, obviously. |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Aborting job "+jobID+" due to error '"+errorText+"'"); |
| } |
| boolean rval; |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| rval = jobs.abortJob(jobID,errorText); |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction aborting job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| if (rval && Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" abort signal successfully sent"); |
| } |
| return rval; |
| } |
| |
| /** Pause a job. |
| *@param jobID is the job identifier to pause. |
| */ |
| @Override |
| public void pauseJob(Long jobID) |
| throws ManifoldCFException |
| { |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Manually pausing job "+jobID); |
| } |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.pauseJob(jobID); |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction pausing job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" successfully paused"); |
| } |
| |
| } |
| |
| /** Restart a paused job. |
| *@param jobID is the job identifier to restart. |
| */ |
| @Override |
| public void restartJob(Long jobID) |
| throws ManifoldCFException |
| { |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Manually restarting paused job "+jobID); |
| } |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| jobs.restartJob(jobID); |
| jobQueue.clearFailTimes(jobID); |
| database.performCommit(); |
| break; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted transaction restarting pausing job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" successfully restarted"); |
| } |
| } |
| |
| /** Get the list of jobs that are ready for seeding. |
| *@param processID is the current process ID. |
| *@return jobs that are active and are running in adaptive mode. These will be seeded |
| * based on what the connector says should be added to the queue. |
| */ |
| @Override |
| public JobSeedingRecord[] getJobsReadyForSeeding(String processID, long currentTime) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Do the query |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(",") |
| .append(jobs.seedingVersionField).append(",") |
| .append(jobs.failTimeField).append(",") |
| .append(jobs.failCountField).append(",") |
| .append(jobs.reseedIntervalField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.statusField,jobs.statusToString(jobs.STATUS_ACTIVE))})).append(" AND ") |
| .append(jobs.typeField).append("=? AND ") |
| .append("(").append(jobs.reseedTimeField).append(" IS NULL OR ").append(jobs.reseedTimeField).append("<=?)") |
| .append(" FOR UPDATE"); |
| |
| list.add(jobs.typeToString(jobs.TYPE_CONTINUOUS)); |
| list.add(new Long(currentTime)); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| // Update them all |
| JobSeedingRecord[] rval = new JobSeedingRecord[set.getRowCount()]; |
| int i = 0; |
| while (i < rval.length) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| String seedingVersionString = (String)row.getValue(jobs.seedingVersionField); |
| |
| Long r = (Long)row.getValue(jobs.reseedIntervalField); |
| Long reseedTime; |
| if (r != null) |
| reseedTime = new Long(currentTime + r.longValue()); |
| else |
| reseedTime = null; |
| |
| Long failTimeLong = (Long)row.getValue(jobs.failTimeField); |
| Long failRetryCountLong = (Long)row.getValue(jobs.failCountField); |
| long failTime; |
| if (failTimeLong == null) |
| failTime = -1L; |
| else |
| failTime = failTimeLong.longValue(); |
| int failRetryCount; |
| if (failRetryCountLong == null) |
| failRetryCount = -1; |
| else |
| failRetryCount = (int)failRetryCountLong.longValue(); |
| |
| // Mark status of job as "active/seeding". Special status is needed so that abort |
| // will not complete until seeding is completed. |
| jobs.writeTransientStatus(jobID,jobs.STATUS_ACTIVESEEDING,reseedTime,processID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Marked job "+jobID+" for seeding"); |
| } |
| rval[i] = new JobSeedingRecord(jobID,seedingVersionString,failTime,failRetryCount); |
| i++; |
| } |
| database.performCommit(); |
| return rval; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted getting jobs ready for seeding: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Get the list of jobs that are ready for delete cleanup. |
| *@param processID is the current process ID. |
| *@return jobs that were in the "readyfordelete" state. |
| */ |
| @Override |
| public JobDeleteRecord[] getJobsReadyForDeleteCleanup(String processID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Do the query |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.statusField,jobs.statusToString(jobs.STATUS_READYFORDELETE))})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| // Update them all |
| JobDeleteRecord[] rval = new JobDeleteRecord[set.getRowCount()]; |
| int i = 0; |
| while (i < rval.length) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| |
| // Mark status of job as "starting delete" |
| jobs.writeTransientStatus(jobID,jobs.STATUS_DELETESTARTINGUP,processID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Marked job "+jobID+" for delete startup"); |
| } |
| |
| rval[i] = new JobDeleteRecord(jobID); |
| i++; |
| } |
| database.performCommit(); |
| return rval; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted getting jobs ready for startup: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Get the list of jobs that are ready for startup. |
| *@param processID is the current process ID. |
| *@return jobs that were in the "readyforstartup" state. These will be marked as being in the "starting up" state. |
| */ |
| @Override |
| public JobStartRecord[] getJobsReadyForStartup(String processID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Do the query |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(",") |
| .append(jobs.failTimeField).append(",") |
| .append(jobs.failCountField).append(",") |
| .append(jobs.seedingVersionField).append(",") |
| .append(jobs.statusField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobs.statusField,new Object[]{ |
| jobs.statusToString(jobs.STATUS_READYFORSTARTUP), |
| jobs.statusToString(jobs.STATUS_READYFORSTARTUPMINIMAL)})})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| // Update them all |
| JobStartRecord[] rval = new JobStartRecord[set.getRowCount()]; |
| int i = 0; |
| while (i < rval.length) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| String seedingVersionString = (String)row.getValue(jobs.seedingVersionField); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| Long failTimeLong = (Long)row.getValue(jobs.failTimeField); |
| Long failRetryCountLong = (Long)row.getValue(jobs.failCountField); |
| long failTime; |
| if (failTimeLong == null) |
| failTime = -1L; |
| else |
| failTime = failTimeLong.longValue(); |
| int failRetryCount; |
| if (failRetryCountLong == null) |
| failRetryCount = -1; |
| else |
| failRetryCount = (int)failRetryCountLong.longValue(); |
| |
| boolean requestMinimum = (status == jobs.STATUS_READYFORSTARTUPMINIMAL); |
| |
| // Mark status of job as "starting" |
| jobs.writeTransientStatus(jobID,requestMinimum?jobs.STATUS_STARTINGUPMINIMAL:jobs.STATUS_STARTINGUP,processID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Marked job "+jobID+" for startup"); |
| } |
| |
| rval[i] = new JobStartRecord(jobID,seedingVersionString,requestMinimum,failTime,failRetryCount); |
| i++; |
| } |
| database.performCommit(); |
| return rval; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted getting jobs ready for startup: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Inactivate a job, from the notification state. |
| *@param jobID is the ID of the job to inactivate. |
| */ |
| @Override |
| public void inactivateJob(Long jobID) |
| throws ManifoldCFException |
| { |
| // While there is no flow that can cause a job to be in the wrong state when this gets called, as a precaution |
| // it might be a good idea to put this in a transaction and have the state get checked first. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Check job status |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() == 0) |
| throw new ManifoldCFException("No such job: "+jobID); |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| |
| switch (status) |
| { |
| case Jobs.STATUS_NOTIFYINGOFCOMPLETION: |
| jobs.notificationComplete(jobID); |
| break; |
| default: |
| throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status)); |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted clearing notification state for job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Remove a job, from the notification state. |
| *@param jobID is the ID of the job to remove. |
| */ |
| @Override |
| public void removeJob(Long jobID) |
| throws ManifoldCFException |
| { |
| // While there is no flow that can cause a job to be in the wrong state when this gets called, as a precaution |
| // it might be a good idea to put this in a transaction and have the state get checked first. |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Check job status |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() == 0) |
| // Presume already removed! |
| return; |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| |
| switch (status) |
| { |
| case Jobs.STATUS_NOTIFYINGOFDELETION: |
| ManifoldCF.noteConfigurationChange(); |
| // Remove documents from job queue |
| jobQueue.deleteAllJobRecords(jobID); |
| // Remove carrydowns for the job |
| carryDown.deleteOwner(jobID); |
| // Nothing is in a critical section - so this should be OK. |
| hopCount.deleteOwner(jobID); |
| jobs.delete(jobID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Removed job "+jobID); |
| } |
| break; |
| default: |
| throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status)); |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted clearing delete notification state for job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset a job starting for delete back to "ready for delete" |
| * state. |
| *@param jobID is the job id. |
| */ |
| @Override |
| public void resetStartDeleteJob(Long jobID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Check job status |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() == 0) |
| throw new ManifoldCFException("No such job: "+jobID); |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| |
| switch (status) |
| { |
| case Jobs.STATUS_DELETESTARTINGUP: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'ReadyForDelete' state"); |
| |
| // Set the state of the job back to "ReadyForStartup" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_READYFORDELETE,true); |
| break; |
| default: |
| throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status)); |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted resetting start delete job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset a job that is notifying back to "ready for notify" |
| * state. |
| *@param jobID is the job id. |
| */ |
| @Override |
| public void resetNotifyJob(Long jobID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Check job status |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() == 0) |
| throw new ManifoldCFException("No such job: "+jobID); |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| |
| switch (status) |
| { |
| case Jobs.STATUS_NOTIFYINGOFCOMPLETION: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'ReadyForNotify' state"); |
| |
| // Set the state of the job back to "ReadyForNotify" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_READYFORNOTIFY,true); |
| break; |
| default: |
| throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status)); |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted resetting notify job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset a job that is delete notifying back to "ready for delete notify" |
| * state. |
| *@param jobID is the job id. |
| */ |
| @Override |
| public void resetDeleteNotifyJob(Long jobID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Check job status |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() == 0) |
| throw new ManifoldCFException("No such job: "+jobID); |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| |
| switch (status) |
| { |
| case Jobs.STATUS_NOTIFYINGOFDELETION: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'ReadyForDeleteNotify' state"); |
| |
| // Set the state of the job back to "ReadyForNotify" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_READYFORDELETENOTIFY,true); |
| break; |
| default: |
| throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status)); |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted resetting delete notify job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset a starting job back to "ready for startup" state. |
| *@param jobID is the job id. |
| */ |
| @Override |
| public void resetStartupJob(Long jobID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Check job status |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() == 0) |
| throw new ManifoldCFException("No such job: "+jobID); |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| |
| switch (status) |
| { |
| case Jobs.STATUS_STARTINGUP: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'ReadyForStartup' state"); |
| |
| // Set the state of the job back to "ReadyForStartup" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_READYFORSTARTUP,true); |
| break; |
| case Jobs.STATUS_STARTINGUPMINIMAL: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'ReadyForStartupMinimal' state"); |
| |
| // Set the state of the job back to "ReadyForStartupMinimal" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_READYFORSTARTUPMINIMAL,true); |
| break; |
| case Jobs.STATUS_ABORTINGSTARTINGUPFORRESTART: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" to 'AbortingForRestart' state"); |
| jobs.writePermanentStatus(jobID,jobs.STATUS_ABORTINGFORRESTART,true); |
| break; |
| case Jobs.STATUS_ABORTINGSTARTINGUPFORRESTARTMINIMAL: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" to 'AbortingForRestartMinimal' state"); |
| jobs.writePermanentStatus(jobID,jobs.STATUS_ABORTINGFORRESTARTMINIMAL,true); |
| break; |
| |
| case Jobs.STATUS_READYFORSTARTUP: |
| case Jobs.STATUS_READYFORSTARTUPMINIMAL: |
| case Jobs.STATUS_ABORTING: |
| case Jobs.STATUS_ABORTINGFORRESTART: |
| case Jobs.STATUS_ABORTINGFORRESTARTMINIMAL: |
| // ok |
| break; |
| default: |
| throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status)); |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted resetting startup job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Reset a seeding job back to "active" state. |
| *@param jobID is the job id. |
| */ |
| @Override |
| public void resetSeedJob(Long jobID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Check job status |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.idField,jobID)})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| if (set.getRowCount() == 0) |
| throw new ManifoldCFException("No such job: "+jobID); |
| IResultRow row = set.getRow(0); |
| int status = jobs.stringToStatus((String)row.getValue(jobs.statusField)); |
| switch (status) |
| { |
| case Jobs.STATUS_ACTIVESEEDING_UNINSTALLED: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'Active_Uninstalled' state"); |
| |
| // Set the state of the job back to "Active" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_ACTIVE_UNINSTALLED); |
| break; |
| case Jobs.STATUS_ACTIVESEEDING: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'Active' state"); |
| |
| // Set the state of the job back to "Active" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_ACTIVE); |
| break; |
| case Jobs.STATUS_ACTIVEWAITSEEDING: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'ActiveWait' state"); |
| |
| // Set the state of the job back to "Active" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_ACTIVEWAIT); |
| break; |
| case Jobs.STATUS_PAUSEDSEEDING: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'Paused' state"); |
| |
| // Set the state of the job back to "Active" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_PAUSED); |
| break; |
| case Jobs.STATUS_PAUSEDWAITSEEDING: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'PausedWait' state"); |
| |
| // Set the state of the job back to "Active" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_PAUSEDWAIT); |
| break; |
| |
| case Jobs.STATUS_ABORTINGFORRESTARTSEEDING: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'AbortingForRestart' state"); |
| |
| // Set the state of the job back to "Active" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_ABORTINGFORRESTART); |
| break; |
| |
| case Jobs.STATUS_ABORTINGFORRESTARTSEEDINGMINIMAL: |
| if (Logging.jobs.isDebugEnabled()) |
| Logging.jobs.debug("Setting job "+jobID+" back to 'AbortingForRestartMinimal' state"); |
| |
| // Set the state of the job back to "Active" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_ABORTINGFORRESTARTMINIMAL); |
| break; |
| |
| case Jobs.STATUS_ABORTING: |
| case Jobs.STATUS_ABORTINGFORRESTART: |
| case Jobs.STATUS_ABORTINGFORRESTARTMINIMAL: |
| case Jobs.STATUS_ACTIVE: |
| case Jobs.STATUS_ACTIVE_UNINSTALLED: |
| case Jobs.STATUS_PAUSED: |
| case Jobs.STATUS_ACTIVEWAIT: |
| case Jobs.STATUS_PAUSEDWAIT: |
| // ok |
| break; |
| default: |
| throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status)); |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted resetting seeding job: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| |
| /** Delete jobs in need of being deleted (which are marked "ready for delete"). |
| * This method is meant to be called periodically to perform delete processing on jobs. |
| */ |
| @Override |
| public void deleteJobsReadyForDelete() |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| // This method must find only jobs that have nothing hanging around in their jobqueue that represents an ingested |
| // document. Any jobqueue entries which are in a state to interfere with the delete will be cleaned up by other |
| // threads, so eventually a job will become eligible. This happens when there are no records that have an ingested |
| // status: complete, purgatory, being-cleaned, being-deleted, or pending purgatory. |
| database.beginTransaction(); |
| try |
| { |
| // The original query was: |
| // |
| // SELECT id FROM jobs t0 WHERE status='D' AND NOT EXISTS(SELECT 'x' FROM jobqueue t1 WHERE t0.id=t1.jobid AND |
| // t1.status IN ('C', 'F', 'G')) |
| // |
| // However, this did not work well with Postgres when the tables got big. So I revised things to do the following multi-stage process: |
| // (1) The query should be broken up, such that n queries are done: |
| // (a) the first one should get all candidate jobs (those that have the right state) |
| // (b) there should be a query for each job of roughly this form: SELECT id FROM jobqueue WHERE jobid=xxx AND status IN (...) LIMIT 1 |
| // This will work way better than postgresql currently works, because neither the cost-based analysis nor the actual NOT clause seem to allow |
| // early exit!! |
| |
| // Do the first query, getting the candidate jobs to be considered |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.statusField,jobs.statusToString(jobs.STATUS_DELETING))})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| // Now, loop through this list. For each one, verify that it's okay to delete it |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| |
| list.clear(); |
| sb = new StringBuilder("SELECT "); |
| |
| sb.append(jobQueue.idField).append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.jobIDField,jobID), |
| new MultiClause(jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE), |
| jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)})})) |
| .append(" ").append(database.constructOffsetLimitClause(0,1)); |
| |
| IResultSet confirmSet = database.performQuery(sb.toString(),list,null,null,1,null); |
| |
| if (confirmSet.getRowCount() > 0) |
| continue; |
| |
| jobs.finishJobCleanup(jobID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" cleanup is now completed"); |
| } |
| |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted deleting jobs ready for delete: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Put all eligible jobs in the "shutting down" state. |
| */ |
| @Override |
| public void finishJobs() |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| // The jobs we should transition: |
| // - are active |
| // - have no ACTIVE, PENDING, ACTIVEPURGATORY, or PENDINGPURGATORY records |
| database.beginTransaction(); |
| try |
| { |
| // The query I used to emit was: |
| // SELECT jobid FROM jobs t0 WHERE t0.status='A' AND NOT EXISTS(SELECT 'x' FROM jobqueue t1 WHERE |
| // t0.id=t1.jobid AND t1.status IN ('A','P','F','G')) |
| |
| // This did not get along well with Postgresql, so instead this is what is now done: |
| // (1) The query should be broken up, such that n queries are done: |
| // (a) the first one should get all candidate jobs (those that have the right state) |
| // (b) there should be a query for each job of roughly this form: SELECT id FROM jobqueue WHERE jobid=xxx AND status IN (...) LIMIT 1 |
| // This will work way better than postgresql currently works, because neither the cost-based analysis nor the actual NOT clause seem to allow |
| // early exit!! |
| |
| // Do the first query, getting the candidate jobs to be considered |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobs.statusField,new Object[]{ |
| jobs.statusToString(jobs.STATUS_ACTIVE), |
| jobs.statusToString(jobs.STATUS_ACTIVEWAIT), |
| jobs.statusToString(jobs.STATUS_ACTIVE_UNINSTALLED)})})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| |
| // Check to be sure the job is a candidate for shutdown |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| |
| sb.append(jobQueue.idField).append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.jobIDField,jobID), |
| new MultiClause(jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVE), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN), |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)})})) |
| .append(" ").append(database.constructOffsetLimitClause(0,1)); |
| |
| IResultSet confirmSet = database.performQuery(sb.toString(),list,null,null,1,null); |
| |
| if (confirmSet.getRowCount() > 0) |
| continue; |
| |
| // Mark status of job as "finishing" |
| jobs.writePermanentStatus(jobID,jobs.STATUS_SHUTTINGDOWN,true); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Marked job "+jobID+" for shutdown"); |
| } |
| |
| } |
| database.performCommit(); |
| return; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted finishing jobs: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Find the list of jobs that need to have their connectors notified of job completion. |
| *@param processID is the process ID. |
| *@return the ID's of jobs that need their output connectors notified in order to become inactive. |
| */ |
| @Override |
| public JobNotifyRecord[] getJobsReadyForInactivity(String processID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Do the query |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(",").append(jobs.failTimeField).append(",").append(jobs.failCountField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.statusField,jobs.statusToString(jobs.STATUS_READYFORNOTIFY))})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| // Return them all |
| JobNotifyRecord[] rval = new JobNotifyRecord[set.getRowCount()]; |
| int i = 0; |
| while (i < rval.length) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| Long failTimeLong = (Long)row.getValue(jobs.failTimeField); |
| Long failRetryCountLong = (Long)row.getValue(jobs.failCountField); |
| long failTime; |
| if (failTimeLong == null) |
| failTime = -1L; |
| else |
| failTime = failTimeLong.longValue(); |
| int failRetryCount; |
| if (failRetryCountLong == null) |
| failRetryCount = -1; |
| else |
| failRetryCount = (int)failRetryCountLong.longValue(); |
| |
| // Mark status of job as "starting delete" |
| jobs.writeTransientStatus(jobID,jobs.STATUS_NOTIFYINGOFCOMPLETION,processID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Found job "+jobID+" in need of notification"); |
| } |
| rval[i++] = new JobNotifyRecord(jobID,failTime,failRetryCount); |
| } |
| database.performCommit(); |
| return rval; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted getting jobs ready for notify: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Find the list of jobs that need to have their connectors notified of job deletion. |
| *@param processID is the process ID. |
| *@return the ID's of jobs that need their output connectors notified in order to be removed. |
| */ |
| @Override |
| public JobNotifyRecord[] getJobsReadyForDelete(String processID) |
| throws ManifoldCFException |
| { |
| while (true) |
| { |
| long sleepAmt = 0L; |
| database.beginTransaction(); |
| try |
| { |
| // Do the query |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(",").append(jobs.failTimeField).append(",").append(jobs.failCountField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.statusField,jobs.statusToString(jobs.STATUS_READYFORDELETENOTIFY))})) |
| .append(" FOR UPDATE"); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| // Return them all |
| JobNotifyRecord[] rval = new JobNotifyRecord[set.getRowCount()]; |
| int i = 0; |
| while (i < rval.length) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| Long failTimeLong = (Long)row.getValue(jobs.failTimeField); |
| Long failRetryCountLong = (Long)row.getValue(jobs.failCountField); |
| long failTime; |
| if (failTimeLong == null) |
| failTime = -1L; |
| else |
| failTime = failTimeLong.longValue(); |
| int failRetryCount; |
| if (failRetryCountLong == null) |
| failRetryCount = -1; |
| else |
| failRetryCount = (int)failRetryCountLong.longValue(); |
| |
| // Mark status of job as "starting delete" |
| jobs.writeTransientStatus(jobID,jobs.STATUS_NOTIFYINGOFDELETION,processID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Found job "+jobID+" in need of delete notification"); |
| } |
| rval[i++] = new JobNotifyRecord(jobID,failTime,failRetryCount); |
| } |
| database.performCommit(); |
| return rval; |
| } |
| catch (ManifoldCFException e) |
| { |
| database.signalRollback(); |
| if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug("Aborted getting jobs ready for notify: "+e.getMessage()); |
| sleepAmt = getRandomAmount(); |
| continue; |
| } |
| throw e; |
| } |
| catch (Error e) |
| { |
| database.signalRollback(); |
| throw e; |
| } |
| finally |
| { |
| database.endTransaction(); |
| sleepFor(sleepAmt); |
| } |
| } |
| } |
| |
| /** Complete the sequence that resumes jobs, either from a pause or from a scheduling window |
| * wait. The logic will restore the job to an active state (many possibilities depending on |
| * connector status), and will record the jobs that have been so modified. |
| *@param timestamp is the current time in milliseconds since epoch. |
| *@param modifiedJobs is filled in with the set of IJobDescription objects that were resumed. |
| */ |
| @Override |
| public void finishJobResumes(long timestamp, ArrayList modifiedJobs) |
| throws ManifoldCFException |
| { |
| // Alternative to using a write lock here: Put this in a transaction, with a "FOR UPDATE" on the first query. |
| // I think that still causes way too much locking, though, on some databases. |
| lockManager.enterWriteLock(jobResumeLock); |
| try |
| { |
| // Do the first query, getting the candidate jobs to be considered |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobs.statusField,new Object[]{ |
| jobs.statusToString(jobs.STATUS_RESUMING), |
| jobs.statusToString(jobs.STATUS_RESUMINGSEEDING) |
| })})); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| |
| // There are no secondary checks that need to be made; just resume |
| IJobDescription jobDesc = jobs.load(jobID,true); |
| modifiedJobs.add(jobDesc); |
| |
| jobs.finishResumeJob(jobID,timestamp); |
| |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Resumed job "+jobID); |
| } |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(jobResumeLock); |
| } |
| } |
| |
| /** Complete the sequence that stops jobs, either for abort, pause, or because of a scheduling |
| * window. The logic will move the job to its next state (INACTIVE, PAUSED, ACTIVEWAIT), |
| * and will record the jobs that have been so modified. |
| *@param timestamp is the current time in milliseconds since epoch. |
| *@param modifiedJobs is filled in with the set of IJobDescription objects that were stopped. |
| */ |
| @Override |
| public void finishJobStops(long timestamp, ArrayList modifiedJobs) |
| throws ManifoldCFException |
| { |
| // Alternative to using a write lock here: Put this in a transaction, with a "FOR UPDATE" on the first query. |
| // I think that still causes way too much locking, though, on some databases. |
| lockManager.enterWriteLock(jobStopLock); |
| try |
| { |
| // The query I used to emit was: |
| // SELECT jobid FROM jobs t0 WHERE t0.status='X' AND NOT EXISTS(SELECT 'x' FROM jobqueue t1 WHERE |
| // t0.id=t1.jobid AND t1.status IN ('A','F')) |
| // Now the query is broken up so that Postgresql behaves more efficiently. |
| |
| // Do the first query, getting the candidate jobs to be considered |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField) |
| .append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(jobs.statusField,new Object[]{ |
| jobs.statusToString(jobs.STATUS_ABORTING), |
| jobs.statusToString(jobs.STATUS_ABORTINGFORRESTART), |
| jobs.statusToString(jobs.STATUS_ABORTINGFORRESTARTMINIMAL), |
| jobs.statusToString(jobs.STATUS_ABORTINGSHUTTINGDOWN), |
| jobs.statusToString(jobs.STATUS_PAUSING), |
| jobs.statusToString(jobs.STATUS_PAUSINGSEEDING), |
| jobs.statusToString(jobs.STATUS_ACTIVEWAITING), |
| jobs.statusToString(jobs.STATUS_ACTIVEWAITINGSEEDING), |
| jobs.statusToString(jobs.STATUS_PAUSINGWAITING), |
| jobs.statusToString(jobs.STATUS_PAUSINGWAITINGSEEDING) |
| })})); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| |
| sb.append(jobQueue.idField).append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.jobIDField,jobID), |
| new MultiClause(jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVE), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)})})) |
| .append(" ").append(database.constructOffsetLimitClause(0,1)); |
| |
| IResultSet confirmSet = database.performQuery(sb.toString(),list,null,null,1,null); |
| |
| if (confirmSet.getRowCount() > 0) |
| continue; |
| |
| // All the job's documents need to have their docpriority set to null, to clear dead wood out of the docpriority index. |
| // See CONNECTORS-290. |
| // We do this BEFORE updating the job state. |
| jobQueue.clearDocPriorities(jobID); |
| |
| IJobDescription jobDesc = jobs.load(jobID,true); |
| modifiedJobs.add(jobDesc); |
| |
| jobs.finishStopJob(jobID,timestamp); |
| |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Stopped job "+jobID); |
| } |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(jobStopLock); |
| } |
| } |
| |
| /** Reset eligible jobs either back to the "inactive" state, or make them active again. The |
| * latter will occur if the cleanup phase of the job generated more pending documents. |
| * |
| * This method is used to pick up all jobs in the shutting down state |
| * whose purgatory or being-cleaned records have been all processed. |
| * |
| *@param currentTime is the current time in milliseconds since epoch. |
| *@param resetJobs is filled in with the set of IJobDescription objects that were reset. |
| */ |
| @Override |
| public void resetJobs(long currentTime, ArrayList resetJobs) |
| throws ManifoldCFException |
| { |
| // Alternative to using a write lock here: Put this in a transaction, with a "FOR UPDATE" on the first query. |
| // I think that still causes way too much locking, though, on some databases. |
| lockManager.enterWriteLock(jobResetLock); |
| try |
| { |
| // Query for all jobs that fulfill the criteria |
| // The query used to look like: |
| // |
| // SELECT id FROM jobs t0 WHERE status='D' AND NOT EXISTS(SELECT 'x' FROM jobqueue t1 WHERE |
| // t0.id=t1.jobid AND t1.status='P') |
| // |
| // Now, the query is broken up, for performance |
| |
| // Do the first query, getting the candidate jobs to be considered |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(jobs.idField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobs.statusField,jobs.statusToString(jobs.STATUS_SHUTTINGDOWN))})); |
| |
| IResultSet set = database.performQuery(sb.toString(),list,null,null); |
| |
| int i = 0; |
| while (i < set.getRowCount()) |
| { |
| IResultRow row = set.getRow(i++); |
| Long jobID = (Long)row.getValue(jobs.idField); |
| |
| // Check to be sure the job is a candidate for shutdown |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| |
| sb.append(jobQueue.idField).append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.jobIDField,jobID), |
| new MultiClause(jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)})})) |
| .append(" ").append(database.constructOffsetLimitClause(0,1)); |
| |
| IResultSet confirmSet = database.performQuery(sb.toString(),list,null,null,1,null); |
| |
| if (confirmSet.getRowCount() > 0) |
| continue; |
| |
| // The shutting-down phase is complete. However, we need to check if there are any outstanding |
| // PENDING or PENDINGPURGATORY records before we can decide what to do. |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| |
| sb.append(jobQueue.idField).append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobQueue.jobIDField,jobID), |
| new MultiClause(jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)})})) |
| .append(" ").append(database.constructOffsetLimitClause(0,1)); |
| |
| confirmSet = database.performQuery(sb.toString(),list,null,null,1,null); |
| |
| if (confirmSet.getRowCount() > 0) |
| { |
| // This job needs to re-enter the active state. Make that happen. |
| jobs.returnJobToActive(jobID); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" is re-entering active state"); |
| } |
| } |
| else |
| { |
| // This job should be marked as finished. |
| IJobDescription jobDesc = jobs.load(jobID,true); |
| resetJobs.add(jobDesc); |
| |
| jobs.finishJob(jobID,currentTime); |
| if (Logging.jobs.isDebugEnabled()) |
| { |
| Logging.jobs.debug("Job "+jobID+" now completed"); |
| } |
| } |
| } |
| } |
| finally |
| { |
| lockManager.leaveWriteLock(jobResetLock); |
| } |
| } |
| |
| |
| // Status reports |
| |
| /** Get the status of a job. |
| *@return the status object for the specified job. |
| */ |
| @Override |
| public JobStatus getStatus(Long jobID) |
| throws ManifoldCFException |
| { |
| return getStatus(jobID,true); |
| } |
| |
| /** Get a list of all jobs, and their status information. |
| *@return an ordered array of job status objects. |
| */ |
| @Override |
| public JobStatus[] getAllStatus() |
| throws ManifoldCFException |
| { |
| return getAllStatus(true); |
| } |
| |
| /** Get a list of running jobs. This is for status reporting. |
| *@return an array of the job status objects. |
| */ |
| @Override |
| public JobStatus[] getRunningJobs() |
| throws ManifoldCFException |
| { |
| return getRunningJobs(true); |
| } |
| |
| /** Get a list of completed jobs, and their statistics. |
| *@return an array of the job status objects. |
| */ |
| @Override |
| public JobStatus[] getFinishedJobs() |
| throws ManifoldCFException |
| { |
| return getFinishedJobs(true); |
| } |
| |
| /** Get the status of a job. |
| *@param jobID is the job ID. |
| *@param includeCounts is true if document counts should be included. |
| *@return the status object for the specified job. |
| */ |
| @Override |
| public JobStatus getStatus(Long jobID, boolean includeCounts) |
| throws ManifoldCFException |
| { |
| return getStatus(jobID, includeCounts, Integer.MAX_VALUE); |
| } |
| |
| /** Get a list of all jobs, and their status information. |
| *@param includeCounts is true if document counts should be included. |
| *@return an ordered array of job status objects. |
| */ |
| @Override |
| public JobStatus[] getAllStatus(boolean includeCounts) |
| throws ManifoldCFException |
| { |
| return getAllStatus(includeCounts, Integer.MAX_VALUE); |
| } |
| |
| /** Get a list of running jobs. This is for status reporting. |
| *@param includeCounts is true if document counts should be included. |
| *@return an array of the job status objects. |
| */ |
| @Override |
| public JobStatus[] getRunningJobs(boolean includeCounts) |
| throws ManifoldCFException |
| { |
| return getRunningJobs(includeCounts, Integer.MAX_VALUE); |
| } |
| |
| /** Get a list of completed jobs, and their statistics. |
| *@param includeCounts is true if document counts should be included. |
| *@return an array of the job status objects. |
| */ |
| @Override |
| public JobStatus[] getFinishedJobs(boolean includeCounts) |
| throws ManifoldCFException |
| { |
| return getFinishedJobs(includeCounts, Integer.MAX_VALUE); |
| } |
| |
| /** Get the status of a job. |
| *@param includeCounts is true if document counts should be included. |
| *@return the status object for the specified job. |
| */ |
| @Override |
| public JobStatus getStatus(Long jobID, boolean includeCounts, int maxCount) |
| throws ManifoldCFException |
| { |
| ArrayList list = new ArrayList(); |
| String whereClause = Jobs.idField+"=?"; |
| list.add(jobID); |
| JobStatus[] records = makeJobStatus(whereClause,list,includeCounts,maxCount); |
| if (records.length == 0) |
| return null; |
| return records[0]; |
| } |
| |
| |
| /** Get a list of all jobs, and their status information. |
| *@param includeCounts is true if document counts should be included. |
| *@param maxCount is the maximum number of documents we want to count for each status. |
| *@return an ordered array of job status objects. |
| */ |
| @Override |
| public JobStatus[] getAllStatus(boolean includeCounts, int maxCount) |
| throws ManifoldCFException |
| { |
| return makeJobStatus(null,null,includeCounts,maxCount); |
| } |
| |
| /** Get a list of running jobs. This is for status reporting. |
| *@param includeCounts is true if document counts should be included. |
| *@param maxCount is the maximum number of documents we want to count for each status. |
| *@return an array of the job status objects. |
| */ |
| @Override |
| public JobStatus[] getRunningJobs(boolean includeCounts, int maxCount) |
| throws ManifoldCFException |
| { |
| ArrayList whereParams = new ArrayList(); |
| |
| String whereClause = database.buildConjunctionClause(whereParams,new ClauseDescription[]{ |
| new MultiClause(Jobs.statusField,new Object[]{ |
| Jobs.statusToString(Jobs.STATUS_ACTIVE), |
| Jobs.statusToString(Jobs.STATUS_ACTIVESEEDING), |
| Jobs.statusToString(Jobs.STATUS_ACTIVE_UNINSTALLED), |
| Jobs.statusToString(Jobs.STATUS_ACTIVESEEDING_UNINSTALLED), |
| Jobs.statusToString(Jobs.STATUS_PAUSED), |
| Jobs.statusToString(Jobs.STATUS_PAUSEDSEEDING), |
| Jobs.statusToString(Jobs.STATUS_ACTIVEWAIT), |
| Jobs.statusToString(Jobs.STATUS_ACTIVEWAITSEEDING), |
| Jobs.statusToString(Jobs.STATUS_PAUSEDWAIT), |
| Jobs.statusToString(Jobs.STATUS_PAUSEDWAITSEEDING), |
| Jobs.statusToString(Jobs.STATUS_PAUSING), |
| Jobs.statusToString(Jobs.STATUS_PAUSINGSEEDING), |
| Jobs.statusToString(Jobs.STATUS_ACTIVEWAITING), |
| Jobs.statusToString(Jobs.STATUS_ACTIVEWAITINGSEEDING), |
| Jobs.statusToString(Jobs.STATUS_PAUSINGWAITING), |
| Jobs.statusToString(Jobs.STATUS_PAUSINGWAITINGSEEDING), |
| Jobs.statusToString(Jobs.STATUS_RESUMING), |
| Jobs.statusToString(Jobs.STATUS_RESUMINGSEEDING) |
| })}); |
| |
| return makeJobStatus(whereClause,whereParams,includeCounts,maxCount); |
| } |
| |
| /** Get a list of completed jobs, and their statistics. |
| *@param includeCounts is true if document counts should be included. |
| *@param maxCount is the maximum number of documents we want to count for each status. |
| *@return an array of the job status objects. |
| */ |
| @Override |
| public JobStatus[] getFinishedJobs(boolean includeCounts, int maxCount) |
| throws ManifoldCFException |
| { |
| StringBuilder sb = new StringBuilder(); |
| ArrayList whereParams = new ArrayList(); |
| |
| sb.append(database.buildConjunctionClause(whereParams,new ClauseDescription[]{ |
| new UnitaryClause(Jobs.statusField,Jobs.statusToString(Jobs.STATUS_INACTIVE))})).append(" AND ") |
| .append(Jobs.endTimeField).append(" IS NOT NULL"); |
| |
| return makeJobStatus(sb.toString(),whereParams,includeCounts,maxCount); |
| } |
| |
| // Protected methods and classes |
| |
| /** Make a job status array from a query result. |
| *@param whereClause is the where clause for the jobs we are interested in. |
| *@return the status array. |
| */ |
| protected JobStatus[] makeJobStatus(String whereClause, ArrayList whereParams, boolean includeCounts, int maxCount) |
| throws ManifoldCFException |
| { |
| IResultSet set = database.performQuery("SELECT t0."+ |
| Jobs.idField+",t0."+ |
| Jobs.descriptionField+",t0."+ |
| Jobs.statusField+",t0."+ |
| Jobs.startTimeField+",t0."+ |
| Jobs.endTimeField+",t0."+ |
| Jobs.errorField+ |
| " FROM "+jobs.getTableName()+" t0 "+((whereClause==null)?"":(" WHERE "+whereClause))+" ORDER BY "+Jobs.descriptionField+" ASC", |
| whereParams,null,null); |
| |
| // Build hashes for set2 and set3 |
| Map<Long,Long> set2Hash = new HashMap<Long,Long>(); |
| Map<Long,Long> set3Hash = new HashMap<Long,Long>(); |
| Map<Long,Long> set4Hash = new HashMap<Long,Long>(); |
| Map<Long,Boolean> set2Exact = new HashMap<Long,Boolean>(); |
| Map<Long,Boolean> set3Exact = new HashMap<Long,Boolean>(); |
| Map<Long,Boolean> set4Exact = new HashMap<Long,Boolean>(); |
| |
| if (includeCounts) |
| { |
| // If we are counting all of them anyway, do this via GROUP BY since it will be the fastest. But |
| // otherwise, fire off an individual query at a time. |
| if (maxCount == Integer.MAX_VALUE) |
| { |
| buildCountsUsingGroupBy(whereClause,whereParams,set2Hash,set3Hash,set4Hash,set2Exact,set3Exact,set4Exact); |
| } |
| else |
| { |
| // Check if the total matching jobqueue rows exceeds the limit. If not, we can still use the cheaper query. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(database.constructCountClause("t2.x")).append(" AS doccount") |
| .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append(" t1"); |
| addWhereClause(sb,list,whereClause,whereParams,false); |
| sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false)) |
| .append(") t2"); |
| IResultSet countResult = database.performQuery(sb.toString(),list,null,null); |
| if (countResult.getRowCount() > 0 && ((Long)countResult.getRow(0).getValue("doccount")).longValue() > maxCount) |
| { |
| // Too many items in queue; do it the hard way |
| buildCountsUsingIndividualQueries(whereClause,whereParams,maxCount,set2Hash,set3Hash,set4Hash,set2Exact,set3Exact,set4Exact); |
| } |
| else |
| { |
| // Cheap way should still work. |
| buildCountsUsingGroupBy(whereClause,whereParams,set2Hash,set3Hash,set4Hash,set2Exact,set3Exact,set4Exact); |
| } |
| } |
| } |
| |
| JobStatus[] rval = new JobStatus[set.getRowCount()]; |
| for (int i = 0; i < rval.length; i++) |
| { |
| IResultRow row = set.getRow(i); |
| Long jobID = (Long)row.getValue(Jobs.idField); |
| String description = row.getValue(Jobs.descriptionField).toString(); |
| int status = Jobs.stringToStatus(row.getValue(Jobs.statusField).toString()); |
| Long startTimeValue = (Long)row.getValue(Jobs.startTimeField); |
| long startTime = -1; |
| if (startTimeValue != null) |
| startTime = startTimeValue.longValue(); |
| Long endTimeValue = (Long)row.getValue(Jobs.endTimeField); |
| long endTime = -1; |
| if (endTimeValue != null) |
| endTime = endTimeValue.longValue(); |
| String errorText = (String)row.getValue(Jobs.errorField); |
| if (errorText != null && errorText.length() == 0) |
| errorText = null; |
| int rstatus = JobStatus.JOBSTATUS_NOTYETRUN; |
| |
| switch (status) |
| { |
| case Jobs.STATUS_INACTIVE: |
| if (errorText != null) |
| rstatus = JobStatus.JOBSTATUS_ERROR; |
| else |
| { |
| if (startTime >= 0) |
| rstatus = JobStatus.JOBSTATUS_COMPLETED; |
| else |
| rstatus = JobStatus.JOBSTATUS_NOTYETRUN; |
| } |
| break; |
| case Jobs.STATUS_ACTIVE_UNINSTALLED: |
| case Jobs.STATUS_ACTIVESEEDING_UNINSTALLED: |
| rstatus = JobStatus.JOBSTATUS_RUNNING_UNINSTALLED; |
| break; |
| case Jobs.STATUS_ACTIVE: |
| case Jobs.STATUS_ACTIVESEEDING: |
| rstatus = JobStatus.JOBSTATUS_RUNNING; |
| break; |
| case Jobs.STATUS_SHUTTINGDOWN: |
| rstatus = JobStatus.JOBSTATUS_JOBENDCLEANUP; |
| break; |
| case Jobs.STATUS_READYFORNOTIFY: |
| case Jobs.STATUS_NOTIFYINGOFCOMPLETION: |
| case Jobs.STATUS_READYFORDELETENOTIFY: |
| case Jobs.STATUS_NOTIFYINGOFDELETION: |
| rstatus = JobStatus.JOBSTATUS_JOBENDNOTIFICATION; |
| break; |
| case Jobs.STATUS_ABORTING: |
| case Jobs.STATUS_ABORTINGSHUTTINGDOWN: |
| rstatus = JobStatus.JOBSTATUS_ABORTING; |
| break; |
| case Jobs.STATUS_ABORTINGFORRESTART: |
| case Jobs.STATUS_ABORTINGFORRESTARTMINIMAL: |
| case Jobs.STATUS_ABORTINGFORRESTARTSEEDING: |
| case Jobs.STATUS_ABORTINGFORRESTARTSEEDINGMINIMAL: |
| case Jobs.STATUS_ABORTINGSTARTINGUPFORRESTART: |
| case Jobs.STATUS_ABORTINGSTARTINGUPFORRESTARTMINIMAL: |
| rstatus = JobStatus.JOBSTATUS_RESTARTING; |
| break; |
| case Jobs.STATUS_PAUSING: |
| case Jobs.STATUS_PAUSINGSEEDING: |
| case Jobs.STATUS_ACTIVEWAITING: |
| case Jobs.STATUS_ACTIVEWAITINGSEEDING: |
| case Jobs.STATUS_PAUSINGWAITING: |
| case Jobs.STATUS_PAUSINGWAITINGSEEDING: |
| rstatus = JobStatus.JOBSTATUS_STOPPING; |
| break; |
| case Jobs.STATUS_RESUMING: |
| case Jobs.STATUS_RESUMINGSEEDING: |
| rstatus = JobStatus.JOBSTATUS_RESUMING; |
| break; |
| case Jobs.STATUS_PAUSED: |
| case Jobs.STATUS_PAUSEDSEEDING: |
| rstatus = JobStatus.JOBSTATUS_PAUSED; |
| break; |
| case Jobs.STATUS_ACTIVEWAIT: |
| case Jobs.STATUS_ACTIVEWAITSEEDING: |
| rstatus = JobStatus.JOBSTATUS_WINDOWWAIT; |
| break; |
| case Jobs.STATUS_PAUSEDWAIT: |
| case Jobs.STATUS_PAUSEDWAITSEEDING: |
| rstatus = JobStatus.JOBSTATUS_PAUSED; |
| break; |
| case Jobs.STATUS_STARTINGUP: |
| case Jobs.STATUS_STARTINGUPMINIMAL: |
| case Jobs.STATUS_READYFORSTARTUP: |
| case Jobs.STATUS_READYFORSTARTUPMINIMAL: |
| rstatus = JobStatus.JOBSTATUS_STARTING; |
| break; |
| case Jobs.STATUS_DELETESTARTINGUP: |
| case Jobs.STATUS_READYFORDELETE: |
| case Jobs.STATUS_DELETING: |
| case Jobs.STATUS_DELETING_NOOUTPUT: |
| rstatus = JobStatus.JOBSTATUS_DESTRUCTING; |
| break; |
| default: |
| break; |
| } |
| |
| Long set2Value = set2Hash.get(jobID); |
| Long set3Value = set3Hash.get(jobID); |
| Long set4Value = set4Hash.get(jobID); |
| Boolean set2ExactValue = set2Exact.get(jobID); |
| Boolean set3ExactValue = set3Exact.get(jobID); |
| Boolean set4ExactValue = set4Exact.get(jobID); |
| |
| rval[i] = new JobStatus(jobID.toString(),description,rstatus,((set2Value==null)?0L:set2Value.longValue()), |
| ((set3Value==null)?0L:set3Value.longValue()), |
| ((set4Value==null)?0L:set4Value.longValue()), |
| ((set2ExactValue==null)?true:set2ExactValue.booleanValue()), |
| ((set3ExactValue==null)?true:set3ExactValue.booleanValue()), |
| ((set4ExactValue==null)?true:set4ExactValue.booleanValue()), |
| startTime,endTime,errorText); |
| } |
| return rval; |
| } |
| |
| protected static ClauseDescription buildOutstandingClause() |
| throws ManifoldCFException |
| { |
| return new MultiClause(JobQueue.statusField,new Object[]{ |
| JobQueue.statusToString(JobQueue.STATUS_ACTIVE), |
| JobQueue.statusToString(JobQueue.STATUS_ACTIVENEEDRESCAN), |
| JobQueue.statusToString(JobQueue.STATUS_PENDING), |
| JobQueue.statusToString(JobQueue.STATUS_ACTIVEPURGATORY), |
| JobQueue.statusToString(JobQueue.STATUS_ACTIVENEEDRESCANPURGATORY), |
| JobQueue.statusToString(JobQueue.STATUS_PENDINGPURGATORY)}); |
| } |
| |
| protected static ClauseDescription buildProcessedClause() |
| throws ManifoldCFException |
| { |
| return new MultiClause(JobQueue.statusField,new Object[]{ |
| JobQueue.statusToString(JobQueue.STATUS_COMPLETE), |
| JobQueue.statusToString(JobQueue.STATUS_UNCHANGED), |
| JobQueue.statusToString(JobQueue.STATUS_PURGATORY), |
| JobQueue.statusToString(JobQueue.STATUS_ACTIVEPURGATORY), |
| JobQueue.statusToString(JobQueue.STATUS_ACTIVENEEDRESCANPURGATORY), |
| JobQueue.statusToString(JobQueue.STATUS_PENDINGPURGATORY)}); |
| } |
| |
| protected void buildCountsUsingIndividualQueries(String whereClause, ArrayList whereParams, int maxCount, |
| Map<Long,Long> set2Hash, Map<Long,Long> set3Hash, Map<Long,Long> set4Hash, |
| Map<Long,Boolean> set2Exact, Map<Long,Boolean> set3Exact, Map<Long,Boolean> set4Exact) |
| throws ManifoldCFException |
| { |
| // Fire off an individual query with a limit for each job |
| |
| // First, get the list of jobs that we are interested in. |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(Jobs.idField).append(" FROM ").append(jobs.getTableName()).append(" t0"); |
| if (whereClause != null) |
| { |
| sb.append(" WHERE ") |
| .append(whereClause); |
| if (whereParams != null) |
| list.addAll(whereParams); |
| } |
| |
| IResultSet jobSet = database.performQuery(sb.toString(),list,null,null); |
| |
| // Scan the set of jobs |
| for (int i = 0; i < jobSet.getRowCount(); i++) |
| { |
| IResultRow row = jobSet.getRow(i); |
| Long jobID = (Long)row.getValue(Jobs.idField); |
| |
| // Now, for each job, fire off a separate, limited, query for each count we care about |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| sb.append(database.constructCountClause("t2.x")).append(" AS doccount") |
| .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append(" WHERE "); |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{new UnitaryClause(JobQueue.jobIDField,jobID)})); |
| sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false)) |
| .append(") t2"); |
| |
| IResultSet totalSet = database.performQuery(sb.toString(),list,null,null); |
| if (totalSet.getRowCount() > 0) |
| { |
| long rowCount = ((Long)totalSet.getRow(0).getValue("doccount")).longValue(); |
| if (rowCount > maxCount) |
| { |
| set2Hash.put(jobID,new Long(maxCount)); |
| set2Exact.put(jobID,new Boolean(false)); |
| } |
| else |
| { |
| set2Hash.put(jobID,new Long(rowCount)); |
| set2Exact.put(jobID,new Boolean(true)); |
| } |
| } |
| |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| sb.append(database.constructCountClause("t2.x")).append(" AS doccount") |
| .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append(" WHERE "); |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{new UnitaryClause(JobQueue.jobIDField,jobID)})); |
| sb.append(" AND "); |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{buildOutstandingClause()})); |
| sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false)) |
| .append(") t2"); |
| |
| IResultSet outstandingSet = database.performQuery(sb.toString(),list,null,null); |
| if (outstandingSet.getRowCount() > 0) |
| { |
| long rowCount = ((Long)outstandingSet.getRow(0).getValue("doccount")).longValue(); |
| if (rowCount > maxCount) |
| { |
| set3Hash.put(jobID,new Long(maxCount)); |
| set3Exact.put(jobID,new Boolean(false)); |
| } |
| else |
| { |
| set3Hash.put(jobID,new Long(rowCount)); |
| set3Exact.put(jobID,new Boolean(true)); |
| } |
| } |
| |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| sb.append(database.constructCountClause("t2.x")).append(" AS doccount") |
| .append(" FROM (SELECT 'x' AS x FROM ").append(jobQueue.getTableName()).append(" WHERE "); |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{new UnitaryClause(JobQueue.jobIDField,jobID)})); |
| sb.append(" AND "); |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{buildProcessedClause()})); |
| sb.append(" ").append(database.constructOffsetLimitClause(0,maxCount+1,false)) |
| .append(") t2"); |
| |
| IResultSet processedSet = database.performQuery(sb.toString(),list,null,null); |
| if (processedSet.getRowCount() > 0) |
| { |
| long rowCount = ((Long)processedSet.getRow(0).getValue("doccount")).longValue(); |
| if (rowCount > maxCount) |
| { |
| set4Hash.put(jobID,new Long(maxCount)); |
| set4Exact.put(jobID,new Boolean(false)); |
| } |
| else |
| { |
| set4Hash.put(jobID,new Long(rowCount)); |
| set4Exact.put(jobID,new Boolean(true)); |
| } |
| } |
| } |
| } |
| |
| protected void buildCountsUsingGroupBy(String whereClause, ArrayList whereParams, |
| Map<Long,Long> set2Hash, Map<Long,Long> set3Hash, Map<Long,Long> set4Hash, |
| Map<Long,Boolean> set2Exact, Map<Long,Boolean> set3Exact, Map<Long,Boolean> set4Exact) |
| throws ManifoldCFException |
| { |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append(JobQueue.jobIDField).append(",") |
| .append(database.constructCountClause(JobQueue.docHashField)).append(" AS doccount") |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t1"); |
| addWhereClause(sb,list,whereClause,whereParams,false); |
| sb.append(" GROUP BY ").append(JobQueue.jobIDField); |
| |
| IResultSet set2 = database.performQuery(sb.toString(),list,null,null); |
| |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| |
| sb.append(JobQueue.jobIDField).append(",") |
| .append(database.constructCountClause(JobQueue.docHashField)).append(" AS doccount") |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{buildOutstandingClause()})); |
| addWhereClause(sb,list,whereClause,whereParams,true); |
| sb.append(" GROUP BY ").append(JobQueue.jobIDField); |
| |
| IResultSet set3 = database.performQuery(sb.toString(),list,null,null); |
| |
| sb = new StringBuilder("SELECT "); |
| list.clear(); |
| |
| sb.append(JobQueue.jobIDField).append(",") |
| .append(database.constructCountClause(JobQueue.docHashField)).append(" AS doccount") |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{buildProcessedClause()})); |
| addWhereClause(sb,list,whereClause,whereParams,true); |
| sb.append(" GROUP BY ").append(JobQueue.jobIDField); |
| |
| IResultSet set4 = database.performQuery(sb.toString(),list,null,null); |
| |
| for (int j = 0; j < set2.getRowCount(); j++) |
| { |
| IResultRow row = set2.getRow(j); |
| Long jobID = (Long)row.getValue(JobQueue.jobIDField); |
| set2Hash.put(jobID,(Long)row.getValue("doccount")); |
| set2Exact.put(jobID,new Boolean(true)); |
| } |
| for (int j = 0; j < set3.getRowCount(); j++) |
| { |
| IResultRow row = set3.getRow(j); |
| Long jobID = (Long)row.getValue(JobQueue.jobIDField); |
| set3Hash.put(jobID,(Long)row.getValue("doccount")); |
| set3Exact.put(jobID,new Boolean(true)); |
| } |
| for (int j = 0; j < set4.getRowCount(); j++) |
| { |
| IResultRow row = set4.getRow(j); |
| Long jobID = (Long)row.getValue(JobQueue.jobIDField); |
| set4Hash.put(jobID,(Long)row.getValue("doccount")); |
| set4Exact.put(jobID,new Boolean(true)); |
| } |
| } |
| |
| protected void addWhereClause(StringBuilder sb, ArrayList list, String whereClause, ArrayList whereParams, boolean wherePresent) |
| { |
| if (whereClause != null) |
| { |
| if (wherePresent) |
| sb.append(" AND"); |
| else |
| sb.append(" WHERE"); |
| |
| sb.append(" EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t0 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new JoinClause("t0."+Jobs.idField,"t1."+JobQueue.jobIDField)})).append(" AND ") |
| .append(whereClause) |
| .append(")"); |
| if (whereParams != null) |
| list.addAll(whereParams); |
| } |
| } |
| |
| // These methods generate reports for direct display in the UI. |
| |
| /** Run a 'document status' report. |
| *@param connectionName is the name of the connection. |
| *@param filterCriteria are the criteria used to limit the records considered for the report. |
| *@param sortOrder is the specified sort order of the final report. |
| *@param startRow is the first row to include. |
| *@param rowCount is the number of rows to include. |
| *@return the results, with the following columns: identifier, job, state, status, scheduled, action, retrycount, retrylimit. The "scheduled" column and the |
| * "retrylimit" column are long values representing a time; all other values will be user-friendly strings. |
| */ |
| @Override |
| public IResultSet genDocumentStatus(String connectionName, StatusFilterCriteria filterCriteria, SortOrder sortOrder, |
| int startRow, int rowCount) |
| throws ManifoldCFException |
| { |
| // Build the query. |
| Long currentTime = new Long(System.currentTimeMillis()); |
| |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append("t0.").append(jobQueue.idField).append(" AS id,") |
| .append("t0.").append(jobQueue.docIDField).append(" AS identifier,") |
| .append("t1.").append(jobs.descriptionField).append(" AS job,") |
| .append("CASE") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Not yet processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Not yet processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Not yet processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Processed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Being removed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Being removed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Being removed'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append("=? THEN 'Out of scope'") |
| .append(" ELSE 'Unknown'") |
| .append(" END AS state,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?,?)") |
| .append(" THEN 'Inactive'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append("t0.").append(jobQueue.checkActionField).append("=?") |
| .append(" AND t0.").append(jobQueue.checkTimeField).append("<=").append(currentTime.toString()) |
| .append(" THEN 'Ready for processing'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append("t0.").append(jobQueue.checkActionField).append("=?") |
| .append(" AND t0.").append(jobQueue.checkTimeField).append("<=").append(currentTime.toString()) |
| .append(" THEN 'Ready for expiration'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append("t0.").append(jobQueue.checkActionField).append("=?") |
| .append(" AND t0.").append(jobQueue.checkTimeField).append(">").append(currentTime.toString()) |
| .append(" THEN 'Waiting for processing'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append("t0.").append(jobQueue.checkActionField).append("=?") |
| .append(" AND t0.").append(jobQueue.checkTimeField).append(">").append(currentTime.toString()) |
| .append(" THEN 'Waiting for expiration'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append("t0.").append(jobQueue.checkTimeField).append(" IS NULL") |
| .append(" THEN 'Waiting forever'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append("=?") |
| .append(" THEN 'Hopcount exceeded'") |
| .append(" WHEN ").append("t0.").append(jobQueue.statusField).append(" IN (?,?,?)") |
| .append(" THEN 'Deleting'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?,?,?)") |
| .append(" AND ").append("t0.").append(jobQueue.checkActionField).append("=?") |
| .append(" THEN 'Processing'") |
| .append(" WHEN ") |
| .append("t0.").append(jobQueue.statusField).append(" IN (?,?,?,?)") |
| .append(" AND ").append("t0.").append(jobQueue.checkActionField).append("=?") |
| .append(" THEN 'Expiring'") |
| .append(" ELSE 'Unknown'") |
| .append(" END AS status,") |
| .append("t0.").append(jobQueue.checkTimeField).append(" AS scheduled,") |
| .append("CASE") |
| .append(" WHEN ").append("t0.").append(jobQueue.checkActionField).append("=? THEN 'Process'") |
| .append(" WHEN ").append("t0.").append(jobQueue.checkActionField).append("=? THEN 'Expire'") |
| .append(" ELSE 'Unknown'") |
| .append(" END AS action,") |
| .append("t0.").append(jobQueue.failCountField).append(" AS retrycount,") |
| .append("t0.").append(jobQueue.failTimeField).append(" AS retrylimit") |
| .append(" FROM ").append(jobQueue.getTableName()).append(" t0,").append(jobs.getTableName()).append(" t1 WHERE ") |
| .append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new JoinClause("t0."+jobQueue.jobIDField,"t1."+jobs.idField)})); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_COMPLETE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_UNCHANGED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_COMPLETE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_UNCHANGED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PURGATORY)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_RESCAN)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_REMOVE)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_RESCAN)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_REMOVE)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_RESCAN)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_REMOVE)); |
| |
| list.add(jobQueue.actionToString(jobQueue.ACTION_RESCAN)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_REMOVE)); |
| |
| addCriteria(sb,list,"t0.",connectionName,filterCriteria,true); |
| // The intrinsic ordering is provided by the "id" column, and nothing else. |
| addOrdering(sb,new String[]{"id"},sortOrder); |
| addLimits(sb,startRow,rowCount); |
| return database.performQuery(sb.toString(),list,null,null,rowCount,null); |
| } |
| |
| /** Run a 'queue status' report. |
| *@param connectionName is the name of the connection. |
| *@param filterCriteria are the criteria used to limit the records considered for the report. |
| *@param sortOrder is the specified sort order of the final report. |
| *@param idBucketDescription is the bucket description for generating the identifier class. |
| *@param startRow is the first row to include. |
| *@param rowCount is the number of rows to include. |
| *@return the results, with the following columns: idbucket, inactive, processing, expiring, deleting, |
| processready, expireready, processwaiting, expirewaiting |
| */ |
| @Override |
| public IResultSet genQueueStatus(String connectionName, StatusFilterCriteria filterCriteria, SortOrder sortOrder, |
| BucketDescription idBucketDescription, int startRow, int rowCount) |
| throws ManifoldCFException |
| { |
| // SELECT substring(docid FROM '<id_regexp>') AS idbucket, |
| // substring(entityidentifier FROM '<id_regexp>') AS idbucket, |
| // SUM(CASE WHEN status='C' then 1 else 0 end)) AS inactive FROM jobqueue WHERE <criteria> |
| // GROUP BY idbucket |
| |
| Long currentTime = new Long(System.currentTimeMillis()); |
| |
| StringBuilder sb = new StringBuilder("SELECT "); |
| ArrayList list = new ArrayList(); |
| |
| sb.append("t1.idbucket,SUM(t1.inactive) AS inactive,SUM(t1.processing) AS processing,SUM(t1.expiring) AS expiring,SUM(t1.deleting) AS deleting,") |
| .append("SUM(t1.processready) AS processready,SUM(t1.expireready) AS expireready,SUM(t1.processwaiting) AS processwaiting,SUM(t1.expirewaiting) AS expirewaiting,") |
| .append("SUM(t1.waitingforever) AS waitingforever,SUM(t1.hopcountexceeded) AS hopcountexceeded FROM (SELECT "); |
| |
| addBucketExtract(sb,list,"",jobQueue.docIDField,idBucketDescription); |
| |
| sb.append(" AS idbucket,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?,?)") |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" AS inactive,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?,?,?)") |
| .append(" AND ").append(jobQueue.checkActionField).append("=?") |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as processing,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?,?,?)") |
| .append(" AND ").append(jobQueue.checkActionField).append("=?") |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as expiring,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?,?)") |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as deleting,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append(jobQueue.checkActionField).append("=?") |
| .append(" AND ").append(jobQueue.checkTimeField).append("<=").append(currentTime.toString()) |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as processready,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append(jobQueue.checkActionField).append("=?") |
| .append(" AND ").append(jobQueue.checkTimeField).append("<=").append(currentTime.toString()) |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as expireready,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append(jobQueue.checkActionField).append("=?") |
| .append(" AND ").append(jobQueue.checkTimeField).append(">").append(currentTime.toString()) |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as processwaiting,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append(jobQueue.checkActionField).append("=?") |
| .append(" AND ").append(jobQueue.checkTimeField).append(">").append(currentTime.toString()) |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as expirewaiting,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append(" IN (?,?)") |
| .append(" AND ").append(jobQueue.checkTimeField).append(" IS NULL") |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as waitingforever,") |
| .append("CASE") |
| .append(" WHEN ") |
| .append(jobQueue.statusField).append("=?") |
| .append(" THEN 1 ELSE 0") |
| .append(" END") |
| .append(" as hopcountexceeded"); |
| sb.append(" FROM ").append(jobQueue.getTableName()); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_COMPLETE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_UNCHANGED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PURGATORY)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_RESCAN)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVE)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_REMOVE)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_RESCAN)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_REMOVE)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_RESCAN)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| list.add(jobQueue.actionToString(jobQueue.ACTION_REMOVE)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDING)); |
| list.add(jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)); |
| |
| list.add(jobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED)); |
| |
| addCriteria(sb,list,"",connectionName,filterCriteria,false); |
| sb.append(") t1 GROUP BY idbucket"); |
| addOrdering(sb,new String[]{"idbucket","inactive","processing","expiring","deleting","processready","expireready","processwaiting","expirewaiting","waitingforever","hopcountexceeded"},sortOrder); |
| addLimits(sb,startRow,rowCount); |
| return database.performQuery(sb.toString(),list,null,null,rowCount,null); |
| } |
| |
| // Protected methods for report generation |
| |
| /** Turn a bucket description into a return column. |
| * This is complicated by the fact that the extraction code is inherently case sensitive. So if case insensitive is |
| * desired, that means we whack the whole thing to lower case before doing the match. |
| */ |
| protected void addBucketExtract(StringBuilder sb, ArrayList list, String columnPrefix, String columnName, BucketDescription bucketDesc) |
| { |
| boolean isSensitive = bucketDesc.isSensitive(); |
| list.add(bucketDesc.getRegexp()); |
| sb.append(database.constructSubstringClause(columnPrefix+columnName,"?",!isSensitive)); |
| } |
| |
| /** Add criteria clauses to query. |
| */ |
| protected boolean addCriteria(StringBuilder sb, ArrayList list, String fieldPrefix, String connectionName, StatusFilterCriteria criteria, boolean whereEmitted) |
| throws ManifoldCFException |
| { |
| Long[] matchingJobs = criteria.getJobs(); |
| |
| if (matchingJobs != null) |
| { |
| whereEmitted = emitClauseStart(sb,whereEmitted); |
| if (matchingJobs.length == 0) |
| { |
| sb.append("0>1"); |
| } |
| else |
| { |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.jobIDField,matchingJobs)})); |
| } |
| } |
| |
| RegExpCriteria identifierRegexp = criteria.getIdentifierMatch(); |
| if (identifierRegexp != null) |
| { |
| whereEmitted = emitClauseStart(sb,whereEmitted); |
| list.add(identifierRegexp.getRegexpString()); |
| sb.append(database.constructRegexpClause(fieldPrefix+jobQueue.docIDField,"?",identifierRegexp.isInsensitive())); |
| } |
| |
| Long nowTime = new Long(criteria.getNowTime()); |
| int[] states = criteria.getMatchingStates(); |
| int[] statuses = criteria.getMatchingStatuses(); |
| if (states.length == 0 || statuses.length == 0) |
| { |
| whereEmitted = emitClauseStart(sb,whereEmitted); |
| sb.append("0>1"); |
| return whereEmitted; |
| } |
| |
| // Iterate through the specified states, and emit a series of OR clauses, one for each state. The contents of the clause will be complex. |
| whereEmitted = emitClauseStart(sb,whereEmitted); |
| sb.append("("); |
| int k = 0; |
| while (k < states.length) |
| { |
| int stateValue = states[k]; |
| if (k > 0) |
| sb.append(" OR "); |
| switch (stateValue) |
| { |
| case DOCSTATE_NEVERPROCESSED: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVE), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN)})})); |
| break; |
| case DOCSTATE_PREVIOUSLYPROCESSED: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE), |
| jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED), |
| jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED), |
| jobQueue.statusToString(jobQueue.STATUS_COMPLETE), |
| jobQueue.statusToString(jobQueue.STATUS_UNCHANGED), |
| jobQueue.statusToString(jobQueue.STATUS_PURGATORY)})})); |
| break; |
| case DOCSTATE_OUTOFSCOPE: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED)})})); |
| break; |
| } |
| k++; |
| } |
| sb.append(")"); |
| |
| whereEmitted = emitClauseStart(sb,whereEmitted); |
| sb.append("("); |
| k = 0; |
| while (k < statuses.length) |
| { |
| int stateValue = statuses[k]; |
| if (k > 0) |
| sb.append(" OR "); |
| switch (stateValue) |
| { |
| case DOCSTATUS_INACTIVE: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_COMPLETE), |
| jobQueue.statusToString(jobQueue.STATUS_UNCHANGED), |
| jobQueue.statusToString(jobQueue.STATUS_PURGATORY)})})); |
| break; |
| case DOCSTATUS_PROCESSING: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVE), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)}), |
| new UnitaryClause(fieldPrefix+jobQueue.checkActionField,jobQueue.actionToString(jobQueue.ACTION_RESCAN))})); |
| break; |
| case DOCSTATUS_EXPIRING: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVE), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCAN), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVEPURGATORY), |
| jobQueue.statusToString(jobQueue.STATUS_ACTIVENEEDRESCANPURGATORY)}), |
| new UnitaryClause(fieldPrefix+jobQueue.checkActionField,jobQueue.actionToString(jobQueue.ACTION_REMOVE))})); |
| break; |
| case DOCSTATUS_DELETING: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_BEINGDELETED), |
| jobQueue.statusToString(jobQueue.STATUS_BEINGCLEANED), |
| jobQueue.statusToString(jobQueue.STATUS_ELIGIBLEFORDELETE)})})); |
| break; |
| case DOCSTATUS_READYFORPROCESSING: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause(fieldPrefix+jobQueue.checkActionField,jobQueue.actionToString(jobQueue.ACTION_RESCAN)), |
| new UnitaryClause(fieldPrefix+jobQueue.checkTimeField,"<=",nowTime)})); |
| break; |
| case DOCSTATUS_READYFOREXPIRATION: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause(fieldPrefix+jobQueue.checkActionField,jobQueue.actionToString(jobQueue.ACTION_REMOVE)), |
| new UnitaryClause(fieldPrefix+jobQueue.checkTimeField,"<=",nowTime)})); |
| break; |
| case DOCSTATUS_WAITINGFORPROCESSING: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause(fieldPrefix+jobQueue.checkActionField,jobQueue.actionToString(jobQueue.ACTION_RESCAN)), |
| new UnitaryClause(fieldPrefix+jobQueue.checkTimeField,">",nowTime)})); |
| break; |
| case DOCSTATUS_WAITINGFOREXPIRATION: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)}), |
| new UnitaryClause(fieldPrefix+jobQueue.checkActionField,jobQueue.actionToString(jobQueue.ACTION_REMOVE)), |
| new UnitaryClause(fieldPrefix+jobQueue.checkTimeField,">",nowTime)})); |
| break; |
| case DOCSTATUS_WAITINGFOREVER: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_PENDING), |
| jobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)})})) |
| .append(" AND ").append(fieldPrefix).append(jobQueue.checkTimeField).append(" IS NULL"); |
| break; |
| case DOCSTATUS_HOPCOUNTEXCEEDED: |
| sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{ |
| new MultiClause(fieldPrefix+jobQueue.statusField,new Object[]{ |
| jobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED)})})); |
| break; |
| } |
| k++; |
| } |
| sb.append(")"); |
| |
| return whereEmitted; |
| } |
| |
| /** Emit a WHERE or an AND, depending... |
| */ |
| protected boolean emitClauseStart(StringBuilder sb, boolean whereEmitted) |
| { |
| if (whereEmitted) |
| sb.append(" AND "); |
| else |
| sb.append(" WHERE "); |
| return true; |
| } |
| |
| /** Add ordering. |
| */ |
| protected void addOrdering(StringBuilder sb, String[] completeFieldList, SortOrder sort) |
| { |
| // Keep track of the fields we've seen |
| Map hash = new HashMap(); |
| |
| // Emit the "Order by" |
| sb.append(" ORDER BY "); |
| |
| // Go through the specified list |
| int i = 0; |
| int count = sort.getCount(); |
| while (i < count) |
| { |
| if (i > 0) |
| sb.append(","); |
| String column = sort.getColumn(i); |
| sb.append(column); |
| if (sort.getDirection(i) == sort.SORT_ASCENDING) |
| sb.append(" ASC"); |
| else |
| sb.append(" DESC"); |
| hash.put(column,column); |
| i++; |
| } |
| |
| // Now, go through the complete field list, and emit sort criteria for everything |
| // not actually specified. This is so LIMIT and OFFSET give consistent results. |
| |
| int j = 0; |
| while (j < completeFieldList.length) |
| { |
| String field = completeFieldList[j]; |
| if (hash.get(field) == null) |
| { |
| if (i > 0) |
| sb.append(","); |
| sb.append(field); |
| sb.append(" DESC"); |
| //if (j == 0) |
| // sb.append(" DESC"); |
| //else |
| // sb.append(" ASC"); |
| i++; |
| } |
| j++; |
| } |
| } |
| |
| /** Add limit and offset. |
| */ |
| protected void addLimits(StringBuilder sb, int startRow, int maxRowCount) |
| { |
| sb.append(" ").append(database.constructOffsetLimitClause(startRow,maxRowCount)); |
| } |
| |
| |
| /** Class for tracking existing jobqueue row data */ |
| protected static class JobqueueRecord |
| { |
| protected Long recordID; |
| protected int status; |
| protected Long checkTimeValue; |
| |
| public JobqueueRecord(Long recordID, int status, Long checkTimeValue) |
| { |
| this.recordID = recordID; |
| this.status = status; |
| this.checkTimeValue = checkTimeValue; |
| } |
| |
| public Long getRecordID() |
| { |
| return recordID; |
| } |
| |
| public int getStatus() |
| { |
| return status; |
| } |
| |
| public Long getCheckTimeValue() |
| { |
| return checkTimeValue; |
| } |
| } |
| |
| /** We go through 2x the number of documents we should need if we were perfect at setting document priorities. */ |
| private static int EXTRA_FACTOR = 2; |
| |
| /** This class provides the throttling limits for the job queueing query. |
| */ |
| protected static class ThrottleLimit implements ILimitChecker |
| { |
| // For each connection, there is (a) a number (which is the maximum per bin), and (b) |
| // a current running count per bin. These are stored as elements in a hash map. |
| protected HashMap connectionMap = new HashMap(); |
| |
| // The maximum number of jobs that have reached their chunk size limit that we |
| // need |
| protected int n; |
| |
| // This is the hash table that maps a job ID to the object that tracks the number |
| // of documents already accumulated for this resultset. The count of the number |
| // of queue records we have is tallied by going through each job in this table |
| // and adding the records outstanding for it. |
| protected HashMap jobQueueHash = new HashMap(); |
| |
| // This is the map from jobid to connection name |
| protected HashMap jobConnection = new HashMap(); |
| |
| // This is the set of allowed connection names. We discard all documents that are |
| // not from that set. |
| protected HashMap activeConnections = new HashMap(); |
| |
| // This is the number of documents per set per connection. |
| protected HashMap setSizes = new HashMap(); |
| |
| // These are the individual connection maximums, keyed by connection name. |
| protected HashMap maxConnectionCounts = new HashMap(); |
| |
| // This is the maximum number of documents per set over all the connections we are looking at. This helps us establish a sanity limit. |
| protected int maxSetSize = 0; |
| |
| // This is the number of documents processed so far |
| protected int documentsProcessed = 0; |
| |
| // This is where we accumulate blocking documents. This is an arraylist of DocumentDescription objects. |
| protected ArrayList blockingDocumentArray = new ArrayList(); |
| |
| // Cutoff time for documents eligible for prioritization |
| protected long prioritizationTime; |
| |
| /** Constructor. |
| * This class is built up piecemeal, so the constructor does nothing. |
| *@param n is the maximum number of full job descriptions we want at this time. |
| */ |
| public ThrottleLimit(int n, long prioritizationTime) |
| { |
| this.n = n; |
| this.prioritizationTime = prioritizationTime; |
| Logging.perf.debug("Limit instance created"); |
| } |
| |
| /** Transfer blocking documents discovered to BlockingDocuments object */ |
| public void tallyBlockingDocuments(BlockingDocuments blockingDocuments) |
| { |
| int i = 0; |
| while (i < blockingDocumentArray.size()) |
| { |
| DocumentDescription dd = (DocumentDescription)blockingDocumentArray.get(i++); |
| blockingDocuments.addBlockingDocument(dd); |
| } |
| blockingDocumentArray.clear(); |
| } |
| |
| /** Add a job/connection name map entry. |
| *@param jobID is the job id. |
| *@param connectionName is the connection name. |
| */ |
| public void addJob(Long jobID, String connectionName) |
| { |
| jobConnection.put(jobID,connectionName); |
| } |
| |
| /** Add an active connection. This is the pool of active connections that will be used for the lifetime of this operation. |
| *@param connectionName is the connection name. |
| */ |
| public void addConnectionName(String connectionName, IRepositoryConnector connectorInstance) |
| throws ManifoldCFException |
| { |
| activeConnections.put(connectionName,connectorInstance); |
| int setSize = connectorInstance.getMaxDocumentRequest(); |
| setSizes.put(connectionName,new Integer(setSize)); |
| if (setSize > maxSetSize) |
| maxSetSize = setSize; |
| } |
| |
| /** Add a document limit for a specified connection. This is the limit across all matching bins; if any |
| * individual matching bin exceeds that limit, then documents that belong to that bin will be excluded. |
| *@param connectionName is the connection name. |
| *@param regexp is the regular expression, which we will match against various bins. |
| *@param upperLimit is the maximum count associated with the specified job. |
| */ |
| public void addLimit(String connectionName, String regexp, int upperLimit) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug(" Adding fetch limit of "+Integer.toString(upperLimit)+" fetches for expression '"+regexp+"' for connection '"+connectionName+"'"); |
| |
| ThrottleJobItem ji = (ThrottleJobItem)connectionMap.get(connectionName); |
| if (ji == null) |
| { |
| ji = new ThrottleJobItem(); |
| connectionMap.put(connectionName,ji); |
| } |
| ji.addLimit(regexp,upperLimit); |
| } |
| |
| /** Set a connection-based total document limit. |
| */ |
| public void setConnectionLimit(String connectionName, int maxDocuments) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug(" Setting connection limit of "+Integer.toString(maxDocuments)+" for connection "+connectionName); |
| maxConnectionCounts.put(connectionName,new MutableInteger(maxDocuments)); |
| } |
| |
| /** See if this class can be legitimately compared against another of |
| * the same type. |
| *@return true if comparisons will ever return "true". |
| */ |
| public boolean doesCompareWork() |
| { |
| return false; |
| } |
| |
| /** Create a duplicate of this class instance. All current state should be preserved. |
| * NOTE: Since doesCompareWork() returns false, queries using this limit checker cannot |
| * be cached, and therefore duplicate() is never called from the query executor. But it can |
| * be called from other places. |
| *@return the duplicate. |
| */ |
| public ILimitChecker duplicate() |
| { |
| return makeDeepCopy(); |
| } |
| |
| /** Make a deep copy */ |
| public ThrottleLimit makeDeepCopy() |
| { |
| ThrottleLimit rval = new ThrottleLimit(n,prioritizationTime); |
| // Create a true copy of all the structures in which counts are kept. The referential structures (e.g. connection hashes) |
| // do not need a deep copy. |
| rval.activeConnections = activeConnections; |
| rval.setSizes = setSizes; |
| rval.maxConnectionCounts = maxConnectionCounts; |
| rval.maxSetSize = maxSetSize; |
| rval.jobConnection = jobConnection; |
| // The structures where counts are maintained DO need a deep copy. |
| rval.documentsProcessed = documentsProcessed; |
| Iterator iter; |
| iter = connectionMap.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| Object key = iter.next(); |
| rval.connectionMap.put(key,((ThrottleJobItem)connectionMap.get(key)).duplicate()); |
| } |
| iter = jobQueueHash.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| Object key = iter.next(); |
| rval.jobQueueHash.put(key,((QueueHashItem)jobQueueHash.get(key)).duplicate()); |
| } |
| return rval; |
| } |
| |
| /** Find the hashcode for this class. This will only ever be used if |
| * doesCompareWork() returns true. |
| *@return the hashcode. |
| */ |
| public int hashCode() |
| { |
| return 0; |
| } |
| |
| /** Compare two objects and see if equal. This will only ever be used |
| * if doesCompareWork() returns true. |
| *@param object is the object to compare against. |
| *@return true if equal. |
| */ |
| public boolean equals(Object object) |
| { |
| return false; |
| } |
| |
| /** Get the remaining documents we should query for. |
| *@return the maximal remaining count. |
| */ |
| public int getRemainingDocuments() |
| { |
| return EXTRA_FACTOR * n * maxSetSize - documentsProcessed; |
| } |
| |
| /** See if a result row should be included in the final result set. |
| *@param row is the result row to check. |
| *@return true if it should be included, false otherwise. |
| */ |
| public boolean checkInclude(IResultRow row) |
| throws ManifoldCFException |
| { |
| // Note: This method does two things: First, it insures that the number of documents per job per bin does |
| // not exceed the calculated throttle number. Second, it keeps track of how many document queue items |
| // will be needed, so we can stop when we've got enough for the moment. |
| Logging.perf.debug("Checking if row should be included"); |
| // This is the end that does the work. |
| // The row passed in has the following jobqueue columns: idField, jobIDField, docIDField, and statusField |
| Long jobIDValue = (Long)row.getValue(JobQueue.jobIDField); |
| |
| // Get the connection name for this row |
| String connectionName = (String)jobConnection.get(jobIDValue); |
| if (connectionName == null) |
| { |
| Logging.perf.debug(" Row does not have an eligible job - excluding"); |
| return false; |
| } |
| IRepositoryConnector connectorInstance = (IRepositoryConnector)activeConnections.get(connectionName); |
| if (connectorInstance == null) |
| { |
| Logging.perf.debug(" Row does not have an eligible connector instance - excluding"); |
| return false; |
| } |
| |
| // Find the connection limit for this document |
| MutableInteger connectionLimit = (MutableInteger)maxConnectionCounts.get(connectionName); |
| if (connectionLimit != null) |
| { |
| if (connectionLimit.intValue() == 0) |
| { |
| Logging.perf.debug(" Row exceeds its connection limit - excluding"); |
| return false; |
| } |
| connectionLimit.decrement(); |
| } |
| |
| // Tally this item in the job queue hash, so we can detect when to stop |
| QueueHashItem queueItem = (QueueHashItem)jobQueueHash.get(jobIDValue); |
| if (queueItem == null) |
| { |
| // Need to talk to the connector to get a max number of docs per chunk |
| int maxCount = ((Integer)setSizes.get(connectionName)).intValue(); |
| queueItem = new QueueHashItem(maxCount); |
| jobQueueHash.put(jobIDValue,queueItem); |
| |
| } |
| |
| String docIDHash = (String)row.getValue(JobQueue.docHashField); |
| String docID = (String)row.getValue(JobQueue.docIDField); |
| |
| // Figure out what the right bins are, given the data we have. |
| // This will involve a call to the connector. |
| String[] binNames = ManifoldCF.calculateBins(connectorInstance,docID); |
| // Keep the running count, so we can abort without going through the whole set. |
| documentsProcessed++; |
| //scanRecord.addBins(binNames); |
| |
| ThrottleJobItem item = (ThrottleJobItem)connectionMap.get(connectionName); |
| |
| // If there is no schedule-based throttling on this connection, we're done. |
| if (item == null) |
| { |
| queueItem.addDocument(); |
| Logging.perf.debug(" Row has no throttling - including"); |
| return true; |
| } |
| |
| |
| int j = 0; |
| while (j < binNames.length) |
| { |
| if (item.isEmpty(binNames[j])) |
| { |
| if (Logging.perf.isDebugEnabled()) |
| Logging.perf.debug(" Bin "+binNames[j]+" has no more available fetches - excluding"); |
| |
| Object o = row.getValue(JobQueue.prioritySetField); |
| if (o == null || ((Long)o).longValue() <= prioritizationTime) |
| { |
| // Need to add a document descriptor based on this row to the blockingDocuments object! |
| // This will cause it to be reprioritized preferentially, getting it out of the way if it shouldn't |
| // be there. |
| Long id = (Long)row.getValue(JobQueue.idField); |
| Long jobID = (Long)row.getValue(JobQueue.jobIDField); |
| DocumentDescription dd = new DocumentDescription(id,jobID,docIDHash,docID); |
| blockingDocumentArray.add(dd); |
| } |
| |
| return false; |
| } |
| j++; |
| } |
| j = 0; |
| while (j < binNames.length) |
| { |
| item.decrement(binNames[j++]); |
| } |
| queueItem.addDocument(); |
| Logging.perf.debug(" Including!"); |
| return true; |
| } |
| |
| /** See if we should examine another row. |
| *@return true if we need to keep going, or false if we are done. |
| */ |
| public boolean checkContinue() |
| throws ManifoldCFException |
| { |
| if (documentsProcessed >= EXTRA_FACTOR * n * maxSetSize) |
| return false; |
| |
| // If the number of chunks exceeds n, we are done |
| Iterator iter = jobQueueHash.keySet().iterator(); |
| int count = 0; |
| while (iter.hasNext()) |
| { |
| Long jobID = (Long)iter.next(); |
| QueueHashItem item = (QueueHashItem)jobQueueHash.get(jobID); |
| count += item.getChunkCount(); |
| if (count > n) |
| return false; |
| } |
| return true; |
| } |
| |
| |
| |
| } |
| |
| /** This class contains information per job on how many queue items have so far been accumulated. |
| */ |
| protected static class QueueHashItem |
| { |
| // The number of items per chunk for this job |
| int itemsPerChunk; |
| // The number of chunks so far, INCLUDING incomplete chunks |
| int chunkCount = 0; |
| // The number of documents in the current incomplete chunk |
| int currentDocumentCount = 0; |
| |
| /** Construct. |
| *@param itemsPerChunk is the number of items per chunk for this job. |
| */ |
| public QueueHashItem(int itemsPerChunk) |
| { |
| this.itemsPerChunk = itemsPerChunk; |
| } |
| |
| /** Duplicate. */ |
| public QueueHashItem duplicate() |
| { |
| QueueHashItem rval = new QueueHashItem(itemsPerChunk); |
| rval.chunkCount = chunkCount; |
| rval.currentDocumentCount = currentDocumentCount; |
| return rval; |
| } |
| |
| /** Add a document to this job. |
| */ |
| public void addDocument() |
| { |
| currentDocumentCount++; |
| if (currentDocumentCount == 1) |
| chunkCount++; |
| if (currentDocumentCount == itemsPerChunk) |
| currentDocumentCount = 0; |
| } |
| |
| /** Get the number of chunks. |
| *@return the number of chunks. |
| */ |
| public int getChunkCount() |
| { |
| return chunkCount; |
| } |
| } |
| |
| /** This class represents the information stored PER JOB in the throttling structure. |
| * In this structure, "remaining" counts are kept for each bin. When the bin becomes empty, |
| * then no more documents that would map to that bin will be returned, for this query. |
| * |
| * The way in which the maximum count per bin is determined is not part of this class. |
| */ |
| protected static class ThrottleJobItem |
| { |
| /** These are the bin limits. This is an array of ThrottleLimitSpec objects. */ |
| protected ArrayList throttleLimits = new ArrayList(); |
| /** This is a map of the bins and their current counts. If an entry doesn't exist, it's considered to be |
| * the same as maxBinCount. */ |
| protected HashMap binCounts = new HashMap(); |
| |
| /** Constructor. */ |
| public ThrottleJobItem() |
| { |
| } |
| |
| /** Add a bin limit. |
| *@param regexp is the regular expression describing the bins to which the limit applies to. |
| *@param maxCount is the maximum number of fetches allowed for that bin. |
| */ |
| public void addLimit(String regexp, int maxCount) |
| { |
| try |
| { |
| throttleLimits.add(new ThrottleLimitSpec(regexp,maxCount)); |
| } |
| catch (PatternSyntaxException e) |
| { |
| // Ignore the bad entry; it just won't contribute any throttling. |
| } |
| } |
| |
| /** Create a duplicate of this item. |
| *@return the duplicate. |
| */ |
| public ThrottleJobItem duplicate() |
| { |
| ThrottleJobItem rval = new ThrottleJobItem(); |
| rval.throttleLimits = throttleLimits; |
| Iterator iter = binCounts.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| String key = (String)iter.next(); |
| this.binCounts.put(key,((MutableInteger)binCounts.get(key)).duplicate()); |
| } |
| return rval; |
| } |
| |
| /** Check if the specified bin is empty. |
| *@param binName is the bin name. |
| *@return true if empty. |
| */ |
| public boolean isEmpty(String binName) |
| { |
| MutableInteger value = (MutableInteger)binCounts.get(binName); |
| int remaining; |
| if (value == null) |
| { |
| int x = findMaxCount(binName); |
| if (x == -1) |
| return false; |
| remaining = x; |
| } |
| else |
| remaining = value.intValue(); |
| return (remaining == 0); |
| } |
| |
| /** Decrement specified bin. |
| *@param binName is the bin name. |
| */ |
| public void decrement(String binName) |
| { |
| MutableInteger value = (MutableInteger)binCounts.get(binName); |
| if (value == null) |
| { |
| int x = findMaxCount(binName); |
| if (x == -1) |
| return; |
| value = new MutableInteger(x); |
| binCounts.put(binName,value); |
| } |
| value.decrement(); |
| } |
| |
| /** Given a bin name, find the max value for it using the regexps that are in place. |
| *@param binName is the bin name. |
| *@return the max count for that bin, or -1 if infinite. |
| */ |
| protected int findMaxCount(String binName) |
| { |
| // Each connector generates a set of bins per descriptor, e.g. "", ".com", ".metacarta.com", "foo.metacarta.com" |
| // |
| // We want to be able to do a couple of different kinds of things easily. For example, we want to: |
| // - be able to "turn off" or restrict fetching for a given domain, to a lower value than for other domains |
| // - be able to control fetch rates of .com, .metacarta.com, and foo.metacarta.com such that we |
| // can establish a faster rate for .com than for foo.metacarta.com |
| // |
| // The standard case is to limit fetch rate for all terminal domains (e.g. foo.metacarta.com) to some number: |
| // ^[^\.] = 8 |
| // |
| // To apply an additional limit restriction on a specific domain easily requires that the MINIMUM rate |
| // value be chosen when more than one regexp match is found: |
| // ^[^\.] = 8 |
| // ^foo\.metacarta\.com = 4 |
| // |
| // To apply different rates for different levels: |
| // ^[^\.] = 8 |
| // ^\.[^\.]*\.[^\.]*$ = 20 |
| // ^\.[^\.]*$ = 40 |
| // |
| |
| // If the same bin is matched by more than one regexp, I now take the MINIMUM value, since this seems to be |
| // more what the world wants to do (restrict, rather than increase, fetch rates). |
| int maxCount = -1; |
| int i = 0; |
| while (i < throttleLimits.size()) |
| { |
| ThrottleLimitSpec spec = (ThrottleLimitSpec)throttleLimits.get(i++); |
| Pattern p = spec.getRegexp(); |
| Matcher m = p.matcher(binName); |
| if (m.find()) |
| { |
| int limit = spec.getMaxCount(); |
| if (maxCount == -1 || limit < maxCount) |
| maxCount = limit; |
| } |
| } |
| |
| return maxCount; |
| } |
| } |
| |
| /** This is a class which describes an individual throttle limit, in fetches. */ |
| protected static class ThrottleLimitSpec |
| { |
| /** Regexp */ |
| protected Pattern regexp; |
| /** The fetch limit for all bins matching that regexp */ |
| protected int maxCount; |
| |
| /** Constructor */ |
| public ThrottleLimitSpec(String regexp, int maxCount) |
| throws PatternSyntaxException |
| { |
| this.regexp = Pattern.compile(regexp); |
| this.maxCount = maxCount; |
| } |
| |
| /** Get the regexp. */ |
| public Pattern getRegexp() |
| { |
| return regexp; |
| } |
| |
| /** Get the max count */ |
| public int getMaxCount() |
| { |
| return maxCount; |
| } |
| } |
| |
| /** Mutable integer class. |
| */ |
| protected static class MutableInteger |
| { |
| int value; |
| |
| /** Construct. |
| */ |
| public MutableInteger(int value) |
| { |
| this.value = value; |
| } |
| |
| /** Duplicate */ |
| public MutableInteger duplicate() |
| { |
| return new MutableInteger(value); |
| } |
| |
| /** Decrement. |
| */ |
| public void decrement() |
| { |
| value--; |
| } |
| |
| /** Increment. |
| */ |
| public void increment() |
| { |
| value++; |
| } |
| |
| /** Get value. |
| */ |
| public int intValue() |
| { |
| return value; |
| } |
| } |
| |
| |
| } |
| |