| /* $Id: Carrydown.java 988245 2010-08-23 18:39:35Z kwright $ */ |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.manifoldcf.crawler.jobs; |
| |
| import java.util.*; |
| |
| import org.apache.manifoldcf.core.interfaces.*; |
| import org.apache.manifoldcf.crawler.interfaces.*; |
| import org.apache.manifoldcf.crawler.system.Logging; |
| import org.apache.manifoldcf.crawler.system.ManifoldCF; |
| |
| /** This class manages the table that keeps track of intrinsic relationships between documents. |
| * |
| * <br><br> |
| * <b>carrydown</b> |
| * <table border="1" cellpadding="3" cellspacing="0" summary=""> |
| * <tr class="TableHeadingColor"> |
| * <th>Field</th><th>Type</th><th>Description </th> |
| * <tr><td>jobid</td><td>BIGINT</td><td>Reference:jobs.id</td></tr> |
| * <tr><td>parentidhash</td><td>VARCHAR(40)</td><td></td></tr> |
| * <tr><td>childidhash</td><td>VARCHAR(40)</td><td></td></tr> |
| * <tr><td>dataname</td><td>VARCHAR(255)</td><td></td></tr> |
| * <tr><td>datavaluehash</td><td>VARCHAR(40)</td><td></td></tr> |
| * <tr><td>datavalue</td><td>LONGTEXT</td><td></td></tr> |
| * <tr><td>isnew</td><td>CHAR(1)</td><td></td></tr> |
| * <tr><td>processid</td><td>VARCHAR(16)</td><td></td></tr> |
| * </table> |
| * <br><br> |
| * |
| */ |
| public class Carrydown extends org.apache.manifoldcf.core.database.BaseTable |
| { |
| public static final String _rcsid = "@(#)$Id: Carrydown.java 988245 2010-08-23 18:39:35Z kwright $"; |
| |
| // Field names |
| public static final String jobIDField = "jobid"; |
| public static final String parentIDHashField = "parentidhash"; |
| public static final String childIDHashField = "childidhash"; |
| public static final String dataNameField = "dataname"; |
| public static final String dataValueHashField = "datavaluehash"; |
| public static final String dataValueField = "datavalue"; |
| public static final String newField = "isnew"; |
| public static final String processIDField = "processid"; |
| |
| /** The standard value for the "isnew" field. Means that the link existed prior to this scan, and no new link |
| * was found yet. */ |
| protected static final int ISNEW_BASE = 0; |
| /** This value means that the link is brand-new; it did not exist before this pass. */ |
| protected static final int ISNEW_NEW = 1; |
| /** This value means that the link existed before, and has been found during this scan. */ |
| protected static final int ISNEW_EXISTING = 2; |
| |
| // Map from string character to link status |
| protected static Map isNewMap; |
| static |
| { |
| isNewMap = new HashMap(); |
| isNewMap.put("B",new Integer(ISNEW_BASE)); |
| isNewMap.put("N",new Integer(ISNEW_NEW)); |
| isNewMap.put("E",new Integer(ISNEW_EXISTING)); |
| } |
| |
| /** Constructor. |
| *@param database is the database handle. |
| */ |
| public Carrydown(IDBInterface database) |
| throws ManifoldCFException |
| { |
| super(database,"carrydown"); |
| } |
| |
| /** Install or upgrade. |
| */ |
| public void install(String jobsTable, String jobsColumn) |
| throws ManifoldCFException |
| { |
| // Standard practice: Outer loop, to support upgrade requirements. |
| while (true) |
| { |
| Map existing = getTableSchema(null,null); |
| if (existing == null) |
| { |
| // I'm going to allow the parent to be null, which basically will be able to represent carry-down from the seeding |
| // process to the seed, in case this ever arises. |
| // |
| // I am also going to allow null data values. |
| HashMap map = new HashMap(); |
| map.put(jobIDField,new ColumnDescription("BIGINT",false,false,jobsTable,jobsColumn,false)); |
| map.put(parentIDHashField,new ColumnDescription("VARCHAR(40)",false,true,null,null,false)); |
| map.put(childIDHashField,new ColumnDescription("VARCHAR(40)",false,false,null,null,false)); |
| map.put(dataNameField,new ColumnDescription("VARCHAR(255)",false,false,null,null,false)); |
| map.put(dataValueHashField,new ColumnDescription("VARCHAR(40)",false,true,null,null,false)); |
| map.put(dataValueField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); |
| map.put(newField,new ColumnDescription("CHAR(1)",false,true,null,null,false)); |
| map.put(processIDField,new ColumnDescription("VARCHAR(16)",false,true,null,null,false)); |
| |
| performCreate(map,null); |
| |
| } |
| else |
| { |
| // Upgrade code goes here, if needed. |
| } |
| |
| // Now do index management |
| |
| IndexDescription uniqueIndex = new IndexDescription(true,new String[]{jobIDField,parentIDHashField,childIDHashField,dataNameField,dataValueHashField}); |
| IndexDescription jobChildDataIndex = new IndexDescription(false,new String[]{jobIDField,childIDHashField,dataNameField}); |
| IndexDescription newIndex = new IndexDescription(false,new String[]{newField,processIDField}); |
| |
| Map indexes = getTableIndexes(null,null); |
| Iterator iter = indexes.keySet().iterator(); |
| while (iter.hasNext()) |
| { |
| String indexName = (String)iter.next(); |
| IndexDescription id = (IndexDescription)indexes.get(indexName); |
| |
| if (uniqueIndex != null && id.equals(uniqueIndex)) |
| uniqueIndex = null; |
| else if (jobChildDataIndex != null && id.equals(jobChildDataIndex)) |
| jobChildDataIndex = null; |
| else if (newIndex != null && id.equals(newIndex)) |
| newIndex = null; |
| else if (indexName.indexOf("_pkey") == -1) |
| // This index shouldn't be here; drop it |
| performRemoveIndex(indexName); |
| } |
| |
| // Create the indexes we are missing |
| |
| if (jobChildDataIndex != null) |
| performAddIndex(null,jobChildDataIndex); |
| |
| if (newIndex != null) |
| performAddIndex(null,newIndex); |
| |
| // This index is the constraint. Only one row per job,dataname,datavalue,parent,and child. |
| if (uniqueIndex != null) |
| performAddIndex(null,uniqueIndex); |
| |
| // Install/upgrade complete |
| break; |
| } |
| |
| } |
| |
| /** Uninstall. |
| */ |
| public void deinstall() |
| throws ManifoldCFException |
| { |
| performDrop(null); |
| } |
| |
| /** Analyze job tables that need analysis. |
| */ |
| public void analyzeTables() |
| throws ManifoldCFException |
| { |
| long startTime = System.currentTimeMillis(); |
| Logging.perf.debug("Beginning to analyze carrydown table"); |
| analyzeTable(); |
| Logging.perf.debug("Done analyzing carrydown table in "+new Long(System.currentTimeMillis()-startTime)+" ms"); |
| } |
| |
| /** Delete an owning job (and clean up the corresponding carrydown rows). |
| */ |
| public void deleteOwner(Long jobID) |
| throws ManifoldCFException |
| { |
| ArrayList list = new ArrayList(); |
| String query = buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID)}); |
| performDelete("WHERE "+query,list,null); |
| } |
| |
| // The strategy here is to leave all rows that have a given document as a parent labelled as "BASE" at the start of the |
| // processing of that parent. As data are encountered, the values get written as "NEW" or flipped to "EXISTING". |
| // When the document's processing has been completed, another method is called |
| // that will remove all rows that belong to the parent which are still labelled "BASE", and will map the other rows that |
| // belong to the parent back to the "BASE" state. |
| // |
| // If the daemon is aborted and restarted, the "new" rows should be deleted, and the EXISTING rows should be reset to |
| // BASE, in order to restore the system to a good base state. |
| // |
| |
| /** Reset, at startup time. |
| *@param processID is the process ID. |
| */ |
| public void restart(String processID) |
| throws ManifoldCFException |
| { |
| // Delete "new" rows |
| HashMap map = new HashMap(); |
| ArrayList list = new ArrayList(); |
| String query = buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(newField,statusToString(ISNEW_NEW)), |
| new UnitaryClause(processIDField,processID)}); |
| performDelete("WHERE "+query,list,null); |
| |
| // Convert "existing" rows to base |
| map.put(newField,statusToString(ISNEW_BASE)); |
| list.clear(); |
| query = buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(newField,statusToString(ISNEW_EXISTING)), |
| new UnitaryClause(processIDField,processID)}); |
| performUpdate(map,"WHERE "+query,list,null); |
| } |
| |
| /** Clean up after all process IDs. |
| */ |
| public void restart() |
| throws ManifoldCFException |
| { |
| // Delete "new" rows |
| HashMap map = new HashMap(); |
| ArrayList list = new ArrayList(); |
| String query = buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(newField,statusToString(ISNEW_NEW))}); |
| performDelete("WHERE "+query,list,null); |
| |
| // Convert "existing" rows to base |
| map.put(newField,statusToString(ISNEW_BASE)); |
| list.clear(); |
| query = buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(newField,statusToString(ISNEW_EXISTING))}); |
| performUpdate(map,"WHERE "+query,list,null); |
| } |
| |
| /** Reset, at startup time, entire cluster |
| */ |
| public void restartCluster() |
| throws ManifoldCFException |
| { |
| // Does nothing |
| } |
| |
| /** Add carrydown data for a given parent/child pair. |
| * |
| *@return true if new carrydown data was recorded; false otherwise. |
| */ |
| public boolean recordCarrydownData(Long jobID, String parentDocumentIDHash, String childDocumentIDHash, |
| String[] documentDataNames, String[][] documentDataValueHashes, Object[][] documentDataValues, String processID) |
| throws ManifoldCFException |
| { |
| return recordCarrydownDataMultiple(jobID,parentDocumentIDHash,new String[]{childDocumentIDHash}, |
| new String[][]{documentDataNames},new String[][][]{documentDataValueHashes},new Object[][][]{documentDataValues},processID)[0]; |
| } |
| |
| /** Add carrydown data to the table. |
| */ |
| public boolean[] recordCarrydownDataMultiple(Long jobID, String parentDocumentIDHash, String[] childDocumentIDHashes, |
| String[][] dataNames, String[][][] dataValueHashes, Object[][][] dataValues, String processID) |
| throws ManifoldCFException |
| { |
| |
| // Need to go into a transaction because we need to distinguish between update and insert. |
| Set<ValueRecord> duplicateRemoval = new HashSet<>(); |
| Set<ValueRecord> presentSet = new HashSet<>(); |
| |
| int maxClause = getMaxOrClause(); |
| StringBuilder sb = new StringBuilder(); |
| ArrayList list = new ArrayList(); |
| int i = 0; |
| int k = 0; |
| // Keep track of the data items that have been seen vs. those that were unseen. |
| while (k < childDocumentIDHashes.length) |
| { |
| String childDocumentIDHash = childDocumentIDHashes[k]; |
| |
| // Loop through data names and values for this document |
| String[] documentDataNames = dataNames[k]; |
| String[][] documentDataValueHashes = dataValueHashes[k]; |
| Object[][] documentDataValues = dataValues[k]; |
| k++; |
| |
| int q = 0; |
| while (q < documentDataNames.length) |
| { |
| String documentDataName = documentDataNames[q]; |
| String[] documentDataValueHashSet = documentDataValueHashes[q]; |
| Object[] documentDataValueSet = documentDataValues[q]; |
| q++; |
| |
| if (documentDataValueHashSet != null) |
| { |
| int p = 0; |
| while (p < documentDataValueHashSet.length) |
| { |
| String documentDataValueHash = documentDataValueHashSet[p]; |
| Object documentDataValue = documentDataValueSet[p]; |
| // blank values equivalent to null |
| if (documentDataValueHash != null && documentDataValueHash.length() == 0) |
| documentDataValueHash = null; |
| // Build a hash record |
| ValueRecord vr = new ValueRecord(childDocumentIDHash, |
| documentDataName,documentDataValueHash,documentDataValue); |
| if (duplicateRemoval.contains(vr)) |
| continue; |
| duplicateRemoval.add(vr); |
| if (i == maxClause) |
| { |
| // Do the query and record the results |
| performExistsCheck(presentSet,sb.toString(),list); |
| i = 0; |
| sb.setLength(0); |
| list.clear(); |
| } |
| if (i > 0) |
| sb.append(" OR "); |
| |
| sb.append(buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new UnitaryClause(parentIDHashField,parentDocumentIDHash), |
| new UnitaryClause(childIDHashField,childDocumentIDHash), |
| new UnitaryClause(dataNameField,documentDataName), |
| (documentDataValueHash==null)? |
| new NullCheckClause(dataValueHashField,true): |
| new UnitaryClause(dataValueHashField,documentDataValueHash)})); |
| |
| i++; |
| p++; |
| } |
| } |
| } |
| } |
| if (i > 0) |
| performExistsCheck(presentSet,sb.toString(),list); |
| |
| // Go through the list again, and based on the results above, decide to do either an insert or |
| // an update. Keep track of this information also, so we can build the return array when done. |
| |
| Map<String, Boolean> insertHappened = new HashMap<>(); |
| |
| int j = 0; |
| Iterator<ValueRecord> iter = duplicateRemoval.iterator(); |
| while (iter.hasNext()) |
| { |
| ValueRecord childDocumentRecord = iter.next(); |
| |
| String childDocumentIDHash = childDocumentRecord.getDocumentIDHash(); |
| |
| HashMap map = new HashMap(); |
| String dataName = childDocumentRecord.getDataName(); |
| String dataValueHash = childDocumentRecord.getDataValueHash(); |
| Object dataValue = childDocumentRecord.getDataValue(); |
| |
| if (!presentSet.contains(childDocumentRecord)) |
| { |
| map.put(jobIDField,jobID); |
| map.put(parentIDHashField,parentDocumentIDHash); |
| map.put(childIDHashField,childDocumentIDHash); |
| map.put(dataNameField,dataName); |
| if (dataValueHash != null) |
| { |
| map.put(dataValueHashField,dataValueHash); |
| map.put(dataValueField,dataValue); |
| } |
| |
| map.put(newField,statusToString(ISNEW_NEW)); |
| map.put(processIDField,processID); |
| performInsert(map,null); |
| noteModifications(1,0,0); |
| insertHappened.put(childDocumentIDHash,new Boolean(true)); |
| } |
| else |
| { |
| sb = new StringBuilder("WHERE "); |
| ArrayList updateList = new ArrayList(); |
| sb.append(buildConjunctionClause(updateList,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new UnitaryClause(parentIDHashField,parentDocumentIDHash), |
| new UnitaryClause(childIDHashField,childDocumentIDHash), |
| new UnitaryClause(dataNameField,dataName), |
| (dataValueHash==null)? |
| new NullCheckClause(dataValueHashField,true): |
| new UnitaryClause(dataValueHashField,dataValueHash)})); |
| |
| map.put(newField,statusToString(ISNEW_EXISTING)); |
| map.put(processIDField,processID); |
| performUpdate(map,sb.toString(),updateList,null); |
| noteModifications(0,1,0); |
| } |
| } |
| |
| boolean[] rval = new boolean[childDocumentIDHashes.length]; |
| i = 0; |
| while (i < rval.length) |
| { |
| String childDocumentIDHash = childDocumentIDHashes[i]; |
| rval[i++] = (insertHappened.get(childDocumentIDHash) != null); |
| } |
| |
| return rval; |
| } |
| |
| /** Do the exists check, in batch. */ |
| protected void performExistsCheck(Set<ValueRecord> presentSet, String query, ArrayList list) |
| throws ManifoldCFException |
| { |
| // Note well: presentMap is only checked for the *existence* of a record, so we do not need to populate the datavalue field! |
| // This is crucial, because otherwise we'd either be using an undetermined amount of memory, or we'd need to read into a temporary file. |
| IResultSet result = performQuery("SELECT "+childIDHashField+","+dataNameField+","+dataValueHashField+" FROM "+getTableName()+" WHERE "+query+" FOR UPDATE",list,null,null); |
| int i = 0; |
| while (i < result.getRowCount()) |
| { |
| IResultRow row = result.getRow(i++); |
| String documentIDHash = (String)row.getValue(childIDHashField); |
| String dataName = (String)row.getValue(dataNameField); |
| String dataValueHash = (String)row.getValue(dataValueHashField); |
| //String dataValue = (String)row.getValue(dataValueField); |
| ValueRecord vr = new ValueRecord(documentIDHash,dataName,dataValueHash,null); |
| |
| presentSet.add(vr); |
| } |
| } |
| |
| /** Revert all records belonging to the specified parent documents to their original, |
| * pre-modified, state. |
| */ |
| public void revertRecords(Long jobID, String[] parentDocumentIDHashes) |
| throws ManifoldCFException |
| { |
| int maxClause = getMaxInClause(); |
| StringBuilder sb = new StringBuilder(); |
| List<String> list = new ArrayList<String>(); |
| int k = 0; |
| for (String parentDocumentIDHash : parentDocumentIDHashes) |
| { |
| if (k == maxClause) |
| { |
| performRevertRecords(sb.toString(),jobID,list); |
| sb.setLength(0); |
| list.clear(); |
| k = 0; |
| } |
| if (k > 0) |
| sb.append(","); |
| sb.append("?"); |
| list.add(parentDocumentIDHash); |
| k++; |
| } |
| |
| if (k > 0) |
| performRevertRecords(sb.toString(),jobID,list); |
| } |
| |
| protected void performRevertRecords(String query, Long jobID, List<String> list) |
| throws ManifoldCFException |
| { |
| // Delete new records |
| StringBuilder sb = new StringBuilder("WHERE "); |
| ArrayList newList = new ArrayList(); |
| |
| sb.append(buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new MultiClause(parentIDHashField,list)})).append(" AND "); |
| |
| sb.append(newField).append("=?"); |
| newList.add(statusToString(ISNEW_NEW)); |
| performDelete(sb.toString(),newList,null); |
| |
| // Restore old values |
| sb = new StringBuilder("WHERE "); |
| newList.clear(); |
| |
| sb.append(buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new MultiClause(parentIDHashField,list)})).append(" AND "); |
| |
| sb.append(newField).append("=?"); |
| newList.add(statusToString(ISNEW_EXISTING)); |
| |
| HashMap map = new HashMap(); |
| map.put(newField,statusToString(ISNEW_BASE)); |
| map.put(processIDField,null); |
| performUpdate(map,sb.toString(),newList,null); |
| |
| noteModifications(0,list.size(),0); |
| } |
| |
| /** Return all records belonging to the specified parent documents to the base state, |
| * and delete the old (eliminated) child records. |
| */ |
| public void restoreRecords(Long jobID, String[] parentDocumentIDHashes) |
| throws ManifoldCFException |
| { |
| int maxClause = getMaxInClause(); |
| StringBuilder sb = new StringBuilder(); |
| List<String> list = new ArrayList<String>(); |
| int k = 0; |
| for (String parentDocumentIDHash : parentDocumentIDHashes) |
| { |
| if (k == maxClause) |
| { |
| performRestoreRecords(sb.toString(),jobID,list); |
| sb.setLength(0); |
| list.clear(); |
| k = 0; |
| } |
| if (k > 0) |
| sb.append(","); |
| sb.append("?"); |
| list.add(parentDocumentIDHash); |
| k++; |
| } |
| |
| if (k > 0) |
| performRestoreRecords(sb.toString(),jobID,list); |
| } |
| |
| protected void performRestoreRecords(String query, Long jobID, List<String> list) |
| throws ManifoldCFException |
| { |
| // Delete |
| StringBuilder sb = new StringBuilder("WHERE "); |
| ArrayList newList = new ArrayList(); |
| |
| sb.append(buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new MultiClause(parentIDHashField,list)})).append(" AND "); |
| |
| sb.append(newField).append("=?"); |
| newList.add(statusToString(ISNEW_BASE)); |
| performDelete(sb.toString(),newList,null); |
| |
| // Restore new values |
| sb = new StringBuilder("WHERE "); |
| newList.clear(); |
| |
| sb.append(buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new MultiClause(parentIDHashField,list)})).append(" AND "); |
| |
| sb.append(newField).append(" IN (?,?)"); |
| newList.add(statusToString(ISNEW_EXISTING)); |
| newList.add(statusToString(ISNEW_NEW)); |
| |
| HashMap map = new HashMap(); |
| map.put(newField,statusToString(ISNEW_BASE)); |
| map.put(processIDField,null); |
| performUpdate(map,sb.toString(),newList,null); |
| |
| noteModifications(0,list.size(),0); |
| } |
| |
| /** Delete all records that mention a particular set of document identifiers. |
| */ |
| public void deleteRecords(Long jobID, String[] documentIDHashes) |
| throws ManifoldCFException |
| { |
| int maxClause = maxClausePerformDeleteRecords(jobID); |
| List<String> list = new ArrayList<String>(); |
| int k = 0; |
| for (String documentIDHash : documentIDHashes) |
| { |
| if (k == maxClause) |
| { |
| performDeleteRecords(jobID,list); |
| list.clear(); |
| k = 0; |
| } |
| list.add(documentIDHash); |
| k++; |
| } |
| |
| if (k > 0) |
| performDeleteRecords(jobID,list); |
| } |
| |
| protected int maxClausePerformDeleteRecords(Long jobID) |
| { |
| return findConjunctionClauseMax(new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID)}); |
| } |
| |
| protected void performDeleteRecords(Long jobID, List<String> list) |
| throws ManifoldCFException |
| { |
| StringBuilder sb = new StringBuilder("WHERE "); |
| ArrayList newList = new ArrayList(); |
| |
| sb.append(buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new MultiClause(childIDHashField,list)})); |
| performDelete(sb.toString(),newList,null); |
| |
| sb = new StringBuilder("WHERE "); |
| newList.clear(); |
| |
| sb.append(buildConjunctionClause(newList,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new MultiClause(parentIDHashField,list)})); |
| performDelete(sb.toString(),newList,null); |
| |
| noteModifications(0,0,list.size()*2); |
| } |
| |
| /** Get unique values given a document identifier, data name, an job identifier */ |
| public String[] getDataValues(Long jobID, String documentIdentifierHash, String dataName) |
| throws ManifoldCFException |
| { |
| ArrayList list = new ArrayList(); |
| String query = buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new UnitaryClause(childIDHashField,documentIdentifierHash), |
| new UnitaryClause(dataNameField,dataName)}); |
| |
| IResultSet set = getDBInterface().performQuery("SELECT "+dataValueHashField+","+dataValueField+" FROM "+getTableName()+" WHERE "+ |
| query+" ORDER BY 1 ASC",list,null,null,-1,null,new ResultDuplicateEliminator()); |
| |
| String[] rval = new String[set.getRowCount()]; |
| int i = 0; |
| while (i < rval.length) |
| { |
| IResultRow row = set.getRow(i); |
| rval[i] = (String)row.getValue(dataValueField); |
| if (rval[i] == null) |
| rval[i] = ""; |
| i++; |
| } |
| return rval; |
| } |
| |
| /** Get unique values given a document identifier, data name, an job identifier */ |
| public CharacterInput[] getDataValuesAsFiles(Long jobID, String documentIdentifierHash, String dataName) |
| throws ManifoldCFException |
| { |
| ArrayList list = new ArrayList(); |
| String query = buildConjunctionClause(list,new ClauseDescription[]{ |
| new UnitaryClause(jobIDField,jobID), |
| new UnitaryClause(childIDHashField,documentIdentifierHash), |
| new UnitaryClause(dataNameField,dataName)}); |
| |
| ResultSpecification rs = new ResultSpecification(); |
| rs.setForm(dataValueField,ResultSpecification.FORM_STREAM); |
| IResultSet set = getDBInterface().performQuery("SELECT "+dataValueHashField+","+dataValueField+" FROM "+getTableName()+" WHERE "+ |
| query+" ORDER BY 1 ASC",list,null,null,-1,rs,new ResultDuplicateEliminator()); |
| |
| CharacterInput[] rval = new CharacterInput[set.getRowCount()]; |
| int i = 0; |
| while (i < rval.length) |
| { |
| IResultRow row = set.getRow(i); |
| rval[i] = (CharacterInput)row.getValue(dataValueField); |
| i++; |
| } |
| return rval; |
| } |
| |
| /** Convert string to link status. */ |
| public static int stringToStatus(String status) |
| { |
| Integer value = (Integer)isNewMap.get(status); |
| return value.intValue(); |
| } |
| |
| /** Convert link status to string */ |
| public static String statusToString(int status) |
| { |
| switch (status) |
| { |
| case ISNEW_BASE: |
| return "B"; |
| case ISNEW_NEW: |
| return "N"; |
| case ISNEW_EXISTING: |
| return "E"; |
| default: |
| return null; |
| } |
| } |
| |
| /** Limit checker which removes duplicate rows, based on datavaluehash */ |
| protected static class ResultDuplicateEliminator implements ILimitChecker |
| { |
| // The last value of data hash |
| protected String currentDataHashValue = null; |
| |
| public ResultDuplicateEliminator() |
| { |
| } |
| |
| public boolean doesCompareWork() |
| { |
| return false; |
| } |
| |
| public ILimitChecker duplicate() |
| { |
| return null; |
| } |
| |
| public int hashCode() |
| { |
| return 0; |
| } |
| |
| public boolean equals(Object object) |
| { |
| return false; |
| } |
| |
| /** See if a result row should be included in the final result set. |
| *@param row is the result row to check. |
| *@return true if it should be included, false otherwise. |
| */ |
| public boolean checkInclude(IResultRow row) |
| throws ManifoldCFException |
| { |
| // Check to be sure that this row is different from the last; only then agree to include it. |
| String value = (String)row.getValue(dataValueHashField); |
| if (value == null) |
| value = ""; |
| if (currentDataHashValue == null || !value.equals(currentDataHashValue)) |
| { |
| currentDataHashValue = value; |
| return true; |
| } |
| return false; |
| } |
| |
| /** See if we should examine another row. |
| *@return true if we need to keep going, or false if we are done. |
| */ |
| public boolean checkContinue() |
| throws ManifoldCFException |
| { |
| return true; |
| } |
| } |
| |
| protected static class ValueRecord |
| { |
| protected String documentIdentifierHash; |
| protected String dataName; |
| protected String dataValueHash; |
| // This value may be null, if we're simply using this record as a key |
| protected Object dataValue; |
| |
| public ValueRecord(String documentIdentifierHash, String dataName, String dataValueHash, Object dataValue) |
| { |
| this.documentIdentifierHash = documentIdentifierHash; |
| this.dataName = dataName; |
| this.dataValueHash = dataValueHash; |
| this.dataValue = dataValue; |
| } |
| |
| public String getDocumentIDHash() |
| { |
| return documentIdentifierHash; |
| } |
| |
| public String getDataName() |
| { |
| return dataName; |
| } |
| |
| public String getDataValueHash() |
| { |
| return dataValueHash; |
| } |
| |
| public Object getDataValue() |
| { |
| return dataValue; |
| } |
| |
| public int hashCode() |
| { |
| return documentIdentifierHash.hashCode() + dataName.hashCode() + ((dataValueHash == null)?0:dataValueHash.hashCode()); |
| } |
| |
| public boolean equals(Object o) |
| { |
| if (!(o instanceof ValueRecord)) |
| return false; |
| ValueRecord v = (ValueRecord)o; |
| if (!documentIdentifierHash.equals(v.documentIdentifierHash)) |
| return false; |
| if (!dataName.equals(v.dataName)) |
| return false; |
| if (dataValueHash == null && v.dataValueHash != null) |
| return false; |
| if (dataValueHash != null && v.dataValueHash == null) |
| return false; |
| if (dataValueHash == null) |
| return true; |
| return dataValueHash.equals(v.dataValueHash); |
| } |
| } |
| |
| // This class filters an ordered resultset to return only the duplicates |
| protected static class DuplicateFinder implements ILimitChecker |
| { |
| protected Long prevJobID = null; |
| protected String prevParentIDHash = null; |
| protected String prevChildIDHash = null; |
| protected String prevDataName = null; |
| protected String prevDataValue = null; |
| |
| public DuplicateFinder() |
| { |
| } |
| |
| /** See if this class can be legitimately compared against another of |
| * the same type. |
| *@return true if comparisons will ever return "true". |
| */ |
| public boolean doesCompareWork() |
| { |
| return false; |
| } |
| |
| /** Create a duplicate of this class instance. All current state should be preserved. |
| * NOTE: Since doesCompareWork() returns false, queries using this limit checker cannot |
| * be cached, and therefore duplicate() is never called from the query executor. |
| *@return the duplicate. |
| */ |
| public ILimitChecker duplicate() |
| { |
| DuplicateFinder df = new DuplicateFinder(); |
| df.prevJobID = prevJobID; |
| df.prevParentIDHash = prevParentIDHash; |
| df.prevChildIDHash = prevChildIDHash; |
| df.prevDataName = prevDataName; |
| df.prevDataValue = prevDataValue; |
| return df; |
| } |
| |
| /** Find the hashcode for this class. This will only ever be used if |
| * doesCompareWork() returns true. |
| *@return the hashcode. |
| */ |
| public int hashCode() |
| { |
| return 0; |
| } |
| |
| /** Compare two objects and see if equal. This will only ever be used |
| * if doesCompareWork() returns true. |
| *@param object is the object to compare against. |
| *@return true if equal. |
| */ |
| public boolean equals(Object object) |
| { |
| return false; |
| } |
| |
| /** See if a result row should be included in the final result set. |
| *@param row is the result row to check. |
| *@return true if it should be included, false otherwise. |
| */ |
| public boolean checkInclude(IResultRow row) |
| throws ManifoldCFException |
| { |
| Long jobID = (Long)row.getValue(jobIDField); |
| String parentIDHash = (String)row.getValue(parentIDHashField); |
| if (parentIDHash == null) |
| parentIDHash = ""; |
| String childIDHash = (String)row.getValue(childIDHashField); |
| if (childIDHash == null) |
| childIDHash = ""; |
| String dataName = (String)row.getValue(dataNameField); |
| String dataValue = (String)row.getValue(dataValueField); |
| if (dataValue == null) |
| dataValue = ""; |
| |
| // If this is a duplicate, we want to keep it! |
| if (prevJobID != null && jobID.equals(prevJobID) && dataName.equals(prevDataName) && dataValue.equals(prevDataValue) && parentIDHash.equals(prevParentIDHash) && childIDHash.equals(prevChildIDHash)) |
| return true; |
| prevJobID = jobID; |
| prevDataName = dataName; |
| prevParentIDHash = parentIDHash; |
| prevChildIDHash = childIDHash; |
| prevDataValue = dataValue; |
| return false; |
| } |
| |
| /** See if we should examine another row. |
| *@return true if we need to keep going, or false if we are done. |
| */ |
| public boolean checkContinue() |
| throws ManifoldCFException |
| { |
| return true; |
| } |
| } |
| |
| } |