Moving scotchArgs.R to STOUT REPO

Merged SCO+CH-STOUT branch with Master, in SCO+CH Repo. Then I added scotchArgs.R into STOUT repo
diff --git a/scotchArgs.R b/scotchArgs.R
new file mode 100644
index 0000000..9e7a275
--- /dev/null
+++ b/scotchArgs.R
@@ -0,0 +1,680 @@
+#!/usr/bin/env Rscript
+# Usage: This script uses reads in static files produced by STOUT that include form data from other services (e.g., SurveyMonkey)
+#and experiment data from STOUT, and timing data collected through a STOUT User ALE instance. 
+##It runs:
+###basic "truthing" (is form entry right or wrong), 
+###"timing corrections" (client (STOUT time) vs. 3rd party timing), and
+###post-processing operations (scale mean calculation, basic statistics)
+###prints new version of STOUT data with appendended fields (variables), and values.
+# Data Ingest 
+##Read in static files from STOUT
+# Rscript --vanilla scotchArgs.R working.merge.csv xdata.codebook.yr3.v2.scales.csv xdata.codebook.yr3.v2.items.csv aggCheck.csv MasterAnswerTable.csv
+args = commandArgs(trailingOnly=TRUE)
+# input files:
+matDataFile = "working.merge.csv"
+codeBookScalesFile = "xdata.codebook.MOTv4.scales.csv"
+codeBookItemsFile = "xdata.codebook.MOTv4.items.csv"
+# output files:
+aggCheckFile = "aggCheck.csv"
+matFile = "MasterAnswerTable.csv"
+if (length(args)>=1) {
+  matDataFile = args[1]
+  if (length(args)>=2) {
+    codeBookScalesFile = args[2]
+    if (length(args)>=3) {
+      codeBookItemsFile = args[3]
+      if (length(args)>=4) {
+        aggCheckFile = args[4]
+        if (length(args)>=5) {
+          matFile = args[5]
+        }
+      }
+    }
+  }
+# raw data file as .csv into Data Frames <- read.csv(matDataFile, header=TRUE, stringsAsFactors=FALSE)
+colnames([colnames("user_hash"] <- "SYS.IND.SESS."
+codebook.scales <- read.csv(codeBookScalesFile, header=TRUE, stringsAsFactors=FALSE)      # create dataframe to hold codebook subscale metadata
+row.names(codebook.scales) <- make.names(codebook.scales[,"varnames"], unique=TRUE)
+codebook.items <- read.csv(codeBookItemsFile, header=TRUE, stringsAsFactors=FALSE)           # create dataframe to hold codebook items metadata
+row.names(codebook.items) <- make.names(codebook.items[,"varnames"], unique=TRUE)
+#Truthing Script
+##This script compares correct answers from Codebook to responses collected through form data
+OTdataRaw.CP1 <- cbind([,"SYS.IND.SESS."], subset(, select = (grepl("TSK.PRB.CP1.",names( # create working dataset from raw
+names(OTdataRaw.CP1)[1] <- "SYS.IND.SESS."
+OTdataRaw.CP2 <- cbind([,"SYS.IND.SESS."], subset(, select = (grepl("TSK.PRB.CP2.",names( # create working dataset from raw
+names(OTdataRaw.CP2)[1] <- "SYS.IND.SESS."
+OTdataRaw.CP3 <- cbind([,"SYS.IND.SESS."], subset(, select = (grepl("TSK.PRB.CP3.",names( # create working dataset from raw
+names(OTdataRaw.CP3)[1] <- "SYS.IND.SESS."
+OTdataRaw.CP4 <- cbind([,"SYS.IND.SESS."], subset(, select = (grepl("TSK.PRB.CP4.",names( # create working dataset from raw
+names(OTdataRaw.CP4)[1] <- "SYS.IND.SESS."
+OTdataRaw.CP5 <- cbind([,"SYS.IND.SESS."], subset(, select = (grepl("TSK.PRB.CP5.",names( # create working dataset from raw
+names(OTdataRaw.CP5)[1] <- "SYS.IND.SESS."
+# write truthing dataframe for each challenge problem, with common case index with working dataset, and name index column. <-[,"SYS.IND.SESS."], "TSK.PRB.ANS.CP1.OT1.001." = 0,"TSK.PRB.ANS.CP1.OT1.002." = 0,"TSK.PRB.ANS.CP1.OT1.003." = 0,"TSK.PRB.ANS.CP1.OT1.004." =0, "TSK.PRB.ANS.CP1.OT1.005." =0,"TSK.PRB.ANS.CP1.OT2.001." = 0,"TSK.PRB.ANS.CP1.OT2.003." = 0,"TSK.PRB.ANS.CP1.OT2.004." = 0,"TSK.PRB.ANS.CP1.OT2.005." = 0), stringsAsFactors=FALSE) <-[,"SYS.IND.SESS."], "TSK.PRB.ANS.CP2.OT1.001." = 0,"TSK.PRB.ANS.CP2.OT1.002." = 0,"TSK.PRB.ANS.CP2.OT1.003." = 0,"TSK.PRB.ANS.CP2.OT1.004." =0, "TSK.PRB.ANS.CP2.OT1.005." =0,"TSK.PRB.ANS.CP2.OT2.001." = 0,"TSK.PRB.ANS.CP2.OT2.002." = 0,"TSK.PRB.ANS.CP2.OT2.003." = 0,"TSK.PRB.ANS.CP2.OT2.004." = 0,"TSK.PRB.ANS.CP2.OT2.005." = 0), stringsAsFactors=FALSE) <-[,"SYS.IND.SESS."], "TSK.PRB.ANS.CP3.OT1.005.CGBI." = 0, "TSK.PRB.ANS.CP3.OT1.005.AERG." = 0, "TSK.PRB.ANS.CP3.OT1.005.MMTRS." = 0, "TSK.PRB.ANS.CP3.OT1.005.PGFY." = 0, "TSK.PRB.ANS.CP3.OT1.001." = 0, "TSK.PRB.ANS.CP3.OT1.004." = 0, "TSK.PRB.ANS.CP3.OT1.002." = 0, "TSK.PRB.ANS.CP3.OT1.003." = 0, "TSK.PRB.ANS.CP3.OT2.001.1ST." = 0, "TSK.PRB.ANS.CP3.OT2.001.2ND." = 0, "TSK.PRB.ANS.CP3.OT2.002.PZOO." = 0, "TSK.PRB.ANS.CP3.OT2.002.QMCI." = 0, "TSK.PRB.ANS.CP3.OT2.002.IMLE." = 0, "TSK.PRB.ANS.CP3.OT2.002.IMMB." = 0, "TSK.PRB.ANS.CP3.OT2.002.AHII." = 0, "TSK.PRB.ANS.CP3.OT2.002.GOOO." = 0, "TSK.PRB.ANS.CP3.OT2.002.FNRG." = 0, "TSK.PRB.ANS.CP3.OT2.003." = 0, "TSK.PRB.ANS.CP3.OT2.004.JAN." = 0, "TSK.PRB.ANS.CP3.OT2.004.FEB." = 0, "TSK.PRB.ANS.CP3.OT2.004.MAR." = 0, "TSK.PRB.ANS.CP3.OT2.004.APR." = 0, "TSK.PRB.ANS.CP3.OT2.004.MAY." = 0, "TSK.PRB.ANS.CP3.OT2.004.JUN." = 0, "TSK.PRB.ANS.CP3.OT2.004.JUL." = 0, "TSK.PRB.ANS.CP3.OT2.004.AUG." = 0, "TSK.PRB.ANS.CP3.OT2.004.SEP." = 0, "TSK.PRB.ANS.CP3.OT2.004.OCT." = 0, "TSK.PRB.ANS.CP3.OT2.004.NOV." = 0, "TSK.PRB.ANS.CP3.OT2.004.DEC." = 0, "TSK.PRB.ANS.CP3.OT2.005.GHIL." = 0, "TSK.PRB.ANS.CP3.OT2.005.IFLM." = 0, "TSK.PRB.ANS.CP3.OT2.005.FNRG." = 0, "TSK.PRB.ANS.CP3.OT2.005.CTOT." = 0, "TSK.PRB.ANS.CP3.OT2.005.CMGO." = 0, "TSK.PRB.ANS.CP3.OT2.005.MYRY." = 0), stringsAsFactors=FALSE) <-[,"SYS.IND.SESS."], "TSK.PRB.ANS.CP4.OT1.001." = 0,"TSK.PRB.ANS.CP4.OT1.002." = 0,"TSK.PRB.ANS.CP4.OT1.003." = 0,"TSK.PRB.ANS.CP4.OT2.001." = 0,"TSK.PRB.ANS.CP4.OT2.002." = 0,"TSK.PRB.ANS.CP4.OT2.003." = 0), stringsAsFactors=FALSE) <-[,"SYS.IND.SESS."], "TSK.PRB.ANS.CP5.OT1.001." = 0,"TSK.PRB.ANS.CP5.OT1.002." = 0,"TSK.PRB.ANS.CP5.OT1.003." = 0,"TSK.PRB.ANS.CP5.OT1.004." =0, "TSK.PRB.ANS.CP5.OT2.001." = 0,"TSK.PRB.ANS.CP5.OT2.002." = 0,"TSK.PRB.ANS.CP5.OT2.003." = 0,"TSK.PRB.ANS.CP5.OT2.004." = 0,"TSK.PRB.ANS.CP5.OT2.005." = 0), stringsAsFactors=FALSE)
+names([1] <- "SYS.IND.SESS."
+names([1] <- "SYS.IND.SESS."
+names([1] <- "SYS.IND.SESS."
+names([1] <- "SYS.IND.SESS."
+names([1] <- "SYS.IND.SESS."
+truthCalc = function(ind, ans, rawData, codebook, truthData){
+  # arguments:
+  #   ind = the variable being checked
+  #   ans = the variable name containing the truth
+  #   rawData = dataframe holding participant data being checked
+  #   codebook = dataframe containing correct answers and the weights for each
+  #   truthData = dataframe holding the truthed data
+  truths = strsplit(codebook[ind,"truth"],split=",")  # each "truth" cell in the codebook contains all the strings such that if any are in the answer, it is correct; this command splits the contents of the truth cell into those strings
+  for(i in 1:nrow(rawData)){ # for each case in the raw data
+    flagCorrect = FALSE   # the answer is wrong until a match is found
+    if(length(truths[[1]])>0) {
+      for(n in 1:length(truths[[1]])){  # for all the strings that need to be checked (if no commas, length equals 1, containing the contents that are in the truth cell)          
+        if(![i,ind])){     #if cell is not empty        
+          if(grepl(paste("\\<" ,truths[[1]][[n]],"\\>",sep=""), rawData[i,ind],{  # paste truth from codebook, then \\<word\\> match on complete string sequence against cell content
+            flagCorrect = TRUE          # if a match is found, then mark the answer as correct
+          }
+        }
+      }
+    }
+    if(flagCorrect == TRUE){ truthData[i, ans] = 1 } # if the answer was flagged correct
+  } 
+  truthCalc = truthData   # return the truth data
+truthCalcExactMatch = function(ind, ans, rawData, codebook, truthData){
+  # arguments:
+  #   ind = the variable being checked
+  #   ans = the variable name containing the truth
+  #   rawData = dataframe holding participant data being checked
+  #   codebook = dataframe containing correct answers and the weights for each
+  #   truthData = dataframe holding the truthed data
+  truths = strsplit(codebook[ind,"truth"],split=",")  # each "truth" cell in the codebook contains all the strings such that if any are in the answer, it is correct; this command splits the contents of the truth cell into those strings
+  for(i in 1:nrow(rawData)){ # for each case in the raw data
+    flagCorrect = FALSE   # the answer is wrong until a match is found
+    for(n in 1:length(truths[[1]])){  # for all the strings that need to be checked (if no commas, length equals 1, containing the contents that are in the truth cell)   
+      if(![i,ind])){     #if cell is not empty
+        if(grepl(paste("^" ,truths[[1]][[n]],"$",sep=""), rawData[i,ind],{  # paste truth from codebook, then \\<word\\> match on complete string sequence against cell content
+          flagCorrect = TRUE          # if a match is found, then mark the answer as correct
+        }
+      }
+    }
+    if(flagCorrect == TRUE){ truthData[i, ans] = 1 } # if the answer was flagged correct
+  } 
+  truthCalcExactMatch = truthData   # return the truth data
+# CP1. Population Movements
+ = truthCalc("TSK.PRB.CP1.OT1.001.", "TSK.PRB.ANS.CP1.OT1.001.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT1.002.", "TSK.PRB.ANS.CP1.OT1.002.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT1.003.", "TSK.PRB.ANS.CP1.OT1.003.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT1.004.", "TSK.PRB.ANS.CP1.OT1.004.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT1.005.", "TSK.PRB.ANS.CP1.OT1.005.", OTdataRaw.CP1, codebook.items,
+ = truthCalc("TSK.PRB.CP1.OT2.001.", "TSK.PRB.ANS.CP1.OT2.001.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT2.002.", "TSK.PRB.ANS.CP1.OT2.002.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT2.003.", "TSK.PRB.ANS.CP1.OT2.003.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT2.004.", "TSK.PRB.ANS.CP1.OT2.004.", OTdataRaw.CP1, codebook.items, = truthCalc("TSK.PRB.CP1.OT2.005.", "TSK.PRB.ANS.CP1.OT2.005.", OTdataRaw.CP1, codebook.items,
+#write.csv(, file = "CP1_Truthv3_Check.csv", row.names=FALSE) # write out the data
+# CP2. Dealiasing
+ = truthCalc("TSK.PRB.CP2.OT1.001.", "TSK.PRB.ANS.CP2.OT1.001.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT1.002.", "TSK.PRB.ANS.CP2.OT1.002.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT1.003.", "TSK.PRB.ANS.CP2.OT1.003.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT1.004.", "TSK.PRB.ANS.CP2.OT1.004.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT1.005.", "TSK.PRB.ANS.CP2.OT1.005.", OTdataRaw.CP2, codebook.items,
+ = truthCalc("TSK.PRB.CP2.OT2.001.", "TSK.PRB.ANS.CP2.OT2.001.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT2.002.", "TSK.PRB.ANS.CP2.OT2.002.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT2.003.", "TSK.PRB.ANS.CP2.OT2.003.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT2.004.", "TSK.PRB.ANS.CP2.OT2.004.", OTdataRaw.CP2, codebook.items, = truthCalc("TSK.PRB.CP2.OT2.005.", "TSK.PRB.ANS.CP2.OT2.005.", OTdataRaw.CP2, codebook.items,
+#write.csv(, file = "CP2_Truthv3_Check.csv", row.names=FALSE)
+# CP3. Financial
+ = truthCalc("TSK.PRB.CP3.OT1.005.CGBI.", "TSK.PRB.ANS.CP3.OT1.005.CGBI.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT1.005.AERG.", "TSK.PRB.ANS.CP3.OT1.005.AERG.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT1.005.MMTRS.", "TSK.PRB.ANS.CP3.OT1.005.MMTRS.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT1.005.PGFY.", "TSK.PRB.ANS.CP3.OT1.005.PGFY.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT1.001.", "TSK.PRB.ANS.CP3.OT1.001.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT1.004.", "TSK.PRB.ANS.CP3.OT1.004.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT1.002.", "TSK.PRB.ANS.CP3.OT1.002.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT1.003.", "TSK.PRB.ANS.CP3.OT1.003.", OTdataRaw.CP3, codebook.items,
+# truthing for TSK.PRB.CP3.OT2.001.1ST.
+for(i in 1:nrow(OTdataRaw.CP3)){
+  if(is.null(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.1ST."])==FALSE){
+    dateTemp1 = as.Date("2000-01-01")
+    if(nchar(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.1ST."])<8){ OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.1ST."] = paste("0", OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.1ST."], sep = "")}
+    if(![i,"TSK.PRB.CP3.OT2.001.1ST."], "%m%d%Y"))){
+      dateTemp1 = as.Date(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.1ST."], "%m%d%Y")
+    } else if(![i,"TSK.PRB.CP3.OT2.001.1ST."], "%m/%d/%Y"))){ 
+      dateTemp1 = as.Date(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.1ST."], "%m/%d/%Y")
+    }
+    if((dateTemp1 >= as.Date("2014-01-01")) && (dateTemp1 <= as.Date("2014-05-31"))){
+[i, "TSK.PRB.ANS.CP3.OT2.001.1ST."] = 1
+    }
+  }
+# truthing for TSK.PRB.CP3.OT2.001.2ND.
+for(i in 1:nrow(OTdataRaw.CP3)){
+  if(is.null(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.2ND."])==FALSE){
+    dateTemp1 = as.Date("2000-01-01")
+    if(nchar(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.2ND."])<8){ OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.2ND."] = paste("0", OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.2ND."], sep = "")}
+    if(![i,"TSK.PRB.CP3.OT2.001.2ND."], "%m%d%Y"))){
+      dateTemp1 = as.Date(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.2ND."], "%m%d%Y")
+    } else if(![i,"TSK.PRB.CP3.OT2.001.2ND."], "%m/%d/%Y"))){ 
+      dateTemp1 = as.Date(OTdataRaw.CP3[i,"TSK.PRB.CP3.OT2.001.2ND."], "%m/%d/%Y")
+    }
+    if((dateTemp1 >= as.Date("2015-01-01")) && (dateTemp1 <= as.Date("2015-03-31"))){
+      truthData[i, "TSK.PRB.ANS.CP3.OT2.001.2ND."] = 1
+    }
+  }
+ = truthCalcExactMatch("TSK.PRB.CP3.OT2.002.PZOO.", "TSK.PRB.ANS.CP3.OT2.002.PZOO.", OTdataRaw.CP3, codebook.items, = truthCalcExactMatch("TSK.PRB.CP3.OT2.002.QMCI.", "TSK.PRB.ANS.CP3.OT2.002.QMCI.", OTdataRaw.CP3, codebook.items, = truthCalcExactMatch("TSK.PRB.CP3.OT2.002.IMLE.", "TSK.PRB.ANS.CP3.OT2.002.IMLE.", OTdataRaw.CP3, codebook.items, = truthCalcExactMatch("TSK.PRB.CP3.OT2.002.IMMB.", "TSK.PRB.ANS.CP3.OT2.002.IMMB.", OTdataRaw.CP3, codebook.items, = truthCalcExactMatch("TSK.PRB.CP3.OT2.002.AHII.", "TSK.PRB.ANS.CP3.OT2.002.AHII.", OTdataRaw.CP3, codebook.items, = truthCalcExactMatch("TSK.PRB.CP3.OT2.002.GOOO.", "TSK.PRB.ANS.CP3.OT2.002.GOOO.", OTdataRaw.CP3, codebook.items, = truthCalcExactMatch("TSK.PRB.CP3.OT2.002.FNRG.", "TSK.PRB.ANS.CP3.OT2.002.FNRG.", OTdataRaw.CP3, codebook.items,
+ = truthCalc("TSK.PRB.CP3.OT2.003.", "TSK.PRB.ANS.CP3.OT2.003.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.JAN.", "TSK.PRB.ANS.CP3.OT2.004.JAN.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.FEB.", "TSK.PRB.ANS.CP3.OT2.004.FEB.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.MAR.", "TSK.PRB.ANS.CP3.OT2.004.MAR.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.APR.", "TSK.PRB.ANS.CP3.OT2.004.APR.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.MAY.", "TSK.PRB.ANS.CP3.OT2.004.MAY.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.JUN.", "TSK.PRB.ANS.CP3.OT2.004.JUN.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.JUL.", "TSK.PRB.ANS.CP3.OT2.004.JUL.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.AUG.", "TSK.PRB.ANS.CP3.OT2.004.AUG.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.SEP.", "TSK.PRB.ANS.CP3.OT2.004.SEP.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.OCT.", "TSK.PRB.ANS.CP3.OT2.004.OCT.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.NOV.", "TSK.PRB.ANS.CP3.OT2.004.NOV.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.004.DEC.", "TSK.PRB.ANS.CP3.OT2.004.DEC.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.005.GHIL.", "TSK.PRB.ANS.CP3.OT2.005.GHIL.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.005.IFLM.", "TSK.PRB.ANS.CP3.OT2.005.IFLM.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.005.FNRG.", "TSK.PRB.ANS.CP3.OT2.005.FNRG.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.005.CTOT.", "TSK.PRB.ANS.CP3.OT2.005.CTOT.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.005.CMGO.", "TSK.PRB.ANS.CP3.OT2.005.CMGO.", OTdataRaw.CP3, codebook.items, = truthCalc("TSK.PRB.CP3.OT2.005.MYRY.", "TSK.PRB.ANS.CP3.OT2.005.MYRY.", OTdataRaw.CP3, codebook.items,
+ = cbind(,rowSums(subset(, select = c(TSK.PRB.ANS.CP3.OT1.005.CGBI.,TSK.PRB.ANS.CP3.OT1.005.PGFY.), na.rm = TRUE)))
+names([ncol(] <- "TSK.PRB.ANS.CP3.OT1.005." = cbind(,rowSums(subset(, select = (grepl("TSK.PRB.ANS.CP3.OT2.001.", names(, na.rm = TRUE))
+names([ncol(] <- "TSK.PRB.ANS.CP3.OT2.001." = cbind(,rowSums(subset(, select = (grepl("TSK.PRB.ANS.CP3.OT2.002.", names(, na.rm = TRUE))
+names([ncol(] <- "TSK.PRB.ANS.CP3.OT2.002." = cbind(,rowSums(subset(, select = c(TSK.PRB.ANS.CP3.OT2.004.OCT.,TSK.PRB.ANS.CP3.OT2.004.NOV.), na.rm = TRUE)))
+names([ncol(] <- "TSK.PRB.ANS.CP3.OT2.004." = cbind(,rowSums(subset(, select = (grepl("TSK.PRB.ANS.CP3.OT2.005.", names(, na.rm = TRUE))
+names([ncol(] <- "TSK.PRB.ANS.CP3.OT2.005."
+ <- subset(, select= unlist(lapply(gregexpr("\\.",names(,length)) < 7)
+#write.csv(, file = "CP3_Truthv3_Check.csv", row.names=FALSE)
+# CP4. Population Movements for GEQE
+ = truthCalc("TSK.PRB.CP4.OT1.001.1ST.", "TSK.PRB.ANS.CP4.OT1.001.1ST.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT1.001.2ND.", "TSK.PRB.ANS.CP4.OT1.001.2ND.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT1.002.", "TSK.PRB.ANS.CP4.OT1.002.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT1.003.", "TSK.PRB.ANS.CP4.OT1.003.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT1.004.", "TSK.PRB.ANS.CP4.OT1.004.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT1.005.", "TSK.PRB.ANS.CP4.OT1.005.", OTdataRaw.CP4, codebook.items,
+ = truthCalc("TSK.PRB.CP4.OT2.001.1ST.", "TSK.PRB.ANS.CP4.OT2.001.1ST.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT2.001.2ND.", "TSK.PRB.ANS.CP4.OT2.001.2ND.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT2.002.", "TSK.PRB.ANS.CP4.OT2.002.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT2.003.", "TSK.PRB.ANS.CP4.OT2.003.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT2.004.", "TSK.PRB.ANS.CP4.OT2.004.", OTdataRaw.CP4, codebook.items, = truthCalc("TSK.PRB.CP4.OT2.005.", "TSK.PRB.ANS.CP4.OT2.005.", OTdataRaw.CP4, codebook.items,
+#write.csv(, file = "CP1_Truthv3_Check.csv", row.names=FALSE) # write out the data
+# CP5. Population Movements for NEON (NYC Only)
+ = truthCalc("TSK.PRB.CP5.OT1.001.", "TSK.PRB.ANS.CP5.OT1.001.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT1.002.", "TSK.PRB.ANS.CP5.OT1.002.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT1.003.", "TSK.PRB.ANS.CP5.OT1.003.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT1.004.", "TSK.PRB.ANS.CP5.OT1.004.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT1.005.", "TSK.PRB.ANS.CP5.OT1.005.", OTdataRaw.CP5, codebook.items,
+ = truthCalc("TSK.PRB.CP5.OT2.001.", "TSK.PRB.ANS.CP5.OT2.001.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT2.002.", "TSK.PRB.ANS.CP5.OT2.002.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT2.003.", "TSK.PRB.ANS.CP5.OT2.003.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT2.004.", "TSK.PRB.ANS.CP5.OT2.004.", OTdataRaw.CP5, codebook.items, = truthCalc("TSK.PRB.CP5.OT2.005.", "TSK.PRB.ANS.CP5.OT2.005.", OTdataRaw.CP5, codebook.items,
+#write.csv(, file = "CP1_Truthv3_Check.csv", row.names=FALSE) # write out the data
+#Merge Operations across new Data Frames
+ <- subset(,[,"SYS.IND.SESS."])==FALSE) <- subset(,[,"SYS.IND.SESS."])==FALSE) <- subset(,[,"SYS.IND.SESS."])==FALSE) <- subset(,[,"SYS.IND.SESS."])==FALSE) <- subset(,[,"SYS.IND.SESS."])==FALSE)
+ =list(,,,, #add truthed dataframes to single list = Reduce(function(...) merge(..., by = "SYS.IND.SESS.",all.y = TRUE), #simultaneously merge all dataframes indexed by case identifier
+#Merge with Raw Data
+ <- merge(,,by= "SYS.IND.SESS.", all =TRUE)
+# merge with old User-Ale log data
+# comment out if no longer needed <- read.csv("xdatatimelog.csv", header=TRUE, stringsAsFactors=FALSE)
+colnames([colnames("sessionID"] <- "SYS.IND.SESS."<[!duplicated(["SYS.IND.SESS."]),] #remove dupe cases by SESS ID <- merge(,,by= "SYS.IND.SESS.", all.x =TRUE)["SYS.FIL.STD."][["SYS.FIL.STD."])] <- as.character(["timestamp"][["SYS.FIL.STD."])])
+# save the raw plus truthed data
+#write.csv(, file = "", row.names=FALSE)
+#Temporal Variables Computation; Authors: Joshua C. Poore, Eric M. Jones. 
+#This script ingests stout start time output, adjusts time synchronization between STOUT and other procs and prepares it for additional processing within R
+#v1 Eric Jones: 
+#v2 Joshua Poore: Updated data handling, subsetting based on varname parsing. Generalized code to work for numerous variables, rather than 2.
+#v3 Joshua Poore, Fei Sun: Updated Sync Operations to include data from STOUT-USER ALE for timing params. Updated data handling for fewer loops.
+##Paste Index Fucntion
+Paste.Index = function(, index){ #this function pastes a new index term to a text value. Inputs = (value I want to print to), index (new index I want to print onto name)
+  paste(,index, sep = "", collapse = "")
+##Time Extract Function
+# 2015-12-28 21:48:34
+Time.Extract = function(time.value, timezone){ #this function strips time from a value, and formats it in POSIX time, Inputs = time.value (value I want formated), timezone (timezone of value)
+  format(as.POSIXct(strptime(time.value,"%Y-%m-%d %H:%M:%S", tz = timezone)))
+# 2016-03-26T19:31:46.562Z
+Timez.Extract = function(time.value, timezone){ #this function strips time from a value, and formats it in POSIX time, Inputs = time.value (value I want formated), timezone (timezone of value)
+  format(as.POSIXct(strptime(time.value,"%Y-%m-%dT%H:%M:%S", tz = timezone)))
+Sync.Conversion = function(time.value,, timezone){
+  format(as.POSIXct(time.value, tz = timezone) -
+ = "utc" = "utc"
+#Data Ingest
+#Create new dataframe with index values (SessID and STOUT Session Time) and times for converstion<- cbind(subset(, select = c(SYS.IND.SESS.,SYS.FIL.STD.)),subset(, select = (grepl("TSK.FIL.STD.",names( | (grepl("TSK.FIL.END.",names( ==TRUE)))) #subset and bind dataframe together
+colnames([3:ncol(] <- sapply(colnames([3:ncol(],Paste.Index, "CORR.") #apply new index on colnames
+[,3:ncol(] = sapply([,3:ncol(],Time.Extract, #reformat into POSIX time format (gmt)[,"SYS.FIL.STD."] = sapply([,"SYS.FIL.STD."],Timez.Extract, #reformat STOUT Session time
+for (i in 1:nrow({
+  times.order<-order([i,3:ncol(],decreasing = FALSE, na.last = NA) #find the "start date" for non-client times (e.g., forms), should be first for each session
+  # for missing time data, not able to sort, times.order=length(0)
+  if(length(times.order)<=0) {
+    times.order<-c(1);
+  }
+  sync.diff<- as.numeric(difftime([i,"SYS.FIL.STD."],[i,2+(times.order[1])], units="secs")) #calculate the difference in time by subtracting non-client "start date" from client start date (SYS.FIL.STD.) 
+[i,3:ncol(] = sapply([i,3:ncol(],Sync.Conversion, sync.diff, #substracts the sync difference from each value in, prints new values over old.
+#Computes delta between end and start times.
+#new data frame from END times, start times will be subtracted from these values. *MOVE TO SAPPLY LATER <- cbind(subset(, select= c(SYS.IND.SESS.,SYS.FIL.STD.)), subset(, select = (grepl("TSK.FIL.END.",names(
+for (i in 3:ncol({ #Assign new column names to the new data frame to dindicate they are deltas
+  colnames([i] <- paste("TSK.TIME.DIFF.",substr(colnames([i]),start=13,stop=24), sep = "", collapse = "")
+for(j in names([3:ncol(]){
+  #if([,j])==FALSE){
+  x =[,paste("TSK.FIL.STD.",substr(colnames([j]),start=15,stop=29),"CORR.",sep = "", collapse = "")]
+  y =[,j]
+[,j] = as.numeric(difftime(y,x,units="secs"))
+#merge data files and write out
+colnames([colnames("SYS.FIL.STD."] <- "SYS.FIL.STD.UTC." <- subset(, select=-c(SYS.FIL.STD.)) = list(,, #add truthed dataframes to single list = Reduce(function(...) merge(..., by = "SYS.IND.SESS."), <-, stringsAsFactors = FALSE)
+#write.csv(, file = "", row.names=FALSE)
+#Scale Computation Operations + Codebook Handling (SCO+CH); Authors:Joshua C. Poore, Eric M. Jones. 
+#This script produces aggregates across questionnaire data, such as scale and subscale means. Ends with reporting out descriptive statistics.
+#v1 Joshua Poore: Core data ingest, variable substring decomposition, variable substring matching, core means loop, output bind to dataframes, write out functions, documentation 
+#v2 Eric Jones:   Created dataPull() function, wrote reverse scoring code, incorporated dataPull() function in code to calcualte scale and ...
+#                 subscale means, wrote code to make sure no redundant columns are appended to the intake data, documentation
+#v3 Joshua Poore: Added row indexing for easy reference by PID & Variable, removed dataPull function in favor of base R subsetting functions;
+#                 added codebook ingest and output to reverse coding code and weighting; additional loop for computing scales from subscales; documentation
+#v5 Eric Jones:   wrote and tested Reverse Coding, Weighting, and Aggregated Means functions
+#v6 Joshua Poore: Integrated sequential aggregation, "saverage,ssum" operation functionality. Revised object naming conventions for clarity.
+#Dependencies: coefficientalpha(resm,lavaan),xlsx(rJava,xlsxjars),write.xls{xlsReadWrite},gdata
+# Reverse Coding Function
+# if a variable is flagged for reverse scoring (as indicated in the codebook), reverse the scores, replacing the values in the column
+# Arguments:
+    # data - dataframe of raw (all) data
+    # metadata - dataframe of codebook data
+    # revCodeName - name of the column in the codebook that flags a variable for reverse coding
+    # scaleMaxName - name of the column in the codebook that holds the maximum scale value for each variable
+reverseCode = function(data, metadata, varCol, revCol, scaleMaxCol){
+row.names(metadata) <- make.names(metadata[,varCol], unique=TRUE)
+  for(j in names(data)){                           # for each column [j], i.e., for each variable
+    if(![j, revCol])){   # only if the reverse code designation cell is not blank
+      if(metadata[j, revCol] == 1){    # if the variable is designated for reverse coding
+        for(i in 1:nrow(data)){                     # for all elements in each row within the column
+          if(![i,j])){                    # but only if the elements are not missing        
+            data[i,j] = as.numeric(metadata[j, scaleMaxCol]) + 1 - as.numeric(data[i,j]) # reverse code by subtracting raw value from 1 + the scale maximum
+          }
+        }
+      }
+    }
+  }
+  #return the modified set of data
+  reverseCode = data
+# Weighting Function
+# if a variable is flagged for weighting (as indicated in the codebook), multiply the value by the weight
+# Arguments:
+    # data - dataframe of raw (all) data
+    # metadata - dataframe of codebook data
+    # wtName - name of the column in the codebook that holds the weighting value for each variable
+weighting = function(data, metadata, varCol, wtCol){
+row.names(metadata) <- make.names(metadata[,varCol], unique=TRUE)
+  for(j in names(data)){                          # for each column [j], i.e., for each variable
+    if(![j, wtCol])){    # but only for weights that are not missing
+      for(i in 1:nrow(data)){                       # for all elements in each row within the column
+        if(![i,j])){                      # but only if the elements are not missing
+          data[i,j] = as.numeric(data[i,j])*as.numeric(metadata[j, wtCol]) # weight the item by multiplying it by the weight
+        }
+      }
+    }
+  }
+  #return the modified set of data
+  weighting = data
+# Scale Computation Function
+# Based on index terms embedded in variable names in a codebook, function will extract the correct data, and aggregate them appropriately.
+# Arguments:
+    # data - dataframe of raw (all) data
+    # varIndices - a list containing an indeterminate number of indices, separated by a delimiter, that are contained in all the variables across which we want to calculate a mean
+    # opNames - name of the column in the codebook that holds the operation to be performed for each variable
+    # checkMat - returns a matrix of booleans indicated which variable in the raw dataset were included for aggregation
+calcScales = function(data, varIndices, opNames, checkMat){
+# initialize dataframes to hold the data the data to aggregate and the aggregates
+VarsToAgg = data.frame(matrix(0, nrow(data),1)) # variables extracted for aggregation
+ScaleAgg = data.frame(matrix(0, nrow(data),1)) # aggregated variables
+#create a flag to check if the variable name contains all the desired descriptors, and should be selected for aggregation.
+VarIndCheck = TRUE
+for (col in 1:ncol(data)){          # for all the columns in the data matrix
+  for(m in 1:length(varIndices)){    # for all the descriptors that I wish to match
+    # if I previously determined that a desciptor is not in the variable name, or if the current descriptor is not in the variable name 
+    if(VarIndCheck == FALSE | grepl(varIndices[m], names(data[col])) == FALSE){ 
+      VarIndCheck = FALSE   # set my flag to false because the variable does not meet my criteria
+    }
+  }
+  if(VarIndCheck == TRUE){   # if my flag is true, meaning the variable does meet my criteria
+    VarsToAgg=cbind(VarsToAgg,data[,col])                   # extract relevant data columns pull the data from that column and put in the new matrix
+    names(VarsToAgg)[ncol(VarsToAgg)] = names(data)[col]    # change the name of that new column
+    checkMat[paste(varIndices, sep="", collapse = ""), names(data)[col]] = checkMat[paste(varIndices, sep="", collapse = ""), names(data)[col]] + 1 
+  }
+  # reset the flag for the next variable
+  VarIndCheck = TRUE
+  if(ncol(VarsToAgg) >= 3){                             # if more than 2 columns of data are pulled 
+    VarsToAgg = VarsToAgg[,2:ncol(VarsToAgg)]   # remove the column of zeroes that was created when initializing the temporary dataframe
+    if(opNames == "average"){
+      ScaleAgg = rowMeans(VarsToAgg, na.rm = TRUE)        # calculate the row means and append the column of means to the raw data file
+    }
+    else if(opNames == "sum"){
+      ScaleAgg = rowSums(VarsToAgg, na.rm = TRUE)        # calculate the row means and append the column of means to the raw data file
+    }  
+    else if(opNames == "saverage"){
+      ScaleIndexLength <- length(gregexpr("\\.", paste(varIndices,sep='',collapse=''))[[1]]) #find the number of indices in the scale variable (to compute) by index delimiter (".") 
+      AggIndices <- rev(1:max(unlist(lapply(gregexpr("\\.",names(VarsToAgg)),length)))) #find the number of indices in the strings of variables selected to aggregate as array      
+      for (i in AggIndices){
+        if(i == max(AggIndices)){ # if this is the first step of aggregation (Step1) 
+          SortAggVars <-, stringsToFactors = FALSE) #sort subset of vars to aggregate by name, ascending, coerce to dataframe for reference
+          CommonVarInd <-substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[",i-1)) #extract common strings amongst variable selections
+          SubsetIndToAgg <-subset(CommonVarInd,duplicated(substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[", i-1)))==FALSE) #remove duplicates from CommonVarInd
+          ScaleAggStep <- data.frame(matrix(0, nrow(data), ncol = length(SubsetIndToAgg))) #initialize dataframe for step-wise aggregates
+          names(ScaleAggStep) <- SubsetIndToAgg[1:length(SubsetIndToAgg)] #give names to dataframe columns for step-wise aggregates
+          for(k in 1:length(SubsetIndToAgg)){ # for each element in subset array of indices for step 1 aggregation  
+            VarsAggStep <- subset(data, select = (grepl(SubsetIndToAgg[k], names(data))==TRUE)) # raw data variables names subset from Step 1 aggregation vars
+            AggStep <-, na.rm = TRUE), stringsAsFactors=FALSE) #format averages from RowMeans across Step 1[1] vars into new column
+            names(AggStep) = SubsetIndToAgg[k]
+            ScaleAggStep[,names(AggStep)]<-AggStep
+          }
+        }  
+        else if(i < max(AggIndices) & i > ScaleIndexLength + 1){
+          SortAggVars <-, stringsToFactors = FALSE) #sort subset of vars to aggregate by name, ascending, coerce to dataframe for reference
+          CommonVarInd <-substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[",i-1)) #extract common strings amongst Step 1 selections
+          SubsetIndToAgg <-subset(CommonVarInd,duplicated(substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[", i-1)))==FALSE) #remove duplicates from CommonVarInd
+          for(k in 1:length(SubsetIndToAgg)){ # for each element in subset array of indices for step n aggregation  
+            VarsAggStep <- subset(ScaleAggStep, select = (grepl(SubsetIndToAgg[k], names(ScaleAggStep))==TRUE)) # raw data variables names subset from Step 1 aggregation vars
+            AggStep <-, na.rm = TRUE), stringsAsFactors=FALSE) #format averages from RowMeans across Step 1[1] vars into new column
+            names(AggStep) = SubsetIndToAgg[k]
+            ScaleAggStep[,names(AggStep)]<-AggStep
+          }
+        }
+        else if(i == ScaleIndexLength + 1){
+          SortAggVars <-, stringsToFactors = FALSE) #sort subset of vars to aggregate by name, ascending, coerce to working.proc.dataframe for reference
+          CommonVarInd <-substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[",i)) #extract common strings amongst Step 1 selections
+          SubsetIndToAgg <-subset(CommonVarInd,duplicated(substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[", i)))==FALSE) #remove duplicates from CommonVarInd
+          VarsAggStep <- subset(ScaleAggStep, select = names(ScaleAggStep) %in% SubsetIndToAgg) # raw variables names subset from Step 1 aggregation vars
+          AggStep <-, na.rm = TRUE), stringsAsFactors=FALSE) #format averages from RowMeans across Step 1[1] vars into new column
+          names(AggStep) = paste(varIndices,sep='',collapse='')
+          ScaleAggStep[,names(AggStep)]<-AggStep
+          }
+        else if(i == ScaleIndexLength | i < ScaleIndexLength){
+        ScaleAgg = ScaleAggStep[,paste(varIndices,sep='',collapse='')]
+        #write.csv(ScaleAggStep,paste("saverage_",paste(varIndices,sep='',collapse=''),".csv",sep='',collapse=''), append = TRUE)
+        }
+      }
+    }
+    else if(opNames == "ssum"){
+    ScaleIndexLength <- length(gregexpr("\\.", paste(varIndices,sep='',collapse=''))[[1]]) #find the number of indices in the scale variable (to compute) by index delimiter (".") 
+    AggIndices <- rev(1:max(unlist(lapply(gregexpr("\\.",names(VarsToAgg)),length)))) #find the number of indices in the strings of variables selected to aggregate as array      
+     for (i in AggIndices){
+        if(i == max(AggIndices)){ # if this is the first step of aggregation  
+          SortAggVars <-, stringsToFactors = FALSE) #sort subset of vars to aggregate by name, ascending, coerce to dataframe for reference
+          CommonVarInd <-substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[",i-1)) #extract common strings amongst variable selections
+          SubsetIndToAgg <-subset(CommonVarInd,duplicated(substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[", i-1)))==FALSE) #remove duplicates from CommonVarInd
+          ScaleAggStep <- data.frame(matrix(0, nrow(data), ncol = length(SubsetIndToAgg))) #initialize dataframe for 
+          names(ScaleAggStep) <- SubsetIndToAgg[1:length(SubsetIndToAgg)]
+          for(k in 1:length(SubsetIndToAgg)){ # for each element in subset array of indices for step 1 aggregation  
+            VarsAggStep <- subset(data, select = (grepl(SubsetIndToAgg[k], names(data))==TRUE)) # raw data variables names subset from Step 1 aggregation vars
+            AggStep <-, na.rm = TRUE), stringsAsFactors=FALSE) #format averages from RowMeans across Step 1[1] vars into new column
+            names(AggStep) = SubsetIndToAgg[k]
+            ScaleAggStep[,names(AggStep)]<-AggStep
+          }
+        }
+        else if(i < max(AggIndices) & i > ScaleIndexLength +1 ){
+          SortAggVars <-, stringsToFactors = FALSE) #sort subset of vars to aggregate by name, ascending, coerce to dataframe for reference
+          CommonVarInd <-substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[",i-1)) #extract common strings amongst Step 1 selections
+          SubsetIndToAgg <-subset(CommonVarInd,duplicated(substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[", i-1)))==FALSE) #remove duplicates from CommonVarInd
+          for(k in 1:length(SubsetIndToAgg)){ # for each element in subset array of indices for step 1 aggregation  
+            VarsAggStep <- subset(ScaleAggStep, select = (grepl(SubsetIndToAgg[k], names(ScaleAggStep))==TRUE)) # raw data variables names subset from Step 1 aggregation vars
+            AggStep <-, na.rm = TRUE), stringsAsFactors=FALSE) #format averages from RowMeans across Step 1[1] vars into new column
+            names(AggStep) = SubsetIndToAgg[k]
+            ScaleAggStep[,names(AggStep)]<-AggStep
+          }
+        }
+        else if(i == ScaleIndexLength + 1){
+          SortAggVars <-, stringsToFactors = FALSE) #sort subset of vars to aggregate by name, ascending, coerce to working.proc.dataframe for reference
+          CommonVarInd <-substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[",i)) #extract common strings amongst Step 1 selections
+          SubsetIndToAgg <-subset(CommonVarInd,duplicated(substr(SortAggVars[,1],start=1,stop=sapply(gregexpr("\\.",SortAggVars[,1]),"[[", i)))==FALSE) #remove duplicates from CommonVarInd
+          VarsAggStep <- subset(ScaleAggStep, select = names(ScaleAggStep) %in% SubsetIndToAgg) # raw variables names subset from Step 1 aggregation vars
+          AggStep <-, na.rm = TRUE), stringsAsFactors=FALSE) #format averages from RowMeans across Step 1[1] vars into new column
+          names(AggStep) = paste(varIndices,sep='',collapse='')
+          ScaleAggStep[,names(AggStep)]<-AggStep
+          }
+        else if(i == ScaleIndexLength | i < ScaleIndexLength){
+        ScaleAgg = ScaleAggStep[,paste(varIndices,sep='',collapse='')]
+        #write.csv(ScaleAggStep,paste("saverage_",paste(varIndices,sep='',collapse=''),".csv",sep='',collapse=''), append = TRUE)
+        }
+      }
+    }
+  else if(ncol(VarsToAgg) == 2){     # if only 1 column of data is pulled
+      ScaleAgg = VarsToAgg[,2]    # append that column of data to the raw data file
+    }  
+  }
+  calcScales = list(ScaleAgg, checkMat)
+# Reverse Code - Function Call = reverseCode(, codebook.items, "varnames", "reverse.code", "scale.max")
+# Item Weighting - Function Call = weighting(, codebook.items, "varnames", "weight")
+# Aggregated Means Using Codebook
+ =    # create a new dataframe which will hold the means
+varIndices = list()          # initialize a list containing the descriptors that will be found
+aggCheck = matrix(0, length(codebook.scales[,1]), length(names(
+rownames(aggCheck) = codebook.scales[,1]
+colnames(aggCheck) = names(
+for(k in 1:nrow(codebook.scales)){    # for all the variables in the codebook
+  if ((codebook.scales[k,1] %in% names( )) == FALSE ){    # if the variable is not in the dataset, we need to calculate the means and append the column
+    delimiter = gregexpr("\\.", codebook.scales[k,1])                 # determine the locations of all the delimiters (periods)
+    for(p in 1:length(delimiter[[1]])){   # for as many indices as there are in the variable name
+      if(p == 1){                       # special case when pulling out the first descriptor
+        varIndices = c(varIndices, substr(codebook.scales[k,1],start=1,stop=delimiter[[1]][[p]]))   # code for pulling out the decriptor and adding to the list
+      }
+      else{
+        varIndices = c(varIndices, substr(codebook.scales[k,1],start=delimiter[[1]][[p-1]]+1,stop=delimiter[[1]][[p]]))   # code for pulling out the decriptor and adding to the list
+      }
+    }
+    listReturn = calcScales(, varIndices, codebook.scales[k, "operation"], aggCheck)
+    # calculate the mean of the new variable, and append to the working dataset
+ = cbind(, listReturn[[1]])
+    names([ncol(] = codebook.scales[k,1]     # change the name of the column
+    aggCheck = listReturn[[2]]
+    # reset the list that contains the indices
+    varIndices = list()
+  }
+write.csv(, file = matFile, row.names=FALSE)
+write.csv(, file="MasterAnswerTable.csv", row.names=FALSE)
+#write.csv(aggCheck, file = paste(Sys.time(),"aggCheck.csv",sep = "_", collapse=""))
+#write.csv(aggCheck, file = "aggCheck.csv", collapse="")
+write.csv(aggCheck, file = aggCheckFile)
+# Save a simple version in JSON format for D3 histogram
+cat( toJSON(unname(split(histData, 1:nrow(histData)))) )