| <?php |
| |
| /** |
| * Main Runner which handles triggering individual tasks based on |
| * the cmdline params specifying what to run for what day |
| * |
| */ |
| class Runner { |
| |
| static $currentDate; |
| static $grabbedLogFile; |
| static $product; |
| |
| |
| /** |
| * Run |
| * @param string $source Resource to run |
| * @return void |
| */ |
| public static function run($source) { |
| $availaleSources = array('dlc', 'nb', 'vvm'); |
| // prepare the queue of products to run for |
| if ($source == 'all') { |
| $runSources = $availaleSources; |
| } else { |
| if (in_array($source, $availaleSources)) { |
| $runSources = array($source); |
| } else { |
| //unknown product |
| throw new Exception('Unknown source specified ' . $source); |
| } |
| } |
| self::lock(); |
| foreach ($runSources as $p) { |
| self::procesSource($p); |
| } |
| self::unlock(); |
| } |
| |
| private function procesSource($source) { |
| Logger::write(Logger::INFO, 'Starting processing soucre: ' . $source); |
| if (self::grabSourceLogfile($source) == true) { |
| // reset Db counter |
| Db::$counter = 0; |
| self::parseLogfile($source); |
| self::removeLogfile(); |
| self::incrementRunDate($source); |
| } |
| } |
| |
| //dsd |
| |
| private function parseLogfile($source) { |
| $importCounter = 0; |
| Logger::write(Logger::INFO, 'Starting parsing of the ' . $source . ' logfile ' . self::$grabbedLogFile); |
| $months = array( |
| 'Jan' => '01', |
| 'Feb' => '02', |
| 'Mar' => '03', |
| 'Apr' => '04', |
| 'May' => '05', |
| 'Jun' => '06', |
| 'Jul' => '07', |
| 'Aug' => '08', |
| 'Sep' => '09', |
| 'Oct' => '10', |
| 'Nov' => '11', |
| 'Dec' => '12' |
| ); |
| $handle = fopen(self::$grabbedLogFile, 'r'); |
| if ($handle) { |
| $nl = $nok = 0; |
| while (($line = fgets($handle)) !== false) { |
| $hit = false; |
| // only interested in requests that include the 'unique' identifier as they represent the AU pings |
| if (!strstr($line, '?unique=') || strstr($line, '/hotfixes/') || strstr($line, '/thirdparty/')) { |
| $nl++; |
| continue; |
| } |
| switch ($source) { |
| case 'dlc': |
| $line=str_replace('<%JSON:httpd_access%> ','', $line); |
| $jsonLog = json_decode($line, true); |
| $preg='/^(.+?)\?unique=(unique%3D)?([_A-Z-]+)?(0)?([a-f0-9-]+)(_[a-f0-9-]+)?(.*)?/'; |
| $m = array(); |
| $hit = preg_match($preg, $jsonLog['request'], $match); |
| if($hit) { |
| $ip = $jsonLog['clientip']; |
| $date = date('Y-m-d', strtotime($jsonLog['time'])); |
| $time = date('H:i:s', strtotime($jsonLog['time'])); |
| //$timestamp = $match[1].' '.$match[2]; |
| $path = $jsonLog['uri']; |
| $product_id = $match[3]; |
| $user_id = $match[5]; |
| if (isset($match[6]) && substr($match[6], 0, 1) == '_') { |
| $super_id = substr($match[6], 1); |
| } else { |
| $super_id = ""; |
| } |
| $response = $jsonLog['status']; |
| $bytes = $jsonLog['bytes']; |
| } |
| break; |
| |
| } |
| // let's parse it and fill these: $ip,$date $time,$path,$product_id,$user_id,$super_id,$response,$bytes |
| if($hit) { |
| |
| // check the unique ID format |
| // prior to NB 5.5.1, the ID was just a timestamp (current time in millis), |
| // since NB 5.5.1, the ID is a standard UUID: 01234567-89ab-cdef-0123-456789abcdef |
| if (!preg_match('/^(([0-9]{5,12})|([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}))/', $user_id, $match)) { |
| Logger::write(Logger::DEBUG, self::$grabbedLogFile . ": suspicious user ID '$user_id' in line: $nl - $line"); |
| } else { |
| // in 6.5, a truly unique user ID (super ID) has been added, |
| // check the validity of this ID in UUID format, if available |
| if (strlen($super_id) > 0 && !preg_match('/^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/', $super_id, $match)) { |
| Logger::write(Logger::DEBUG, self::$grabbedLogFile . ": suspicious super ID '$super_id' in line: $nl - $line"); |
| // log a warning, but don't stop here, if the super ID is invalid, |
| // just ignore it |
| $super_id = ""; |
| } |
| // return data found, ignore invalid requests (404) |
| if (strcmp($response, "404")) { |
| try { |
| // import it to DB |
| if (Importer::import(array('ip' => $ip, 'ts' => $date . " " . $time, 'path' => $path, 'distro' => $product_id, 'user_id' => $user_id, 'user2_id' => $super_id, 'response' => $response, 'size' => $bytes), $source) == true) { |
| $importCounter++; |
| } |
| } catch (Exception $e) { |
| Logger::write(Logger::ERROR, 'Error happened during log entry import: ' . $e->getMessage()); |
| } |
| $nok++; |
| } |
| } |
| |
| } |
| $nl++; |
| } |
| Logger::write(Logger::INFO, 'Parsed ' . $nl . ' lines of the log, used ' . $nok . ' for importing'); |
| Logger::write(Logger::INFO, 'Really inported into DB were ' . $importCounter . ' AU hits (needed ' . Db::$counter . ' queries)'); |
| } else { |
| throw new Exception('Unable to open decompressed logfile for parsing ' . self::$grabbedLogFile); |
| } |
| return true; |
| } |
| |
| private function grabSourceLogfile($source) { |
| // get the current date |
| $currentDate = strtotime('+1 day', self::getLastRunDate($source)); |
| Logger::write(Logger::INFO, 'Current date set to ' . date('Y-m-d', $currentDate)); |
| // put together the path to the source logfile |
| switch ($source) { |
| case 'nb': // CONSTANT to avoid typo! |
| $filename = 'access_' . date('Ymd', $currentDate); |
| $url = URL_PREFIX_NB . date('Y_m', $currentDate) . '/' . $filename . '.gz'; |
| break; |
| case 'vvm': |
| $next_date = mktime(0, 0, 0, date("m", $currentDate), date("d", $currentDate) + 1, date("Y", $currentDate)); |
| $filename = date('Ymd', $currentDate) . "-" . date('Ymd', $next_date) . ".log"; |
| $url = URL_PREFIX_VVM . $filename . ".gz"; |
| break; |
| case 'dlc': |
| $filename = 'netbeans-vm.apache.org_access.log_' . date('Ymd', $currentDate); |
| $url = URL_PREFIX_DLC . date('Y_m', $currentDate) . '/' . $filename . '.gz'; |
| break; |
| default: |
| throw new Exception('Unknown source: ' . $source); |
| } |
| self::$grabbedLogFile = $filename; |
| self::$currentDate = $currentDate; |
| Logger::write(Logger::INFO, 'Going to download the source logfile: ' . $url); |
| // grab it using wget |
| system("wget --quiet $url", $returnVal); |
| if ($returnVal === 0) { |
| Logger::write(Logger::INFO, 'Source logfile downloaded'); |
| // decompress it |
| system("gzip -fd $filename.gz", $returnVal); |
| if ($returnVal === 0) { |
| Logger::write(Logger::INFO, 'Source logfile decompressed'); |
| return true; |
| } |
| } else { |
| Logger::write(Logger::INFO, 'Source logfile not available'); |
| return false; |
| } |
| } |
| |
| private function getLastRunDate($source) { |
| $ld = file(LAST_DATE_FILE_PREFIX . $source); |
| if ($ld) { |
| Logger::write(Logger::INFO, 'Last run date identified as ' . trim($ld[0], "\n")); |
| return strtotime(trim($ld[0], "\n")); |
| } else { |
| throw new Exception('Unable to get the last run date from ' . LAST_DATE_FILE_PREFIX . $source); |
| } |
| } |
| |
| private function incrementRunDate($source) { |
| if (file_put_contents(LAST_DATE_FILE_PREFIX . $source, date('Y-m-d', self::$currentDate)) != false) { |
| Logger::write(Logger::INFO, 'Setting the last run date to ' . date('Y-m-d', self::$currentDate)); |
| } else { |
| throw new Exception('Unable to set the last run date into ' . LAST_DATE_FILE_PREFIX . $source); |
| } |
| } |
| |
| private function lock() { |
| if (file_exists(LOCKFILE)) { |
| throw new Exception('Previous run still runnig, lockfile ' . LOCKFILE . ' from ' . date("F d Y H:i:s.", filemtime(LOCKFILE)) . ". Remove lockfile first\n"); |
| } |
| system('touch ' . LOCKFILE, $retval); |
| if ($retval === 0) { |
| Logger::write(Logger::INFO, 'Lockfile created'); |
| } else { |
| throw new Exception('Can\'t create lockfile ' . LOCKFILE . "\n"); |
| } |
| } |
| |
| private function unlock() { |
| if (unlink(LOCKFILE) == true) { |
| Logger::write(Logger::INFO, 'Lockfile removed'); |
| } else { |
| throw new Exception('Not possible to remove lockfile ' . LOCKFILE); |
| } |
| } |
| |
| private function removeLogfile() { |
| if (unlink(self::$grabbedLogFile) == true) { |
| Logger::write(Logger::INFO, 'Source logfile removed'); |
| } else { |
| throw new Exception('Not possible to remove source logfile ' . self::$grabbedLogFile); |
| } |
| } |
| |
| } |
| |
| ?> |