| // Copyright 2013 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Author: bvb@google.com (Ben VanBerkum) |
| // Author: sligocki@google.com (Shawn Ligocki) |
| |
| #include "pagespeed/kernel/base/statistics_logger.h" |
| |
| #include <cstddef> |
| #include <map> |
| #include <set> |
| #include <utility> // for pair |
| #include <vector> |
| |
| #include "net/instaweb/util/public/escaping.h" |
| #include "pagespeed/kernel/base/abstract_mutex.h" |
| #include "pagespeed/kernel/base/basictypes.h" |
| #include "pagespeed/kernel/base/file_writer.h" |
| #include "pagespeed/kernel/base/file_system.h" |
| #include "pagespeed/kernel/base/message_handler.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/base/timer.h" |
| #include "pagespeed/kernel/base/writer.h" |
| #include "pagespeed/kernel/html/html_keywords.h" |
| |
| namespace net_instaweb { |
| |
| StatisticsLogger::StatisticsLogger( |
| int64 update_interval_ms, int64 max_logfile_size_kb, |
| const StringPiece& logfile_name, MutexedVariable* last_dump_timestamp, |
| MessageHandler* message_handler, Statistics* stats, |
| FileSystem* file_system, Timer* timer) |
| : last_dump_timestamp_(last_dump_timestamp), |
| message_handler_(message_handler), |
| statistics_(stats), |
| file_system_(file_system), |
| timer_(timer), |
| update_interval_ms_(update_interval_ms), |
| max_logfile_size_kb_(max_logfile_size_kb) { |
| logfile_name.CopyToString(&logfile_name_); |
| } |
| |
| StatisticsLogger::~StatisticsLogger() { |
| } |
| |
| void StatisticsLogger::UpdateAndDumpIfRequired() { |
| int64 current_time_ms = timer_->NowMs(); |
| AbstractMutex* mutex = last_dump_timestamp_->mutex(); |
| if (mutex == NULL) { |
| return; |
| } |
| // Avoid blocking if the dump is already happening in another thread/process. |
| if (mutex->TryLock()) { |
| if (current_time_ms >= |
| (last_dump_timestamp_->GetLockHeld() + update_interval_ms_)) { |
| // It's possible we'll need to do some of the following here for |
| // cross-process consistency: |
| // - flush the logfile before unlock to force out buffered data |
| FileSystem::OutputFile* statistics_log_file = |
| file_system_->OpenOutputFileForAppend( |
| logfile_name_.c_str(), message_handler_); |
| if (statistics_log_file != NULL) { |
| FileWriter statistics_writer(statistics_log_file); |
| statistics_->DumpConsoleVarsToWriter( |
| current_time_ms, &statistics_writer, message_handler_); |
| statistics_writer.Flush(message_handler_); |
| file_system_->Close(statistics_log_file, message_handler_); |
| |
| // Trim logfile if it's over max size. |
| TrimLogfileIfNeeded(); |
| } else { |
| message_handler_->Message(kError, |
| "Error opening statistics log file %s.", |
| logfile_name_.c_str()); |
| } |
| // Update timestamp regardless of file write so we don't hit the same |
| // error many times in a row. |
| last_dump_timestamp_->SetLockHeld(current_time_ms); |
| } |
| mutex->Unlock(); |
| } |
| } |
| |
| void StatisticsLogger::TrimLogfileIfNeeded() { |
| int64 size_bytes; |
| if (file_system_->Size(logfile_name_, &size_bytes, message_handler_) && |
| size_bytes > max_logfile_size_kb_ * 1024) { |
| // TODO(sligocki): Rotate a set of logfiles and just delete the |
| // oldest one each time instead of deleting the entire log. |
| // NOTE: Update SharedMemStatisticsTestBase.TestLogfileTrimming when we |
| // make this change so that it correctly tests the total log size. |
| file_system_->RemoveFile(logfile_name_.c_str(), message_handler_); |
| } |
| } |
| |
| void StatisticsLogger::DumpJSON( |
| const std::set<GoogleString>& var_titles, |
| int64 start_time, int64 end_time, int64 granularity_ms, |
| Writer* writer, MessageHandler* message_handler) const { |
| FileSystem::InputFile* log_file = |
| file_system_->OpenInputFile(logfile_name_.c_str(), message_handler); |
| VarMap parsed_var_data; |
| std::vector<int64> list_of_timestamps; |
| StatisticsLogfileReader reader(log_file, start_time, end_time, |
| granularity_ms, message_handler); |
| ParseDataFromReader(var_titles, &reader, &list_of_timestamps, |
| &parsed_var_data); |
| PrintJSON(list_of_timestamps, parsed_var_data, writer, message_handler); |
| file_system_->Close(log_file, message_handler); |
| } |
| |
| void StatisticsLogger::ParseDataFromReader( |
| const std::set<GoogleString>& var_titles, |
| StatisticsLogfileReader* reader, |
| std::vector<int64>* timestamps, VarMap* var_values) const { |
| // curr_timestamp starts as 0 because we need to compare it to the first |
| // timestamp pulled from the file. The current timestamp should always be |
| // less than the timestamp after it due to the fact that the logfile dumps |
| // data periodically. This makes sure that all timestamps are >= 0. |
| int64 curr_timestamp = 0; |
| // Stores data associated with current timestamp. |
| GoogleString data; |
| while (reader->ReadNextDataBlock(&curr_timestamp, &data)) { |
| // Parse variable data. |
| std::map<StringPiece, StringPiece> parsed_var_data; |
| ParseVarDataIntoMap(data, &parsed_var_data); |
| |
| timestamps->push_back(curr_timestamp); |
| // Push all variable values. Note: We only save the variables listed in |
| // var_titles, the rest are disregarded. |
| for (std::set<GoogleString>::const_iterator iter = var_titles.begin(); |
| iter != var_titles.end(); ++iter) { |
| const GoogleString& var_title = *iter; |
| |
| std::map<StringPiece, StringPiece>::const_iterator value_iter = |
| parsed_var_data.find(var_title); |
| if (value_iter != parsed_var_data.end()) { |
| (*var_values)[var_title].push_back(value_iter->second.as_string()); |
| } else { |
| // If data is not available in this segment, we just push 0 as a place |
| // holder. We must push something or else it will be ambiguous which |
| // timestamp corresponds to which variable values. |
| (*var_values)[var_title].push_back("0"); |
| } |
| } |
| } |
| } |
| |
| void StatisticsLogger::ParseVarDataIntoMap( |
| StringPiece logfile_var_data, |
| std::map<StringPiece, StringPiece>* parsed_var_data) const { |
| std::vector<StringPiece> lines; |
| SplitStringPieceToVector(logfile_var_data, "\n", &lines, true); |
| for (size_t i = 0; i < lines.size(); ++i) { |
| size_t end_index_of_name = lines[i].find_first_of(":"); |
| StringPiece var_name = lines[i].substr(0, end_index_of_name); |
| StringPiece var_value_string = lines[i].substr(end_index_of_name + 2); |
| (*parsed_var_data)[var_name] = var_value_string; |
| } |
| } |
| |
| void StatisticsLogger::PrintJSON( |
| const std::vector<int64>& list_of_timestamps, |
| const VarMap& parsed_var_data, |
| Writer* writer, MessageHandler* message_handler) const { |
| writer->Write("{", message_handler); |
| writer->Write("\"timestamps\": [", message_handler); |
| PrintTimestampListAsJSON(list_of_timestamps, writer, message_handler); |
| writer->Write("],", message_handler); |
| writer->Write("\"variables\": {", message_handler); |
| PrintVarDataAsJSON(parsed_var_data, writer, message_handler); |
| writer->Write("}", message_handler); |
| writer->Write("}", message_handler); |
| } |
| |
| void StatisticsLogger::PrintTimestampListAsJSON( |
| const std::vector<int64>& list_of_timestamps, Writer* writer, |
| MessageHandler* message_handler) const { |
| for (size_t i = 0; i < list_of_timestamps.size(); ++i) { |
| writer->Write(Integer64ToString(list_of_timestamps[i]), message_handler); |
| if (i != list_of_timestamps.size() - 1) { |
| writer->Write(", ", message_handler); |
| } |
| } |
| } |
| |
| void StatisticsLogger::PrintVarDataAsJSON( |
| const VarMap& parsed_var_data, Writer* writer, |
| MessageHandler* message_handler) const { |
| for (VarMap::const_iterator iterator = |
| parsed_var_data.begin(); iterator != parsed_var_data.end(); |
| ++iterator) { |
| StringPiece var_name = iterator->first; |
| VariableInfo info = iterator->second; |
| // If we are at the last entry in the map, we do not |
| // want the trailing comma as per JSON format. |
| if (iterator != parsed_var_data.begin()) { |
| writer->Write(",", message_handler); |
| } |
| GoogleString html_name, json_name; |
| HtmlKeywords::Escape(var_name, &html_name); |
| EscapeToJsStringLiteral(html_name, true /* add_quotes*/, &json_name); |
| |
| writer->Write(json_name, message_handler); |
| writer->Write(": [", message_handler); |
| for (size_t i = 0; i < info.size(); ++i) { |
| writer->Write(info[i], message_handler); |
| // If we are at the last recorded variable, we do not want the |
| // trailing comma as per JSON format. |
| if (i != info.size() - 1) { |
| writer->Write(", ", message_handler); |
| } |
| } |
| writer->Write("]", message_handler); |
| } |
| } |
| |
| StatisticsLogfileReader::StatisticsLogfileReader( |
| FileSystem::InputFile* file, int64 start_time, int64 end_time, |
| int64 granularity_ms, MessageHandler* message_handler) |
| : file_(file), |
| start_time_(start_time), |
| end_time_(end_time), |
| granularity_ms_(granularity_ms), |
| message_handler_(message_handler) { |
| } |
| |
| StatisticsLogfileReader::~StatisticsLogfileReader() { |
| } |
| |
| bool StatisticsLogfileReader::ReadNextDataBlock(int64* timestamp, |
| GoogleString* data) { |
| if (buffer_.size() < 1) { |
| FeedBuffer(); |
| } |
| size_t offset = 0; |
| // The first line should always be "timestamp: xxx". |
| // If it's not, we're done; otherwise, we grab the timestamp value. |
| while (StringPiece(buffer_).substr(offset).starts_with("timestamp: ")) { |
| int64 old_timestamp = *timestamp; |
| // If the timestamp was cut off in the middle of the buffer, we need to |
| // read more into the buffer. |
| size_t newline_pos = BufferFind("\n", offset); |
| // Separate the current timestamp from the rest of the data in the buffer. |
| size_t timestamp_size = STATIC_STRLEN("timestamp: "); |
| StringPiece timestamp_str = StringPiece(buffer_).substr( |
| offset + timestamp_size, newline_pos - timestamp_size); |
| StringToInt64(timestamp_str, timestamp); |
| // Before ReadNextDataBlock returns, it finds the next occurrence of the |
| // full string "timestamp: " so that it knows it contains the full data |
| // block. It then separates the data block from the rest of the data, |
| // meaning that "timestamp: " should always be the first thing in the |
| // buffer. |
| size_t next_timestamp_pos = BufferFind("timestamp: ", |
| offset + newline_pos + 1); |
| // Check to make sure that this timestamp fits the criteria. If it doesn't, |
| // the loop restarts. |
| if (*timestamp >= start_time_ && *timestamp <= end_time_ && |
| *timestamp >= old_timestamp + granularity_ms_) { |
| *data = buffer_.substr(newline_pos + 1, |
| next_timestamp_pos - (newline_pos + 1)); |
| buffer_.erase(0, next_timestamp_pos); |
| return true; |
| } else { |
| *timestamp = old_timestamp; |
| offset = next_timestamp_pos; |
| } |
| } |
| return false; |
| } |
| |
| // Finds the string constant search_for in the buffer, continually reading more |
| // into the buffer if search_for is not found and trying again. Returns the |
| // position of first occurrence of search_for. If search_for is not found, |
| // npos is returned. |
| size_t StatisticsLogfileReader::BufferFind(const char* search_for, |
| size_t start_at) { |
| size_t position = buffer_.find(search_for, start_at); |
| while (position == buffer_.npos) { |
| int read = FeedBuffer(); |
| if (read == 0) { |
| return buffer_.npos; |
| } |
| position = buffer_.find(search_for, |
| buffer_.size() - read - strlen(search_for)); |
| } |
| return position; |
| } |
| |
| int StatisticsLogfileReader::FeedBuffer() { |
| const int kChunkSize = 3000; |
| char buf[kChunkSize]; |
| int num_read = file_->Read(buf, kChunkSize, message_handler_); |
| StrAppend(&buffer_, StringPiece(buf, num_read)); |
| return num_read; |
| } |
| |
| } // namespace net_instaweb |