| /* |
| * Copyright 2011 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Authors: nforman@google.com (Naomi Forman) |
| // jefftk@google.com (Jeff Kaufman) |
| // |
| // Implements the insert_ga_snippet filter, which inserts the Google Analytics |
| // tracking snippet into html pages. When experiments are enabled, also inserts |
| // snippets to report experiment status back. |
| |
| #include "net/instaweb/rewriter/public/insert_ga_filter.h" |
| |
| #include "base/logging.h" |
| #include "net/instaweb/rewriter/public/experiment_util.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| #include "net/instaweb/rewriter/public/rewrite_options.h" |
| #include "net/instaweb/rewriter/public/server_context.h" |
| #include "pagespeed/kernel/base/message_handler.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/html/html_element.h" |
| #include "pagespeed/kernel/html/html_name.h" |
| #include "pagespeed/kernel/html/html_node.h" |
| #include "pagespeed/kernel/http/google_url.h" |
| #include "pagespeed/kernel/js/js_keywords.h" |
| #include "pagespeed/kernel/js/js_tokenizer.h" |
| |
| namespace { |
| |
| // Name for statistics variable. |
| const char kInsertedGaSnippets[] = "inserted_ga_snippets"; |
| |
| } // namespace |
| |
| namespace net_instaweb { |
| |
| // This filter primarily exists to support PageSpeed experiments that report |
| // back to Google Analytics for reporting. You can also use it just to insert |
| // the Google Analytics tracking snippet, though. |
| // |
| // GA had a rewrite recently, switching from ga.js to analytics.js with a new |
| // API. They also released support for content experiments. The older style of |
| // reporting is to use a custom variable. This filter can report to a content |
| // experiment with either ga.js or analytics.js; with ga.js reporting to a |
| // custom variable is still supported. |
| // |
| // If no GA snippet is present on the page then PageSpeed will insert one. |
| // Additionally, if you're running an experiment then PageSpeed will insert the |
| // JS necessary to report details back to GA. This can look like any of these |
| // three things: |
| // |
| // ga.js + custom variables: |
| // <script>kGAExperimentSnippet |
| // kGAJsSnippet</script> [ possibly existing ] |
| // |
| // ga.js + content experiments: |
| // <script src="kContentExperimentsJsClientUrl"></script> |
| // <script>kContentExperimentsSetChosenVariationSnippet |
| // kGAJsSnippet</script> [ possibly existing ] |
| // |
| // analytics.js + content experiments: |
| // <script>kAnalyticsJsSnippet</script> [ possibly existing ] |
| // kContentExperimentsSetExpAndVariantSnippet goes inside the analytics js |
| // snippet, just before the ga(send, pageview) call. |
| |
| // Google Analytics snippet for setting experiment related variables. Use with |
| // old ga.js and custom variable experiment reporting. Arguments are: |
| // %s: Optional snippet to increase site speed tracking. |
| // %u: Which ga.js custom variable to support to. |
| // %s: Experiment spec string, shown in the GA UI. |
| extern const char kGAExperimentSnippet[] = |
| "var _gaq = _gaq || [];" |
| "%s" |
| "_gaq.push(['_setCustomVar', %u, 'ExperimentState', '%s'" |
| "]);"; |
| |
| // Google Analytics async snippet along with the _trackPageView call. |
| extern const char kGAJsSnippet[] = |
| "if (window.parent == window) {" |
| "var _gaq = _gaq || [];" |
| "_gaq.push(['_setAccount', '%s']);" // %s is the GA account number. |
| "_gaq.push(['_setDomainName', '%s']);" // %s is the domain name |
| "_gaq.push(['_setAllowLinker', true]);" |
| "%s" // Optional snippet to increase site speed tracking. |
| "_gaq.push(['_trackPageview']);" |
| "(function() {" |
| "var ga = document.createElement('script'); ga.type = 'text/javascript';" |
| "ga.async = true;" |
| "ga.src = 'https://ssl.google-analytics.com/ga.js';" |
| "var s = document.getElementsByTagName('script')[0];" |
| "s.parentNode.insertBefore(ga, s);" |
| "})();" |
| "}"; |
| |
| // Google Universal analytics snippet. First argument is the GA account number, |
| // second is kContentExperimentsSetExpAndVariantSnippet or nothing. |
| extern const char kAnalyticsJsSnippet[] = |
| "if (window.parent == window) {" |
| "(function(i,s,o,g,r,a,m){" |
| "i['GoogleAnalyticsObject']=r;" |
| "i[r]=i[r]||function(){" |
| "(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();" |
| "a=s.createElement(o), m=s.getElementsByTagName(o)[0];" |
| "a.async=1;a.src=g;m.parentNode.insertBefore(a,m)" |
| "})(window,document,'script'," |
| "'//www.google-analytics.com/analytics.js','ga');" |
| "ga('create', '%s', 'auto'%s);" |
| "%s" |
| "ga('send', 'pageview');" |
| "}"; |
| |
| // Increase site speed tracking to 100% when using analytics.js |
| // Use the first one if we're inserting the snippet, or if the site we're |
| // modifying isn't already using a fields object with ga('create'), the second |
| // one if there is an existing snippet with a fields object. |
| extern const char kAnalyticsJsIncreaseSiteSpeedTracking[] = |
| ", {'siteSpeedSampleRate': 100}"; |
| extern const char kAnalyticsJsIncreaseSiteSpeedTrackingMinimal[] = |
| "'siteSpeedSampleRate': 100,"; |
| |
| // When using content experiments with ga.js you need to do a sychronous load |
| // of /cx/api.js first. |
| extern const char kContentExperimentsJsClientUrl[] = |
| "//www.google-analytics.com/cx/api.js"; |
| |
| // When using content experiments with ga.js, after /cx/api.js has loaded and |
| // before ga.js loads you need to call this. The first argument is the |
| // variant id, the second is the experiment id. |
| extern const char kContentExperimentsSetChosenVariationSnippet[] = |
| "cxApi.setChosenVariation(%d, '%s');"; |
| |
| // When using content experiments with ga.js, the variant ID must be numeric. |
| // If the user requests a non-numeric variant with ga.js, we inject this |
| // comment. The string is bracketed with newlines because otherwise it's |
| // invisible in a wall of JavaScript. |
| extern const char kContentExperimentsNonNumericVariantComment[] = |
| "\n/* mod_pagespeed cannot inject experiment variant '%s' " |
| "because it's not a number */\n"; |
| |
| // When using content experiments with analytics.js, after ga('create', ..._) |
| // and before ga('[...].send', 'pageview'), we need to insert: |
| extern const char kContentExperimentsSetExpAndVariantSnippet[] = |
| "ga('set', 'expId', '%s');" |
| "ga('set', 'expVar', '%s');"; |
| |
| // Set the sample rate to 100%. |
| // TODO(nforman): Allow this to be configurable through RewriteOptions. |
| extern const char kGASpeedTracking[] = |
| "_gaq.push(['_setSiteSpeedSampleRate', 100]);"; |
| |
| InsertGAFilter::InsertGAFilter(RewriteDriver* rewrite_driver) |
| : CommonFilter(rewrite_driver), |
| script_element_(NULL), |
| added_analytics_js_(false), |
| added_experiment_snippet_(false), |
| ga_id_(rewrite_driver->options()->ga_id()), |
| found_snippet_(false), |
| increase_speed_tracking_( |
| rewrite_driver->options()->increase_speed_tracking()), |
| seen_sync_ga_js_(false) { |
| Statistics* stats = driver()->statistics(); |
| inserted_ga_snippets_count_ = stats->GetVariable(kInsertedGaSnippets); |
| DCHECK(!ga_id_.empty()) << "Enabled ga insertion, but did not provide ga id."; |
| } |
| |
| void InsertGAFilter::InitStats(Statistics* stats) { |
| stats->AddVariable(kInsertedGaSnippets); |
| } |
| |
| InsertGAFilter::~InsertGAFilter() {} |
| |
| bool InsertGAFilter::StringLiteralMatches(StringPiece literal, |
| StringPiece desired) { |
| // Literal includes the beginning and ending quotes; need to exclude them. |
| return literal.substr(1, literal.size() - 2) == desired; |
| } |
| |
| bool InsertGAFilter::StringLiteralEndsWith(StringPiece literal, |
| StringPiece desired) { |
| // Literal includes the beginning and ending quotes; need to exclude them. |
| return literal.substr(1, literal.size() - 2).ends_with(desired); |
| } |
| |
| void InsertGAFilter::StartDocumentImpl() { |
| found_snippet_ = false; |
| script_element_ = NULL; |
| added_analytics_js_ = false; |
| added_experiment_snippet_ = false; |
| if (driver()->options()->running_experiment()) { |
| driver()->message_handler()->Message( |
| kInfo, "run_experiment: %s", |
| driver()->options()->ToExperimentDebugString().c_str()); |
| } |
| } |
| |
| // Start looking for ga snippet. |
| void InsertGAFilter::StartElementImpl(HtmlElement* element) { |
| if (!ga_id_.empty() && |
| !found_snippet_ && |
| element->keyword() == HtmlName::kScript && |
| script_element_ == NULL) { |
| script_element_ = element; |
| } |
| } |
| |
| // This isn't perfect but matches all the cases we've found. It's ok if it has |
| // some false positives; the later check is more thorough. |
| InsertGAFilter::AnalyticsStatus InsertGAFilter::FindSnippetInScript( |
| const GoogleString& s) { |
| // dc.js is a synonym for old-style ga.js |
| if (!seen_sync_ga_js_ && |
| (s.find("google-analytics.com/ga.js") != GoogleString::npos || |
| s.find("stats.g.doubleclick.net/dc.js") != GoogleString::npos)) { |
| // The synchronous snippet has two parts: first one with |
| // [google-analytics.com/ga.js] (no initial dot) and then a later one with |
| // ga_id, _getTracker, and _trackPageview. Track that we've seen what is |
| // probably the first snippet, and then if we later get what could be the |
| // second snippet we'll check below. |
| seen_sync_ga_js_ = true; |
| } |
| if (s.find(StrCat("'", ga_id_, "'")) == GoogleString::npos && |
| s.find(StrCat("\"", ga_id_, "\"")) == GoogleString::npos) { |
| return kNoSnippetFound; |
| } |
| if (s.find(".google-analytics.com/urchin.js") != GoogleString::npos) { |
| return kUnusableSnippetFound; // urchin.js is too old. |
| } else if (s.find(".google-analytics.com/ga.js") != GoogleString::npos || |
| s.find("stats.g.doubleclick.net/dc.js") != GoogleString::npos) { |
| // With the async snippet there is one part that first loads ga.js |
| // (using [.google-analytics.com/ga.js], with initial dot) and then has the |
| // ga_id (which we checked for above). |
| return kGaJs; // Asynchronous ga.js |
| } else if (seen_sync_ga_js_ && |
| s.find("_getTracker") != GoogleString::npos && |
| s.find("_trackPageview") != GoogleString::npos) { |
| // Synchronous ga.js was split over two script tags: first one to do the |
| // loading then one to do the initialization and page tracking. We want to |
| // process the second one. |
| return kGaJs; // Syncronous ga.js |
| } else if (s.find(".google-analytics.com/analytics.js")) { |
| return kAnalyticsJs; |
| } |
| return kUnusableSnippetFound; |
| } |
| |
| GoogleString InsertGAFilter::AnalyticsJsExperimentSnippet() const { |
| return StringPrintf( |
| kContentExperimentsSetExpAndVariantSnippet, |
| driver()->options()->content_experiment_id().c_str(), |
| driver()->options()->content_experiment_variant_id().c_str()); |
| } |
| |
| GoogleString InsertGAFilter::GaJsExperimentSnippet() const { |
| // ga.js requires a numeric variant id. Attempt to convert the string |
| // variant ID to int and use that. |
| const char* variant_id = |
| driver()->options()->content_experiment_variant_id().c_str(); |
| int numeric_variant_id; |
| if (StringToInt(variant_id, &numeric_variant_id)) { |
| return StringPrintf( |
| kContentExperimentsSetChosenVariationSnippet, numeric_variant_id, |
| driver()->options()->content_experiment_id().c_str()); |
| } else { |
| // Variant ID was non-numeric, so inject a warning. |
| return StringPrintf(kContentExperimentsNonNumericVariantComment, |
| variant_id); |
| } |
| } |
| |
| // * If we've already inserted any GA snippet or if we found a GA snippet in the |
| // original page, don't do anything. |
| // * If we haven't found anything, and haven't inserted anything yet, insert the |
| // GA js snippet. |
| // |
| // Caveat: The snippet should ideally be placed in <head> for accurate |
| // collection of data (e.g. pageviews etc.). We place it at the end of the |
| // document so that we won't add duplicate analytics js code for any page. |
| // |
| // For pages which don't already have analytics js, this might result in some |
| // data being lost. |
| void InsertGAFilter::EndDocument() { |
| if (found_snippet_ || added_analytics_js_ || ga_id_.empty()) { |
| return; |
| } |
| |
| // No snippets have been found, and we haven't added any snippets yet, so add |
| // one now. Include experiment setup if experiments are on. |
| |
| GoogleString js_text; |
| GoogleString experiment_snippet; |
| const char* speed_tracking = ""; |
| if (driver()->options()->use_analytics_js()) { |
| if (increase_speed_tracking_) { |
| speed_tracking = kAnalyticsJsIncreaseSiteSpeedTracking; |
| } |
| if (ShouldInsertExperimentTracking(true /* analytics.js */)) { |
| experiment_snippet = AnalyticsJsExperimentSnippet(); |
| } |
| js_text = StringPrintf( |
| kAnalyticsJsSnippet, |
| ga_id_.c_str(), |
| speed_tracking, |
| experiment_snippet.c_str()); |
| } else { |
| if (ShouldInsertExperimentTracking(false /* ga.js */)) { |
| if (driver()->options()->is_content_experiment()) { |
| HtmlElement* cxapi = driver()->NewElement(NULL, HtmlName::kScript); |
| driver()->AddAttribute( |
| cxapi, HtmlName::kSrc, kContentExperimentsJsClientUrl); |
| InsertNodeAtBodyEnd(cxapi); |
| experiment_snippet = GaJsExperimentSnippet(); |
| } else { |
| experiment_snippet = StringPrintf( |
| kGAExperimentSnippet, |
| "" /* don't change speed tracking here, we add it below */, |
| driver()->options()->experiment_ga_slot(), |
| driver()->options()->ToExperimentString().c_str()); |
| } |
| } |
| |
| // Domain for this html page. |
| GoogleString domain = driver()->google_url().Host().as_string(); |
| if (increase_speed_tracking_) { |
| speed_tracking = kGASpeedTracking; |
| } |
| js_text = StrCat(experiment_snippet, |
| StringPrintf(kGAJsSnippet, |
| ga_id_.c_str(), |
| domain.c_str(), |
| speed_tracking)); |
| } |
| |
| HtmlElement* script_element = driver()->NewElement(NULL, HtmlName::kScript); |
| InsertNodeAtBodyEnd(script_element); |
| HtmlNode* snippet = driver()->NewCharactersNode(script_element, js_text); |
| driver()->AppendChild(script_element, snippet); |
| |
| added_analytics_js_ = true; |
| inserted_ga_snippets_count_->Add(1); |
| } |
| |
| bool InsertGAFilter::ShouldInsertExperimentTracking(bool is_analytics_js) { |
| if (driver()->options()->running_experiment()) { |
| if (is_analytics_js && !driver()->options()->is_content_experiment()) { |
| driver()->WarningHere("Experiment framework requires a content experiment" |
| " when used with analytics.js."); |
| return false; |
| } |
| |
| int experiment_state = driver()->options()->experiment_id(); |
| if (experiment_state != experiment::kExperimentNotSet && |
| experiment_state != experiment::kNoExperiment) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| void InsertGAFilter::RewriteInlineScript(HtmlCharactersNode* characters) { |
| AnalyticsStatus analytics_status = |
| FindSnippetInScript(characters->contents()); |
| if (analytics_status == kNoSnippetFound) { |
| return; // This inline script isn't for GA; nothing to change. |
| } |
| |
| found_snippet_ = true; |
| |
| if (!ShouldInsertExperimentTracking(analytics_status == kAnalyticsJs)) { |
| return; // GA script found, but we don't need to change it. |
| } |
| |
| if (analytics_status == kUnusableSnippetFound) { |
| driver()->InfoHere("Page contains unusual Google Analytics snippet that" |
| " we're not able to modify to add experiment tracking."); |
| return; |
| } |
| |
| if (analytics_status == kAnalyticsJs) { |
| GoogleString rewritten; |
| StringPiece token; |
| pagespeed::JsKeywords::Type token_type; |
| pagespeed::js::JsTokenizer tokenizer( |
| server_context()->js_tokenizer_patterns(), characters->contents()); |
| ParseState state = kInitial; |
| |
| // Go through the tokens, appending them to rewritten. We need to find the |
| // ga(create) call so we can increase speed tracking. Then we need to find |
| // the ga(send pageview) call so we can insert our experiment snippet. |
| |
| // When we find a ga(send pageview) call it won't be obvious what we've |
| // found until we're several tokens along. So save the offset of each ga |
| // function call when we find it so we can later insert before if need be. |
| int ga_send_pageview_offset = -1; |
| bool inserted_speed_tracking = false; |
| |
| while ((token_type = tokenizer.NextToken(&token)) != |
| pagespeed::JsKeywords::kEndOfInput) { |
| if (token_type == pagespeed::JsKeywords::kError) { |
| driver()->InfoHere("Got invalid js in Google Analytics snippet"); |
| return; |
| } |
| if (token_type == pagespeed::JsKeywords::kComment || |
| token_type == pagespeed::JsKeywords::kWhitespace || |
| token_type == pagespeed::JsKeywords::kLineSeparator) { |
| // All states allow these, so stay in the same state. kLineSeparator is |
| // specifically for newlines that don't trigger semicolon insertion. |
| } else if (state == kInitial && |
| token_type == pagespeed::JsKeywords::kIdentifier && |
| token == "ga") { |
| ga_send_pageview_offset = rewritten.size(); |
| state = kGotGa; |
| } else if (state == kGotGa && |
| token_type == pagespeed::JsKeywords::kOperator && |
| token == "(") { |
| state = kGotGaFuncCall; |
| } else if (state == kGotGaFuncCall && |
| token_type == pagespeed::JsKeywords::kStringLiteral && |
| StringLiteralMatches(token, "create")) { |
| state = kGotGaCreate; |
| } else if (state == kGotGaFuncCall && |
| token_type == pagespeed::JsKeywords::kStringLiteral && |
| (StringLiteralMatches(token, "send") || |
| StringLiteralEndsWith(token, ".send"))) { |
| state = kGotGaSend; |
| } else if (state == kGotGaCreate && |
| token_type == pagespeed::JsKeywords::kOperator && |
| token == ",") { |
| state = kGotGaCreateComma; |
| } else if (state == kGotGaCreate && |
| token_type == pagespeed::JsKeywords::kOperator && |
| token == ")") { |
| // Saw end of function call without any fields object. Insert |
| // standard speed tracking here. |
| if (increase_speed_tracking_) { |
| rewritten.append(kAnalyticsJsIncreaseSiteSpeedTracking); |
| inserted_speed_tracking = true; |
| } |
| state = kInitial; |
| } else if (state == kGotGaCreateComma && |
| token_type == pagespeed::JsKeywords::kStringLiteral) { |
| // Ignore any string arguments after create, just let them pass. |
| state = kGotGaCreate; |
| } else if (state == kGotGaCreateComma && |
| token_type == pagespeed::JsKeywords::kOperator && |
| token == "{") { |
| state = kGotFieldsObject; |
| } else if (state == kGotFieldsObject) { |
| // Add our field setting before any of the others. |
| if (increase_speed_tracking_) { |
| rewritten.append(kAnalyticsJsIncreaseSiteSpeedTrackingMinimal); |
| inserted_speed_tracking = true; |
| } |
| state = kInitial; |
| } else if (state == kGotGaSend && |
| token_type == pagespeed::JsKeywords::kOperator && |
| token == ",") { |
| state = kGotGaSendComma; |
| } else if (state == kGotGaSendComma && |
| token_type == pagespeed::JsKeywords::kStringLiteral && |
| StringLiteralMatches(token, "pageview")) { |
| state = kGotGaSendPageview; |
| } else if (state == kGotGaSendPageview && |
| token_type == pagespeed::JsKeywords::kOperator && |
| (token == "," || token == ")")) { |
| CHECK(ga_send_pageview_offset != -1); |
| rewritten.insert(ga_send_pageview_offset, |
| AnalyticsJsExperimentSnippet()); |
| state = kSuccess; |
| } else if (state == kSuccess) { |
| // Pass the remaining tokens through, we already made our changes. |
| } else { |
| // Any token we weren't expecting puts us back into looking for "ga". |
| state = kInitial; |
| } |
| |
| rewritten.append(token.as_string()); |
| } |
| if (state == kSuccess) { |
| (*characters->mutable_contents()) = rewritten; |
| added_experiment_snippet_ = true; |
| |
| if (increase_speed_tracking_ && !inserted_speed_tracking) { |
| driver()->InfoHere("Failed to increase site speed tracking."); |
| } |
| } else { |
| driver()->InfoHere( |
| "Failed to add experiment tracking to existing snippet."); |
| } |
| } else { |
| DCHECK(analytics_status == kGaJs); |
| |
| if (driver()->options()->is_content_experiment()) { |
| // The API for content experiments with ga.js unfortunately requires a |
| // synchronous script load first. Ideally people would switch to |
| // analytics.js, which doesn't have this problem, but we need to support |
| // people who haven't switched as well. |
| // |
| // We can't do InsertBeforeCurrent here, because we could be in the |
| // horrible case where "<script>" has been flushed and now we're |
| // rewriting the script body. So the best we can do is: |
| // * Blank out this script. |
| // * Append the blocking external script load. |
| // * Append the edited body of the original script tag as a new |
| // inline script. |
| postponed_script_body_ = characters->contents(); |
| characters->mutable_contents()->clear(); |
| } else { |
| const char* speed_tracking = |
| increase_speed_tracking_ ? kGASpeedTracking : ""; |
| GoogleString snippet_text = StringPrintf( |
| kGAExperimentSnippet, |
| speed_tracking, |
| driver()->options()->experiment_ga_slot(), |
| driver()->options()->ToExperimentString().c_str()); |
| GoogleString* script = characters->mutable_contents(); |
| // Prepend snippet_text to the script block. |
| script->insert(0, snippet_text); |
| added_experiment_snippet_ = true; |
| } |
| } |
| } |
| |
| // If RewriteInlineScript decided to insert any new script nodes, do that |
| // insertion here. |
| void InsertGAFilter::HandleEndScript(HtmlElement* script) { |
| if (!postponed_script_body_.empty()) { |
| DCHECK(script == script_element_); |
| driver()->InsertScriptAfterCurrent( |
| kContentExperimentsJsClientUrl, true /* external */); |
| driver()->InsertScriptAfterCurrent( |
| StrCat(GaJsExperimentSnippet(), postponed_script_body_), |
| false /* inline */); |
| added_experiment_snippet_ = true; |
| postponed_script_body_.clear(); |
| } |
| script_element_ = NULL; |
| } |
| |
| void InsertGAFilter::EndElementImpl(HtmlElement* element) { |
| if (ga_id_.empty()) { |
| // We only DCHECK that it's non-empty above, but there's nothing useful we |
| // can do if it hasn't been set. Checking here means we'll make no changes. |
| return; |
| } |
| if (element->keyword() == HtmlName::kScript) { |
| HandleEndScript(element); |
| } |
| } |
| |
| void InsertGAFilter::Characters(HtmlCharactersNode* characters) { |
| if (script_element_ != NULL && !found_snippet_ && |
| !added_experiment_snippet_) { |
| RewriteInlineScript(characters); |
| } |
| } |
| |
| } // namespace net_instaweb |