/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: jmarantz@google.com (Joshua Marantz)

#include "net/instaweb/rewriter/public/url_partnership.h"

#include <cstddef>

#include "base/logging.h"
#include "net/instaweb/rewriter/public/domain_lawyer.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/url_namer.h"
#include "pagespeed/kernel/base/message_handler.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/stl_util.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/http/google_url.h"

namespace net_instaweb {

UrlPartnership::UrlPartnership(const RewriteDriver* driver)
    : rewrite_options_(driver->options()),
      url_namer_(driver->server_context()->url_namer()) {
}

UrlPartnership::~UrlPartnership() {
  STLDeleteElements(&url_vector_);
}

// Adds a URL to a combination.  If it can be legally added, consulting
// the DomainLawyer, then true is returned.  AddUrl cannot be called
// after Resolve (CHECK failure).
bool UrlPartnership::AddUrl(const StringPiece& untrimmed_resource_url,
                            MessageHandler* handler) {
  GoogleString resource_url, mapped_domain_name;
  bool ret = false;
  TrimWhitespace(untrimmed_resource_url, &resource_url);

  if (resource_url.empty()) {
    handler->Message(
        kInfo, "Cannot rewrite empty URL relative to %s",
        original_origin_and_path_.spec_c_str());
  } else if (!original_origin_and_path_.IsWebValid()) {
    handler->Message(
        kInfo, "Cannot rewrite %s relative to invalid url %s",
        resource_url.c_str(),
        original_origin_and_path_.spec_c_str());
  } else {
    // First resolve the original request to ensure that it is allowed by the
    // options.
    scoped_ptr<GoogleUrl> resolved_request(
        new GoogleUrl(original_origin_and_path_, resource_url));
    if (!resolved_request->IsWebValid()) {
      handler->Message(
          kInfo, "URL %s cannot be resolved relative to base URL %s",
          resource_url.c_str(),
          original_origin_and_path_.spec_c_str());
    } else if (!rewrite_options_->IsAllowed(resolved_request->Spec())) {
      handler->Message(kInfo,
                       "Rewriting URL %s is disallowed via configuration",
                       resolved_request->spec_c_str());
    } else if (FindResourceDomain(original_origin_and_path_,
                                  url_namer_,
                                  rewrite_options_,
                                  resolved_request.get(),
                                  &mapped_domain_name,
                                  handler)) {
      if (url_vector_.empty()) {
        domain_and_path_prefix_.swap(mapped_domain_name);
        ret = true;
      } else {
        GoogleUrl domain_url(domain_and_path_prefix_);
        GoogleUrl mapped_url(mapped_domain_name);
        ret = (domain_url.Origin() == mapped_url.Origin());
        if (ret && !rewrite_options_->combine_across_paths()) {
          ret = (ResolvedBase() == resolved_request->AllExceptLeaf());
        }
      }

      if (ret) {
        url_vector_.push_back(resolved_request.release());
        int index = url_vector_.size() - 1;
        IncrementalResolve(index);
      }
    }
  }
  return ret;
}

bool UrlPartnership::FindResourceDomain(const GoogleUrl& base_url,
                                        const UrlNamer* url_namer,
                                        const RewriteOptions* rewrite_options,
                                        GoogleUrl* resource,
                                        GoogleString* domain,
                                        MessageHandler* handler) {
  bool ret = false;
  GoogleString resource_url;
  if (url_namer->Decode(*resource, rewrite_options, &resource_url)) {
    resource->Reset(resource_url);
    ret = resource->IsWebValid();
    resource->Origin().CopyToString(domain);
  } else {
    ret = rewrite_options->domain_lawyer()->MapRequestToDomain(
        base_url, resource->Spec(), domain,
        resource, handler);
  }
  return ret;
}

void UrlPartnership::RemoveLast() {
  CHECK(!url_vector_.empty());
  int last = url_vector_.size() - 1;
  delete url_vector_[last];
  url_vector_.resize(last);

  // Re-resolve the entire partnership in the absense of the influence of the
  // ex-partner, by re-adding the GURLs one at a time.
  common_components_.clear();
  for (int i = 0, n = url_vector_.size(); i < n; ++i) {
    IncrementalResolve(i);
  }
}

void UrlPartnership::Reset(const GoogleUrl& original_request) {
  STLDeleteElements(&url_vector_);
  url_vector_.clear();
  common_components_.clear();
  if (original_request.IsWebValid()) {
    original_origin_and_path_.Reset(original_request.AllExceptLeaf());
  }
}

void UrlPartnership::IncrementalResolve(int index) {
  CHECK_LE(0, index);
  CHECK_LT(index, static_cast<int>(url_vector_.size()));

  // When tokenizing a URL, we don't want to omit empty segments
  // because we need to avoid aliasing "http://x" with "/http:/x".
  bool omit_empty = false;
  StringPieceVector components;

  if (index == 0) {
    StringPiece base = url_vector_[0]->AllExceptLeaf();
    SplitStringPieceToVector(base, "/", &components, omit_empty);
    components.pop_back();            // base ends with "/"
    CHECK_LE(3U, components.size());  // expect {"http:", "", "x"...}
    for (size_t i = 0; i < components.size(); ++i) {
      const StringPiece& sp = components[i];
      common_components_.push_back(GoogleString(sp.data(), sp.size()));
    }
  } else {
    // Split each string on / boundaries, then compare these path elements
    // until one doesn't match, then shortening common_components.
    StringPiece all_but_leaf = url_vector_[index]->AllExceptLeaf();
    SplitStringPieceToVector(all_but_leaf, "/", &components, omit_empty);
    components.pop_back();            // base ends with "/"
    CHECK_LE(3U, components.size());  // expect {"http:", "", "x"...}

    if (components.size() < common_components_.size()) {
      common_components_.resize(components.size());
    }
    for (size_t c = 0; c < common_components_.size(); ++c) {
      if (common_components_[c] != components[c]) {
        common_components_.resize(c);
        break;
      }
    }
  }
}

GoogleString UrlPartnership::ResolvedBase() const {
  GoogleString ret;
  if (!common_components_.empty()) {
    for (size_t c = 0; c < common_components_.size(); ++c) {
      const GoogleString& component = common_components_[c];
      ret += component;
      ret += "/";  // initial segment is "http" with no leading /
    }
  }
  return ret;
}

// Returns the relative path of a particular URL that was added into
// the partnership.  This requires that Resolve() be called first.
GoogleString UrlPartnership::RelativePath(int index) const {
  GoogleString resolved_base = ResolvedBase();
  StringPiece spec = url_vector_[index]->Spec();
  CHECK_GE(spec.size(), resolved_base.size());
  CHECK_EQ(StringPiece(spec.data(), resolved_base.size()),
           StringPiece(resolved_base));
  return GoogleString(spec.data() + resolved_base.size(),
                      spec.size() - resolved_base.size());
}

}  // namespace net_instaweb
