blob: 1183197f4ae40d420fae81186fd5903adcc8e092 [file] [log] [blame]
// Copyright 2012 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: morlovich@google.com (Maksim Orlovich)
#include "pagespeed/system/loopback_route_fetcher.h"
#include "base/logging.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/http/public/url_async_fetcher.h"
#include "net/instaweb/rewriter/public/domain_lawyer.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/http_names.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "apr_network_io.h"
namespace net_instaweb {
class MessageHandler;
LoopbackRouteFetcher::LoopbackRouteFetcher(
const RewriteOptions* options,
const GoogleString& own_ip,
int own_port,
UrlAsyncFetcher* backend_fetcher)
: options_(options),
own_ip_(own_ip),
own_port_(own_port),
backend_fetcher_(backend_fetcher) {
if (own_ip_.empty()) {
own_ip_ = "127.0.0.1";
}
}
LoopbackRouteFetcher::~LoopbackRouteFetcher() {
}
void LoopbackRouteFetcher::Fetch(const GoogleString& original_url,
MessageHandler* message_handler,
AsyncFetch* fetch) {
GoogleString url = original_url;
GoogleUrl parsed_url(original_url);
if (!parsed_url.IsWebValid()) {
// Fail immediately in case we can't parse the URL, rather than risk
// getting weird handling due to inconsistencies in parsing between us
// and backend_fetcher_.
LOG(WARNING) << "Can't parse URL:" << original_url;
fetch->Done(false);
return;
}
RequestHeaders* request_headers = fetch->request_headers();
// Check to see if the URL we hand to the backend has an origin we were never
// explicitly told of, and if so just talk to loopback.
// Note that in case of an origin mapping the parsed_url will contain the
// fetch host, not the original host, so the domain_lawyer will know about it
// and the if body will not run.
if (!options_->domain_lawyer()->IsOriginKnown(parsed_url) &&
!fetch->request_context()->IsSessionAuthorizedFetchOrigin(
parsed_url.Origin().as_string())) {
// If there is no host header, make sure to add one, since we are about
// to munge the URL.
if (request_headers->Lookup1(HttpAttributes::kHost) == NULL) {
request_headers->Replace(HttpAttributes::kHost, parsed_url.HostAndPort());
}
GoogleString path_and_leaf;
// Includes leading slash.
parsed_url.PathAndLeaf().CopyToString(&path_and_leaf);
StringPiece scheme = parsed_url.Scheme();
GoogleString port_section = "";
if (!((own_port_ == 80 && scheme == "http") ||
(own_port_ == 443 && scheme == "https"))) {
port_section = StrCat(":", IntegerToString(own_port_));
}
url = StrCat(scheme, "://", own_ip_, port_section, path_and_leaf);
// Note that we end up with host: containing the actual URL's host, but
// the URL containing just our IP. This is technically wrong, but the
// Serf fetcher will interpret it in the way we want it to --- it will
// connect to our IP, pass only the path portion to the host, and
// keep the host: header matching what's in the request_headers.
}
backend_fetcher_->Fetch(url, message_handler, fetch);
}
bool LoopbackRouteFetcher::IsLoopbackAddr(const apr_sockaddr_t* addr) {
if (addr->family == APR_INET) {
// 127.0.0.0/8 is the IPv4 loopback.
// Note: is network byte order, so we can do char-wide indexing into it
// consistently (but not look at the whole thing).
const char* ipbytes = reinterpret_cast<const char*>(
&addr->sa.sin.sin_addr.s_addr);
return (ipbytes[0] == 127);
} else if (addr->family == APR_INET6) {
const in6_addr& addr_v6 = addr->sa.sin6.sin6_addr;
// There are a couple of ways we can see loopbacks in IPv6: as the
// proper IPv6 loopback, ::1, or as "IPv4-compatible IPv6 address"
// of the IPv4 loopback, ::FFFF:127.x.y.z.
// Regardless, the first 10 bytes ought to be 0.
for (int b = 0; b < 10; ++b) {
if (addr_v6.s6_addr[b] != 0) {
return false;
}
}
// If first 10 are OK, check the last 6 bytes for the 2 options.
return (addr_v6.s6_addr[10] == 0xFF &&
addr_v6.s6_addr[11] == 0xFF &&
addr_v6.s6_addr[12] == 127) ||
(addr_v6.s6_addr[10] == 0 &&
addr_v6.s6_addr[11] == 0 &&
addr_v6.s6_addr[12] == 0 &&
addr_v6.s6_addr[13] == 0 &&
addr_v6.s6_addr[14] == 0 &&
addr_v6.s6_addr[15] == 1);
} else {
return false;
}
}
} // namespace net_instaweb