blob: 0007260dafbfecaca229d95a56f8c3cc99ae2d95 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_UTIL_URL_PARSER_H
#define IMPALA_UTIL_URL_PARSER_H
#include "runtime/string-value.h"
#include "runtime/string-search.h"
namespace impala {
/// TODO: For now, our parse_url may not behave exactly like Hive
/// when given malformed URLs.
/// If necessary, we can closely follow Java's URL implementation
/// to behave exactly like Hive.
/// Example for explaining URL parts:
/// http://user:pass@example.com:80/docs/books/tutorial/index.html?name=networking#DOWNLOADING
/// PROTOCOL = http
/// AUTHORITY = example.com:80
/// HOST = example.com
/// PATH = /docs/books/tutorial/index.html
/// QUERY = name=networking
/// FILENAME = /docs/books/tutorial/index.html?name=networking
/// REF = DOWNLOADING
/// USERINFO = user:pass
/// More details on what exactly the URL parts mean can be found here:
/// http://docs.oracle.com/javase/tutorial/networking/urls/urlInfo.html
class UrlParser {
public:
/// Parts of a URL that can be requested.
enum UrlPart {
INVALID,
AUTHORITY,
FILE,
HOST,
PATH,
PROTOCOL,
QUERY,
REF,
USERINFO
};
/// Tries to parse the part from url. Places the result in result.
/// Returns false if the URL is malformed or if part is invalid. True otherwise.
/// If false is returned the contents of results are undefined.
static bool ParseUrl(const StringValue& url, UrlPart part, StringValue* result);
/// Tries to parse key from url. Places the result in result.
/// Returns false if the URL is malformed or if part is invalid. True otherwise.
/// If false is returned the contents of results are undefined.
static bool ParseUrlKey(const StringValue& url, UrlPart part, const StringValue& key,
StringValue* result);
/// Compares part against url_authority, url_file, url_host, etc.,
/// and returns the corresponding enum.
/// If part did not match any of the url part constants, returns INVALID.
static UrlPart GetUrlPart(const StringValue& part);
private:
// Constants representing parts of a URL.
static const StringValue url_authority;
static const StringValue url_file;
static const StringValue url_host;
static const StringValue url_path;
static const StringValue url_protocol;
static const StringValue url_query;
static const StringValue url_ref;
static const StringValue url_userinfo;
// Constants used in searching for URL parts.
static const StringValue protocol;
static const StringValue at;
static const StringValue slash;
static const StringValue colon;
static const StringValue question;
static const StringValue hash;
static const StringSearch protocol_search;
static const StringSearch at_search;
static const StringSearch slash_search;
static const StringSearch colon_search;
static const StringSearch question_search;
static const StringSearch hash_search;
};
}
#endif