blob: c94c6fe54361a162e7c94e73db526403d3a1a720 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _DECAF_NET_URL_H_
#define _DECAF_NET_URL_H_
#include <decaf/util/Config.h>
#include <decaf/lang/String.h>
#include <decaf/io/InputStream.h>
#include <string>
namespace decaf {
namespace net {
class URI;
class URLImpl;
class URLStreamHandler;
class URLStreamHandlerFactory;
class URLConnection;
class Proxy;
/**
* Class URL represents a Uniform Resource Locator, a pointer to a "resource"
* on the World Wide Web. A resource can be something as simple as a file
* or a directory, or it can be a reference to a more complicated object,
* such as a query to a database or to a search engine. More information on
* the types of URLs and their formats can be found at:
*
* http://www.ksc.nasa.gov/facts/internet/url-primer.html
*
* In general, a URL can be broken into several parts. The previous example
* of a URL indicates that the protocol to use is http (HyperText Transfer
* Protocol) and that the information resides on a host machine named
* www.ksc.nasa.gov. The information on that host machine is named
* /facts/internet/url-primer.html. The exact meaning of this name on the
* host machine is both protocol dependent and host dependent. The information
* normally resides in a file, but it could be generated on the fly. This
* component of the URL is called the path component.
*
* A URL can optionally specify a "port", which is the port number to which
* the TCP connection is made on the remote host machine. If the port is not
* specified, the default port for the protocol is used instead. For example,
* the default port for http is 80. An alternative port could be specified as:
*
* http://www.ksc.nasa.gov:80/facts/internet/url-primer.html
*
* The syntax of URL is defined by RFC 2396: Uniform Resource Identifiers (URI):
* Generic Syntax, amended by RFC 2732: Format for Literal IPv6 Addresses in URLs.
* The Literal IPv6 address format also supports scope_ids. The syntax and usage
* of scope_ids is described here.
*
* A URL may have appended to it a "fragment", also known as a "ref" or a
* "reference". The fragment is indicated by the sharp sign character "#" followed
* by more characters. For example,
*
* http://www.apache.org/cms/index.html#chapter1
*
* This fragment is not technically part of the URL. Rather, it indicates that
* after the specified resource is retrieved, the application is specifically
* interested in that part of the document that has the tag chapter1 attached to
* it. The meaning of a tag is resource specific.
*
* An application can also specify a "relative URL", which contains only enough
* information to reach the resource relative to another URL. Relative URLs
* are frequently used within HTML pages. For example, if the contents of the URL:
*
* http://www.apache.org/cms/index.html
*
* contained within it the relative URL:
*
* FAQ.html
*
* it would be a shorthand for:
*
* http://www.apache.org/cms/FAQ.html
*
* The relative URL need not specify all the components of a URL. If the protocol,
* host name, or port number is missing, the value is inherited from the fully
* specified URL. The file component must be specified. The optional fragment is
* not inherited.
*
* The URL class does not itself encode or decode any URL components according
* to the escaping mechanism defined in RFC2396. It is the responsibility of the
* caller to encode any fields, which need to be escaped prior to calling URL,
* and also to decode any escaped fields, that are returned from URL. Furthermore,
* because URL has no knowledge of URL escaping, it does not recognise equivalence
* between the encoded or decoded form of the same URL. For example, the two URLs:
*
* http://foo.com/hello world/ and http://foo.com/hello%20world
*
* would be considered not equal to each other.
*
* Note, the URI class does perform escaping of its component fields in certain
* circumstances. The recommended way to manage the encoding and decoding of URLs
* is to use URI, and to convert between these two classes using toURI() and
* URI.toURL().
*
* The URLEncoder and URLDecoder classes can also be used, but only for HTML form
* encoding, which is not the same as the encoding scheme defined in RFC2396.
*
* @since 1.0
*/
class DECAF_API URL {
private:
URLImpl* impl;
public:
/**
* Creates a URL object from the String representation.
*
* This constructor is equivalent to a call to the two-argument constructor with
* a empty first argument.
*
* @param url
* the String to parse as a URL.
*
* @throws MalformedURLException If the string specifies an unknown protocol.
*/
URL(const decaf::lang::String& url);
/**
* Creates a URL by parsing the given spec within a specified context. The new URL is
* created from the given context URL and the spec argument as described in RFC2396
* "Uniform Resource Identifiers : Generic * Syntax" :
*
* <scheme>://<authority><path>?<query>#<fragment>
*
* The reference is parsed into the scheme, authority, path, query and fragment parts.
* If the path component is empty and the scheme, authority, and query components are
* undefined, then the new URL is a reference to the current document. Otherwise, the
* fragment and query parts present in the spec are used in the new URL.
*
* If the scheme component is defined in the given spec and does not match the scheme
* of the context, then the new URL is created as an absolute URL based on the spec alone.
* Otherwise the scheme component is inherited from the context URL.
*
* If the authority component is present in the spec then the spec is treated as absolute
* and the spec authority and path will replace the context authority and path. If the
* authority component is absent in the spec then the authority of the new URL will be
* inherited from the context.
*
* If the spec's path component begins with a slash character "/" then the path is treated
* as absolute and the spec path replaces the context path.
*
* Otherwise, the path is treated as a relative path and is appended to the context path,
* as described in RFC2396. Also, in this case, the path is canonicalized through the
* removal of directory changes made by occurrences of ".." and ".".
*
* For a more detailed description of URL parsing, refer to RFC2396.
*
* @param context
* the URL which is used as the context.
* @param spec
* the URL string representation which has to be parsed.
*
* @throws MalformedURLException
* if the given string spec could not be parsed as a URL or an invalid
* protocol has been found.
*/
URL(const URL& context, const decaf::lang::String& spec);
/**
* Creates a URL object from the specified protocol, host, port number, file, and
* handler. Specifying a port number of -1 indicates that the URL should use the
* default port for the protocol. Specifying a handler of null indicates that the
* URL should use a default stream handler for the protocol, as outlined for:
*
* URL(const String&, const String&, int, const String&)
*
* If a URLStreamHandler instance is provided then this class will take ownership
* of the object and delete at a later time.
*
* @param protocol
* the name of the protocol to use.
* @param host
* the name of the host.
* @param port
* the port number on the host.
* @param file
* the file on the host
* @param handler
* the stream handler for the URL.
*
* @throws MalformedURLException if an unknown protocol is specified.
*/
URL(const decaf::lang::String& protocol, const decaf::lang::String& host, int port,
const decaf::lang::String& file, URLStreamHandler* handler);
/**
* Creates a URL from the specified protocol name, host name, and file name. The default
* port for the specified protocol is used.
*
* This method is equivalent to calling the four-argument constructor with the arguments
* being protocol, host, -1, and file. No validation of the inputs is performed by this
* constructor.
*
* @param protocol
* the name of the protocol to use.
* @param host
* the name of the host.
* @param file
* the file on the host
*
* @throws MalformedURLException if an unknown protocol is specified.
*/
URL(const decaf::lang::String& protocol, const decaf::lang::String& host, const decaf::lang::String& file);
/**
* Creates a new URL instance using the given arguments. The URL uses the
* specified port instead of the default port for the given protocol.
*
* @param protocol
* the name of the protocol to use.
* @param host
* the name of the host.
* @param port
* the specific port number of the URL. Value of -1 represents the
* default port of the protocol.
* @param file
* the name of the resource.
*
* @throws MalformedURLException
* if the combination of all arguments do not represent a valid
* URL or the protocol is invalid.
*/
URL(const decaf::lang::String& protocol, const decaf::lang::String& host, int port, const decaf::lang::String& file);
/**
* Creates a URL by parsing the given spec with the specified handler within a
* specified context. If the handler is NULL, the parsing occurs as with the two
* argument constructor.
*
* If a stream handler instance is passed then this object takes ownership of it
* and will destroy the resources when no longer needed.
*
* @param context
* the URL which is used as the context.
* @param spec
* the URL string representation which has to be parsed.
* @param handler
* the stream handler for the URL.
*
* @throws MalformedURLException if an unknown protocol is specified.
*/
URL(const URL& context, const decaf::lang::String& spec, URLStreamHandler* handler);
virtual ~URL();
public:
/**
* Compares this URL for equality with another URL.
*
* Two URL objects are equal if they have the same protocol, reference equivalent
* hosts, have the same port number on the host, and the same file and fragment
* of the file.
*
* Two hosts are considered equivalent if both host names can be resolved into the
* same IP addresses; else if either host name can't be resolved, the host names
* must be equal without regard to case; or both host names equal to empty string.
*
* Since hosts comparison requires name resolution, this operation is a blocking operation.
*
* @return true if this URL is considered equal to the given URL instance.
*/
bool equals(const URL& other) const;
/**
* Gets the authority part of this URL.
*
* @return the authority part of this URL.
*/
decaf::lang::String getAuthority() const;
/**
* Gets the default port number of the protocol associated with this URL. If the URL
* scheme or the URLStreamHandler for the URL do not define a default port number, then
* -1 is returned.
*
* @return the default port for the given scheme.
*/
int getDefaultPort() const;
/**
* Gets the file name of this URL. The returned file portion will be the same as getPath(),
* plus the concatenation of the value of getQuery(), if any. If there is no query portion,
* this method and getPath() will return identical results.
*
* @return the file name associated with this URL.
*/
decaf::lang::String getFile() const;
/**
* Gets the host name of this URL, if applicable. The format of the host conforms to
* RFC 2732, i.e. for a literal IPv6 address, this method will return the IPv6 address
* enclosed in square brackets ('[' and ']').
*
* @return the host name for this URL.
*/
decaf::lang::String getHost() const;
/**
* Gets the path part of this URL.
*
* @return the path part of this URL.
*/
decaf::lang::String getPath() const;
/**
* Gets the user Info part of this URL.
*
* @return the user info part of this URL.
*/
decaf::lang::String getUserInfo() const;
/**
* Gets the port of this URL.
*
* @return the port of this URL or -1 if not set.
*/
int getPort() const;
/**
* Gets the protocol of this URL.
*
* @return the path part of this URL.
*/
decaf::lang::String getProtocol() const;
/**
* Gets the query part of this URL.
*
* @return the query part of this URL or empty string if not set.
*/
decaf::lang::String getQuery() const;
/**
* Gets the anchor or "reference" portion of this URL.
*
* @return the anchor or "reference" portion of this URL.
*/
decaf::lang::String getRef() const;
/**
* Creates an integer hash code for this URL which is used in hash based collections.
*
* The hash code is based upon all the URL components relevant for URL comparison which
* means that the host resolution may cause this operation to block.
*
* @return the integer has code for this URL.
*/
int hashCode() const;
/**
* Returns a URLConnection object that represents a connection to the remote object
* referred to by the URL.
*
* A new connection is opened every time by calling the openConnection method of the
* protocol handler for this URL.
*
* If for the URL's protocol (such as HTTP), there exists a public, specialized
* URLConnection subclass belonging to one of the following packages or one of their
* subpackages: java.lang, java.io, java.util, java.net, the connection returned will
* be of that subclass. For example, for HTTP an HttpURLConnection will be returned,
* and for JAR a JarURLConnection will be returned.
*
* @return a new URLConnection instance for this URL.
*
* @throws IOException if an error occurs while opening the connection.
*/
URLConnection* openConnection();
/**
* Same basic functionality as openConnection() is provided here, except that the connection
* will be made through the specified proxy; Protocol handlers that do not support proxing
* will ignore the proxy parameter and make a normal connection.
*
* @param proxy
* The proxy instance to use to make the connection.
*
* @return a new URLConnection instance for this URL.
*
* @throws IOException if an error occurs while opening the connection.
* @throws IllegalArgumentException if proxy is null, or proxy has the wrong type.
* @throws UnsupportedOperationException if this method is not supported.
*/
URLConnection* openConnection(const Proxy* proxy);
/**
* Shortcut method to opens a connection to this URL and fetch an InputStream
* for reading from that connection.
*
* @return an InputStream that reads from this URL's location.
*
* @throws IOException if an error occurs.
*/
decaf::io::InputStream* openStream();
/**
* Compares this URL to the other ignoring the fragment portion to determine if both
* reference the same remote object.
*
* @return true if both URL's reference the same external object.
*/
bool sameFile(const URL& other) const;
/**
* Constructs a string representation of this URL, by calling the toExternalForm
* method of the stream protocol handler for this object.
*
* @return the string representation of this URL.
*/
decaf::lang::String toExternalForm() const;
/**
* Calls toExternalForm to create a string representation of this URL.
*
* @return the string representation of this URL.
*/
std::string toString() const;
/**
* Returns a URI instance that is the equivalent of this URL.
*
* @return the URI that is the equivalent of this URL.
*/
URI toURI() const;
public:
/**
* Sets an application's URLStreamHandlerFactory. This method can be called at most once.
*
* The URLStreamHandlerFactory instance is used to construct a stream protocol handler
* from a protocol name. The provided factory becomes the property of this runtime and
* will be deleted at shutdown.
*
* @param factory
* the desired factory.
*
* @throws Exception if there is already a set factory.
*/
static void setURLStreamHandlerFactory(URLStreamHandlerFactory* factory);
private:
/**
* Sets the fields of the URL. This is not a public method so that only URLStreamHandlers
* can modify URL fields. URLs are otherwise constant.
*
* @param protocol
* the name of the protocol to use.
* @param host
* the name of the host.
* @param port
* the specific port number of the URL. Value of -1 represents the
* default port of the protocol.
* @param file
* the file value.
* @param ref
* the internal reference in the URL
*/
void set(const decaf::lang::String& protocol, const decaf::lang::String& host, int port,
const decaf::lang::String& file, const decaf::lang::String& ref);
/**
* Sets the fields of the URL. This is not a public method so that only URLStreamHandlers
* can modify URL fields. URLs are otherwise constant.
*
* @param protocol
* the name of the protocol to use.
* @param host
* the name of the host.
* @param port
* the specific port number of the URL. Value of -1 represents the
* default port of the protocol.
* @param authority
* the authority value.
* @param userInfo
* the user info value.
* @param path
* the path value.
* @param query
* the query value.
* @param ref
* the internal reference in the URL
*/
void set(const decaf::lang::String& protocol, const decaf::lang::String& host, int port,
const decaf::lang::String& authority, const decaf::lang::String& userInfo,
const decaf::lang::String& path, const decaf::lang::String& query,
const decaf::lang::String& ref);
/**
* Returns the URLStreamHandler configured for this URL, used to validate that
* the operations performed by a protocol handler are only done on its own URL.
*/
URLStreamHandler* getURLStreamHandler() const;
private:
void initialize(const URL* context, const decaf::lang::String& theSpec, URLStreamHandler* handler);
void initialize(const decaf::lang::String& protocol, const decaf::lang::String& host, int port,
const decaf::lang::String& file, URLStreamHandler* handler);
friend class URLStreamHandler;
};
}}
#endif /*_DECAF_NET_URL_H_*/