blob: 442bff39986f7982638af95c836528747080bdfc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "URIHelper.h"
#include <decaf/lang/Integer.h>
#include <decaf/lang/Character.h>
#include <decaf/lang/Exception.h>
#include <decaf/internal/net/URIEncoderDecoder.h>
#include <decaf/util/StringTokenizer.h>
#include <decaf/lang/exceptions/NumberFormatException.h>
using namespace decaf;
using namespace decaf::lang;
using namespace decaf::lang::exceptions;
using namespace decaf::util;
using namespace decaf::net;
using namespace decaf::internal;
using namespace decaf::internal::net;
////////////////////////////////////////////////////////////////////////////////
URIHelper::URIHelper( const std::string& unreserved,
const std::string& punct,
const std::string& reserved,
const std::string& someLegal,
const std::string& allLegal )
: unreserved( unreserved ),
punct( punct ),
reserved( reserved ),
someLegal( someLegal ),
allLegal( allLegal ) {
}
////////////////////////////////////////////////////////////////////////////////
URIHelper::URIHelper() : unreserved( "_-!.~\'()*" ),
punct( ",;:$&+=" ),
reserved( ",;:$&+=?/[]@" ),
someLegal( "_-!.~\'()*,;:$&+=" ),
allLegal( "_-!.~\'()*,;:$&+=?/[]@" ) {
}
////////////////////////////////////////////////////////////////////////////////
URIType URIHelper::parseURI( const std::string& uri, bool forceServer ) {
URIType result( uri );
std::string temp = uri;
std::size_t index, index1, index2, index3;
// parse into Fragment, Scheme, and SchemeSpecificPart
// then parse SchemeSpecificPart if necessary
// Fragment
index = temp.find( '#' );
if( index != std::string::npos ) {
// remove the fragment from the end
result.setFragment( temp.substr( index + 1, std::string::npos ) );
validateFragment( uri, result.getFragment(), index + 1 );
temp = temp.substr( 0, index );
}
// Scheme and SchemeSpecificPart
index = index1 = temp.find( ':' );
index2 = temp.find( '/' );
index3 = temp.find( '?' );
// if a '/' or '?' occurs before the first ':' the uri has no
// specified scheme, and is therefore not absolute
if( index != std::string::npos &&
( index2 >= index || index2 == std::string::npos ) &&
( index3 >= index || index3 == std::string::npos ) ) {
// the characters up to the first ':' comprise the scheme
result.setAbsolute( true );
result.setScheme( temp.substr( 0, index ) );
if( result.getScheme() == "" ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Scheme not specified.", (int)index );
}
validateScheme( uri, result.getScheme(), 0 );
result.setSchemeSpecificPart( temp.substr( index + 1, std::string::npos ) );
if( result.getSchemeSpecificPart() == "" ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Scheme specific part is invalid..", (int)index + 1 );
}
} else {
result.setAbsolute( false );
result.setSchemeSpecificPart( temp );
}
if( result.getScheme() == "" ||
( !result.getSchemeSpecificPart().empty() &&
result.getSchemeSpecificPart().at( 0 ) == '/' ) ) {
result.setOpaque( false );
// the URI is hierarchical
// Query
temp = result.getSchemeSpecificPart();
index = temp.find( '?' );
if( index != std::string::npos ) {
result.setQuery( temp.substr( index + 1, std::string::npos ) );
temp = temp.substr( 0, index );
validateQuery( uri, result.getQuery(), index2 + 1 + index );
}
// Authority and Path
if( temp.size() >= 2 && temp.at(0) == '/' && temp.at(1) == '/' ) {
index = temp.find( '/', 2 );
if( index != std::string::npos ) {
result.setAuthority( temp.substr( 2, index - 2 ) );
result.setPath( temp.substr( index, std::string::npos ) );
} else {
result.setAuthority( temp.substr( 2, std::string::npos ) );
if( result.getAuthority() == "" &&
result.getQuery() == "" && result.getFragment() == "" ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Scheme specific part is invalid..", (int)uri.length() );
}
}
if( result.getAuthority() != "" ) {
validateAuthority( uri, result.getAuthority(), index1 + 3 );
}
} else { // no authority specified
result.setPath( temp );
}
std::size_t pathIndex = 0;
if( index2 != std::string::npos ) {
pathIndex += index2;
}
if( index != std::string::npos ) {
pathIndex += index;
}
validatePath( uri, result.getPath(), pathIndex );
} else { // if not hierarchical, URI is opaque
result.setOpaque( true );
validateSsp( uri, result.getSchemeSpecificPart(), index2 + 2 + index );
}
URIType authority = parseAuthority( forceServer, result.getAuthority() );
// Authority was valid, so we capture the results
if( authority.isValid() ) {
result.setUserInfo( authority.getUserInfo() );
result.setHost( authority.getHost() );
result.setPort( authority.getPort() );
result.setServerAuthority( true );
}
return result;
}
////////////////////////////////////////////////////////////////////////////////
void URIHelper::validateScheme( const std::string& uri, const std::string& scheme, int index ) {
// first char needs to be an alpha char
if( !Character::isLetter( scheme.at(0) ) ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Schema must start with a Letter.", (int)index );
}
try {
URIEncoderDecoder::validateSimple( scheme, "+-." );
} catch( URISyntaxException& e ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Invalid Schema", (int)index + e.getIndex() );
}
}
////////////////////////////////////////////////////////////////////////////////
void URIHelper::validateSsp( const std::string& uri, const std::string& ssp,
std::size_t index ) {
try {
URIEncoderDecoder::validate( ssp, allLegal );
} catch( URISyntaxException& e ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Invalid URI Ssp", (int)index + e.getIndex() );
}
}
////////////////////////////////////////////////////////////////////////////////
void URIHelper::validateAuthority( const std::string& uri, const std::string& authority,
std::size_t index ) {
try {
// "@[]" + someLegal
URIEncoderDecoder::validate( authority, "@[]" + someLegal );
} catch( URISyntaxException& e ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Invalid URI Authority", (int)index + e.getIndex() );
}
}
////////////////////////////////////////////////////////////////////////////////
void URIHelper::validatePath( const std::string& uri, const std::string& path,
std::size_t index ) {
try {
// "/@" + someLegal
URIEncoderDecoder::validate( path, "/@" + someLegal );
} catch( URISyntaxException& e ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Invalid URI Path", (int)index + e.getIndex() );
}
}
////////////////////////////////////////////////////////////////////////////////
void URIHelper::validateQuery( const std::string& uri, const std::string& query,
std::size_t index ) {
try {
URIEncoderDecoder::validate( query, allLegal );
} catch( URISyntaxException& e ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Invalid URI Query", (int)index + e.getIndex() );
}
}
////////////////////////////////////////////////////////////////////////////////
void URIHelper::validateFragment( const std::string& uri, const std::string& fragment,
std::size_t index ) {
try {
URIEncoderDecoder::validate( fragment, allLegal );
} catch( URISyntaxException& e ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "Invalid URI Fragment", (int)index + e.getIndex() );
}
}
////////////////////////////////////////////////////////////////////////////////
URIType URIHelper::parseAuthority( bool forceServer, const std::string& authority ) {
try{
URIType result( authority );
if( authority == "" ) {
return result;
}
std::string temp, tempUserinfo = "", tempHost = "";
std::size_t index, hostindex = 0;
int tempPort = -1;
temp = authority;
index = temp.find( '@' );
if( index != std::string::npos ) {
// remove user info
tempUserinfo = temp.substr( 0, index );
validateUserinfo( authority, tempUserinfo, 0 );
temp = temp.substr( index + 1, std::string::npos ); // host[:port] is left
hostindex = index + 1;
}
index = temp.rfind( ':' );
std::size_t endindex = temp.find( ']' );
if( index != std::string::npos && ( endindex < index || endindex == std::string::npos ) ){
// determine port and host
tempHost = temp.substr( 0, index );
if( index < ( temp.length() - 1 ) ) { // port part is not empty
try {
tempPort = Integer::parseInt( temp.substr( index + 1, std::string::npos ) );
if( tempPort < 0 ) {
if( forceServer ) {
throw URISyntaxException(
__FILE__, __LINE__,
authority, "Port number is missing",
(int)hostindex + (int)index + 1 );
}
return result;
}
} catch( NumberFormatException& e ) {
if( forceServer ) {
throw URISyntaxException(
__FILE__, __LINE__,
authority, "Port number is malformed.",
(int)hostindex + (int)index + 1 );
}
return result;
}
}
} else {
tempHost = temp;
}
if( tempHost == "" ) {
if( forceServer ) {
throw URISyntaxException(
__FILE__, __LINE__,
authority, "Host name is empty", (int)hostindex );
}
return result;
}
if( !isValidHost( forceServer, tempHost ) ) {
return result;
}
// this is a server based uri,
// fill in the userinfo, host and port fields
result.setUserInfo( tempUserinfo );
result.setHost( tempHost );
result.setPort( tempPort );
result.setServerAuthority( true );
// We know its valid now so tag it.
result.setValid( true );
return result;
}
DECAF_CATCH_RETHROW( URISyntaxException )
DECAF_CATCH_EXCEPTION_CONVERT( Exception, URISyntaxException )
DECAF_CATCHALL_THROW( URISyntaxException )
}
////////////////////////////////////////////////////////////////////////////////
void URIHelper::validateUserinfo( const std::string& uri, const std::string& userinfo,
std::size_t index ) {
for( std::size_t i = 0; i < userinfo.length(); i++ ) {
char ch = userinfo.at( i );
if( ch == ']' || ch == '[' ) {
throw URISyntaxException(
__FILE__, __LINE__,
uri, "User Info cannot contain '[' or ']'", (int)( index + i ) );
}
}
}
////////////////////////////////////////////////////////////////////////////////
bool URIHelper::isValidHost( bool forceServer, const std::string& host ) {
try{
if( host.at( 0 ) == '[' ) {
// ipv6 address
if( host.at( host.length() - 1 ) != ']' ) {
throw URISyntaxException(
__FILE__, __LINE__,
host, "Host address does not end in ']'", 0 );
}
if( !isValidIP6Address( host ) ) {
throw URISyntaxException(
__FILE__, __LINE__,
host, "Host IPv6 address is not valid" );
}
return true;
}
// '[' and ']' can only be the first char and last char
// of the host name
if( host.find( '[' ) != std::string::npos ||
host.find( ']' ) != std::string::npos ) {
throw URISyntaxException(
__FILE__, __LINE__,
host, "Unexpected '[' or ']' found in address" );
}
std::size_t index = host.find_last_of( '.' );
if( index == std::string::npos || index == host.length() - 1 ||
!Character::isDigit( host.at( index + 1 ) ) ) {
// domain name
if( isValidDomainName( host ) ) {
return true;
}
if( forceServer ) {
throw URISyntaxException(
__FILE__, __LINE__,
host, "Host address is not valid" );
}
return false;
}
// IPv4 address
if( isValidIPv4Address( host ) ) {
return true;
}
if( forceServer ) {
throw URISyntaxException(
__FILE__, __LINE__,
host, "Host IPv4 address is not valid" );
}
return false;
}
DECAF_CATCH_RETHROW( URISyntaxException )
DECAF_CATCH_EXCEPTION_CONVERT( Exception, URISyntaxException )
DECAF_CATCHALL_THROW( URISyntaxException )
}
////////////////////////////////////////////////////////////////////////////////
bool URIHelper::isValidDomainName( const std::string& host ) {
try {
URIEncoderDecoder::validateSimple( host, "-." );
} catch( URISyntaxException& e ) {
return false;
}
std::string label = "";
StringTokenizer st( host, "." );
while( st.hasMoreTokens() ) {
label = st.nextToken();
if( *(label.begin()) == '-' || *(label.rbegin()) == '-' ) {
return false;
}
}
if( label != host ) {
if( Character::isDigit( label.at( 0 ) ) ) {
return false;
}
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
bool URIHelper::isValidIPv4Address( const std::string& host ) {
std::size_t index;
std::size_t index2;
try {
int num;
index = host.find( '.' );
num = Integer::parseInt( host.substr( 0, index ) );
if( num < 0 || num > 255 ) {
return false;
}
index2 = host.find( '.', index + 1 );
num = Integer::parseInt( host.substr( index + 1, index2 - index - 1 ) );
if( num < 0 || num > 255 ) {
return false;
}
index = host.find( '.', index2 + 1 );
num = Integer::parseInt( host.substr( index2 + 1, index - index2 - 1 ) );
if( num < 0 || num > 255 ) {
return false;
}
num = Integer::parseInt( host.substr( index + 1, std::string::npos ) );
if( num < 0 || num > 255 ) {
return false;
}
} catch( Exception& e ) {
return false;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
bool URIHelper::isValidIP6Address( const std::string& ipAddress ) {
std::size_t length = ipAddress.length();
bool doubleColon = false;
int numberOfColons = 0;
int numberOfPeriods = 0;
std::string word = "";
char c = 0;
char prevChar = 0;
int offset = 0; // offset for [] ip addresses
if( length < 2 ) {
return false;
}
for( std::size_t i = 0; i < length; i++ ) {
prevChar = c;
c = ipAddress.at( i );
switch( c ) {
// case for an open bracket [x:x:x:...x]
case '[':
if( i != 0 ) {
return false; // must be first character
}
if( ipAddress.at( length - 1 ) != ']' ) {
return false; // must have a close ]
}
if( ( ipAddress.at( 1 ) == ':' ) && ( ipAddress.at( 2 )
!= ':' ) ) {
return false;
}
offset = 1;
if( length < 4 ) {
return false;
}
break;
// case for a closed bracket at end of IP [x:x:x:...x]
case ']':
if( i != length - 1 ) {
return false; // must be last character
}
if( ipAddress.at( 0 ) != '[' ) {
return false; // must have a open [
}
break;
// case for the last 32-bits represented as IPv4
// x:x:x:x:x:x:d.d.d.d
case '.':
numberOfPeriods++;
if( numberOfPeriods > 3 ) {
return false;
}
if( !isValidIP4Word( word ) ) {
return false;
}
if( numberOfColons != 6 && !doubleColon ) {
return false;
}
// a special case ::1:2:3:4:5:d.d.d.d allows 7 colons
// with
// an IPv4 ending, otherwise 7 :'s is bad
if( numberOfColons == 7 &&
ipAddress.at( 0 + offset ) != ':' &&
ipAddress.at( 1 + offset ) != ':' ) {
return false;
}
word = "";
break;
case ':':
numberOfColons++;
if( numberOfColons > 7 ) {
return false;
}
if( numberOfPeriods > 0 ) {
return false;
}
if( prevChar == ':' ) {
if( doubleColon ) {
return false;
}
doubleColon = true;
}
word = "";
break;
default:
if( word.length() > 3 ) {
return false;
}
if( !isValidHexChar( c ) ) {
return false;
}
word += c;
}
}
// Check if we have an IPv4 ending
if( numberOfPeriods > 0 ) {
if( numberOfPeriods != 3 || !isValidIP4Word( word ) ) {
return false;
}
} else {
// If we're at then end and we haven't had 7 colons then there
// is a problem unless we encountered a doubleColon
if( numberOfColons != 7 && !doubleColon ) {
return false;
}
// If we have an empty word at the end, it means we ended in
// either a : or a .
// If we did not end in :: then this is invalid
if( word == "" && ipAddress.at( length - 1 - offset ) != ':' &&
ipAddress.at( length - 2 - offset ) != ':' ) {
return false;
}
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
bool URIHelper::isValidIP4Word( const std::string& word ) {
if( word.length() < 1 || word.length() > 3 ) {
return false;
}
for( std::size_t i = 0; i < word.length(); i++ ) {
if( !Character::isDigit( word.at( i ) ) ) {
return false;
}
}
if( Integer::parseInt( word ) > 255 ) {
return false;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
bool URIHelper::isValidHexChar( char c ) {
return (c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f');
}