blob: 4f68375ba4200a31c6c1aeb2bfb357c367589ed1 [file] [log] [blame]
/*
* Copyright 2011 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "domain_registry/private/assert.h"
#include "domain_registry/private/string_util.h"
#include "domain_registry/private/trie_search.h"
#include <stdlib.h>
#include <string.h>
/*
* Helper macro that chooses the node half-way between the start and
* end nodes. Used for binary search.
*/
#define MIDDLE(start, end) ((start) + ((((end) - (start)) + 1) / 2));
/*
* Global data structures used to perform the search. Should be
* populated once at startup by a call to SetRegistryTables.
*/
static const char* g_string_table = NULL;
static const struct TrieNode* g_node_table = NULL;
static size_t g_num_root_children = 0;
static const REGISTRY_U16* g_leaf_node_table = NULL;
static size_t g_leaf_node_table_offset = 0;
/*
* Create an "exception" version of the given component. For instance
* if component is "foo", will return "!foo". The caller is
* responsible for freeing the returned memory.
*/
static char* StrDupExceptionComponent(const char* component) {
/*
* TODO(bmcquade): could use thread-local storage of sufficient size
* to avoid this allocation. This should be invoked infrequently
* enough that it's probably fine for us to perform the allocation.
*/
const size_t component_len = strlen(component);
char* exception_component = malloc(component_len + 2);
if (exception_component == NULL) {
return NULL;
}
memcpy(exception_component + 1, component, component_len);
exception_component[0] = '!';
exception_component[component_len + 1] = 0;
return exception_component;
}
/*
* Performs a binary search looking for value, between the nodes start
* and end, inclusive. Would normally have static linkage but is made
* public for testing.
*/
const struct TrieNode* FindNodeInRange(
const char* value,
const struct TrieNode* start,
const struct TrieNode* end) {
DCHECK(value != NULL);
DCHECK(start != NULL);
DCHECK(end != NULL);
if (start > end) return NULL;
while (1) {
const struct TrieNode* candidate;
const char* candidate_str;
int result;
DCHECK(start <= end);
candidate = MIDDLE(start, end);
candidate_str = g_string_table + candidate->string_table_offset;
result = HostnamePartCmp(value, candidate_str);
if (result == 0) return candidate;
if (result > 0) {
if (end == candidate) return NULL;
start = candidate + 1;
} else {
if (start == candidate) return NULL;
end = candidate - 1;
}
}
}
/*
* Performs a binary search looking for value, between the nodes start
* and end, inclusive. Would normally have static linkage but is made
* public for testing.
*/
const char* FindLeafNodeInRange(
const char* value,
const REGISTRY_U16* start,
const REGISTRY_U16* end) {
DCHECK(value != NULL);
DCHECK(start != NULL);
DCHECK(end != NULL);
if (start > end) return NULL;
while (1) {
const REGISTRY_U16* candidate;
const char* candidate_str;
int result;
DCHECK(start <= end);
candidate = MIDDLE(start, end);
candidate_str = g_string_table + *candidate;
result = HostnamePartCmp(value, candidate_str);
if (result == 0) return candidate_str;
if (result > 0) {
if (end == candidate) return NULL;
start = candidate + 1;
} else {
if (start == candidate) return NULL;
end = candidate - 1;
}
}
}
/*
* Searches to find a registry node with the given component
* identifier and the given parent node. If parent is null, searches
* starting from the root node.
*/
const struct TrieNode* FindRegistryNode(const char* component,
const struct TrieNode* parent) {
const struct TrieNode* start;
const struct TrieNode* end;
const struct TrieNode* current;
const struct TrieNode* exception;
DCHECK(g_string_table != NULL);
DCHECK(g_node_table != NULL);
DCHECK(g_leaf_node_table != NULL);
DCHECK(component != NULL);
if (IsInvalidComponent(component)) {
return NULL;
}
if (parent == NULL) {
/* If parent is NULL, start the search at the root node. */
start = g_node_table;
end = start + (g_num_root_children - 1);
} else {
if (HasLeafChildren(parent) != 0) {
/*
* If the parent has leaf children, FindRegistryLeafNode should
* have been called instead.
*/
DCHECK(0);
return NULL;
}
/* We'll be searching the specified parent node's children. */
start = g_node_table + parent->first_child_offset;
end = start + ((int) parent->num_children - 1);
}
current = FindNodeInRange(component, start, end);
if (current != NULL) {
/* Found a match. Return it. */
return current;
}
/*
* We didn't find an exact match, so see if there's a wildcard
* match. From http://publicsuffix.org/format/: "The wildcard
* character * (asterisk) matches any valid sequence of characters
* in a hostname part. (Note: the list uses Unicode, not Punycode
* forms, and is encoded using UTF-8.) Wildcards may only be used to
* wildcard an entire level. That is, they must be surrounded by
* dots (or implicit dots, at the beginning of a line)."
*/
current = FindNodeInRange("*", start, end);
if (current != NULL) {
/*
* If there was a wildcard match, see if there is a wildcard
* exception match, and prefer it if so. From
* http://publicsuffix.org/format/: "An exclamation mark (!) at
* the start of a rule marks an exception to a previous wildcard
* rule. An exception rule takes priority over any other matching
* rule.".
*/
char* exception_component = StrDupExceptionComponent(component);
if (exception_component == NULL) {
return NULL;
}
exception = FindNodeInRange(exception_component,
start,
end);
free(exception_component);
if (exception != NULL) {
current = exception;
}
}
return current;
}
const char* FindRegistryLeafNode(const char* component,
const struct TrieNode* parent) {
size_t offset;
const REGISTRY_U16* leaf_start;
const REGISTRY_U16* leaf_end;
const char* match;
const char* exception;
DCHECK(g_string_table != NULL);
DCHECK(g_node_table != NULL);
DCHECK(g_leaf_node_table != NULL);
DCHECK(component != NULL);
DCHECK(parent != NULL);
DCHECK(HasLeafChildren(parent) != 0);
if (parent == NULL) {
return NULL;
}
if (HasLeafChildren(parent) == 0) {
return NULL;
}
if (IsInvalidComponent(component)) {
return NULL;
}
offset = parent->first_child_offset - g_leaf_node_table_offset;
leaf_start = g_leaf_node_table + offset;
leaf_end = leaf_start + ((int) parent->num_children - 1);
match = FindLeafNodeInRange(component,
leaf_start,
leaf_end);
if (match != NULL) {
return match;
}
/*
* We didn't find an exact match, so see if there's a wildcard
* match. From http://publicsuffix.org/format/: "The wildcard
* character * (asterisk) matches any valid sequence of characters
* in a hostname part. (Note: the list uses Unicode, not Punycode
* forms, and is encoded using UTF-8.) Wildcards may only be used to
* wildcard an entire level. That is, they must be surrounded by
* dots (or implicit dots, at the beginning of a line)."
*/
match = FindLeafNodeInRange("*", leaf_start, leaf_end);
if (match != NULL) {
/*
* There was a wildcard match, so see if there is a wildcard
* exception match, and prefer it if so. From
* http://publicsuffix.org/format/: "An exclamation mark (!) at
* the start of a rule marks an exception to a previous wildcard
* rule. An exception rule takes priority over any other matching
* rule.".
*/
char* exception_component = StrDupExceptionComponent(component);
if (exception_component == NULL) {
return NULL;
}
exception = FindLeafNodeInRange(exception_component,
leaf_start,
leaf_end);
free(exception_component);
if (exception != NULL) {
match = exception;
}
}
return match;
}
const char* GetHostnamePart(size_t offset) {
DCHECK(g_string_table != NULL);
return g_string_table + offset;
}
int HasLeafChildren(const struct TrieNode* node) {
if (node->first_child_offset < g_leaf_node_table_offset) return 0;
return 1;
}
void SetRegistryTables(const char* string_table,
const struct TrieNode* node_table,
size_t num_root_children,
const REGISTRY_U16* leaf_node_table,
size_t leaf_node_table_offset) {
g_string_table = string_table;
g_node_table = node_table;
g_num_root_children = num_root_children;
g_leaf_node_table = leaf_node_table;
g_leaf_node_table_offset = leaf_node_table_offset;
}