blob: 08d37a53048b9afd1d05ec82c033b687044c65c9 [file] [log] [blame]
#!/usr/bin/python2.4
#
# Copyright 2011 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Builds node tables. See class comment for more details."""
__author__ = 'bmcquade@google.com (Bryan McQuade)'
def _NodeHasAllLeafChildren(node):
"""Determines whether the given node's children are all leaf nodes.
Note that a node with no children does *not* have all leaf children.
"""
if not node.HasChildren():
return False
for child in node.GetChildren():
if child.HasChildren():
return False
return True
def _ComputeLeafChildCacheKey(node):
"""Computes a key that uniquely identifies the node, or None if no key."""
if not _NodeHasAllLeafChildren(node):
# We do not compute cache keys for nodes that have children, since
# there is a low hitrate for such duplicates and since the cache
# keys would be substantially more complex.
raise ValueError('Node has non-leaf children.')
return tuple([n.GetName() for n in node.GetChildren()])
class _NodeTable(object):
"""Stores a table of TrieNodes and provides efficent lookup of node offset."""
def __init__(self, cache_key_func=None):
"""Instantiates a new _NodeTable.
Args:
cache_key_func: The function to use to compute the cache key for
the given node, if any. If unspecified, no cache
is used.
"""
# Table of TrieNodes, in breadth-first order, with siblings
# ordered lexicographically. For instance the hostnames
# 'foo.com.au' and 'edu.au' would be ordered in the table as: [
# 'au', 'com', 'edu', 'foo' ].
self._nodes = []
# Map from a node's hostname identifier (e.g. 'edu.au') to the
# offset of that node's first child in the _nodes member. Enables
# fast lookup of the index of a node's children.
self._first_child_offset_map = {}
# Used to find duplicate entries that can be reused to save space,
# instead of adding the same entries multiple times.
self._cache = {}
# Function used to uniquely identify nodes in the cache.
self._cache_key_func = cache_key_func
def GetNodeList(self):
"""Get the list of nodes populated by calls to AddChildren()."""
return self._nodes
def GetFirstChildOffset(self, node):
"""Get the offset of this node's first child in the table."""
identifier = node.GetIdentifier('.')
if identifier not in self._first_child_offset_map:
raise ValueError('No offset for specified node.')
return self._first_child_offset_map[node.GetIdentifier('.')]
def AddChildren(self, node):
"""Add the children of this node to the appropriate table."""
cache_key = None
if self._cache_key_func:
cache_key = self._cache_key_func(node)
if cache_key in self._cache:
identifier = node.GetIdentifier('.')
self._first_child_offset_map[identifier] = self._cache[cache_key]
return
# Cache miss. Add the children to this _NodeTable.
self._DoAddChildren(node)
if cache_key:
self._cache[cache_key] = self.GetFirstChildOffset(node)
def _DoAddChildren(self, node):
"""Add this node's children to the node table."""
children = node.GetChildren()
offset_of_children = len(self._nodes)
self._first_child_offset_map[node.GetIdentifier('.')] = offset_of_children
for child in children:
self._nodes.append(child)
class NodeTableBuilder(object):
"""Builds node tables that allow for trie-based lookup of registry suffixes.
NodeTableBuilder builds a table-based representation of the registry
suffix trie. We construct two different node tables: the 'main' node
table, and the leaf child node table. The leaf child node table
contains nodes whose siblings are all leaf children in the trie. The
leaf child table exists separately from the main table only because
many nodes fit this criteria (all siblings are leaves) and these
nodes can be represented more efficiently than nodes in the main
table (2 bytes per node, instead of 5 bytes per node).
"""
def __init__(self):
self._node_table = _NodeTable()
self._leaf_child_node_table = _NodeTable(_ComputeLeafChildCacheKey)
def BuildNodeTables(self, node):
"""Constructs the node tables for the given root trie node."""
if _NodeHasAllLeafChildren(node):
self._leaf_child_node_table.AddChildren(node)
elif node.HasChildren():
self._node_table.AddChildren(node)
children = node.GetChildren()
for child in children:
self.BuildNodeTables(child)
def GetNodeTable(self):
"""Return the generated node table."""
return self._node_table.GetNodeList()
def GetLeafNodeTable(self):
"""Return the generated leaf node table."""
return self._leaf_child_node_table.GetNodeList()
def GetChildNodeOffset(self, node):
"""Return the index of the node's first child.
If the node has only leaf children, the index will be offset by
the number of nodes in the non-leaf table, as this allows us to
differentiate between references to the node table and the leaf
table.
"""
if _NodeHasAllLeafChildren(node):
return (len(self._node_table.GetNodeList()) +
self._leaf_child_node_table.GetFirstChildOffset(node))
else:
return self._node_table.GetFirstChildOffset(node)