blob: 7fbfe92ddddf882cd6ff2912a73e9b98fb2452c4 [file] [log] [blame]
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Line too long - pylint: disable=C0301
# Invalid name - pylint: disable=C0103
"""
parseutils.py
Routines to parse "flexible" configuration files for tools like
gpaddmirrors, gprecoverseg, gpexpand, etc.
"""
import sys
from gppylib.mainUtils import ExceptionNoStackTraceNeeded
from gppylib.gplog import get_default_logger, logging_is_verbose
logger = get_default_logger()
def caller():
"Return name of calling function"
if logging_is_verbose():
return sys._getframe(1).f_code.co_name + '()'
return ''
def canonicalize_address(addr):
"""
Encases addresses in [ ] per RFC 2732. Generally used to deal with ':'
characters which are also often used as delimiters.
Returns the addr string if it doesn't contain any ':' characters.
If addr contains ':' and also contains a '[' then the addr string is
simply returned under the assumption that it is already escaped as needed.
Otherwise return a new string from addr by adding '[' prefix and ']' suffix.
Examples
--------
>>> canonicalize_address('myhost')
'myhost'
>>> canonicalize_address('127.0.0.1')
'127.0.0.1'
>>> canonicalize_address('::1')
'[::1]'
>>> canonicalize_address('[::1]')
'[::1]'
>>> canonicalize_address('2620:0:170:610::13')
'[2620:0:170:610::13]'
>>> canonicalize_address('[2620:0:170:610::13]')
'[2620:0:170:610::13]'
@param addr: the address to possibly encase in [ ]
@returns: the addresss, encased in [] if necessary
"""
if ':' not in addr: return addr
if '[' in addr: return addr
return '[' + addr + ']'
#
# line parsing
#
def consume_to(delimiter, rest):
"""
Consume characters from rest string until we encounter the delimiter.
Returns (None, after, None) where after are the characters after delimiter
or (None, rest, 'does not contain '+delimiter) when delimiter is not encountered.
Examples
--------
>>> consume_to('=', 'abc=def:ghi')
(None, 'def:ghi', None)
@param delimiter: the delimiter string
@param rest: the string to read such as 'abc:def:ghi'
@returns: (None, after, None) tuple such as (None, 'def:ghi', None)
"""
p = rest.find(delimiter)
if p < 0:
return None, rest, 'does not contain '+delimiter
return None, rest[p+1:], None
def read_to(delimiter, rest):
"""
Read characters from rest string until we encounter the delimiter.
Separate the string into characters 'before' and 'after' the delimiter.
If no delimiter is found, assign entire string to 'before' and None to 'after'.
Examples
--------
>>> read_to(':', 'abc:def:ghi')
('abc', 'def:ghi', None)
>>> read_to(':', 'abc:def')
('abc', 'def', None)
>>> read_to(':', 'abc')
('abc', None, None)
>>> read_to(':', '')
('', None, None)
Note this returns a 3-tuple for compatibility with other routines
which use the third element as an error message
@param delimiter: the delimiter string
@param rest: the string to read such as 'abc:def:ghi'
@returns: (before, after, None) tuple such as ('abc', 'def:ghi', None)
"""
p = rest.find(delimiter)
if p < 0:
return rest, None, None
return rest[0:p], rest[p+1:], None
def read_to_bracketed(delimiter, rest):
"""
Read characters from rest string which is expected to start with a '['.
If rest does not start with '[', return a tuple (None, rest, 'does not begin with [').
If rest string starts with a '[', then read until we find ']'.
If no ']' is found, return a tuple (None, rest, 'does not contain ending ]').
Otherwise separate the string into 'before' representing characters between
'[' and ']' and 'after' representing characters after the ']' and then check
that the first character found after the ']' is a :'.
If there are no characters after the ']', return a tuple (before, None, None)
where before contains the characters between '[' and ']'.
If there are characters after ']' other than the delimiter, return a tuple
(None, rest, 'characters not allowed after ending ]')
Otherwise return a tuple (before, after, None) where before contains the
characters between '[' and ']' and after contains the characters after the ']:'.
This function avoids raising Exceptions for these particular cases of
malformed input since they are easier to report in the calling function.
Examples
--------
>>> read_to_bracketed(':', '[abc:def]')
('abc:def', None, None)
>>> read_to_bracketed(':', '[abc]:def:ghi')
('abc', 'def:ghi', None)
>>> read_to_bracketed(':', '[abc:def]:ghi:jkl')
('abc:def', 'ghi:jkl', None)
>>> read_to_bracketed(':', 'abc:def:ghi:jkl')
(None, 'abc:def:ghi:jkl', 'does not begin with [')
>>> read_to_bracketed(':', '[abc:def:ghi:jkl')
(None, '[abc:def:ghi:jkl', 'does not contain ending ]')
>>> read_to_bracketed(':', '[abc]extra:def:ghi:jkl')
(None, '[abc]extra:def:ghi:jkl', 'characters not allowed after ending ]')
@param delimiter: the delimiter string
@param rest: the string to read such as '[abc:def]:ghi'
@returns: (before, after, reason) tuple such as ('abc:def', 'ghi', None)
"""
if not rest.startswith('['):
return None, rest, 'does not begin with ['
p = rest.find(']')
if p < 0:
return None, rest, 'does not contain ending ]'
if len(rest[p+1:]) < 1:
return rest[1:p], None, None
if rest[p+1] != delimiter:
return None, rest, 'characters not allowed after ending ]'
return rest[1:p], rest[p+2:], None
def read_to_possibly_bracketed(delimiter, rest):
"""
Behave as read_bracketed above when rest starts with a '[',
otherwise as read_to_colon. This is intended to support fields
which may contain an IPv6 address, an IPv4 address or a hostname.
Examples
--------
>>> read_to_possibly_bracketed(':', 'abc:def:ghi')
('abc', 'def:ghi', None)
>>> read_to_possibly_bracketed(':', '[abc]:def:ghi')
('abc', 'def:ghi', None)
>>> read_to_possibly_bracketed(':', '[abc:def]:ghi')
('abc:def', 'ghi', None)
>>> read_to_possibly_bracketed(':', '[]:ghi')
('', 'ghi', None)
>>> read_to_possibly_bracketed(':', ':ghi')
('', 'ghi', None)
>>> read_to_possibly_bracketed(':', '[ghi]')
('ghi', None, None)
>>> read_to_possibly_bracketed(':', '[]')
('', None, None)
>>> read_to_possibly_bracketed(':', '')
('', None, None)
@param delimiter: the delimiter string
@param rest: the string to read such as '[abc:def]:ghi'
@returns: (before, after, reason) tuple such as ('abc:def', 'ghi', None)
"""
if rest.startswith('['):
return read_to_bracketed(delimiter, rest)
return read_to(delimiter, rest)
class LineParser:
"""
Manage state to parse a single line, generally from a configuration
file with fields delimited by colons.
"""
def __init__(self, caller, filename, lineno, line):
"Initialize"
(self.caller, self.filename, self.lineno, self.line, self.rest, self.error) = (caller, filename, lineno, line, line, None)
self.logger = logger
if logging_is_verbose():
self.logger.debug("%s:%s" % (filename, lineno))
def ensure_more_to_process(self, name):
"Raise an exception if we've exhausted the input line"
if self.rest is None:
msg = "out of values (reading %s)" % name
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (self.filename, self.lineno, self.caller, self.line, msg))
def read_delimited_field(self, delimiter, name="next field", reader=read_to):
"""
Attempts to read the next field in the line up to the specified delimiter
using the specified reading method, raising any error encountered as an
exception. Returns the read field when successful.
"""
self.ensure_more_to_process(name)
value, self.rest, error = reader(delimiter, self.rest)
if error is not None:
msg = "%s (reading %s) >>%s" % (error, name, self.rest)
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (self.filename, self.lineno, self.caller, self.line, msg))
if logging_is_verbose():
self.logger.debug(" name=%-30s delimiter='%s' value=%s" % (name, delimiter, value))
return value
def does_starts_with(self, expected):
"Returns true if line starts with expected value, or return false"
return self.line.startswith(expected)
def ensure_starts_with(self, expected):
"Returns true if line starts with expected value, or raise an exception otherwise"
if not self.does_starts_with(expected):
msg = "does not start with %s" % expected
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (self.filename, self.lineno, self.caller, self.line, msg))
self.rest = self.rest[len(expected):]
def handle_field(self, name, dst=None, delimiter=':', stripchars=None):
"""
Attempts to read the next field up to a given delimiter.
Names starting with '[' indicate that the field should use the bracketed parsing logic.
If dst is not none, also assigns the value to dst[name].
If stripchars is not none, value is first stripped of leading and trailing stripchars.
"""
if name[0] == '[':
name = name.strip('[]')
value = self.read_delimited_field(delimiter, name, read_to_possibly_bracketed)
else:
value = self.read_delimited_field(delimiter, name)
if stripchars is not None:
value = value.strip(stripchars)
if dst is not None:
dst[name] = value
return value
#
# file parsing
#
def line_reader(f):
"""
Read the contents of the given input, generating the non-blank non-comment
lines found within as a series of tuples of the form (line number, line).
>>> [l for l in line_reader(['', '# test', 'abc:def'])]
[(3, 'abc:def')]
"""
for offset, line in enumerate(f):
line = line.strip()
if len(line) < 1 or line[0] == '#':
continue
yield offset+1, line
################
# gpfilespace format
#
# First line in file is the filespace name, remaining lines are
# specify hostname, dbid, and a path:
#
# filespace:name
# hostname:dbid:path
# ...
################
def parse_fspacename(filename, lineno, line):
"""
Parse the filespace: line which appears at the beginning of the gpfilespace configuration file.
>>> parse_fspacename('file', 1, 'filespace:blah')
'blah'
"""
p = LineParser(caller(), filename, lineno, line)
p.ensure_starts_with('filespace:')
fspacename = p.read_delimited_field(':')
if p.rest is not None:
msg = "unexpected characters after filespace name >>%s" % p.rest
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
return fspacename
def parse_dfs_url(filename, lineno, line):
"""
Parse the filespace: line which appears at the beginning of the gpfilespace configuration file.
>>> parse_dfs_url('file', 1, 'dfs_url::localhost:9000/gpsql')
'localhost:9000/gpsql'
"""
p = LineParser(caller(), filename, lineno, line)
p.ensure_starts_with('dfs_url::')
dfs_url = p.read_delimited_field('::')
if p.rest is not None:
msg = "unexpected characters after filespace name >>%s" % p.rest
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
return dfs_url
def parse_fspacesys(filename, lineno, line):
"""
Pasrse the filesystem name: the optional second line in the gpfilespace configuration file.
>>> parse_fspacetype('file', 2, 'fsysname:local|filesystem_name')
local|filesystem_name
"""
p = LineParser(caller(), filename, lineno, line)
if not p.does_starts_with('fsysname:'):
return None
p.ensure_starts_with('fsysname:')
fsysname = p.read_delimited_field(':')
if p.rest is not None:
msg = "unexpected characters after filespace type >>%s" % p.rest
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
return fsysname
def parse_fspacereplica(filename, lineno, line):
"""
Pasrse the filespace replica: the optional third line in the gpfilespace configuration file.
>>> parse_fspacereplica('file', 3, 'fsreplica:repnum')
repnum
"""
p = LineParser(caller(), filename, lineno, line)
if not p.does_starts_with('fsreplica:'):
return None
p.ensure_starts_with('fsreplica:')
fsreplica = p.read_delimited_field(':')
if p.rest is not None:
msg = "unexpected characters after filespace replica >>%s" % p.rest
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
return fsreplica
def parse_gpfilespace_line(filename, lineno, line):
"""
Parse a line of the gpfilespace configuration file other than the first.
>>> parse_gpfilespace_line('file', 1, '[::1]:dbid:path')
('::1', 'dbid', 'path')
>>> parse_gpfilespace_line('file', 1, 'host:dbid:path')
('host', 'dbid', 'path')
"""
p = LineParser(caller(), filename, lineno, line)
host = p.handle_field('[host]') # [host] indicates possible IPv6 address
dbid = p.handle_field('dbid')
path = p.handle_field('[path]') # url contains the ':'.
if p.rest is not None:
msg = "unexpected characters after path name >>%s" % p.rest
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
return host, dbid, path
################
# gpexpand segment file format:
#
# Form of file is hostname:address:port:dtadir:dbid:contentId:role[:replicationPort]
################
def parse_gpexpand_segment_line(filename, lineno, line):
"""
Parse a line of the gpexpand configuration file.
>>> parse_gpexpand_segment_line('file', 1, "localhost:[::1]:40001:/Users/ctaylor/data/p2/gpseg1:4:1:p")
('localhost', '::1', '40001', '/Users/ctaylor/data/p2/gpseg1', '4', '1', 'p', None)
>>> parse_gpexpand_segment_line('file', 1, "localhost:[::1]:40001:/Users/ctaylor/data/p2/gpseg1:4:1:p:41001")
('localhost', '::1', '40001', '/Users/ctaylor/data/p2/gpseg1', '4', '1', 'p', '41001')
"""
p = LineParser(caller(), filename, lineno, line)
hostname = p.handle_field('[host]') # [host] indicates possible IPv6 address
address = p.handle_field('[address]') # [address] indicates possible IPv6 address
port = p.handle_field('port')
datadir = p.handle_field('datadir')
dbid = p.handle_field('dbid')
contentId = p.handle_field('contentId')
role = p.handle_field('role')
replicationPort = None
if p.rest is not None:
replicationPort = p.handle_field('replicationPort')
if p.rest is not None:
msg = "unexpected characters after replicationPort >>%s" % p.rest
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
return hostname, address, port, datadir, dbid, contentId, role, replicationPort
################
# gpaddmirrors format:
#
# filespaceOrder=[filespace1_fsname[:filespace2_fsname:...]]
# mirror[content]=content:address:port:mir_replication_port:pri_replication_port:fselocation[:fselocation:...]
#
################
def parse_filespace_order(filename, lineno, line):
"""
Parse the filespaceOrder= line appearing at the beginning of the gpaddmirrors,
gpmovemirrors and gprecoverseg configuration files.
>>> parse_filespace_order('file', 1, "filespaceOrder=fs1:fs2:fs3")
['fs1', 'fs2', 'fs3']
>>> parse_filespace_order('file', 1, "filespaceOrder=")
[]
"""
p = LineParser(caller(), filename, lineno, line)
p.ensure_starts_with('filespaceOrder=')
fslist = []
while p.rest:
fslist.append( p.read_delimited_field(':', 'next filespace') )
return fslist
def parse_gpaddmirrors_line(filename, lineno, line, fslist):
"""
Parse a line in the gpaddmirrors configuration file other than the first.
>>> line = "mirror0=0:[::1]:40001:50001:60001:/Users/ctaylor/data/p2/gpseg1"
>>> fixed, flex = parse_gpaddmirrors_line('file', 1, line, [])
>>> fixed["address"], fixed["contentId"], fixed["dataDirectory"]
('::1', '0', '/Users/ctaylor/data/p2/gpseg1')
"""
fixed = {}
flexible = {}
p = LineParser(caller(), filename, lineno, line)
p.ensure_starts_with('mirror')
p.read_delimited_field('=', 'content id', consume_to)
# [address] indicates possible IPv6 address
for field in [ 'contentId', '[address]', 'port', 'replicationPort', 'primarySegmentReplicationPort', 'dataDirectory' ]:
p.handle_field(field, fixed)
for fsname in fslist:
p.handle_field('[' + fsname + ']', flexible)
return fixed, flexible
################
# gpmovemirrors format:
#
# This is basically the same as the gprecoverseg format (since gpmovemirrors ultimately just
# passes the input file after validating it) but the field names are slightly different.
#
# filespaceOrder=[filespace1_fsname[:filespace2_fsname:...]
# old_address:port:datadir new_address:port:replication_port:datadir[:fselocation:...]
# ^
# note space
################
def parse_gpmovemirrors_line(filename, lineno, line, fslist):
"""
Parse a line in the gpmovemirrors configuration file other than the first.
>>> line = "[::1]:40001:/Users/ctaylor/data/m2/gpseg1 [::2]:40101:50101:/Users/ctaylor/data/m2/gpseg1:/fs1"
>>> fixed, flex = parse_gpmovemirrors_line('file', 1, line, ['fs1'])
>>> fixed["oldAddress"], fixed["newAddress"]
('::1', '::2')
>>> flex
{'fs1': '/fs1'}
"""
groups = len(line.split())
if groups != 2:
msg = "need two groups of fields delimited by a space for old and new mirror, not %d" % groups
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
fixed = {}
flexible = {}
p = LineParser(caller(), filename, lineno, line)
p.handle_field('[oldAddress]', fixed) # [oldAddress] indicates possible IPv6 address
p.handle_field('oldPort', fixed)
p.handle_field('oldDataDirectory', fixed, delimiter=' ', stripchars=' \t') # MPP-15675 note stripchars here and next line
p.handle_field('[newAddress]', fixed, stripchars=' \t') # [newAddress] indicates possible IPv6 address
p.handle_field('newPort', fixed)
p.handle_field('newReplicationPort', fixed)
p.handle_field('newDataDirectory', fixed)
for fsname in fslist:
p.handle_field(fsname, flexible)
if p.rest is not None:
msg = "unexpected characters after mirror fields >>%s" % p.rest
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
return fixed, flexible
################
# gprecoverseg format:
#
# filespaceOrder=[filespace1_fsname[:filespace2_fsname:...]]
# failed_host_address:port:datadir [recovery_host_address:port:replication_port:datadir[:fselocation:...]]
# ^
# note space
#
# filespace locations are only present at the end of the other fields when there
# are two groups of fields separated by a space. If there is only one group of
# fields then we assume the entire line is only three fields as below with no
# filespace locations:
#
# failed_host_address:port:datadir
################
def parse_gprecoverseg_line(filename, lineno, line, fslist):
"""
Parse a line in the gprecoverseg configuration file other than the first.
>>> line = "[::1]:40001:/Users/ctaylor/data/m2/gpseg1"
>>> fixed, flex = parse_gprecoverseg_line('file', 1, line, [])
>>> fixed["failedAddress"], fixed["failedPort"], fixed["failedDataDirectory"]
('::1', '40001', '/Users/ctaylor/data/m2/gpseg1')
>>> line = "[::1]:40001:/Users/ctaylor/data/m2/gpseg1 [::2]:40101:50101:/Users/ctaylor/data/m2/gpseg1:/fs1"
>>> fixed, flex = parse_gprecoverseg_line('file', 1, line, ['fs1'])
>>> fixed["newAddress"], fixed["newPort"], fixed["newReplicationPort"], fixed["newDataDirectory"]
('::2', '40101', '50101', '/Users/ctaylor/data/m2/gpseg1')
>>> flex
{'fs1': '/fs1'}
"""
groups = len(line.split())
if groups not in [1, 2]:
msg = "only one or two groups of fields delimited by a space, not %d" % groups
raise ExceptionNoStackTraceNeeded("%s:%s:%s LINE >>%s\n%s" % (filename, lineno, caller(), line, msg))
fixed = {}
flexible = {}
p = LineParser(caller(), filename, lineno, line)
p.handle_field('[failedAddress]', fixed) # [failedAddress] indicates possible IPv6 address
p.handle_field('failedPort', fixed)
if groups == 1:
p.handle_field('failedDataDirectory', fixed)
else:
p.handle_field('failedDataDirectory', fixed, delimiter=' ', stripchars=' \t') # MPP-15675 note stripchars here and next line
p.handle_field('[newAddress]', fixed, stripchars=' \t') # [newAddress] indicates possible IPv6 address
p.handle_field('newPort', fixed)
p.handle_field('newReplicationPort', fixed)
p.handle_field('newDataDirectory', fixed)
for fsname in fslist:
p.handle_field('[' + fsname + ']', flexible)
return fixed, flexible
if __name__ == '__main__':
import doctest
doctest.testmod()