src/ports/postgres/modules/pmml/formula.py_in - madlib - Git at Google

 import plpy
 import re

 class Formula(object):
     def __init__(self, y_str, x_str, coef_len):
         self.n_coef = coef_len
         self.y = y_str.replace('"','')
         self.x = self.parse(x_str)

     def parse(self, x_str):
         array_expr = re.compile(r'array[[](["a-z0-9_, .]+)[]]', flags=re.I)
         simple_col = re.compile(r'["a-z0-9_]+', flags=re.I)
         prefix = 'x'
         if array_expr.match(x_str) is not None:
             x_csv = array_expr.sub(r'\1', x_str)
             ret = [s.strip().replace('"','') for s in x_csv.split(',')]
             if len(ret) == self.n_coef:
                 return ret
             else:
                 pass # fall back to using 'x'
         elif simple_col.match(x_str) is not None:
             prefix = x_str.replace('"','')
         return ["{0}[{1}]".format(prefix, str(i+1)) for i in range(self.n_coef)]

     def rename(self, spec):
         if isinstance(spec, str):
             if spec.find('{') == 0:
                 spec = spec.replace('{','').replace('}','')
                 spec = [s.strip() for s in spec.split(',')]
                 return self.rename(spec)
             if '~' in spec:
                 (y, spec) = spec.split('~')
                 y = y.strip()
             else:
                 y = self.y
             if '+' in spec:
                 x = [s.strip() for s in spec.split('+')]
             else:
                 x = [s.strip() for s in spec.split(',')]
             if self.n_coef != len(x):
                 plpy.warning("PMML warning: unexpected namespec '" + \
                         spec + "', using default names")
             else:
                 self.y = y
                 self.x = x
         else:
             if len(spec) == self.n_coef + 1:
                 self.y = spec[0]
                 self.x = spec[1:]
             elif len(spec) == self.n_coef:
                 self.x = spec
             else:
                 plpy.warning("PMML warning: unexpected namespec '" + \
                         str(spec) + "', using default names")
	import plpy
	import re

	class Formula(object):
	def __init__(self, y_str, x_str, coef_len):
	self.n_coef = coef_len
	self.y = y_str.replace('"','')
	self.x = self.parse(x_str)

	def parse(self, x_str):
	array_expr = re.compile(r'array[[](["a-z0-9_, .]+)[]]', flags=re.I)
	simple_col = re.compile(r'["a-z0-9_]+', flags=re.I)
	prefix = 'x'
	if array_expr.match(x_str) is not None:
	x_csv = array_expr.sub(r'\1', x_str)
	ret = [s.strip().replace('"','') for s in x_csv.split(',')]
	if len(ret) == self.n_coef:
	return ret
	else:
	pass # fall back to using 'x'
	elif simple_col.match(x_str) is not None:
	prefix = x_str.replace('"','')
	return ["{0}[{1}]".format(prefix, str(i+1)) for i in range(self.n_coef)]

	def rename(self, spec):
	if isinstance(spec, str):
	if spec.find('{') == 0:
	spec = spec.replace('{','').replace('}','')
	spec = [s.strip() for s in spec.split(',')]
	return self.rename(spec)
	if '~' in spec:
	(y, spec) = spec.split('~')
	y = y.strip()
	else:
	y = self.y
	if '+' in spec:
	x = [s.strip() for s in spec.split('+')]
	else:
	x = [s.strip() for s in spec.split(',')]
	if self.n_coef != len(x):
	plpy.warning("PMML warning: unexpected namespec '" + \
	spec + "', using default names")
	else:
	self.y = y
	self.x = x
	else:
	if len(spec) == self.n_coef + 1:
	self.y = spec[0]
	self.x = spec[1:]
	elif len(spec) == self.n_coef:
	self.x = spec
	else:
	plpy.warning("PMML warning: unexpected namespec '" + \
	str(spec) + "', using default names")