pylib/cqlshlib/cqlhandling.py - cassandra - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # code for dealing with CQL's syntax, rules, interpretation
 # i.e., stuff that's not necessarily cqlsh-specific

 import traceback
 import cassandra
 from . import pylexotron, util

 Hint = pylexotron.Hint

 cql_keywords_reserved = set((
     'add', 'allow', 'alter', 'and', 'apply', 'asc', 'authorize', 'batch', 'begin', 'by', 'columnfamily', 'create',
     'default', 'delete', 'desc', 'describe', 'drop', 'entries', 'execute', 'from', 'full', 'grant', 'if', 'in', 'index',
     'infinity', 'insert', 'into', 'is', 'keyspace', 'limit', 'materialized', 'mbean', 'mbeans', 'modify', 'nan',
     'norecursive', 'not', 'null', 'of', 'on', 'or', 'order', 'primary', 'rename', 'replace', 'revoke', 'schema',
     'select', 'set', 'table', 'to', 'token', 'truncate', 'unlogged', 'unset', 'update', 'use', 'using', 'view', 'where',
     'with'
 ))
 """
 Set of reserved keywords in CQL.

 Derived from .../cassandra/src/java/org/apache/cassandra/cql3/ReservedKeywords.java
 """


 class CqlParsingRuleSet(pylexotron.ParsingRuleSet):

     available_compression_classes = (
         'DeflateCompressor',
         'SnappyCompressor',
         'LZ4Compressor',
     )

     available_compaction_classes = (
         'LeveledCompactionStrategy',
         'SizeTieredCompactionStrategy',
         'DateTieredCompactionStrategy',
         'TimeWindowCompactionStrategy'
     )

     replication_strategies = (
         'SimpleStrategy',
         'OldNetworkTopologyStrategy',
         'NetworkTopologyStrategy'
     )

     replication_factor_strategies = (
         'SimpleStrategy',
         'org.apache.cassandra.locator.SimpleStrategy',
         'OldNetworkTopologyStrategy',
         'org.apache.cassandra.locator.OldNetworkTopologyStrategy'
     )

     def __init__(self, *args, **kwargs):
         pylexotron.ParsingRuleSet.__init__(self, *args, **kwargs)

         # note: commands_end_with_newline may be extended by callers.
         self.commands_end_with_newline = set()
         self.set_reserved_keywords()

     def set_reserved_keywords(self):
         """
         We cannot let reserved cql keywords be simple 'identifier' since this caused
         problems with completion, see CASSANDRA-10415
         """
         cassandra.metadata.cql_keywords_reserved = cql_keywords_reserved
         syntax = '<reserved_identifier> ::= /(' + '|'.join(r'\b{}\b'.format(k) for k in cql_keywords_reserved) + ')/ ;'
         self.append_rules(syntax)

     def completer_for(self, rulename, symname):
         def registrator(f):
             def completerwrapper(ctxt):
                 cass = ctxt.get_binding('cassandra_conn', None)
                 if cass is None:
                     return ()
                 return f(ctxt, cass)
             completerwrapper.func_name = 'completerwrapper_on_' + f.func_name
             self.register_completer(completerwrapper, rulename, symname)
             return completerwrapper
         return registrator

     def explain_completion(self, rulename, symname, explanation=None):
         if explanation is None:
             explanation = '<%s>' % (symname,)

         @self.completer_for(rulename, symname)
         def explainer(ctxt, cass):
             return [Hint(explanation)]

         return explainer

     def cql_massage_tokens(self, toklist):
         curstmt = []
         output = []

         term_on_nl = False

         for t in toklist:
             if t[0] == 'endline':
                 if term_on_nl:
                     t = ('endtoken',) + t[1:]
                 else:
                     # don't put any 'endline' tokens in output
                     continue

             # Convert all unicode tokens to ascii, where possible.  This
             # helps avoid problems with performing unicode-incompatible
             # operations on tokens (like .lower()).  See CASSANDRA-9083
             # for one example of this.
             str_token = t[1]
             if isinstance(str_token, unicode):
                 try:
                     str_token = str_token.encode('ascii')
                     t = (t[0], str_token) + t[2:]
                 except UnicodeEncodeError:
                     pass

             curstmt.append(t)
             if t[0] == 'endtoken':
                 term_on_nl = False
                 output.extend(curstmt)
                 curstmt = []
             else:
                 if len(curstmt) == 1:
                     # first token in statement; command word
                     cmd = t[1].lower()
                     term_on_nl = bool(cmd in self.commands_end_with_newline)

         output.extend(curstmt)
         return output

     def cql_parse(self, text, startsymbol='Start'):
         tokens = self.lex(text)
         tokens = self.cql_massage_tokens(tokens)
         return self.parse(startsymbol, tokens, init_bindings={'*SRC*': text})

     def cql_whole_parse_tokens(self, toklist, srcstr=None, startsymbol='Start'):
         return self.whole_match(startsymbol, toklist, srcstr=srcstr)

     def cql_split_statements(self, text):
         tokens = self.lex(text)
         tokens = self.cql_massage_tokens(tokens)
         stmts = util.split_list(tokens, lambda t: t[0] == 'endtoken')
         output = []
         in_batch = False
         in_pg_string = len([st for st in tokens if len(st) > 0 and st[0] == 'unclosedPgString']) == 1
         for stmt in stmts:
             if in_batch:
                 output[-1].extend(stmt)
             else:
                 output.append(stmt)
             if len(stmt) > 2:
                 if stmt[-3][1].upper() == 'APPLY':
                     in_batch = False
                 elif stmt[0][1].upper() == 'BEGIN':
                     in_batch = True
         return output, in_batch or in_pg_string

     def cql_complete_single(self, text, partial, init_bindings={}, ignore_case=True,
                             startsymbol='Start'):
         tokens = (self.cql_split_statements(text)[0] or [[]])[-1]
         bindings = init_bindings.copy()

         # handle some different completion scenarios- in particular, completing
         # inside a string literal
         prefix = None
         dequoter = util.identity
         lasttype = None
         if tokens:
             lasttype = tokens[-1][0]
             if lasttype == 'unclosedString':
                 prefix = self.token_dequote(tokens[-1])
                 tokens = tokens[:-1]
                 partial = prefix + partial
                 dequoter = self.dequote_value
                 requoter = self.escape_value
             elif lasttype == 'unclosedName':
                 prefix = self.token_dequote(tokens[-1])
                 tokens = tokens[:-1]
                 partial = prefix + partial
                 dequoter = self.dequote_name
                 requoter = self.escape_name
             elif lasttype == 'unclosedComment':
                 return []
         bindings['partial'] = partial
         bindings['*LASTTYPE*'] = lasttype
         bindings['*SRC*'] = text

         # find completions for the position
         completions = self.complete(startsymbol, tokens, bindings)

         hints, strcompletes = util.list_bifilter(pylexotron.is_hint, completions)

         # it's possible to get a newline token from completion; of course, we
         # don't want to actually have that be a candidate, we just want to hint
         if '\n' in strcompletes:
             strcompletes.remove('\n')
             if partial == '':
                 hints.append(Hint('<enter>'))

         # find matches with the partial word under completion
         if ignore_case:
             partial = partial.lower()
             f = lambda s: s and dequoter(s).lower().startswith(partial)
         else:
             f = lambda s: s and dequoter(s).startswith(partial)
         candidates = filter(f, strcompletes)

         if prefix is not None:
             # dequote, re-escape, strip quotes: gets us the right quoted text
             # for completion. the opening quote is already there on the command
             # line and not part of the word under completion, and readline
             # fills in the closing quote for us.
             candidates = [requoter(dequoter(c))[len(prefix) + 1:-1] for c in candidates]

             # the above process can result in an empty string; this doesn't help for
             # completions
             candidates = filter(None, candidates)

         # prefix a space when desirable for pleasant cql formatting
         if tokens:
             newcandidates = []
             for c in candidates:
                 if self.want_space_between(tokens[-1], c) \
                         and prefix is None \
                         and not text[-1].isspace() \
                         and not c[0].isspace():
                     c = ' ' + c
                 newcandidates.append(c)
             candidates = newcandidates

         # append a space for single, complete identifiers
         if len(candidates) == 1 and candidates[0][-1].isalnum()  \
                 and lasttype != 'unclosedString' \
                 and lasttype != 'unclosedName':
             candidates[0] += ' '
         return candidates, hints

     @staticmethod
     def want_space_between(tok, following):
         if following in (',', ')', ':'):
             return False
         if tok[0] == 'op' and tok[1] in (',', ')', '='):
             return True
         if tok[0] == 'stringLiteral' and following[0] != ';':
             return True
         if tok[0] == 'star' and following[0] != ')':
             return True
         if tok[0] == 'endtoken':
             return True
         if tok[1][-1].isalnum() and following[0] != ',':
             return True
         return False

     def cql_complete(self, text, partial, cassandra_conn=None, ignore_case=True, debug=False,
                      startsymbol='Start'):
         init_bindings = {'cassandra_conn': cassandra_conn}
         if debug:
             init_bindings['*DEBUG*'] = True
             print "cql_complete(%r, partial=%r)" % (text, partial)

         completions, hints = self.cql_complete_single(text, partial, init_bindings,
                                                       startsymbol=startsymbol)

         if hints:
             hints = [h.text for h in hints]
             hints.append('')

         if len(completions) == 1 and len(hints) == 0:
             c = completions[0]
             if debug:
                 print "** Got one completion: %r. Checking for further matches...\n" % (c,)
             if not c.isspace():
                 new_c = self.cql_complete_multiple(text, c, init_bindings, startsymbol=startsymbol)
                 completions = [new_c]
             if debug:
                 print "** New list of completions: %r" % (completions,)

         return hints + completions

     def cql_complete_multiple(self, text, first, init_bindings, startsymbol='Start'):
         debug = init_bindings.get('*DEBUG*', False)
         try:
             completions, hints = self.cql_complete_single(text + first, '', init_bindings,
                                                           startsymbol=startsymbol)
         except Exception:
             if debug:
                 print "** completion expansion had a problem:"
                 traceback.print_exc()
             return first
         if hints:
             if not first[-1].isspace():
                 first += ' '
             if debug:
                 print "** completion expansion found hints: %r" % (hints,)
             return first
         if len(completions) == 1 and completions[0] != '':
             if debug:
                 print "** Got another completion: %r." % (completions[0],)
             if completions[0][0] in (',', ')', ':') and first[-1] == ' ':
                 first = first[:-1]
             first += completions[0]
         else:
             common_prefix = util.find_common_prefix(completions)
             if common_prefix == '':
                 return first
             if common_prefix[0] in (',', ')', ':') and first[-1] == ' ':
                 first = first[:-1]
             if debug:
                 print "** Got a partial completion: %r." % (common_prefix,)
             return first + common_prefix
         if debug:
             print "** New total completion: %r. Checking for further matches...\n" % (first,)
         return self.cql_complete_multiple(text, first, init_bindings, startsymbol=startsymbol)

     @staticmethod
     def cql_extract_orig(toklist, srcstr):
         # low end of span for first token, to high end of span for last token
         return srcstr[toklist[0][2][0]:toklist[-1][2][1]]

     @staticmethod
     def token_dequote(tok):
         if tok[0] == 'unclosedName':
             # strip one quote
             return tok[1][1:].replace('""', '"')
         if tok[0] == 'quotedStringLiteral':
             # strip quotes
             return tok[1][1:-1].replace("''", "'")
         if tok[0] == 'unclosedString':
             # strip one quote
             return tok[1][1:].replace("''", "'")
         if tok[0] == 'unclosedComment':
             return ''
         return tok[1]

     @staticmethod
     def token_is_word(tok):
         return tok[0] == 'identifier'
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# code for dealing with CQL's syntax, rules, interpretation
	# i.e., stuff that's not necessarily cqlsh-specific

	import traceback
	import cassandra
	from . import pylexotron, util

	Hint = pylexotron.Hint

	cql_keywords_reserved = set((
	'add', 'allow', 'alter', 'and', 'apply', 'asc', 'authorize', 'batch', 'begin', 'by', 'columnfamily', 'create',
	'default', 'delete', 'desc', 'describe', 'drop', 'entries', 'execute', 'from', 'full', 'grant', 'if', 'in', 'index',
	'infinity', 'insert', 'into', 'is', 'keyspace', 'limit', 'materialized', 'mbean', 'mbeans', 'modify', 'nan',
	'norecursive', 'not', 'null', 'of', 'on', 'or', 'order', 'primary', 'rename', 'replace', 'revoke', 'schema',
	'select', 'set', 'table', 'to', 'token', 'truncate', 'unlogged', 'unset', 'update', 'use', 'using', 'view', 'where',
	'with'
	))
	"""
	Set of reserved keywords in CQL.

	Derived from .../cassandra/src/java/org/apache/cassandra/cql3/ReservedKeywords.java
	"""


	class CqlParsingRuleSet(pylexotron.ParsingRuleSet):

	available_compression_classes = (
	'DeflateCompressor',
	'SnappyCompressor',
	'LZ4Compressor',
	)

	available_compaction_classes = (
	'LeveledCompactionStrategy',
	'SizeTieredCompactionStrategy',
	'DateTieredCompactionStrategy',
	'TimeWindowCompactionStrategy'
	)

	replication_strategies = (
	'SimpleStrategy',
	'OldNetworkTopologyStrategy',
	'NetworkTopologyStrategy'
	)

	replication_factor_strategies = (
	'SimpleStrategy',
	'org.apache.cassandra.locator.SimpleStrategy',
	'OldNetworkTopologyStrategy',
	'org.apache.cassandra.locator.OldNetworkTopologyStrategy'
	)

	def __init__(self, args, *kwargs):
	pylexotron.ParsingRuleSet.__init__(self, args, *kwargs)

	# note: commands_end_with_newline may be extended by callers.
	self.commands_end_with_newline = set()
	self.set_reserved_keywords()

	def set_reserved_keywords(self):
	"""
	We cannot let reserved cql keywords be simple 'identifier' since this caused
	problems with completion, see CASSANDRA-10415
	"""
	cassandra.metadata.cql_keywords_reserved = cql_keywords_reserved
	syntax = '<reserved_identifier> ::= /(' + '\|'.join(r'\b{}\b'.format(k) for k in cql_keywords_reserved) + ')/ ;'
	self.append_rules(syntax)

	def completer_for(self, rulename, symname):
	def registrator(f):
	def completerwrapper(ctxt):
	cass = ctxt.get_binding('cassandra_conn', None)
	if cass is None:
	return ()
	return f(ctxt, cass)
	completerwrapper.func_name = 'completerwrapper_on_' + f.func_name
	self.register_completer(completerwrapper, rulename, symname)
	return completerwrapper
	return registrator

	def explain_completion(self, rulename, symname, explanation=None):
	if explanation is None:
	explanation = '<%s>' % (symname,)

	@self.completer_for(rulename, symname)
	def explainer(ctxt, cass):
	return [Hint(explanation)]

	return explainer

	def cql_massage_tokens(self, toklist):
	curstmt = []
	output = []

	term_on_nl = False

	for t in toklist:
	if t[0] == 'endline':
	if term_on_nl:
	t = ('endtoken',) + t[1:]
	else:
	# don't put any 'endline' tokens in output
	continue

	# Convert all unicode tokens to ascii, where possible. This
	# helps avoid problems with performing unicode-incompatible
	# operations on tokens (like .lower()). See CASSANDRA-9083
	# for one example of this.
	str_token = t[1]
	if isinstance(str_token, unicode):
	try:
	str_token = str_token.encode('ascii')
	t = (t[0], str_token) + t[2:]
	except UnicodeEncodeError:
	pass

	curstmt.append(t)
	if t[0] == 'endtoken':
	term_on_nl = False
	output.extend(curstmt)
	curstmt = []
	else:
	if len(curstmt) == 1:
	# first token in statement; command word
	cmd = t[1].lower()
	term_on_nl = bool(cmd in self.commands_end_with_newline)

	output.extend(curstmt)
	return output

	def cql_parse(self, text, startsymbol='Start'):
	tokens = self.lex(text)
	tokens = self.cql_massage_tokens(tokens)
	return self.parse(startsymbol, tokens, init_bindings={'SRC': text})

	def cql_whole_parse_tokens(self, toklist, srcstr=None, startsymbol='Start'):
	return self.whole_match(startsymbol, toklist, srcstr=srcstr)

	def cql_split_statements(self, text):
	tokens = self.lex(text)
	tokens = self.cql_massage_tokens(tokens)
	stmts = util.split_list(tokens, lambda t: t[0] == 'endtoken')
	output = []
	in_batch = False
	in_pg_string = len([st for st in tokens if len(st) > 0 and st[0] == 'unclosedPgString']) == 1
	for stmt in stmts:
	if in_batch:
	output[-1].extend(stmt)
	else:
	output.append(stmt)
	if len(stmt) > 2:
	if stmt[-3][1].upper() == 'APPLY':
	in_batch = False
	elif stmt[0][1].upper() == 'BEGIN':
	in_batch = True
	return output, in_batch or in_pg_string

	def cql_complete_single(self, text, partial, init_bindings={}, ignore_case=True,
	startsymbol='Start'):
	tokens = (self.cql_split_statements(text)[0] or [[]])[-1]
	bindings = init_bindings.copy()

	# handle some different completion scenarios- in particular, completing
	# inside a string literal
	prefix = None
	dequoter = util.identity
	lasttype = None
	if tokens:
	lasttype = tokens[-1][0]
	if lasttype == 'unclosedString':
	prefix = self.token_dequote(tokens[-1])
	tokens = tokens[:-1]
	partial = prefix + partial
	dequoter = self.dequote_value
	requoter = self.escape_value
	elif lasttype == 'unclosedName':
	prefix = self.token_dequote(tokens[-1])
	tokens = tokens[:-1]
	partial = prefix + partial
	dequoter = self.dequote_name
	requoter = self.escape_name
	elif lasttype == 'unclosedComment':
	return []
	bindings['partial'] = partial
	bindings['LASTTYPE'] = lasttype
	bindings['SRC'] = text

	# find completions for the position
	completions = self.complete(startsymbol, tokens, bindings)

	hints, strcompletes = util.list_bifilter(pylexotron.is_hint, completions)

	# it's possible to get a newline token from completion; of course, we
	# don't want to actually have that be a candidate, we just want to hint
	if '\n' in strcompletes:
	strcompletes.remove('\n')
	if partial == '':
	hints.append(Hint('<enter>'))

	# find matches with the partial word under completion
	if ignore_case:
	partial = partial.lower()
	f = lambda s: s and dequoter(s).lower().startswith(partial)
	else:
	f = lambda s: s and dequoter(s).startswith(partial)
	candidates = filter(f, strcompletes)

	if prefix is not None:
	# dequote, re-escape, strip quotes: gets us the right quoted text
	# for completion. the opening quote is already there on the command
	# line and not part of the word under completion, and readline
	# fills in the closing quote for us.
	candidates = [requoter(dequoter(c))[len(prefix) + 1:-1] for c in candidates]

	# the above process can result in an empty string; this doesn't help for
	# completions
	candidates = filter(None, candidates)

	# prefix a space when desirable for pleasant cql formatting
	if tokens:
	newcandidates = []
	for c in candidates:
	if self.want_space_between(tokens[-1], c) \
	and prefix is None \
	and not text[-1].isspace() \
	and not c[0].isspace():
	c = ' ' + c
	newcandidates.append(c)
	candidates = newcandidates

	# append a space for single, complete identifiers
	if len(candidates) == 1 and candidates[0][-1].isalnum() \
	and lasttype != 'unclosedString' \
	and lasttype != 'unclosedName':
	candidates[0] += ' '
	return candidates, hints

	@staticmethod
	def want_space_between(tok, following):
	if following in (',', ')', ':'):
	return False
	if tok[0] == 'op' and tok[1] in (',', ')', '='):
	return True
	if tok[0] == 'stringLiteral' and following[0] != ';':
	return True
	if tok[0] == 'star' and following[0] != ')':
	return True
	if tok[0] == 'endtoken':
	return True
	if tok[1][-1].isalnum() and following[0] != ',':
	return True
	return False

	def cql_complete(self, text, partial, cassandra_conn=None, ignore_case=True, debug=False,
	startsymbol='Start'):
	init_bindings = {'cassandra_conn': cassandra_conn}
	if debug:
	init_bindings['DEBUG'] = True
	print "cql_complete(%r, partial=%r)" % (text, partial)

	completions, hints = self.cql_complete_single(text, partial, init_bindings,
	startsymbol=startsymbol)

	if hints:
	hints = [h.text for h in hints]
	hints.append('')

	if len(completions) == 1 and len(hints) == 0:
	c = completions[0]
	if debug:
	print "** Got one completion: %r. Checking for further matches...\n" % (c,)
	if not c.isspace():
	new_c = self.cql_complete_multiple(text, c, init_bindings, startsymbol=startsymbol)
	completions = [new_c]
	if debug:
	print "** New list of completions: %r" % (completions,)

	return hints + completions

	def cql_complete_multiple(self, text, first, init_bindings, startsymbol='Start'):
	debug = init_bindings.get('DEBUG', False)
	try:
	completions, hints = self.cql_complete_single(text + first, '', init_bindings,
	startsymbol=startsymbol)
	except Exception:
	if debug:
	print "** completion expansion had a problem:"
	traceback.print_exc()
	return first
	if hints:
	if not first[-1].isspace():
	first += ' '
	if debug:
	print "** completion expansion found hints: %r" % (hints,)
	return first
	if len(completions) == 1 and completions[0] != '':
	if debug:
	print "** Got another completion: %r." % (completions[0],)
	if completions[0][0] in (',', ')', ':') and first[-1] == ' ':
	first = first[:-1]
	first += completions[0]
	else:
	common_prefix = util.find_common_prefix(completions)
	if common_prefix == '':
	return first
	if common_prefix[0] in (',', ')', ':') and first[-1] == ' ':
	first = first[:-1]
	if debug:
	print "** Got a partial completion: %r." % (common_prefix,)
	return first + common_prefix
	if debug:
	print "** New total completion: %r. Checking for further matches...\n" % (first,)
	return self.cql_complete_multiple(text, first, init_bindings, startsymbol=startsymbol)

	@staticmethod
	def cql_extract_orig(toklist, srcstr):
	# low end of span for first token, to high end of span for last token
	return srcstr[toklist[0][2][0]:toklist[-1][2][1]]

	@staticmethod
	def token_dequote(tok):
	if tok[0] == 'unclosedName':
	# strip one quote
	return tok[1][1:].replace('""', '"')
	if tok[0] == 'quotedStringLiteral':
	# strip quotes
	return tok[1][1:-1].replace("''", "'")
	if tok[0] == 'unclosedString':
	# strip one quote
	return tok[1][1:].replace("''", "'")
	if tok[0] == 'unclosedComment':
	return ''
	return tok[1]

	@staticmethod
	def token_is_word(tok):
	return tok[0] == 'identifier'