cpp/build-support/lintutils.py - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import multiprocessing as mp
 import os
 from fnmatch import fnmatch
 from subprocess import Popen


 def chunk(seq, n):
     """
     divide a sequence into equal sized chunks
     (the last chunk may be smaller, but won't be empty)
     """
     chunks = []
     some = []
     for element in seq:
         if len(some) == n:
             chunks.append(some)
             some = []
         some.append(element)
     if len(some) > 0:
         chunks.append(some)
     return chunks


 def dechunk(chunks):
     "flatten chunks into a single list"
     seq = []
     for chunk in chunks:
         seq.extend(chunk)
     return seq


 def run_parallel(cmds, **kwargs):
     """
     Run each of cmds (with shared **kwargs) using subprocess.Popen
     then wait for all of them to complete.
     Runs batches of multiprocessing.cpu_count() * 2 from cmds
     returns a list of tuples containing each process'
     returncode, stdout, stderr
     """
     complete = []
     for cmds_batch in chunk(cmds, mp.cpu_count() * 2):
         procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch]
         for proc in procs_batch:
             stdout, stderr = proc.communicate()
             complete.append((proc.returncode, stdout, stderr))
     return complete


 _source_extensions = '''
 .h
 .cc
 .cpp
 '''.split()


 def get_sources(source_dir, exclude_globs=[]):
     sources = []
     for directory, subdirs, basenames in os.walk(source_dir):
         for path in [os.path.join(directory, basename)
                      for basename in basenames]:
             # filter out non-source files
             if os.path.splitext(path)[1] not in _source_extensions:
                 continue

             path = os.path.abspath(path)

             # filter out files that match the globs in the globs file
             if any([fnmatch(path, glob) for glob in exclude_globs]):
                 continue

             sources.append(path)
     return sources


 def stdout_pathcolonline(completed_process, filenames):
     """
     given a completed process which may have reported some files as problematic
     by printing the path name followed by ':' then a line number, examine
     stdout and return the set of actually reported file names
     """
     returncode, stdout, stderr = completed_process
     bfilenames = set()
     for filename in filenames:
         bfilenames.add(filename.encode('utf-8') + b':')
     problem_files = set()
     for line in stdout.splitlines():
         for filename in bfilenames:
             if line.startswith(filename):
                 problem_files.add(filename.decode('utf-8'))
                 bfilenames.remove(filename)
                 break
     return problem_files, stdout
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import multiprocessing as mp
	import os
	from fnmatch import fnmatch
	from subprocess import Popen


	def chunk(seq, n):
	"""
	divide a sequence into equal sized chunks
	(the last chunk may be smaller, but won't be empty)
	"""
	chunks = []
	some = []
	for element in seq:
	if len(some) == n:
	chunks.append(some)
	some = []
	some.append(element)
	if len(some) > 0:
	chunks.append(some)
	return chunks


	def dechunk(chunks):
	"flatten chunks into a single list"
	seq = []
	for chunk in chunks:
	seq.extend(chunk)
	return seq


	def run_parallel(cmds, **kwargs):
	"""
	Run each of cmds (with shared **kwargs) using subprocess.Popen
	then wait for all of them to complete.
	Runs batches of multiprocessing.cpu_count() * 2 from cmds
	returns a list of tuples containing each process'
	returncode, stdout, stderr
	"""
	complete = []
	for cmds_batch in chunk(cmds, mp.cpu_count() * 2):
	procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch]
	for proc in procs_batch:
	stdout, stderr = proc.communicate()
	complete.append((proc.returncode, stdout, stderr))
	return complete


	_source_extensions = '''
	.h
	.cc
	.cpp
	'''.split()


	def get_sources(source_dir, exclude_globs=[]):
	sources = []
	for directory, subdirs, basenames in os.walk(source_dir):
	for path in [os.path.join(directory, basename)
	for basename in basenames]:
	# filter out non-source files
	if os.path.splitext(path)[1] not in _source_extensions:
	continue

	path = os.path.abspath(path)

	# filter out files that match the globs in the globs file
	if any([fnmatch(path, glob) for glob in exclude_globs]):
	continue

	sources.append(path)
	return sources


	def stdout_pathcolonline(completed_process, filenames):
	"""
	given a completed process which may have reported some files as problematic
	by printing the path name followed by ':' then a line number, examine
	stdout and return the set of actually reported file names
	"""
	returncode, stdout, stderr = completed_process
	bfilenames = set()
	for filename in filenames:
	bfilenames.add(filename.encode('utf-8') + b':')
	problem_files = set()
	for line in stdout.splitlines():
	for filename in bfilenames:
	if line.startswith(filename):
	problem_files.add(filename.decode('utf-8'))
	bfilenames.remove(filename)
	break
	return problem_files, stdout