blob: c37774d21010fd52287c641b8ee3bff66bb119e0 [file] [log] [blame]
load("//:plugin.bzl", "ProtoPluginInfo")
ProtoCompileInfo = provider(fields = {
"label": "label object",
"plugins": "ProtoPluginInfo object",
"descriptor": "descriptor set file",
"outputs": "generated protoc outputs",
"files": "final generated files",
"protos": "generated protos (copies)",
"args": "proto arguments",
"tools": "proto tools",
"verbose": "verbose level",
def _get_plugin_out(ctx, plugin):
if not plugin.out:
return None
filename = plugin.out
filename = filename.replace("{name}",
return filename
# From
_VIRTUAL_IMPORTS = "/_virtual_imports/"
def _strip_virtual_import(path):
pos = path.find(_VIRTUAL_IMPORTS)
path = path[pos + len(_VIRTUAL_IMPORTS):]
return path.split("/", 1)[-1]
def _get_proto_filename(src):
"""Assemble the filename for a proto
src: the .proto <File>
<string> of the filename.
parts = src.short_path.split("/")
if len(parts) > 1 and parts[0] == "..":
return "/".join(parts[2:])
return src.short_path
def _apply_plugin_transitivity_rules(ctx, targets, plugin):
"""Process the proto target list according to plugin transitivity rules
ctx: the <ctx> object
targets: the dict<string,File> of .proto files that we intend to compile.
plugin: the <PluginInfo> object.
<list<File>> the possibly filtered list of .proto <File>s
# Iterate transitivity rules like '{ "google/protobuf": "exclude" }'. The
# only rule type implemented is "exclude", which checks if the pathname or
# dirname ends with the given pattern. If so, remove that item in the
# targets list.
# Why does this feature exist? Well, library rules like C# require all the
# proto files to be present during the compilation (collected via transitive
# sources). However, since the well-known types are already present in the
# library dependencies, we don't actually want to compile well-known types
# (but do want to compile everything else).
transitivity = {}
for pattern, rule in transitivity.items():
if rule == "exclude":
for key, target in targets.items():
if ctx.attr.verbose > 2:
print("Checking '%s' endswith '%s'" % (target.short_path, pattern))
if target.dirname.endswith(pattern) or target.path.endswith(pattern):
if ctx.attr.verbose > 2:
print("Removing '%s' from the list of files to compile as plugin '%s' excluded it" % (target.short_path,
elif ctx.attr.verbose > 2:
print("Keeping '%s' (not excluded)" % (target.short_path))
elif rule == "include":
for key, target in targets.items():
if target.dirname.endswith(pattern) or target.path.endswith(pattern):
if ctx.attr.verbose > 2:
print("Keeping '%s' (explicitly included)" % (target.short_path))
if ctx.attr.verbose > 2:
print("Removing '%s' from the list of files to compile as plugin '%s' did not include it" % (target.short_path,
fail("Unknown transitivity rule '%s'" % rule)
return targets
def get_plugin_outputs(ctx, descriptor, outputs, src, proto, plugin):
"""Get the predicted generated outputs for a given plugin
ctx: the <ctx> object
descriptor: the descriptor <Generated File>
outputs: the list of outputs.
src: the orginal .proto source <Source File>.
proto: the copied .proto <Generated File> (the one in the package 'staging area')
plugin: the <PluginInfo> object.
<list<Generated File>> the augmented list of files that will be generated
for output in plugin.outputs:
filename = _get_output_filename(src, plugin, output)
if not filename:
sibling = _get_output_sibling_file(output, proto, descriptor)
outputs.append(ctx.actions.declare_file(filename, sibling = sibling))
return outputs
def _get_output_filename(src, plugin, pattern):
"""Build the predicted filename for file generated by the given plugin.
A 'proto_plugin' rule allows one to define the predicted outputs. For
flexibility, we allow special tokens in the output filename that get
replaced here. The overall pattern is '{token}' mimicking the python
'format' feature.
Additionally, there are '|' characters like '{basename|pascal}' that can be
read as 'take the basename and pipe that through the pascal function'.
src: the .proto <File>
plugin: the <PluginInfo> object.
pattern: the input pattern string
the replaced string
# If output to srcjar, don't emit a per-proto output file.
if plugin.out:
return None
# Slice off this prefix if it exists, we don't use it here.
if pattern.startswith("{package}/"):
pattern = pattern[len("{package}/"):]
basename = src.basename
if basename.endswith(".proto"):
basename = basename[:-6]
elif basename.endswith(".protodevel"):
basename = basename[:-11]
filename = basename
if pattern.find("{basename}") != -1:
filename = pattern.replace("{basename}", basename)
elif pattern.find("{basename|pascal}") != -1:
filename = pattern.replace("{basename|pascal}", _pascal_case(basename))
elif pattern.find("{basename|pascal|objc}") != -1:
filename = pattern.replace("{basename|pascal|objc}", _pascal_objc(basename))
elif pattern.find("{basename|rust_keyword}") != -1:
filename = pattern.replace("{basename|rust_keyword}", _rust_keyword(basename))
filename = basename + pattern
return filename
def _get_output_sibling_file(pattern, proto, descriptor):
"""Get the correct place to
The ctx.actions.declare_file has a 'sibling = <File>' feature that allows
one to declare files in the same directory as the sibling.
This function checks for the prefix special token '{package}' and, if true,
uses the descriptor as the sibling (which declares the output file will be
in the root of the generated tree).
pattern: the input filename pattern <string>
proto: the .proto <Generated File> (in the staging area)
descriptor: the descriptor <File> that marks the staging root.
the <File> to be used as the correct sibling.
if pattern.startswith("{package}/"):
return descriptor
return proto
rust_keywords = {
"as": True,
"break": True,
"const": True,
"continue": True,
"crate": True,
"else": True,
"enum": True,
"extern": True,
"false": True,
"fn": True,
"for": True,
"if": True,
"impl": True,
"let": True,
"loop": True,
"match": True,
"mod": True,
"move": True,
"mut": True,
"pub": True,
"ref": True,
"return": True,
"self": True,
"Self": True,
"static": True,
"struct": True,
"super": True,
"trait": True,
"true": True,
"type": True,
"unsafe": True,
"use": True,
"where": True,
"while": True,
objc_upper_segments = {
"url": "URL",
"http": "HTTP",
"https": "HTTPS",
def _capitalize(s):
"""Capitalize a string - only first letter
s (string): The input string to be capitalized.
(string): The capitalized string.
return s[0:1].upper() + s[1:]
def get_plugin_out_arg(ctx, outdir, plugin, plugin_outfiles):
"""Build the --java_out argument
ctx: the <ctx> object
outdir: the package output directory <string>
plugin: the <PluginInfo> object.
plugin_outfiles: The <dict<string,<File>>. For example, {closure: "library.js"}
<string> for the protoc arg list.
arg = outdir
if plugin.outdir:
arg = plugin.outdir.replace("{name}", outdir)
elif plugin.out:
outfile = plugin_outfiles[]
arg = outfile.path
# Collate a list of options from the plugin itself PLUS options from the
# global plugin_options list (if they exist)
options = []
options += getattr(plugin, "options", [])
options += getattr(ctx.attr, "plugin_options", [])
if options:
arg = "%s:%s" % (",".join(_get_plugin_options(ctx, options)), arg)
return "--%s_out=%s" % (, arg)
def _get_plugin_options(ctx, options):
"""Build a plugin option list
ctx: the <ctx> object
options: list<string> options from the <PluginInfo>
<string> for the --plugin_out= arg
return [_get_plugin_option(ctx, option) for option in options]
def _get_plugin_option(ctx, option):
"""Build a plugin option
ctx: the <ctx> object
option: string from the <PluginInfo>
<string> for the --plugin_out= arg
return option.replace("{name}",
def _rust_keyword(s):
"""Check if arg is a rust keyword and append '_pb' if true.
s (string): The input string to be capitalized.
(string): The appended string.
return s + "_pb" if rust_keywords.get(s) else s
def _pascal_objc(s):
"""Convert pascal_case -> PascalCase
Objective C uses pascal case, but there are e exceptions that it uppercases
the entire segment: url, http, and https.
s (string): The input string to be capitalized.
Returns: (string): The capitalized string.
segments = []
for segment in s.split("_"):
repl = objc_upper_segments.get(segment)
if repl:
segment = repl
segment = _capitalize(segment)
return "".join(segments)
def _pascal_case(s):
"""Convert pascal_case -> PascalCase
s (string): The input string to be capitalized.
(string): The capitalized string.
return "".join([_capitalize(part) for part in s.split("_")])
def is_in_virtual_imports(source_file, virtual_folder = _VIRTUAL_IMPORTS):
"""Determines if source_file is virtual (is placed in _virtual_imports
subdirectory). The output of all proto_library targets which use
import_prefix and/or strip_import_prefix arguments is placed under
_virtual_imports directory.
source_file: A proto file.
virtual_folder: The virtual folder name (is set to "_virtual_imports"
by default)
True if source_file is located under _virtual_imports, False otherwise.
return not source_file.is_source and virtual_folder in source_file.path
def copy_proto(ctx, descriptor, src):
"""Copy a proto to the 'staging area'
ctx: the <ctx> object
descriptor: the descriptor <File> that marks the root of the 'staging area'.
src: the source .proto <File>
<Generated File> for the copied .proto
if is_in_virtual_imports(src):
proto_rpath = _strip_virtual_import(src.path)
proto_rpath = _get_proto_filename(src)
proto_copy_path = "/".join([descriptor.dirname, proto_rpath])
proto = ctx.actions.declare_file(proto_rpath, sibling = descriptor)
mnemonic = "CopyProto",
inputs = [src],
outputs = [proto],
command = "cp %s %s" % (src.path, proto_copy_path),
return proto
def _copy_jar_to_srcjar(ctx, jar):
"""Copy .jar to .srcjar
ctx: the <ctx> object
jar: the <Generated File> of a jar containing source files.
<Generated File> for the renamed file
srcjar = ctx.actions.declare_file("%s/%s.srcjar" % (,
mnemonic = "CopySrcjar",
inputs = [jar],
outputs = [srcjar],
command = "mv %s %s" % (jar.path, srcjar.path),
return srcjar
def get_plugin_runfiles(tool):
"""Gather runfiles for a plugin.
files = []
if not tool:
return files
info = tool[DefaultInfo]
if not info:
return files
if info.files:
files += info.files.to_list()
if info.default_runfiles:
runfiles = info.default_runfiles
if runfiles.files:
files += runfiles.files.to_list()
if info.data_runfiles:
runfiles = info.data_runfiles
if runfiles.files:
files += runfiles.files.to_list()
return files
def proto_compile_impl(ctx):
### Part 1: setup variables used in scope
# <int> verbose level
verbose = ctx.attr.verbose
# <File> the protoc tool
protoc = ctx.executable.protoc
# <File> for the output descriptor. Often used as the sibling in
# 'declare_file' actions.
descriptor = ctx.outputs.descriptor
# <string> The directory where that generated descriptor is.
outdir = descriptor.dirname
# <list<ProtoInfo>> A list of ProtoInfo
deps = [dep[ProtoInfo] for dep in ctx.attr.deps]
# <list<PluginInfo>> A list of PluginInfo
plugins = [plugin[ProtoPluginInfo] for plugin in ctx.attr.plugins]
# <list<File>> The list of .proto files that will exist in the 'staging
# area'. We copy them from their source location into place such that a
# single '-I.' at the package root will satisfy all import paths.
protos = []
# <dict<string,File>> The set of .proto files to compile, used as the final
# list of arguments to protoc. This is a subset of the 'protos' list that
# are directly specified in the proto_library deps, but excluding other
# transitive .protos. For example, even though we might transitively depend
# on 'google/protobuf/any.proto', we don't necessarily want to actually
# generate artifacts for it when compiling 'foo.proto'. Maintained as a dict
# for set semantics. The key is the value from File.path.
targets = {}
# <dict<string,File>> A mapping from plugin name to the plugin tool. Used to
# generate the --plugin=protoc-gen-KEY=VALUE args
plugin_tools = {}
# <dict<string,<File> A mapping from to File. In the case
# of plugins that specify a single output 'archive' (like java), we gather
# them in this dict. It is used to generate args like
# '--java_out=libjava.jar'.
plugin_outfiles = {}
# <list<File>> The list of srcjars that we're generating (like
# 'foo.srcjar').
srcjars = []
# <list<File>> The list of generated artifacts like '' that we
# expect to be produced.
outputs = []
# Additional data files from needed by plugin tools that are not
# single binaries.
data = []
### Part 2: gather plugin.out artifacts
# Some protoc plugins generate a set of output files (like python) while
# others generate a single 'archive' file that contains the individual
# outputs (like java). This first loop is for the latter type. In this
# scenario, the PluginInfo.out attribute will exist; the predicted file
# output location is relative to the package root, marked by the descriptor
# file. Jar outputs are gathered as a special case as we need to
# post-process them to have a 'srcjar' extension (java_library rules don't
# accept source jars with a 'jar' extension)
for plugin in plugins:
if plugin.executable:
plugin_tools[] = plugin.executable
data += + get_plugin_runfiles(plugin.tool)
filename = _get_plugin_out(ctx, plugin)
if not filename:
out = ctx.actions.declare_file(filename, sibling = descriptor)
plugin_outfiles[] = out
if out.path.endswith(".jar"):
srcjar = _copy_jar_to_srcjar(ctx, out)
### Part 3a: Gather generated artifacts for each dependency .proto source file.
for dep in deps:
# Iterate all the directly specified .proto files. If we have already
# processed this one, skip it to avoid declaring duplicate outputs.
# Create an action to copy the proto into our staging area. Consult the
# plugin to assemble the actual list of predicted generated artifacts
# and save these in the 'outputs' list.
for src in dep.direct_sources:
if targets.get(src.path):
proto = copy_proto(ctx, descriptor, src)
targets[src] = proto
# Iterate all transitive .proto files. If we already processed in the
# loop above, skip it. Otherwise add a copy action to get it into the
# 'staging area'
for src in dep.transitive_sources.to_list():
if targets.get(src):
if verbose > 2:
print("transitive source: %r" % src)
proto = copy_proto(ctx, descriptor, src)
if ctx.attr.transitive:
targets[src] = proto
### Part 3b: apply transitivity rules
# If the 'transitive = true' was enabled, we collected all the protos into
# the 'targets' list.
# At this point we want to post-process that list and remove any protos that
# might be incompatible with the plugin transitivity rules.
if ctx.attr.transitive:
for plugin in plugins:
targets = _apply_plugin_transitivity_rules(ctx, targets, plugin)
### Part 3c: collect generated artifacts for all in the target list of protos to compile
for src, proto in targets.items():
for plugin in plugins:
outputs = get_plugin_outputs(ctx, descriptor, outputs, src, proto, plugin)
### Part 4: build list of arguments for protoc
args = ["--descriptor_set_out=%s" % descriptor.path]
# By default we have a single 'proto_path' argument at the 'staging area'
# root.
args += ["--proto_path=%s" % outdir]
if ctx.attr.include_imports:
args += ["--include_imports"]
if ctx.attr.include_source_info:
args += ["--include_source_info"]
for plugin in plugins:
args += [get_plugin_out_arg(ctx, outdir, plugin, plugin_outfiles)]
args += ["--plugin=protoc-gen-%s=%s" % (k, v.path) for k, v in plugin_tools.items()]
args += [proto.path for proto in targets.values()]
### Part 5: build the final protoc command and declare the action
mnemonic = "ProtoCompile"
command = " ".join([protoc.path] + args)
if verbose > 0:
print("%s: %s" % (mnemonic, command))
if verbose > 1:
command += " && echo '\n##### SANDBOX AFTER RUNNING PROTOC' && find ."
if verbose > 2:
command = "echo '\n##### SANDBOX BEFORE RUNNING PROTOC' && find . && " + command
if verbose > 3:
command = "env && " + command
for f in outputs:
print("expected output: ", f.path)
tools = [protoc] + plugin_tools.values()
inputs = protos + data
outs = outputs + [descriptor] + ctx.outputs.outputs
if verbose > 3:
for s in args:
print("ARG: %s" % s)
for k, f in targets.items():
print("TARGET: %s=%s" % (k, f))
for f in tools:
print("TOOL: %s" % f.path)
for f in inputs:
print("INPUT: %s" % f.path)
for f in outs:
print("OUTPUT: %s" % f.path)
mnemonic = mnemonic,
command = command,
inputs = inputs,
outputs = outs,
tools = tools,
### Part 6: assemble output providers
# The files for 'DefaultInfo' include any explicit outputs for the rule. If
# we are generating srcjars, use those as the final outputs rather than
# their '.jar' intermediates. Otherwise include all the file outputs.
# NOTE: this looks a little wonky here. It probably works in simple cases
# where there list of plugins has length 1 OR all outputting to jars OR all
# not outputting to jars. Probably would break here if they were mixed.
files = [] + ctx.outputs.outputs
if len(srcjars) > 0:
files += srcjars
files += outputs
if len(plugin_outfiles) > 0:
files += plugin_outfiles.values()
return [ProtoCompileInfo(
label = ctx.label,
plugins = plugins,
protos = protos,
outputs = outputs,
files = files,
tools = plugin_tools,
args = args,
descriptor = descriptor,
), DefaultInfo(files = depset(files))]
proto_compile = rule(
implementation = proto_compile_impl,
attrs = {
"deps": attr.label_list(
doc = "proto_library dependencies",
mandatory = True,
providers = [ProtoInfo],
"plugins": attr.label_list(
doc = "List of protoc plugins to apply",
providers = [ProtoPluginInfo],
mandatory = True,
"plugin_options": attr.string_list(
doc = "List of additional 'global' options to add (applies to all plugins)",
"outputs": attr.output_list(
doc = "Escape mechanism to explicitly declare files that will be generated",
"protoc": attr.label(
doc = "The protoc tool",
default = "@com_google_protobuf//:protoc",
cfg = "host",
executable = True,
doc = "Increase verbose level for more debugging",
"include_imports": attr.bool(
doc = "Pass the --include_imports argument to the protoc_plugin",
default = True,
"include_source_info": attr.bool(
doc = "Pass the --include_source_info argument to the protoc_plugin",
default = True,
"transitive": attr.bool(
doc = "Emit transitive artifacts",
"transitivity": attr.string_dict(
doc = "Transitive rules. When the 'transitive' property is enabled, this string_dict can be used to exclude protos from the compilation list",
# TODO(pcj) remove this
outputs = {
"descriptor": "%{name}/descriptor.source.bin",
output_to_genfiles = True,