blob: e8d2f4f45edce795c15e55d15a7dbbe7c7c362ff [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bufmodulebuild
import (
"context"
"encoding/hex"
"errors"
)
import (
"go.uber.org/zap"
"golang.org/x/crypto/sha3"
)
import (
"github.com/apache/dubbo-kubernetes/pkg/bufman/bufpkg/bufmodule"
)
type moduleFileSetBuilder struct {
logger *zap.Logger
moduleReader bufmodule.ModuleReader
}
func newModuleFileSetBuilder(
logger *zap.Logger,
moduleReader bufmodule.ModuleReader,
) *moduleFileSetBuilder {
return &moduleFileSetBuilder{
logger: logger,
moduleReader: moduleReader,
}
}
func (m *moduleFileSetBuilder) Build(
ctx context.Context,
module bufmodule.Module,
options ...BuildModuleFileSetOption,
) (bufmodule.ModuleFileSet, error) {
buildModuleFileSetOptions := &buildModuleFileSetOptions{}
for _, option := range options {
option(buildModuleFileSetOptions)
}
return m.build(
ctx,
module,
buildModuleFileSetOptions.workspace,
)
}
func (m *moduleFileSetBuilder) build(
ctx context.Context,
module bufmodule.Module,
workspace bufmodule.Workspace,
) (bufmodule.ModuleFileSet, error) {
var dependencyModules []bufmodule.Module
hashes := make(map[string]struct{})
moduleHash, err := protoPathsHash(ctx, module)
if err != nil {
return nil, err
}
hashes[moduleHash] = struct{}{}
if workspace != nil {
// From the perspective of the ModuleFileSet, we include all of the files
// specified in the workspace. When we build the Image from the ModuleFileSet,
// we construct it based on the TargetFileInfos, and thus only include the files
// in the transitive closure.
//
// This is defensible as we're saying that everything in the workspace is a potential
// dependency, even if some are not actual dependencies of this specific module. In this
// case, the extra modules are no different than unused dependencies in a buf.yaml/buf.lock.
//
// By including all the Modules from the workspace, we are potentially including the input
// Module itself. This is bad, and will result in errors when using the result ModuleFileSet.
// The ModuleFileSet expects a Module, and its dependency Modules, but it is not OK for
// a Module to both be the input Module and a dependency Module. We have no concept
// of Module "ID" - a Module may have a ModuleIdentity and commit associated with it,
// but there is no guarantee of this. To get around this, we do a hash of the .proto file
// paths within a Module, and say that Modules are equivalent if they contain the exact
// same .proto file paths. If they have the same .proto file paths, then we do not
// add the Module as a dependency.
//
// We know from bufmodule.Workspace that no two Modules in a Workspace will have overlapping
// file paths, therefore if the Module is in the Workspace and has equivalent file paths,
// we know that it must be the same module. If the Module is not in the workspace...why
// did we provide a workspace?
//
// We could use other methods for equivalence or to say "do not add":
//
// - If there are any overlapping files: for example, one module has a.proto, one module
// has b.proto, and both have c.proto. We don't use this heuristic as what we are looking
// for here is a situation where based on our Module construction, we have two actually-equivalent
// Modules. The existence of any overlapping files will result in an error during build, which
// is what we want. This would also indicate this Module did not come from the Workspace, given
// the property of file uniqueness in Workspaces.
// - Golang object equivalence: for example, doing "module != potentialDependencyModule". This
// happens to work since we only construct Modules once, but it's error-prone: it's totally
// possible to create two Module objects from the same source, and if they represent the
// same Module on disk/in the BSR, we don't want to include these as duplicates.
// - Full module digest and/or proto file content: We could include buf.yaml, buf.lock,
// README.md, etc, and also hash the actual content of the .proto files, but we're saying
// that this doesn't help us any more than just comparing .proto files, and may lead to
// false negatives. However, this is the most likely candidate as an alternative, as you
// could argue that at the ModuleFileSetBuilder level, we should say "assume any difference
// is a real difference".
//
// We could also determine which modules could be omitted here, but it would incur
// the cost of parsing the target files and detecting exactly which imports are
// used. We already get this for free in Image construction, so it's simplest and
// most efficient to bundle all of the modules together like so.
for _, potentialDependencyModule := range workspace.GetModules() {
potentialDependencyModuleHash, err := protoPathsHash(ctx, potentialDependencyModule)
if err != nil {
return nil, err
}
if _, ok := hashes[potentialDependencyModuleHash]; !ok {
dependencyModules = append(dependencyModules, potentialDependencyModule)
} else {
hashes[potentialDependencyModuleHash] = struct{}{}
}
}
}
// We know these are unique by remote, owner, repository and
// contain all transitive dependencies.
for _, dependencyModulePin := range module.DependencyModulePins() {
if workspace != nil {
if _, ok := workspace.GetModule(dependencyModulePin); ok {
// This dependency is already provided by the workspace, so we don't
// need to consult the ModuleReader.
continue
}
}
dependencyModule, err := m.moduleReader.GetModule(ctx, dependencyModulePin)
if err != nil {
return nil, err
}
dependencyModuleHash, err := protoPathsHash(ctx, dependencyModule)
if err != nil {
return nil, err
}
// At this point, this is really just a safety check.
if _, ok := hashes[dependencyModuleHash]; ok {
return nil, errors.New("module declared in DependencyModulePins but not in workspace was already added to the dependency Module set, this is a system error")
}
dependencyModules = append(dependencyModules, dependencyModule)
hashes[dependencyModuleHash] = struct{}{}
}
return bufmodule.NewModuleFileSet(module, dependencyModules), nil
}
// protoPathsHash returns a hash representing the paths of the .proto files within the Module.
func protoPathsHash(ctx context.Context, module bufmodule.Module) (string, error) {
fileInfos, err := module.SourceFileInfos(ctx)
if err != nil {
return "", err
}
shakeHash := sha3.NewShake256()
for _, fileInfo := range fileInfos {
_, err := shakeHash.Write([]byte(fileInfo.Path()))
if err != nil {
return "", err
}
}
data := make([]byte, 64)
if _, err := shakeHash.Read(data); err != nil {
return "", err
}
return hex.EncodeToString(data), nil
}