chore(license): add license checker for copyright and fix license files (cherry-pick #1674) (#1677)
https://github.com/apache/incubator-pegasus/issues/1676
There are several tasks/steps for this PR:
- develop a script to check the consistency between `.licenserc.yaml` and all files of the project.
- according to the check result, fix `.licenserc.yaml`.
- according to the fixed `.licenserc.yaml`, amend `LICENSE`.
License checker could be run just by `python3 scripts/check_license.py`.
This PR is to cherry-pick #1674 into v2.5 to solve issue #1676.
diff --git a/.licenserc.yaml b/.licenserc.yaml
index c6f63af..543a7c0 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -20,64 +20,37 @@
copyright-owner: Apache Software Foundation
paths-ignore:
+ # Configuration files that could not be added with copyright info (otherwise would lead to error).
+ - '.rat-excludes'
+ - '**/*.csv'
+ - '**/*.json'
# All the type of licenses of this project should be added to LICENSE.
+ - 'DISCLAIMER-WIP'
- 'LICENSE'
- 'NOTICE'
- - '.github/pull_request_template.md'
+ # Template files of issues and pull requests for Github.
- '.github/ISSUE_TEMPLATE/bug_report.md'
- '.github/ISSUE_TEMPLATE/feature-request.md'
- '.github/ISSUE_TEMPLATE/general_question.md'
- - '.rat-excludes'
- - 'DISCLAIMER-WIP'
- - 'python-client/requirement.txt'
- - '.devcontainer/devcontainer.json'
- # TODO(yingchun): shell/* files are import from thirdparties, we can move them to thirdparty later.
- - 'src/shell/argh.h'
- - 'src/shell/linenoise/linenoise.c'
- - 'src/shell/linenoise/linenoise.h'
- - 'src/shell/sds/sds.c'
- - 'src/shell/sds/sds.h'
- - 'src/shell/sds/sdsalloc.h'
+ - '.github/pull_request_template.md'
+ # Image files for docs.
+ - '**/*.jpg'
+ - '**/*.png'
+ # Files in pdf format.
+ - '**/*.pdf'
+ # Special files for golang.
- '**/go.sum'
- - '**/*.csv'
- - '**/*.json'
+ # TODO(wangdan): Generated files for go client, could generate dynamically?
+ - 'go-client/idl/base/GoUnusedProtection__.go'
- 'go-client/idl/base/dsn_err_string.go'
- 'go-client/idl/base/rocskdb_err_string.go'
- - 'go-client/idl/base/GoUnusedProtection__.go'
+ # Special files for nodejs.
- '**/.npmigonre'
- # Copyright (c) Facebook, Inc
- - 'src/utils/TokenBucket.h'
- - 'src/utils/test/TokenBucketTest.cpp'
- - 'src/utils/test/TokenBucketTest.h'
- # https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE
- - 'src/utils/hpc_locks/autoresetevent.h'
- - 'src/utils/hpc_locks/autoreseteventcondvar.h'
- - 'src/utils/hpc_locks/benaphore.h'
- - 'src/utils/hpc_locks/bitfield.h'
- - 'src/utils/hpc_locks/readme.txt'
- - 'src/utils/hpc_locks/rwlock.h'
- - 'src/utils/hpc_locks/sema.h'
- # Copyright (c) xxxx The Chromium Authors
- - 'src/utils/safe_strerror_posix.h'
- - 'src/runtime/build_config.h'
- - 'src/utils/test/autoref_ptr_test.cpp'
- - 'src/utils/safe_strerror_posix.cpp'
- # Copyright 2017 The Abseil Authors
- - 'src/utils/absl/base/internal/invoke.h'
- - 'src/utils/absl/utility/utility.h'
- - 'src/utils/smart_pointers.h'
- - 'src/utils/string_view.h'
- - 'src/utils/test/memutil_test.cpp'
- - 'src/utils/test/string_view_test.cpp'
- - 'src/utils/test/smart_pointers_test.cpp'
- - 'src/utils/memutil.h'
- - 'src/utils/string_view.cpp'
- # Copyright (c) 2010-2011, Rob Jansen
- - 'cmake_modules/FindRT.cmake'
- - 'cmake_modules/FindDL.cmake'
- # Copyright (c) 2017 Guillaume Papin
- - 'scripts/run-clang-format.py'
- # need manual fix
+ # Special files for python.
+ - 'python-client/requirement.txt'
+ # Text files used for tests and could not be added with copyright info (otherwise would lead to error).
+ - 'src/aio/test/copy_source.txt'
+ - 'src/runtime/test/command.txt'
- 'src/failure_detector/test/gtest.filter'
- 'src/meta/test/meta_state/gtest.filter'
- 'src/meta/test/suite1'
@@ -85,6 +58,13 @@
- 'src/nfs/test/nfs_test_file1'
- 'src/nfs/test/nfs_test_file2'
- 'src/runtime/test/gtest.filter'
+ # Used for tests and should be empty, or ignore all comment lines (otherwise would lead to error).
+ - 'src/utils/test/config-empty.ini'
+ # Binary files used for tests and could not be added with copyright info (otherwise would lead to error).
+ - 'src/replica/duplication/test/log.1.0.handle_real_private_log'
+ - 'src/replica/duplication/test/log.1.0.handle_real_private_log2'
+ - 'src/replica/duplication/test/log.1.0.all_loaded_are_write_empties'
+ # Used for patches for thirdparties.
- 'thirdparty/fix_fds_for_macos.patch'
- 'thirdparty/fix_jemalloc_for_m1_on_macos.patch'
- 'thirdparty/fix_libevent_for_macos.patch'
@@ -92,8 +72,52 @@
- 'thirdparty/fix_s2_for_aarch64.patch'
- 'thirdparty/fix_thrift_for_cpp11.patch'
- 'thirdparty/rocksdb_fix_atomic_flush_0879c240.patch'
- # should be empty, or ignore all comment lines
- - 'src/utils/test/config-empty.ini'
+ # TODO(yingchun): shell/* files are import from thirdparties, we can move them to thirdparty later.
+ # Copyright (c) 2016, Adi Shavit
+ - 'src/shell/argh.h'
+ # Copyright (c) 2010-2016, Salvatore Sanfilippo, etc.
+ - 'src/shell/linenoise/linenoise.c'
+ # Copyright (c) 2010-2014, Salvatore Sanfilippo, etc.
+ - 'src/shell/linenoise/linenoise.h'
+ # Copyright (c) 2006-2015, Salvatore Sanfilippo, etc.
+ - 'src/shell/sds/sds.c'
+ - 'src/shell/sds/sds.h'
+ - 'src/shell/sds/sdsalloc.h'
+ # Copyright (c) Facebook, Inc
+ - 'src/utils/TokenBucket.h'
+ - 'src/utils/test/TokenBucketTest.cpp'
+ - 'src/utils/test/TokenBucketTest.h'
+ # https://github.com/preshing/modern-cpp-threading/blob/master/LICENSE
+ - 'src/utils/hpc_locks/autoreseteventcondvar.h'
+ # https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE
+ - 'src/utils/hpc_locks/autoresetevent.h'
+ - 'src/utils/hpc_locks/benaphore.h'
+ - 'src/utils/hpc_locks/bitfield.h'
+ - 'src/utils/hpc_locks/readme.txt'
+ - 'src/utils/hpc_locks/rwlock.h'
+ - 'src/utils/hpc_locks/sema.h'
+ # Copyright (c) 2011 The Chromium Authors
+ - 'src/utils/safe_strerror_posix.h'
+ # Copyright (c) 2012 The Chromium Authors
+ - 'src/runtime/build_config.h'
+ - 'src/utils/test/autoref_ptr_test.cpp'
+ # Copyright (c) 2006-2009 The Chromium Authors
+ - 'src/utils/safe_strerror_posix.cpp'
+ # Copyright 2017 The Abseil Authors
+ - 'src/utils/absl/base/internal/invoke.h'
+ - 'src/utils/absl/utility/utility.h'
+ - 'src/utils/memutil.h'
+ - 'src/utils/smart_pointers.h'
+ - 'src/utils/string_view.cpp'
+ - 'src/utils/string_view.h'
+ - 'src/utils/test/memutil_test.cpp'
+ - 'src/utils/test/smart_pointers_test.cpp'
+ - 'src/utils/test/string_view_test.cpp'
+ # Copyright (c) 2010-2011, Rob Jansen
+ - 'cmake_modules/FindRT.cmake'
+ - 'cmake_modules/FindDL.cmake'
+ # Copyright (c) 2017 Guillaume Papin
+ - 'scripts/run-clang-format.py'
# The MIT License (MIT), Copyright (c) 2015 Microsoft Corporation
- 'cmake_modules/BaseFunctions.cmake'
- 'docs/rdsn-README.md'
@@ -105,7 +129,6 @@
- 'idl/replica_admin.thrift'
- 'scripts/compile_thrift.py'
- 'scripts/learn_stat.py'
- - 'src/common/api_common.h'
- 'src/runtime/api_layer1.h'
- 'src/runtime/api_task.h'
- 'src/utils/api_utilities.h'
@@ -113,7 +136,6 @@
- 'src/common/json_helper.h'
- 'src/runtime/rpc/rpc_stream.h'
- 'src/runtime/rpc/serialization.h'
- - 'src/common/serialization_helper/dsn.layer2_types.h'
- 'src/common/serialization_helper/dsn_types.h'
- 'src/common/serialization_helper/thrift_helper.h'
- 'src/runtime/serverlet.h'
@@ -131,7 +153,6 @@
- 'src/client/partition_resolver.h'
- 'src/replica/replica_base.h'
- 'src/common/replica_envs.h'
- - 'src/replica/replica_test_utils.h'
- 'src/common/replication.codes.h'
- 'src/replica/replication_app_base.h'
- 'src/client/replication_ddl_client.h'
@@ -187,7 +208,6 @@
- 'src/utils/configuration.h'
- 'src/utils/crc.h'
- 'src/utils/customizable_id.h'
- - 'src/utils/dlib.h'
- 'src/utils/enum_helper.h'
- 'src/utils/error_code.h'
- 'src/utils/errors.h'
@@ -220,7 +240,6 @@
- 'src/aio/test/aio.cpp'
- 'src/aio/test/clear.sh'
- 'src/aio/test/config.ini'
- - 'src/aio/test/copy_source.txt'
- 'src/aio/test/run.sh'
- 'src/block_service/test/config-test.ini'
- 'src/client/CMakeLists.txt'
@@ -363,7 +382,6 @@
- 'src/replica/replica_learn.cpp'
- 'src/replica/replica_stub.cpp'
- 'src/replica/replica_stub.h'
- - 'src/replica/replica_test_utils.cpp'
- 'src/replica/replication_app_base.cpp'
- 'src/replica/replication_service_app.cpp'
- 'src/replica/split/test/config-test.ini'
@@ -514,7 +532,6 @@
- 'src/replica/test/run.sh'
- 'src/runtime/CMakeLists.txt'
- 'src/runtime/core_main.cpp'
- - 'src/runtime/dsn.layer2_types.cpp'
- 'src/runtime/env.sim.cpp'
- 'src/runtime/env.sim.h'
- 'src/runtime/fault_injector.cpp'
@@ -569,7 +586,6 @@
- 'src/runtime/test/address_test.cpp'
- 'src/runtime/test/async_call.cpp'
- 'src/runtime/test/clear.sh'
- - 'src/runtime/test/command.txt'
- 'src/runtime/test/config-test-corrupt-message.ini'
- 'src/runtime/test/config-test-sim.ini'
- 'src/runtime/test/config-test.ini'
diff --git a/LICENSE b/LICENSE
index f686259..06453d0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -231,7 +231,8 @@
--------------------------------------------------------------------------------
-src/shell/linenoise/* - BSD-2-Clause License
+src/shell/linenoise/linenoise.h - BSD-2-Clause License
+src/shell/linenoise/LICENSE
Copyright (c) 2010-2014, Salvatore Sanfilippo <antirez at gmail dot com>
Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
@@ -263,6 +264,38 @@
--------------------------------------------------------------------------------
+src/shell/linenoise/linenoise.c - BSD-2-Clause License
+
+ Copyright (c) 2010-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
src/shell/sds/* - BSD-2-Clause License
Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
@@ -327,12 +360,12 @@
--------------------------------------------------------------------------------
-src/utils/smart_pointers.h - Apache 2.0 License
-src/utils/string_view.h
-src/utils/absl/base/internal/invoke.h
+src/utils/absl/base/internal/invoke.h - Apache 2.0 License
src/utils/absl/utility/utility.h
src/utils/memutil.h
+src/utils/smart_pointers.h
src/utils/string_view.cpp
+src/utils/string_view.h
src/utils/test/memutil_test.cpp
src/utils/test/smart_pointers_test.cpp
src/utils/test/string_view_test.cpp
@@ -406,11 +439,12 @@
--------------------------------------------------------------------------------
src/utils/hpc_locks/autoreseteventcondvar.h - zlib License
-src/utils/hpc_locks/rwlock.h
src/utils/hpc_locks/autoresetevent.h
-src/utils/hpc_locks/sema.h
-src/utils/hpc_locks/bitfield.h
src/utils/hpc_locks/benaphore.h
+src/utils/hpc_locks/bitfield.h
+src/utils/hpc_locks/readme.txt
+src/utils/hpc_locks/rwlock.h
+src/utils/hpc_locks/sema.h
Copyright (c) 2015 Jeff Preshing
diff --git a/scripts/check_license.py b/scripts/check_license.py
new file mode 100755
index 0000000..8151979
--- /dev/null
+++ b/scripts/check_license.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import pprint
+
+PRJ_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+YML_PATH = os.path.join(PRJ_PATH, '.licenserc.yaml')
+
+IGNORED_STARTS_WITH = ['.git/', '.idea/']
+IGNORED_ENDS_WITH = ['.swp', '.npmigonre', 'go.sum', '.csv', '.json', '.pdf', '.jpg', '.png']
+IGNORED_NAMES = {'.licenserc.yaml', 'LICENSE', 'tags'}
+
+COPYRIGHT_MARKERS = [
+ "Copyright (c) 2016, Adi Shavit",
+ "Copyright (c) 2010-2016, Salvatore Sanfilippo",
+ "Copyright (c) 2010-2014, Salvatore Sanfilippo",
+ "Copyright (c) 2006-2015, Salvatore Sanfilippo",
+ "Copyright (c) Facebook, Inc",
+ "https://github.com/preshing/modern-cpp-threading",
+ "https://github.com/preshing/cpp11-on-multicore",
+ "Copyright (c) 2011 The Chromium Authors",
+ "Copyright (c) 2012 The Chromium Authors",
+ "Copyright (c) 2006-2009 The Chromium Authors",
+ "Copyright 2017 The Abseil Authors",
+ "Copyright (c) 2010-2011, Rob Jansen",
+ "Copyright (c) 2017 Guillaume Papin",
+ "Copyright (c) 2015 Microsoft Corporation",
+]
+IGNORED_COPYRIGHT_MARKERS = ["http://www.apache.org/licenses/LICENSE-2.0"]
+
+NO_COPYRIGHT_MARKER_KEY = "NO_COPYRIGHT_MARKER"
+IGNORED_COPYRIGHT_MARKER_KEY = "IGNORED_COPYRIGHT_MARKER"
+
+
+def mark_file(path):
+ with open(path) as f:
+ try:
+ for line in f:
+ for marker in IGNORED_COPYRIGHT_MARKERS:
+ if marker in line:
+ return IGNORED_COPYRIGHT_MARKER_KEY
+
+ for marker in COPYRIGHT_MARKERS:
+ if marker in line:
+ return marker
+ except UnicodeDecodeError:
+ # Ignore UnicodeDecodeError, since some files might be binary.
+ pass
+
+ # No marker was found, thus marked with no copyright.
+ return NO_COPYRIGHT_MARKER_KEY
+
+
+def is_path_ignored(path):
+ for header in IGNORED_STARTS_WITH:
+ if path.startswith(header):
+ return True
+
+ for trailer in IGNORED_ENDS_WITH:
+ if path.endswith(trailer):
+ return True
+
+ return False
+
+
+def is_name_ignored(name):
+ return name in IGNORED_NAMES
+
+
+def classify_files():
+ """
+ Scan all the files of the project, mark the ones that have copyright info.
+ """
+ marked_files = {}
+
+ for abs_dir, sub_dirs, file_names in os.walk(PRJ_PATH):
+ rel_dir = os.path.relpath(abs_dir, PRJ_PATH)
+ if rel_dir == '.':
+ # Drop the possible prefixed './' for the relative paths.
+ rel_dir = ''
+
+ for name in file_names:
+ # Some kinds of files should be ignored.
+ if is_name_ignored(name):
+ continue
+
+ rel_path = os.path.join(rel_dir, name)
+
+ # Some kinds of dirs/files should be ignored.
+ if is_path_ignored(rel_path):
+ continue
+
+ path = os.path.join(abs_dir, name)
+ marker = mark_file(path)
+
+ # Some kinds of copyright could be ignored, such as Apache LICENSE-2.0.
+ if marker == IGNORED_COPYRIGHT_MARKER_KEY:
+ continue
+
+ if marker not in marked_files:
+ marked_files[marker] = set()
+ marked_files[marker].add(rel_path)
+
+ return marked_files
+
+
+def parse_yml():
+ """
+ Scan all the files in .licenserc.yaml, mark the ones that have copyright info.
+ """
+ marked_files = {}
+
+ with open(YML_PATH) as f:
+ # The files without copyright info are marked with the specific key.
+ current_marker = NO_COPYRIGHT_MARKER_KEY
+ for line in f:
+ for marker in COPYRIGHT_MARKERS:
+ if marker in line:
+ # Files in following lines would belong to this copyright.
+ current_marker = marker
+ break
+ else:
+ begin_idx = line.find("'")
+ if begin_idx < 0:
+ # There's no file in this line, thus copyright would be reset.
+ current_marker = NO_COPYRIGHT_MARKER_KEY
+ continue
+
+ begin_idx += 1
+ end_idx = line.find("'", begin_idx)
+ if end_idx < 0:
+ raise ValueError("Invalid file path line in {yml_path}".format(yml_path=YML_PATH))
+
+ path = line[begin_idx:end_idx]
+
+ # Some kinds of dirs/files should be ignored.
+ if is_name_ignored(os.path.basename(path)):
+ continue
+ if is_path_ignored(path):
+ continue
+
+ if current_marker not in marked_files:
+ marked_files[current_marker] = set()
+ marked_files[current_marker].add(path)
+
+ return marked_files
+
+
+def check_diff():
+ """
+ Check if .licenserc.yaml is consistent with all real files of the project.
+ """
+ yml_marked_files = parse_yml()
+ marked_files = classify_files()
+ for yml_marker, yml_files in yml_marked_files.items():
+ if yml_marker not in marked_files:
+ print(
+ "marker {yml_marker} in {yml_path} not found in any file of the project".format(yml_marker=yml_marker,
+ yml_path=YML_PATH))
+ continue
+
+ files = marked_files[yml_marker]
+ yml_plus = yml_files - files
+ yml_minus = files - yml_files
+ if not yml_plus and not yml_minus:
+ # .licenserc.yaml is consistent with the project.
+ print(
+ "No diff found for marker '{yml_marker}' in {yml_path}".format(yml_marker=yml_marker,
+ yml_path=YML_PATH))
+ del marked_files[yml_marker]
+ continue
+
+ print("Diff found for marker '{yml_marker}' in {yml_path}:".format(yml_marker=yml_marker, yml_path=YML_PATH))
+ if yml_plus:
+ # Files in .licenserc.yaml, but not in the project.
+ print("{plus}: {yml_plus}".format(plus='+' * len(yml_plus), yml_marker=yml_marker, yml_plus=yml_plus))
+ if yml_minus:
+ # Files in the project, but not in .licenserc.yaml.
+ print("{minus}: {yml_minus}".format(minus='-' * len(yml_minus), yml_minus=yml_minus))
+
+ del marked_files[yml_marker]
+
+ if not marked_files:
+ return
+
+ print("markers in some files of the project not found in {yml_path}:".format(yml_path=YML_PATH))
+ pprint.pprint(marked_files)
+
+
+def main():
+ check_diff()
+
+
+if __name__ == '__main__':
+ main()