| // |
| // Licensed to Apache Software Foundation (ASF) under one or more contributor |
| // license agreements. See the NOTICE file distributed with |
| // this work for additional information regarding copyright |
| // ownership. Apache Software Foundation (ASF) licenses this file to you under |
| // the Apache License, Version 2.0 (the "License"); you may |
| // not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| package license |
| |
| import ( |
| "reflect" |
| "regexp" |
| "runtime" |
| "strings" |
| |
| "github.com/apache/skywalking-eyes/license-eye/internal/logger" |
| ) |
| |
| type Normalizer func(string) string |
| |
| var ( |
| // normalizers is a list of Normalizer that can be applied to the license text, yet doesn't change the license's |
| // meanings, according to the matching guide in https://spdx.dev/license-list/matching-guidelines. |
| // The order matters. |
| normalizers = []Normalizer{ |
| OneLineNormalizer, |
| FlattenSpaceNormalizer, |
| SubstantiveTextsNormalizer, |
| FlattenSpaceNormalizer, |
| strings.ToLower, |
| strings.TrimSpace, |
| } |
| |
| // 6. Code Comment Indicators (https://spdx.dev/license-list/matching-guidelines.) |
| commentIndicators = []*regexp.Regexp{ |
| regexp.MustCompile(`(?m)^\s*#+`), // # |
| regexp.MustCompile(`(?m)^\s*//+`), // // |
| regexp.MustCompile(`(?m)^\s*"""+`), // """ |
| regexp.MustCompile(`(?m)^\s*\(\*+`), // (* |
| regexp.MustCompile(`(?m)^\s*;+`), // ; |
| |
| regexp.MustCompile(`(?m)^\s*/\*+`), // /* |
| regexp.MustCompile(`(?m)^\s*\*+/`), // */ |
| regexp.MustCompile(`(?m)^\s*\*+`), // * |
| |
| regexp.MustCompile(`(?m)^\s*<!--+`), // <!-- |
| regexp.MustCompile(`(?m)^\s*--+>`), // --> |
| regexp.MustCompile(`(?m)^\s*--+`), // -- |
| regexp.MustCompile(`(?m)^\s*~+`), // ~ |
| |
| regexp.MustCompile(`(?m)^\s*{-+`), // {- |
| regexp.MustCompile(`(?m)^\s*-}+`), // -} |
| |
| regexp.MustCompile(`(?m)^\s*::`), // :: |
| regexp.MustCompile(`(?m)^\s*\.\.`), // .. |
| regexp.MustCompile(`(?mi)^\s*@?REM`), // @REM |
| regexp.MustCompile(`(?mi)^\s*%+`), // % e.g. matlab |
| regexp.MustCompile(`(?m)^\s*{#+`), // {# |
| regexp.MustCompile(`(?m)^\s*#+}`), // #} |
| regexp.MustCompile(`(?m)^\s*{\*+`), // {* |
| regexp.MustCompile(`(?m)^\s*\*+}`), // *} |
| regexp.MustCompile(`(?m)^\s*'+`), // ' |
| } |
| |
| flattenSpace = regexp.MustCompile(`\s+`) |
| |
| substitutableTexts = []struct { |
| regex *regexp.Regexp |
| replacement string |
| }{ |
| {regexp.MustCompile(`(?i)\backnowledgement\b`), "acknowledgment"}, |
| {regexp.MustCompile(`(?i)\banalog\b`), "analogue"}, |
| {regexp.MustCompile(`(?i)\banalyze\b`), "analyse"}, |
| {regexp.MustCompile(`(?i)\bartifact\b`), "artefact"}, |
| {regexp.MustCompile(`(?i)\bauthorization\b`), "authorisation"}, |
| {regexp.MustCompile(`(?i)\bauthorized\b`), "authorised"}, |
| {regexp.MustCompile(`(?i)\bcaliber\b`), "calibre"}, |
| {regexp.MustCompile(`(?i)\bcanceled\b`), "cancelled"}, |
| {regexp.MustCompile(`(?i)\bcapitalizations\b`), "capitalisations"}, |
| {regexp.MustCompile(`(?i)\bcatalog\b`), "catalogue"}, |
| {regexp.MustCompile(`(?i)\bcategorize\b`), "categorise"}, |
| {regexp.MustCompile(`(?i)\bcenter\b`), "centre"}, |
| {regexp.MustCompile(`(?i)\bcopyright holder\b`), "copyright owner"}, |
| {regexp.MustCompile(`(?i)\bemphasized\b`), "emphasised"}, |
| {regexp.MustCompile(`(?i)\bfavor\b`), "favour"}, |
| {regexp.MustCompile(`(?i)\bfavorite\b`), "favourite"}, |
| {regexp.MustCompile(`(?i)\bfulfill\b`), "fulfil"}, |
| {regexp.MustCompile(`(?i)\bfulfillment\b`), "fulfilment"}, |
| {regexp.MustCompile(`(?i)\binitialize\b`), "initialise"}, |
| {regexp.MustCompile(`(?i)\bjudgement\b`), "judgment"}, |
| {regexp.MustCompile(`(?i)\blabeling\b`), "labelling"}, |
| {regexp.MustCompile(`(?i)\blabor\b`), "labour"}, |
| {regexp.MustCompile(`(?i)\blicence\b`), "license"}, |
| {regexp.MustCompile(`(?i)\bmaximize\b`), "maximise"}, |
| {regexp.MustCompile(`(?i)\bmodeled\b`), "modelled"}, |
| {regexp.MustCompile(`(?i)\bmodeling\b`), "modelling"}, |
| {regexp.MustCompile(`(?i)\bnoncommercial\b`), "non-commercial"}, |
| {regexp.MustCompile(`(?i)\boffense\b`), "offence"}, |
| {regexp.MustCompile(`(?i)\boptimize\b`), "optimise"}, |
| {regexp.MustCompile(`(?i)\borganization\b`), "organisation"}, |
| {regexp.MustCompile(`(?i)\borganize\b`), "organise"}, |
| {regexp.MustCompile(`(?i)\bpercent\b`), "per cent"}, |
| {regexp.MustCompile(`(?i)\bpractice\b`), "practise"}, |
| {regexp.MustCompile(`(?i)\bprogram\b`), "programme"}, |
| {regexp.MustCompile(`(?i)\brealize\b`), "realise"}, |
| {regexp.MustCompile(`(?i)\brecognize\b`), "recognise"}, |
| {regexp.MustCompile(`(?i)\bsignaling\b`), "signalling"}, |
| {regexp.MustCompile(`(?i)\bsub licen[sc]e\b`), "sublicense"}, |
| {regexp.MustCompile(`(?i)\bsub-licen[sc]e\b`), "sublicense"}, |
| {regexp.MustCompile(`(?i)\butilization\b`), "utilisation"}, |
| {regexp.MustCompile(`(?i)\bwhile\b`), "whilst"}, |
| {regexp.MustCompile(`(?i)\bwilfull\b`), "wilful"}, |
| |
| {regexp.MustCompile(`©`), "Copyright "}, |
| {regexp.MustCompile(`\(([cC])\)`), "Copyright "}, |
| {regexp.MustCompile(`\bhttps://`), "http://"}, |
| |
| {regexp.MustCompile(`“+`), `'`}, |
| {regexp.MustCompile(`”+`), `'`}, |
| {regexp.MustCompile(`’+`), "'"}, |
| {regexp.MustCompile("`+"), "'"}, |
| {regexp.MustCompile(`"+`), "'"}, |
| {regexp.MustCompile(`'+`), "'"}, |
| |
| {regexp.MustCompile(`(?i)\b(the )?Apache Software Foundation( \(ASF\))?`), "the ASF"}, |
| |
| // Prettier chars |
| {regexp.MustCompile(`[-=*]{3,}`), ""}, |
| |
| // Mozilla Public License, Version 2.0 |
| // Mozilla Public License Version 2.0 |
| { |
| regexp.MustCompile(`(?i)Mozilla Public License version 2\.0`), |
| "Mozilla Public License, Version 2.0", |
| }, |
| // Mozilla Public License, v. 2.0 |
| // ... |
| { |
| regexp.MustCompile(`(?i)Mozilla Public License,? v\. ?2\.0`), |
| "Mozilla Public License, v. 2.0", |
| }, |
| |
| { |
| regexp.MustCompile(`(?i)IN NO EVENT SHALL (.+?) BE LIABLE`), |
| "IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE", |
| }, |
| { |
| regexp.MustCompile(`(?i)The names of (its|the) contributors may not be used to endorse`), |
| "Neither the name of the copyright holder nor the names of its contributors may be used to endorse", |
| }, |
| { |
| regexp.MustCompile(`(?i)The name (.+?) may not be used to endorse`), |
| "Neither the name of the copyright holder nor the names of its contributors may be used to endorse", |
| }, |
| { |
| regexp.MustCompile(`(?i)(neither the name of) (.+?) (nor the names of)`), |
| "$1 the copyright holder $3", |
| }, |
| { |
| regexp.MustCompile(`(?i)you may not use this (file|library) except`), |
| "you may not use this file except", |
| }, |
| |
| { |
| regexp.MustCompile(`(?i)THIS SOFTWARE IS PROVIDED BY (.+?)'AS IS'`), |
| `THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'`, |
| }, |
| |
| { |
| regexp.MustCompile(`(?im)\(including the next paragraph\)`), |
| "", |
| }, |
| } |
| |
| lineProcessors = []struct { |
| regexp *regexp.Regexp |
| replacement string |
| }{ |
| // BSD-3-Clause |
| // MIT |
| { // remove optional header |
| regexp.MustCompile(`(?im)^\s*\(?(The )?MIT License( \((MIT|Expat)\))?\)?$`), |
| "", |
| }, |
| // ISC |
| { // remove optional header |
| regexp.MustCompile(`(?im)^\s*(The )?ISC License:?$`), |
| "", |
| }, |
| |
| // leading chars such as >, * just for pretty printing |
| { |
| regexp.MustCompile(`(?m)^[>*]\s+`), |
| " ", |
| }, |
| // Listing bullets such as a., b., 1., 2. |
| { |
| regexp.MustCompile(`(?m)^\s*[a-z0-9]\. `), |
| " ", |
| }, |
| // Listing bullets such as (a), (b), (1), (2) |
| { |
| regexp.MustCompile(`(?m)^\s*\([a-z0-9]\) `), |
| " ", |
| }, |
| // trailing chars such as >, * just for pretty printing |
| { |
| regexp.MustCompile(`(?m)\s+[*]$`), |
| " ", |
| }, |
| // Copyright (c) ..... |
| { |
| regexp.MustCompile(`(?m)^\s*Copyright (\([cC©]\))?.+$`), |
| "", |
| }, |
| |
| // This should be the last one processor |
| { |
| regexp.MustCompile("[\n\r]+"), |
| " ", |
| }, |
| } |
| ) |
| |
| // NormalizePattern applies a chain of Normalizers to the license pattern to make it cleaner for identification. |
| func NormalizePattern(pattern string) string { |
| for _, normalize := range normalizers { |
| pattern = normalize(pattern) |
| } |
| return pattern |
| } |
| |
| // NormalizeHeader applies a chain of Normalizers to the file header to make it cleaner for identification. |
| func NormalizeHeader(header string) string { |
| ns := append([]Normalizer{CommentIndicatorNormalizer}, normalizers...) |
| for _, normalize := range ns { |
| logger.Log.Debugf("After normalized by %+v:", runtime.FuncForPC(reflect.ValueOf(normalize).Pointer()).Name()) |
| header = normalize(header) |
| logger.Log.Debugln(header) |
| } |
| return header |
| } |
| |
| // Normalize applies a chain of Normalizers to the license text to make it cleaner for identification. |
| func Normalize(license string) string { |
| ns := append([]Normalizer{CommentIndicatorNormalizer}, normalizers...) |
| for _, normalize := range ns { |
| license = normalize(license) |
| } |
| return license |
| } |
| |
| // OneLineNormalizer normalizes the text line by line and finally merge them into one line. |
| func OneLineNormalizer(text string) string { |
| for _, s := range lineProcessors { |
| text = s.regexp.ReplaceAllString(text, s.replacement) |
| } |
| return text |
| } |
| |
| // SubstantiveTextsNormalizer normalizes the license text by substituting some words that |
| // doesn't change the meaning of the license. |
| func SubstantiveTextsNormalizer(text string) string { |
| for _, s := range substitutableTexts { |
| text = s.regex.ReplaceAllString(text, s.replacement) |
| } |
| return text |
| } |
| |
| // CommentIndicatorNormalizer trims the leading characters of comments, such as /*, <!--, --, (*, etc.. |
| func CommentIndicatorNormalizer(text string) string { |
| for _, leadingChars := range commentIndicators { |
| text = leadingChars.ReplaceAllString(text, "") |
| } |
| return text |
| } |
| |
| // FlattenSpaceNormalizer flattens continuous spaces into a single space. |
| func FlattenSpaceNormalizer(text string) string { |
| return flattenSpace.ReplaceAllString(text, " ") |
| } |