| // |
| // Licensed to Apache Software Foundation (ASF) under one or more contributor |
| // license agreements. See the NOTICE file distributed with |
| // this work for additional information regarding copyright |
| // ownership. Apache Software Foundation (ASF) licenses this file to you under |
| // the Apache License, Version 2.0 (the "License"); you may |
| // not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| package license |
| |
| import ( |
| "regexp" |
| "strings" |
| ) |
| |
| type Normalizer func(string) string |
| |
| var ( |
| // normalizers is a list of Normalizer that can be applied to the license text, yet doesn't change the license's |
| // meanings, according to the matching guide in https://spdx.dev/license-list/matching-guidelines. |
| // The order matters. |
| normalizers = []Normalizer{ |
| VariablesNormalizer, |
| OneLineNormalizer, |
| FlattenSpaceNormalizer, |
| SubstantiveTextsNormalizer, |
| FlattenSpaceNormalizer, |
| strings.ToLower, |
| strings.TrimSpace, |
| } |
| |
| // 6. Code Comment Indicators (https://spdx.dev/license-list/matching-guidelines.) |
| commentIndicators = []*regexp.Regexp{ |
| regexp.MustCompile(`(?m)^\s*#+`), // # |
| regexp.MustCompile(`(?m)^\s*//+`), // // |
| regexp.MustCompile(`(?m)^\s*"""+`), // """ |
| regexp.MustCompile(`(?m)^\s*\(\*+`), // (* |
| |
| regexp.MustCompile(`(?m)^\s*/\*+`), // /* |
| regexp.MustCompile(`(?m)^\s*\*+/`), // */ |
| regexp.MustCompile(`(?m)^\s*\*+`), // * |
| |
| regexp.MustCompile(`(?m)^\s*<!--+`), // <!-- |
| regexp.MustCompile(`(?m)^\s*--+>`), // --> |
| regexp.MustCompile(`(?m)^\s*--+`), // -- |
| regexp.MustCompile(`(?m)^\s*~+`), // ~ |
| |
| regexp.MustCompile(`(?m)^\s*{-+`), // {- |
| regexp.MustCompile(`(?m)^\s*-}+`), // -} |
| |
| regexp.MustCompile(`(?m)^\s*::`), // :: |
| regexp.MustCompile(`(?mi)^\s*@?REM`), // @REM |
| } |
| |
| flattenSpace = regexp.MustCompile(`\s+`) |
| |
| substitutableTexts = []struct { |
| regex *regexp.Regexp |
| replacement string |
| }{ |
| {regexp.MustCompile(`(?i)\backnowledgement\b`), "acknowledgment"}, |
| {regexp.MustCompile(`(?i)\banalog\b`), "analogue"}, |
| {regexp.MustCompile(`(?i)\banalyze\b`), "analyse"}, |
| {regexp.MustCompile(`(?i)\bartifact\b`), "artefact"}, |
| {regexp.MustCompile(`(?i)\bauthorization\b`), "authorisation"}, |
| {regexp.MustCompile(`(?i)\bauthorized\b`), "authorised"}, |
| {regexp.MustCompile(`(?i)\bcaliber\b`), "calibre"}, |
| {regexp.MustCompile(`(?i)\bcanceled\b`), "cancelled"}, |
| {regexp.MustCompile(`(?i)\bcapitalizations\b`), "capitalisations"}, |
| {regexp.MustCompile(`(?i)\bcatalog\b`), "catalogue"}, |
| {regexp.MustCompile(`(?i)\bcategorize\b`), "categorise"}, |
| {regexp.MustCompile(`(?i)\bcenter\b`), "centre"}, |
| {regexp.MustCompile(`(?i)\bcopyright holder\b`), "copyright owner"}, |
| {regexp.MustCompile(`(?i)\bemphasized\b`), "emphasised"}, |
| {regexp.MustCompile(`(?i)\bfavor\b`), "favour"}, |
| {regexp.MustCompile(`(?i)\bfavorite\b`), "favourite"}, |
| {regexp.MustCompile(`(?i)\bfulfill\b`), "fulfil"}, |
| {regexp.MustCompile(`(?i)\bfulfillment\b`), "fulfilment"}, |
| {regexp.MustCompile(`(?i)\binitialize\b`), "initialise"}, |
| {regexp.MustCompile(`(?i)\bjudgement\b`), "judgment"}, |
| {regexp.MustCompile(`(?i)\blabeling\b`), "labelling"}, |
| {regexp.MustCompile(`(?i)\blabor\b`), "labour"}, |
| {regexp.MustCompile(`(?i)\blicense\b`), "licence"}, |
| {regexp.MustCompile(`(?i)\bmaximize\b`), "maximise"}, |
| {regexp.MustCompile(`(?i)\bmodeled\b`), "modelled"}, |
| {regexp.MustCompile(`(?i)\bmodeling\b`), "modelling"}, |
| {regexp.MustCompile(`(?i)\bnoncommercial\b`), "non-commercial"}, |
| {regexp.MustCompile(`(?i)\boffense\b`), "offence"}, |
| {regexp.MustCompile(`(?i)\boptimize\b`), "optimise"}, |
| {regexp.MustCompile(`(?i)\borganization\b`), "organisation"}, |
| {regexp.MustCompile(`(?i)\borganize\b`), "organise"}, |
| {regexp.MustCompile(`(?i)\bpercent\b`), "per cent"}, |
| {regexp.MustCompile(`(?i)\bpractice\b`), "practise"}, |
| {regexp.MustCompile(`(?i)\bprogram\b`), "programme"}, |
| {regexp.MustCompile(`(?i)\brealize\b`), "realise"}, |
| {regexp.MustCompile(`(?i)\brecognize\b`), "recognise"}, |
| {regexp.MustCompile(`(?i)\bsignaling\b`), "signalling"}, |
| {regexp.MustCompile(`(?i)\bsublicense\b`), "sub-license"}, |
| {regexp.MustCompile(`(?i)\bsub-license\b`), "sub license"}, |
| {regexp.MustCompile(`(?i)\bsublicense\b`), "sub license"}, |
| {regexp.MustCompile(`(?i)\butilization\b`), "utilisation"}, |
| {regexp.MustCompile(`(?i)\bwhile\b`), "whilst"}, |
| {regexp.MustCompile(`(?i)\bwilfull\b`), "wilful"}, |
| |
| {regexp.MustCompile(`©`), "Copyright "}, |
| {regexp.MustCompile(`\(c\)`), "Copyright "}, |
| {regexp.MustCompile(`\bhttps://`), "http://"}, |
| |
| {regexp.MustCompile(`(?i)\b(the )?Apache Software Foundation( \(ASF\))?`), "the ASF"}, |
| } |
| |
| variables = []struct { |
| regexp *regexp.Regexp |
| replacement string |
| }{ |
| // BSD-3-Clause |
| { |
| regexp.MustCompile(`(?im)(^(Copyright \(c\)) (\d{4}) (.+?) (All rights reserved\.)?$\n?)+`), |
| "$2 [year] [owner]. $5", |
| }, |
| { |
| regexp.MustCompile(`(?i)(neither the name of) (.+?) (nor the names of)`), |
| "$1 the copyright holder $3", |
| }, |
| // MIT |
| { // remove optional header |
| regexp.MustCompile(`(?im)^The MIT License \(MIT\)$`), |
| "", |
| }, |
| { |
| regexp.MustCompile(`(?im)^(Copyright \(c\)) (\d{4}) (.+?)$`), |
| "$1 [year] [owner]", |
| }, |
| { |
| regexp.MustCompile(`(?im)\(including the next paragraph\)`), |
| "", |
| }, |
| } |
| ) |
| |
| // NormalizePattern applies a chain of Normalizers to the license pattern to make it cleaner for identification. |
| func NormalizePattern(pattern string) string { |
| for _, normalize := range normalizers { |
| pattern = normalize(pattern) |
| } |
| return pattern |
| } |
| |
| // NormalizeHeader applies a chain of Normalizers to the file header to make it cleaner for identification. |
| func NormalizeHeader(header string) string { |
| ns := append([]Normalizer{CommentIndicatorNormalizer}, normalizers...) |
| for _, normalize := range ns { |
| header = normalize(header) |
| } |
| return header |
| } |
| |
| // Normalize applies a chain of Normalizers to the license text to make it cleaner for identification. |
| func Normalize(license string) string { |
| for _, normalize := range normalizers { |
| license = normalize(license) |
| } |
| return license |
| } |
| |
| // OneLineNormalizer simply removes all line breaks to flatten the license text into one line. |
| func OneLineNormalizer(text string) string { |
| return regexp.MustCompile("[\n\r]+").ReplaceAllString(text, " ") |
| } |
| |
| // SubstantiveTextsNormalizer normalizes the license text by substituting some words that |
| // doesn't change the meaning of the license. |
| func SubstantiveTextsNormalizer(text string) string { |
| for _, s := range substitutableTexts { |
| text = s.regex.ReplaceAllString(text, s.replacement) |
| } |
| return text |
| } |
| |
| // CommentIndicatorNormalizer trims the leading characters of comments, such as /*, <!--, --, (*, etc.. |
| func CommentIndicatorNormalizer(text string) string { |
| for _, leadingChars := range commentIndicators { |
| text = leadingChars.ReplaceAllString(text, "") |
| } |
| return text |
| } |
| |
| // FlattenSpaceNormalizer flattens continuous spaces into a single space. |
| func FlattenSpaceNormalizer(text string) string { |
| return flattenSpace.ReplaceAllString(text, " ") |
| } |
| |
| // VariablesNormalizer replace the variables actual value into variable name. |
| func VariablesNormalizer(text string) string { |
| for _, v := range variables { |
| text = v.regexp.ReplaceAllString(text, v.replacement) |
| } |
| |
| return text |
| } |