| // Copyright 2016 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package fields |
| |
| // This file was copied from https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/fold.go. |
| // Only the license and package were changed. |
| |
| import ( |
| "bytes" |
| "unicode/utf8" |
| ) |
| |
| const ( |
| caseMask = ^byte(0x20) // Mask to ignore case in ASCII. |
| kelvin = '\u212a' |
| smallLongEss = '\u017f' |
| ) |
| |
| // foldFunc returns one of four different case folding equivalence |
| // functions, from most general (and slow) to fastest: |
| // |
| // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 |
| // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') |
| // 3) asciiEqualFold, no special, but includes non-letters (including _) |
| // 4) simpleLetterEqualFold, no specials, no non-letters. |
| // |
| // The letters S and K are special because they map to 3 runes, not just 2: |
| // * S maps to s and to U+017F 'ſ' Latin small letter long s |
| // * k maps to K and to U+212A 'K' Kelvin sign |
| // See https://play.golang.org/p/tTxjOc0OGo |
| // |
| // The returned function is specialized for matching against s and |
| // should only be given s. It's not curried for performance reasons. |
| func foldFunc(s []byte) func(s, t []byte) bool { |
| nonLetter := false |
| special := false // special letter |
| for _, b := range s { |
| if b >= utf8.RuneSelf { |
| return bytes.EqualFold |
| } |
| upper := b & caseMask |
| if upper < 'A' || upper > 'Z' { |
| nonLetter = true |
| } else if upper == 'K' || upper == 'S' { |
| // See above for why these letters are special. |
| special = true |
| } |
| } |
| if special { |
| return equalFoldRight |
| } |
| if nonLetter { |
| return asciiEqualFold |
| } |
| return simpleLetterEqualFold |
| } |
| |
| // equalFoldRight is a specialization of bytes.EqualFold when s is |
| // known to be all ASCII (including punctuation), but contains an 's', |
| // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. |
| // See comments on foldFunc. |
| func equalFoldRight(s, t []byte) bool { |
| for _, sb := range s { |
| if len(t) == 0 { |
| return false |
| } |
| tb := t[0] |
| if tb < utf8.RuneSelf { |
| if sb != tb { |
| sbUpper := sb & caseMask |
| if 'A' <= sbUpper && sbUpper <= 'Z' { |
| if sbUpper != tb&caseMask { |
| return false |
| } |
| } else { |
| return false |
| } |
| } |
| t = t[1:] |
| continue |
| } |
| // sb is ASCII and t is not. t must be either kelvin |
| // sign or long s; sb must be s, S, k, or K. |
| tr, size := utf8.DecodeRune(t) |
| switch sb { |
| case 's', 'S': |
| if tr != smallLongEss { |
| return false |
| } |
| case 'k', 'K': |
| if tr != kelvin { |
| return false |
| } |
| default: |
| return false |
| } |
| t = t[size:] |
| |
| } |
| if len(t) > 0 { |
| return false |
| } |
| return true |
| } |
| |
| // asciiEqualFold is a specialization of bytes.EqualFold for use when |
| // s is all ASCII (but may contain non-letters) and contains no |
| // special-folding letters. |
| // See comments on foldFunc. |
| func asciiEqualFold(s, t []byte) bool { |
| if len(s) != len(t) { |
| return false |
| } |
| for i, sb := range s { |
| tb := t[i] |
| if sb == tb { |
| continue |
| } |
| if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { |
| if sb&caseMask != tb&caseMask { |
| return false |
| } |
| } else { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // simpleLetterEqualFold is a specialization of bytes.EqualFold for |
| // use when s is all ASCII letters (no underscores, etc) and also |
| // doesn't contain 'k', 'K', 's', or 'S'. |
| // See comments on foldFunc. |
| func simpleLetterEqualFold(s, t []byte) bool { |
| if len(s) != len(t) { |
| return false |
| } |
| for i, b := range s { |
| if b&caseMask != t[i]&caseMask { |
| return false |
| } |
| } |
| return true |
| } |