| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /** |
| * Refer to pleac.sourceforge.net if wanting accurate comparisons with PERL. |
| * Original author has included tweaked examples here solely for the purposes |
| * of exercising the Groovy compiler. |
| * In some instances, examples have been modified to avoid additional |
| * dependencies or for dependencies not in common repos. |
| */ |
| |
| import groovy.ant.AntBuilder |
| |
| // @@PLEAC@@_5.0 |
| //---------------------------------------------------------------------------------- |
| // quotes are optional around the key |
| age = [ Nat:24, Jules:25, Josh:17 ] |
| |
| assert age['Nat'] == 24 |
| // alternate syntax |
| assert age."Jules" == 25 |
| |
| foodColor = [ |
| Apple: 'red', |
| Banana: 'yellow', |
| Lemon: 'yellow', |
| Carrot: 'orange' |
| ] |
| assert foodColor.size() == 4 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.1 |
| //---------------------------------------------------------------------------------- |
| foodColor['Lemon'] = 'green' |
| assert foodColor.size() == 4 |
| assert foodColor['Lemon'] == 'green' |
| foodColor['Raspberry'] = 'pink' |
| assert foodColor.size() == 5 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.2 |
| //---------------------------------------------------------------------------------- |
| assert ['Banana', 'Martini'].collect{ foodColor.containsKey(it)?'food':'drink' } == [ 'food', 'drink' ] |
| |
| age = [Toddler:3, Unborn:0, Phantasm:null] |
| ['Toddler', 'Unborn', 'Phantasm', 'Relic'].each{ key -> |
| print "$key: " |
| if (age.containsKey(key)) print 'has key ' |
| if (age.containsKey(key) && age[key]!=null) print 'non-null ' |
| if (age.containsKey(key) && age[key]) print 'true ' |
| println '' |
| } |
| // => |
| // Toddler: has key non-null true |
| // Unborn: has key non-null |
| // Phantasm: has key |
| // Relic: |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.3 |
| //---------------------------------------------------------------------------------- |
| assert foodColor.size() == 5 |
| foodColor.remove('Banana') |
| assert foodColor.size() == 4 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.4 |
| //---------------------------------------------------------------------------------- |
| hash = [:] |
| hash.each { key, value -> |
| // do something with key and value |
| } |
| |
| hash.each { entry -> |
| // do something with entry |
| } |
| |
| hash.keySet().each { key -> |
| // do something with key |
| } |
| |
| sb = new StringBuffer() |
| foodColor.each { food, color -> |
| sb << "$food is $color\n" |
| } |
| assert '\n' + sb.toString() == ''' |
| Lemon is green |
| Carrot is orange |
| Apple is red |
| Raspberry is pink |
| ''' |
| |
| foodColor.each { entry -> |
| assert entry.key.size() > 4 && entry.value.size() > 2 |
| } |
| |
| foodColorsSortedByFood = [] |
| foodColor.keySet().sort().each { k -> foodColorsSortedByFood << foodColor[k] } |
| assert foodColorsSortedByFood == ["red", "orange", "green", "pink"] |
| |
| fakedInput = ''' |
| From: someone@somewhere.com |
| From: someone@spam.com |
| From: someone@somewhere.com |
| ''' |
| |
| from = [:] |
| fakedInput.split('\n').each{ |
| matcher = (it =~ /^From:\s+([^\s>]*)/) |
| if (matcher.matches()) { |
| sender = matcher[0][1] |
| if (from.containsKey(sender)) from[sender] += 1 |
| else from[sender] = 1 |
| } |
| } |
| |
| // More useful to sort by number of received mail by person |
| from.entrySet().sort { a,b -> b.value<=>a.value}.each { e-> |
| println "${e.key}: ${e.value}" |
| } |
| // => |
| // someone@somewhere.com: 2 |
| // someone@spam.com: 1 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.5 |
| //---------------------------------------------------------------------------------- |
| hash = [a:1, b:2, c:3] |
| // Map#toString already produce a pretty decent output: |
| println hash |
| // => ["b":2, "a":1, "c":3] |
| |
| // Or do it by longhand for customised formatting |
| hash.each { k,v -> println "$k => $v" } |
| // => |
| // b => 2 |
| // a => 1 |
| // c => 3 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.6 |
| //---------------------------------------------------------------------------------- |
| // java.util.LinkedHashMap "maintains a doubly-linked list running through all of its entries. |
| // This linked list defines the iteration ordering, which is normally the order in which keys |
| // were inserted into the map (insertion-order)". |
| foodColor = new LinkedHashMap() |
| foodColor['Banana'] = 'Yellow' |
| foodColor['Apple'] = 'Green' |
| foodColor['Lemon'] = 'Yellow' |
| |
| foodColor.keySet().each{ key -> println key } |
| // => |
| // Banana |
| // Apple |
| // Lemon |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.7 |
| //---------------------------------------------------------------------------------- |
| foodsOfColor = [ Yellow:['Banana', 'Lemon'], Green:['Apple'] ] |
| foodsOfColor['Green'] += 'Melon' |
| assert foodsOfColor == ["Green":["Apple", "Melon"], "Yellow":["Banana", "Lemon"]] |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.8 |
| //---------------------------------------------------------------------------------- |
| surname = [Mickey: 'Mantle', Babe: 'Ruth'] |
| assert surname.findAll{ it.value == 'Mantle' }.collect{ it.key } == ["Mickey"] |
| |
| firstname = [:] |
| surname.each{ entry -> firstname[entry.value] = entry.key } |
| assert firstname == ["Ruth":"Babe", "Mantle":"Mickey"] |
| |
| // foodfindScript: |
| //#!/usr/bin/groovy |
| // usage: foodfind food_or_color" |
| color = [Apple:'red', Banana:'yellow', Lemon:'yellow', Carrot:'orange'] |
| given = args[0] |
| if (color.containsKey(given)) |
| println "$given is a food with color ${color[given]}." |
| if (color.containsValue(given)) { |
| // could use commify() here - see 4.2 |
| foods = color.findAll{it.value == given}.collect{it.key} |
| join = foods.size() == 1 ? 'is a food' : 'are foods' |
| println "${foods.join(', ')} $join with color ${given}." |
| } |
| // foodfind red |
| // => Apple is a food with color red. |
| // foodfind yellow |
| // => Lemon, Banana are foods with color yellow. |
| // foodfind Carrot |
| // => Carrot is a food with color orange. |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.9 |
| //---------------------------------------------------------------------------------- |
| foodColor = [Apple:'red', Carrot:'orange', Banana:'yellow', Cherry:'black'] |
| |
| // Sorted by keys |
| assert foodColor.keySet().sort() == ["Apple", "Banana", "Carrot", "Cherry"] |
| // you could now iterate through the hash with the sorted keys |
| assert foodColor.values().sort() == ["black", "orange", "red", "yellow"] |
| assert foodColor.values().sort{it.size()} == ["red", "black", "orange", "yellow"] |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.10 |
| //---------------------------------------------------------------------------------- |
| //merged = a.clone.update(b) # because Hash#update changes object in place |
| |
| drinkColor = [Galliano:'yellow', 'Mai Tai':'blue'] |
| ingestedColor = [:] |
| ingestedColor.putAll(drinkColor) |
| // overrides any common keys |
| ingestedColor.putAll(foodColor) |
| |
| totalColors = ingestedColor.values().sort().unique() |
| assert totalColors == ["black", "blue", "orange", "red", "yellow"] |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.11 |
| //---------------------------------------------------------------------------------- |
| foodColor['Lemon']='yellow' |
| citrusColor = [Lemon:'yellow', Orange:'orange', Lime:'green'] |
| println foodColor |
| println citrusColor |
| common = foodColor.keySet().intersect(citrusColor.keySet()) |
| assert common == ["Lemon"] |
| |
| foodButNotCitrus = foodColor.keySet().toList() - citrusColor.keySet().toList() |
| assert foodButNotCitrus == ["Carrot", "Apple", "Banana", "Cherry"] |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.12 |
| //---------------------------------------------------------------------------------- |
| // no problem here, Groovy handles any kind of object for key-ing |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.13 |
| //---------------------------------------------------------------------------------- |
| // Groovy uses Java implementations for storing hashes and these |
| // support setting an initial capacity and load factor (which determines |
| // at what point the hash will be resized if needed) |
| hash = [:] // Groovy shorthand gets defaults |
| hash = new HashMap() // default capacity and load factor |
| println hash.capacity() |
| // => 16 |
| ('A'..'Z').each{ hash[it] = it } |
| println hash.capacity() |
| // => 64 |
| hash = new HashMap(100) // initial capacity of 100 and default load factor |
| hash = new HashMap(100, 0.8f) // initial capacity of 100 and 0.8 load factor |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.14 |
| //---------------------------------------------------------------------------------- |
| count = [:] |
| letters = [] |
| foodColor.values().each{ letters.addAll((it as String[]).toList()) } |
| letters.each{ if (count.containsKey(it)) count[it] += 1 else count[it] = 1 } |
| assert count == ["o":3, "d":1, "k":1, "w":2, "r":2, "c":1, "l":5, "g":1, "b":1, "a":2, "y":2, "n":1, "e":4] |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.15 |
| //---------------------------------------------------------------------------------- |
| father = [ |
| Cain:'Adam', |
| Abel:'Adam', |
| Seth:'Adam', |
| Enoch:'Cain', |
| Irad:'Enoch', |
| Mehujael:'Irad', |
| Methusael:'Mehujael', |
| Lamech:'Methusael', |
| Jabal:'Lamech', |
| Jubal:'Lamech', |
| Tubalcain:'Lamech', |
| Enos:'Seth' |
| ] |
| |
| def upline(person) { |
| while (father.containsKey(person)) { |
| print person + ' ' |
| person = father[person] |
| } |
| println person |
| } |
| |
| upline('Irad') |
| // => Irad Enoch Cain Adam |
| |
| children = [:] |
| father.each { k,v -> |
| if (!children.containsKey(v)) children[v] = [] |
| children[v] += k |
| } |
| def downline(person) { |
| println "$person begat ${children.containsKey(person)?children[person].join(', '):'Nobody'}.\n" |
| } |
| downline('Tubalcain') |
| // => Tubalcain begat Nobody. |
| downline('Adam') |
| // => Adam begat Abel, Seth, Cain. |
| |
| // This one doesn't recurse through subdirectories (as a simplification) |
| // scriptToFindIncludeFilesWhichContainNoIncludesScript: |
| dir = '<path_to_usr/include>' |
| includes = [:] |
| new File(dir).eachFile{ file -> |
| if (file.directory) return |
| file.eachLine{ line -> |
| matcher = (line =~ '^\\s*#\\s*include\\s*<([^>]+)>') |
| if (matcher.matches()) { |
| if (!includes.containsKey(file.name)) includes[file.name] = [] |
| includes[file.name] += matcher[0][1] |
| } |
| } |
| } |
| // find referenced files which have no includes; assumes all files |
| // were processed and none are missing |
| println includes.values().sort().flatten().unique() - includes.keySet() |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_5.16 |
| //---------------------------------------------------------------------------------- |
| // dutree - print sorted indented rendition of du output |
| // obtaining this input is not shown, it is similar to other examples |
| // on some unix systems it will be: duProcessFakedInput = "du options".process().text |
| duProcessFakedInput = ''' |
| 11732 groovysoap/lib |
| 68 groovysoap/src/main/groovy/net/soap |
| 71 groovysoap/src/main/groovy/net |
| 74 groovysoap/src/main/groovy |
| 77 groovysoap/src/main |
| 9 groovysoap/src/examples |
| 8 groovysoap/src/examples/groovy |
| 102 groovysoap/src/test |
| 202 groovysoap/src |
| 11966 groovysoap |
| ''' |
| |
| // The DuNode class collects all information about a directory, |
| class DuNode { |
| def name |
| def size |
| def kids = [] |
| |
| // support for sorting nodes with side |
| def compareTo(node2) { size <=> node2.size } |
| |
| def getBasename() { |
| name.replaceAll(/.*\//, '') |
| } |
| |
| // returns substring before last "/", otherwise null |
| def getParent() { |
| def p = name.replaceAll(/\/[^\/]+$/,'') |
| return (p == name) ? null : p |
| } |
| } |
| |
| // The DuTree does the actual work of |
| // getting the input, parsing it, building up a tree |
| // and formatting it for output |
| class DuTree { |
| def input |
| def topdir |
| def nodes = [:] |
| def dirsizes = [:] |
| def kids = [:] |
| |
| // get a node by name, create it if it does not exist yet |
| def getOrCreateNode(name) { |
| if (!nodes.containsKey(name)) |
| nodes[name] = new DuNode(name:name) |
| return nodes[name] |
| } |
| |
| // figure out how much is taken in each directory |
| // that isn't stored in the subdirectories. Add a new |
| // fake kid called "." containing that much. |
| def getDots(node) { |
| def cursize = node.size |
| for (kid in node.kids) { |
| cursize -= kid.size |
| getDots(kid) |
| } |
| if (node.size != cursize) { |
| def newnode = getOrCreateNode(node.name + "/.") |
| newnode.size = cursize |
| node.kids += newnode |
| } |
| } |
| |
| def processInput() { |
| def name = '' |
| input.split('\n').findAll{it.trim()}.each{ line -> |
| def tokens = line.tokenize() |
| def size = tokens[0] |
| name = tokens[1] |
| def node = getOrCreateNode(name) |
| node.size = size.toInteger() |
| nodes[name] = node |
| def parent = node.parent |
| if (parent) |
| getOrCreateNode(parent).kids << node |
| } |
| topdir = nodes[name] |
| } |
| |
| // recursively output everything |
| // passing padding and number width as well |
| // on recursive calls |
| def output(node, prefix='', width=0) { |
| def line = node.size.toString().padRight(width) + ' ' + node.basename |
| println (prefix + line) |
| prefix += line.replaceAll(/\d /, '| ') |
| prefix = prefix.replaceAll(/[^|]/, ' ') |
| if (node.kids.size() > 0) { // not a bachelor node |
| kids = node.kids |
| kids.sort{ a,b -> b.compareTo(a) } |
| width = kids[0].size.toString().size() |
| for (kid in kids) output(kid, prefix, width) |
| } |
| } |
| } |
| |
| tree = new DuTree(input:duProcessFakedInput) |
| tree.processInput() |
| tree.getDots(tree.topdir) |
| tree.output(tree.topdir) |
| // => |
| // 11966 groovysoap |
| // | 11732 lib |
| // | 202 src |
| // | | 102 test |
| // | | 77 main |
| // | | | 74 groovy |
| // | | | | 71 net |
| // | | | | | 68 soap |
| // | | | | | 3 . |
| // | | | | 3 . |
| // | | | 3 . |
| // | | 14 . |
| // | | 9 examples |
| // | | | 8 groovy |
| // | | | 1 . |
| // | 32 . |
| //---------------------------------------------------------------------------------- |
| |
| |
| // @@PLEAC@@_6.0 |
| //---------------------------------------------------------------------------------- |
| // Groovy has built-in language support for Regular Expressions: |
| // * Strings quoted with '/' characters have special escaping |
| // rules for backslashes and the like. |
| // * ~string (regex pattern operator) |
| // * m =~ /pattern/ (regex find operator) |
| // * m ==~/pattern/ (regex match operator) |
| // * patterns can be used in case expressions in a switch statement |
| // * string.replaceAll can take a closure expression as the second argument |
| // In addition, Groovy can make use of Java's Pattern, Matcher and Scanner classes |
| // directly. (The sugar coating metnioed above sits on top of these anyway). |
| // There are also additional open source Java regex libraries which can be used. |
| |
| meadow1 = 'cow grass butterflies Ovine' |
| meadow2 = 'goat sheep flowers dog' |
| // pattern strings can benefit from 'slashy' quotes |
| partial = /sheep/ |
| full = /.*sheep.*/ |
| |
| // find operator |
| assert !(meadow1 =~ partial) |
| assert meadow2 =~ partial |
| finder = (meadow2 =~ partial) |
| // underneath Groovy sugar coating is Java implementation |
| assert finder instanceof java.util.regex.Matcher |
| |
| // match operator |
| assert !(meadow1 ==~ full) |
| assert meadow2 ==~ full |
| matcher = (meadow2 ==~ full) |
| // under the covers is just a boolean |
| assert matcher instanceof Boolean |
| |
| assert meadow1 =~ /(?i)\bovines?\b/ // (?i) == case flag |
| |
| string = 'good food' |
| println string.replaceFirst(/o*/, 'e') |
| // => egood food |
| println string.replaceAll(/o*/, 'e') |
| // => egeede efeede (global) |
| // beware this one is just textual replacement |
| println string.replace(/o*/, 'e') |
| // => good food |
| println 'o*o*'.replace(/o*/, 'e') |
| // => ee |
| |
| // groovy -e "m = args[0] =~ /(a|ba|b)+(a|ac)+/; if (m.matches()) println m[0][0]" ababacaca |
| // => ababa |
| |
| digits = "123456789" |
| nonlap = digits =~ /\d\d\d/ |
| assert nonlap.count == 3 |
| print 'Non-overlapping: ' |
| (0..<nonlap.count).each{ print nonlap[it] + ' ' }; print '\n' |
| print 'Overlapping: ' |
| yeslap = (digits =~ /(?=(\d\d\d))/) |
| assert yeslap.count == 7 |
| (0..<yeslap.count).each{ print yeslap[it][1] + ' ' }; print '\n' |
| // Non-overlapping: 123 456 789 |
| // Overlapping: 123 234 345 456 567 678 789 |
| |
| string = 'And little lambs eat ivy' |
| // Greedy version |
| parts = string =~ /(.*)(l[^s]*s)(.*)/ |
| (1..parts.groupCount()).each{ print "(${parts[0][it]}) " }; print '\n' |
| // (And little ) (lambs) ( eat ivy) |
| |
| // Reluctant version |
| parts = string =~ /(.*?)(l[^s]*s)(.*)/ |
| (1..parts.groupCount()).each{ print "(${parts[0][it]}) " }; print '\n' |
| // (And ) (little lambs) ( eat ivy) |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.1 |
| //---------------------------------------------------------------------------------- |
| // Groovy splits src and dest to avoid this problem |
| src = 'Go this way' |
| dst = src.replaceFirst('this', 'that') |
| assert dst == 'Go that way' |
| |
| // extract basename |
| src = 'c:/some/path/file.ext' |
| dst = src.replaceFirst('^.*/', '') |
| assert dst == 'file.ext' |
| |
| // Make All Words Title-Cased (not that you would do it this way) |
| // The preprocessing operations \X where X is one of l, u, L, and U are not supported |
| // in the sun regex library but other Java regex libraries may support this. Instead: |
| src = 'make all words title-cased' |
| dst = src |
| ('a'..'z').each{ dst = dst.replaceAll(/([^a-zA-Z])/+it+/|\A/+it, /$1/+it.toUpperCase()) } |
| assert dst == 'Make All Words Title-Cased' |
| |
| // rename list of dirs |
| bindirs = '/usr/bin /bin /usr/local/bin'.split(' ').toList() |
| expected = '/usr/lib /lib /usr/local/lib'.split(' ').toList() |
| libdirs = bindirs.collect { dir -> dir.replaceFirst('bin', 'lib') } |
| assert libdirs == expected |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.2 |
| //---------------------------------------------------------------------------------- |
| // Groovy uses Java regex (other Java regex packages would also be possible) |
| // It doesn't support Locale-based settings but you can roll your own to some |
| // extent, you can use any Unicode characters as per below and you can use |
| // \p{Punct} Punctuation: One of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ |
| // or the other special character classes |
| words = ''' |
| silly |
| fa�ade |
| co�perate |
| ni�o |
| Ren�e |
| Moli�re |
| h�moglobin |
| na�ve |
| tsch�� |
| random!stuff#here\u0948 |
| ''' |
| results = '' |
| greekAlpha = '\u0391' |
| special = '���������?' + greekAlpha |
| // flag as either Y (alphabetic) or N (not) |
| words.split('\n').findAll{it.trim()}.each{ results += it ==~ /^[\w/+special+/]+$/ ?'Y':'N' } |
| assert results == 'YYYYYYYYYN' |
| results = '' |
| words.split('\n').findAll{it.trim()}.each{ results += it ==~ /^[^\p{Punct}]+$/ ?'Y':'N' } |
| assert results == 'YYYYYYYYYN' |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.3 |
| //---------------------------------------------------------------------------------- |
| // as many non-whitespace bytes as possible |
| finder = 'abczqz z' =~ /a\S+z/ |
| assert finder[0] == 'abczqz' |
| |
| // as many letters, apostrophes, and hyphens |
| finder = "aAzZ'z-z0z" =~ /a[A-Za-z'-]+z/ //' |
| assert finder[0] == "aAzZ'z-z" |
| |
| // selecting words |
| finder = '23rd Psalm' =~ /\b([A-Za-z]+)\b/ // usually best |
| println finder[0][0] |
| // => Psalm (23rd is not matched) |
| finder = '23rd Psalm' =~ /\s([A-Za-z]+)\s/ // fails at ends or w/ punctuation |
| println finder.matches() |
| // => false (no whitespaces at ends) |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.4 |
| //---------------------------------------------------------------------------------- |
| str = 'groovy-lang.org and www.aboutgroovy.com' |
| re = '''(?x) # to enable whitespace and comments |
| ( # capture the hostname in $1 |
| (?: # these parens for grouping only |
| (?! [-_] ) # lookahead for neither underscore nor dash |
| [\\w-] + # hostname component |
| \\. # and the domain dot |
| ) + # now repeat that whole thing a bunch of times |
| [A-Za-z] # next must be a letter |
| [\\w-] + # now trailing domain part |
| ) # end of $1 capture |
| ''' |
| |
| finder = str =~ re |
| out = str |
| (0..<finder.count).each{ |
| adr = finder[it][0] |
| out = out.replaceAll(adr, "$adr [${InetAddress.getByName(adr).hostAddress}]") |
| } |
| println out |
| // => groovy-lang.org [63.246.7.187] and www.aboutgroovy.com [63.246.7.76] |
| |
| // to match whitespace or #-characters in an extended re you need to escape them. |
| foo = 42 |
| str = 'blah #foo# blah' |
| re = '''(?x) # to enable whitespace and comments |
| \\# # a pound sign |
| (\\w+) # the variable name |
| \\# # another pound sign |
| ''' |
| finder = str =~ re |
| found = finder[0] |
| out = str.replaceAll(found[0], evaluate(found[1]).toString()) |
| assert out == 'blah 42 blah' |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.5 |
| //---------------------------------------------------------------------------------- |
| fish = 'One fish two fish red fish blue fish' |
| expected = 'The third fish is a red one.' |
| thirdFish = /(?:\w+\s+fish\s+){2}(\w+)\s+fish.*/ |
| assert expected == (fish.replaceAll(thirdFish, 'The third fish is a $1 one.')) |
| |
| anyFish = /(\w+)\s+fish\b/ |
| finder = fish =~ anyFish |
| // finder contains an array of matched groups |
| // 2 = third one (index start at 0), 1 = matched word in group |
| out = "The third fish is a ${finder[2][1]} one." |
| assert out == expected |
| |
| evens = [] |
| (0..<finder.count).findAll{it%2!=0}.each{ evens += finder[it][1] } |
| println "Even numbered fish are ${evens.join(' ')}." |
| // => Even numbered fish are two blue. |
| |
| // one of several ways to do this |
| pond = fish + ' in the pond' |
| fishInPond = (/(\w+)(\s+fish\b\s*)/) * 4 + /(.*)/ |
| found = (pond =~ fishInPond)[0] |
| println ((found[1..6] + 'sushi' + found[8..9]).join()) |
| // => One fish two fish red fish sushi fish in the pond |
| |
| // find last fish |
| expected = 'Last fish is blue' |
| pond = 'One fish two fish red fish blue fish swim here.' |
| finder = (pond =~ anyFish) |
| assert expected == "Last fish is ${finder[finder.count-1][1]}" |
| // => Last fish is blue |
| |
| // greedy match version of above |
| finder = (pond =~ /.*\b/ + anyFish) |
| assert expected == "Last fish is ${finder[0][1]}" |
| |
| // last fish match version of above |
| finder = (pond =~ /\b(\w+)\s+fish\b(?!.*\bfish\b)/) |
| assert expected == "Last fish is ${finder[0][1]}" |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.6 |
| //---------------------------------------------------------------------------------- |
| // Html Stripper |
| // get this using: fakedfile = new File('path_to_file.htm').text |
| fakedFile = ''' |
| <html> |
| <head><title>Chapter 1 Title</title></head> |
| <body> |
| <h1>Chapter 1: Some Heading</h1> |
| A paragraph. |
| </body> |
| </html> |
| ''' |
| |
| stripExpectations = ''' |
| Chapter 1 Title |
| |
| Chapter 1: Some Heading |
| A paragraph. |
| '''.trim() |
| |
| stripped = fakedFile.replaceAll(/(?m)<.*?>/,'').trim() |
| assert stripExpectations == stripped |
| |
| pattern = '''(?x) |
| ( # capture in $1 |
| Chapter # text string |
| \\s+ # mandatory whitespace |
| \\d+ # decimal number |
| \\s* # optional whitespace |
| : # a real colon |
| . * # anything not a newline till end of line |
| ) |
| ''' |
| |
| headerfyExpectations = ''' |
| Chapter 1 Title |
| |
| <H1>Chapter 1: Some Heading</H1> |
| A paragraph. |
| '''.trim() |
| |
| headerfied = stripped.replaceAll(pattern, '<H1>$1</H1>') |
| assert headerfyExpectations == headerfied |
| |
| // one liner equivalent which prints to stdout |
| //% groovy -p -e "line.replaceAll(/^(Chapter\s+\d+\s*:.*)/,'<H1>$1</H1>')" |
| |
| // one liner equivalent which modifies file in place and creates *.bak original file |
| //% groovy -pi .bak -e "line.replaceAll(/^(Chapter\s+\d+\s*:.*)/,'<H1>$1</H1>')" |
| |
| // use: realFileInput = new File(path_to_file).text |
| fakeFileInput = ''' |
| 0 |
| START |
| 1 |
| 2 |
| END |
| 3 |
| 4 |
| 5 |
| START |
| 6 |
| END |
| ''' |
| |
| chunkyPattern = /(?ms)^START(.*?)^END/ |
| finder = fakeFileInput =~ chunkyPattern |
| (0..<finder.count).each { |
| println "Chunk #$it contains ${new StringTokenizer(finder[it][1],'\n').countTokens()} lines." |
| } |
| // => |
| // Chunk #0 contains 2 lines. |
| // Chunk #1 contains 1 lines. |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.7 |
| //---------------------------------------------------------------------------------- |
| // general pattern is: |
| //file = new File("datafile").text.split(/pattern/) |
| // .Ch, .Se and .Ss divide chunks of input text |
| fakedFiletext = ''' |
| .Ch |
| abc |
| .Se |
| def |
| .Ss |
| ghi |
| .Se |
| jkl |
| .Se |
| mno |
| .Ss |
| pqr |
| .Ch |
| stu |
| .Ch |
| vwx |
| .Se |
| yz! |
| ''' |
| chunks = fakedFiletext.split(/(?m)^\.(Ch|Se|Ss)$/) |
| println "I read ${chunks.size()} chunks." |
| // => I read 10 chunks. |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.8 |
| //---------------------------------------------------------------------------------- |
| // Groovy doesn't support the ~/BEGIN/ .. ~/END/ notation |
| // you have to emulate it as shown in the example below |
| // The from line number to line number processing is supported |
| // from the command line but not within a script, e.g. |
| // command-line to print lines 15 through 17 inclusive (see below) |
| // > groovy -p -e "if (count in 15..17) return line" datafile |
| // Within a script itself, you emulate the count by keeping state |
| |
| htmlContent = ''' |
| <h1>A Heading</h1> |
| Here is <XMP>inline AAA</XMP>. |
| And the bigger Example 2: |
| <XMP> |
| line BBB |
| line CCC |
| </XMP> |
| Done. |
| '''.trim() |
| |
| examplePattern = /(?ms)<XMP>(.*?)<\/XMP>/ |
| finder = htmlContent =~ examplePattern |
| (0..<finder.count).each { |
| println "Example ${it+1}:" |
| println finder[it][1] |
| } |
| // => |
| // Example 1: |
| // inline AAA |
| // Example 2: |
| // |
| // line BBB |
| // line CCC |
| // |
| |
| htmlContent.split('\n').eachWithIndex{ line, count -> |
| if (count in 4..5) println line |
| } |
| // => |
| // line BBB |
| // line CCC |
| |
| // You would probably use a mail Api for this in Groovy |
| fakedMailInput = ''' |
| From: A Person <someone@somewhere.com> |
| To: <pleac-discuss@lists.sourceforge.net> |
| Date: Sun, 31 Dec 2006 02:14:57 +1100 |
| |
| From: noone@nowhere.com |
| To: <pleac-discuss@lists.sourceforge.net> |
| Date: Sun, 31 Dec 2006 02:14:58 +1100 |
| |
| From: someone@somewhere.com |
| To: <pleac-discuss@lists.sourceforge.net> |
| Date: Sun, 31 Dec 2006 02:14:59 +1100 |
| '''.trim()+'\n' |
| |
| seen = [:] |
| fakedMailInput.split('\n').each{ line -> |
| m = (line =~ /^From:?\s(.*)/) |
| if (m) { |
| addr = m[0][1] =~ /([^<>(),;\s]+\@[^<>(),;\s]+)/ |
| x = addr[0][1] |
| if (seen.containsKey(x)) seen[x] += 1 else seen[x] = 1 |
| } |
| } |
| seen.each{ k,v -> println "Address $k seen $v time${v==1?'':'s'}." } |
| // => |
| // Address noone@nowhere.com seen 1 time. |
| // Address someone@somewhere.com seen 2 times. |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.9 |
| //---------------------------------------------------------------------------------- |
| import java.util.regex.Pattern |
| |
| names = ''' |
| myFile.txt |
| oldFile.tex |
| myPicture.jpg |
| ''' |
| |
| def glob2pat(globstr) { |
| def patmap = [ '*':'.*', '?':'.', '[':'[', ']':']' ] |
| def result = '(?m)^' |
| '^' + globstr.replaceAll(/(.)/) { all, c -> |
| result += (patmap.containsKey(c) ? patmap[c] : Pattern.quote(c)) |
| } |
| result + '$' |
| } |
| |
| def checkNumMatches(pat, count) { |
| assert (names =~ glob2pat(pat)).count == count |
| } |
| |
| checkNumMatches('*.*', 3) |
| checkNumMatches('my*.*', 2) |
| checkNumMatches('*.t*', 2) |
| checkNumMatches('*File.*', 2) |
| checkNumMatches('*Rabbit*.*', 0) |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.10 |
| //---------------------------------------------------------------------------------- |
| // version 1: simple obvious way |
| states = 'CO ON MI WI MN'.split(' ').toList() |
| |
| def popgrep1(file) { |
| file.eachLine{ line -> |
| if (states.any{ line =~ /\b$it\b/ }) println line |
| } |
| } |
| // popgrep1(new File('path_to_file')) |
| |
| // version 2: eval strings; fast but hard to quote (SLOW) |
| def popgrep2(file) { |
| def code = 'def found = false\n' |
| states.each{ |
| code += "if (!found && line =~ /\\b$it\\b/) found = true\n" |
| } |
| code += "if (found) println line\n" |
| file.eachLine{ line = it; evaluate(code) } |
| } |
| // popgrep2(new File('path_to_file')) |
| |
| // version 2b: eval using switch/case (not in Perl cookbook) (SLOW) |
| def popgrep2b(file) { |
| def code = 'switch(line) {\n' |
| states.each{ |
| code += "case ~/.*\\b$it\\b.*/:\nprintln line;break\n" |
| } |
| code += "default:break\n}\n" |
| file.eachLine{ line = it; evaluate(code) } |
| } |
| // popgrep2b(new File('path_to_file')) |
| |
| // version3: build a match_any function as a GString |
| def popgrep3(file) { |
| def code = states.collect{ "line =~ /\\b$it\\b/" }.join('||') |
| file.eachLine{ line = it; if (evaluate(code)) println line } |
| } |
| // popgrep3(new File('path_to_file')) |
| |
| // version4: pretty fast, but simple: compile all re's first: |
| patterns = states.collect{ ~/\b$it\b/ } |
| def popgrep4(file) { |
| file.eachLine{ line -> |
| if (patterns.any{ it.matcher(line)}) println line |
| } |
| } |
| // popgrep4(new File('path_to_file')) |
| |
| // version5: faster |
| str = states.collect{ /\b$it\b/ }.join('|') |
| def popgrep5(file) { |
| file.eachLine{ line -> |
| if (line =~ str) println line |
| } |
| } |
| // popgrep5(new File('path_to_file')) |
| |
| // version5b: faster (like 5 but compiled outside loop) |
| pattern = ~states.collect{ /\b$it\b/ }.join('|') |
| def popgrep5b(file) { |
| file.eachLine{ line -> |
| if (pattern.matcher(line)) println line |
| } |
| } |
| // popgrep5b(new File('path_to_file')) |
| |
| // speeds trials ON the current source file (~1200 lines) |
| // popgrep1 => 0.39s |
| // popgrep2 => 25.08s |
| // popgrep2b => 23.86s |
| // popgrep3 => 22.42s |
| // popgrep4 => 0.12s |
| // popgrep5 => 0.05s |
| // popgrep5b => 0.05s |
| // Groovy's built-in support is the way to go in terms of |
| // both speed and simplicity of understanding. Avoid using |
| // evaluate() unless you absolutely need it |
| |
| // generic matching functions |
| input = ''' |
| both cat and dog |
| neither |
| just a cat |
| just a dog |
| '''.split('\n').findAll{it.trim()} |
| |
| def matchAny(line, patterns) { patterns.any{ line =~ it } } |
| def matchAll(line, patterns) { patterns.every{ line =~ it } } |
| |
| assert input.findAll{ matchAny(it, ['cat','dog']) }.size() == 3 |
| assert input.findAll{ matchAny(it, ['cat$','^n.*']) }.size() == 2 |
| assert input.findAll{ matchAll(it, ['cat','dog']) }.size() == 1 |
| assert input.findAll{ matchAll(it, ['cat$','^n.*']) }.size() == 0 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.11 |
| //---------------------------------------------------------------------------------- |
| // patternCheckingScript: |
| prompt = '\n> ' |
| print 'Enter patterns to check:' + prompt |
| new BufferedReader(new InputStreamReader(System.in)).eachLine{ line -> |
| try { |
| Pattern.compile(line) |
| print 'Valid' + prompt |
| } catch (java.util.regex.PatternSyntaxException ex) { |
| print 'Invalid pattern: ' + ex.message + prompt |
| } |
| } |
| // => |
| // Enter patterns to check: |
| // > ab*.c |
| // Valid |
| // > ^\s+[^a-z]*$ |
| // Valid |
| // > ** |
| // Invalid pattern: Dangling meta character '*' near index 0 |
| // ** |
| // ^ |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.12 |
| //---------------------------------------------------------------------------------- |
| src = 'dierk k�nig' |
| // simplistic with locale issue |
| dst = src |
| ('a'..'z').each{ dst = dst.replaceAll(/(?<=[^a-zA-Z])/+it+/|\A/+it, it.toUpperCase()) } |
| println dst |
| // => Dierk K�Nig |
| // locale avoidance |
| dst = src |
| ('a'..'z').each{ dst = dst.replaceAll(/(?<=\A|\b)/+it, it.toUpperCase()) } |
| println dst |
| // => Dierk K�nig |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.13 |
| //---------------------------------------------------------------------------------- |
| // Several libraries exist, e.g. |
| // http://secondstring.sourceforge.net/ |
| // http://sourceforge.net/projects/simmetrics/ |
| // both support numerous algorithms. Using the second as an example: |
| class Levenshtein{} |
| class MongeElkan{} |
| class JaroWinkler{} |
| class Soundex{} |
| //import uk.ac.shef.wit.simmetrics.similaritymetrics.* |
| target = 'balast' |
| candidates = ''' |
| quick |
| brown |
| fox |
| jumped |
| over |
| the |
| lazy |
| dog |
| ballast |
| ballasts |
| balustrade |
| balustrades |
| blast |
| blasted |
| blaster |
| blasters |
| blasting |
| blasts |
| '''.split('\n').findAll{it.trim()} |
| metrics = [new Levenshtein(), new MongeElkan(), new JaroWinkler(), new Soundex()] |
| def out(name, results) { |
| print name.padLeft(14) + ' '; results.each{print(it.padRight(16))}; println() |
| } |
| def outr(name, results){out(name, results.collect{''+((int)(it*100))/100})} |
| out ('Word/Metric', metrics.collect{it.shortDescriptionString} ) |
| candidates.each{ w -> outr(w, metrics.collect{ m -> m.getSimilarity(target, w)} )} |
| // => |
| // Word/Metric Levenshtein MongeElkan JaroWinkler Soundex |
| // quick 0 0.11 0 0.66 |
| // brown 0.16 0.23 0.5 0.73 |
| // fox 0 0.2 0 0.66 |
| // jumped 0 0.2 0 0.66 |
| // over 0 0.44 0 0.55 |
| // the 0 0.33 0 0.55 |
| // lazy 0.33 0.5 0.44 0.66 |
| // dog 0 0.2 0 0.66 |
| // ballast 0.85 0.83 0.96 1 |
| // ballasts 0.75 0.83 0.94 0.94 |
| // balustrade 0.5 0.93 0.3 0.94 |
| // balustrades 0.45 0.93 0.3 0.94 |
| // blast 0.83 0.8 0.88 1 |
| // blasted 0.57 0.66 0.8 0.94 |
| // blaster 0.57 0.66 0.8 0.94 |
| // blasters 0.5 0.66 0.77 0.94 |
| // blasting 0.5 0.66 0.77 0.94 |
| // blasts 0.66 0.66 0.84 0.94 |
| // to implement the example, iterate through /usr/dict/words selecting words |
| // where one or a combination of metrics are greater than some threshold |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.14 |
| //---------------------------------------------------------------------------------- |
| n = " 49 here" |
| println n.replaceAll(/\G /,'0') |
| // => 00049 here |
| |
| str = "3,4,5,9,120" |
| print 'Found numbers:' |
| str.eachMatch(/\G,?(\d+)/){ print ' ' + it[1] } |
| println() |
| // => Found numbers: 3 4 5 9 120 |
| |
| // Groovy doesn't have the String.pos or a /c re modifier like Perl |
| // But it does have similar functionality. Matcher has start() and |
| // end() for find the position and Matcher's usePattern() allows |
| // you to swap patterns without changing the buffer position |
| text = 'the year 1752 lost 10 days on the 3rd of September' |
| p = ~/(?<=\D)(\d+)/ |
| m = p.matcher(text) |
| while (m.find()) { |
| println 'Found ' + m.group() + ' starting at pos ' + m.start() + |
| ' and ending at pos ' + m.end() |
| } |
| // now reset pos back to between 1st and 2nd numbers |
| if (m.find(16)) { println 'Found ' + m.group() } |
| // => |
| // Found 1752 starting at pos 9 and ending at pos 13 |
| // Found 10 starting at pos 19 and ending at pos 21 |
| // Found 3 starting at pos 34 and ending at pos 35 |
| // Found 10 |
| |
| // Alternatively you can use Scanner in Java 5-7+: |
| p1 = ~/(?<=\D)(\d+)/ |
| p2 = ~/\S+/ |
| s = new Scanner(text) |
| while ((f = s.findInLine(p1))) { println 'Found: ' + f } |
| if ((f = s.findInLine(p2))) { println "Found $f after the last number." } |
| // => |
| // Found: 1752 |
| // Found: 10 |
| // Found: 3 |
| // Found rd after the last number. |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.15 |
| //---------------------------------------------------------------------------------- |
| html = '<b><i>this</i> and <i>that</i> are important</b> Oh, <b><i>me too!</i></b>' |
| |
| greedyHtmlStripPattern = ~/(?m)<.*>/ // not good |
| nonGreedyHtmlStripPattern = ~/(?m)<.*?>/ // not great |
| simpleNested = ~/(?mx)<b><i>(.*?)<\/i><\/b>/ |
| // match BEGIN, then not BEGIN, then END |
| generalPattern = ~/BEGIN((?:(?!BEGIN).)*)END/ |
| betterButInefficient1 = ~/(?mx)<b><i>( (?: (?!<\/b>|<\/i>). )* ) <\/i><\/b>/ |
| betterButInefficient2 = ~/(?mx)<b><i>( (?: (?!<\/[ib]>). )* ) <\/i><\/b>/ |
| |
| efficientPattern = '''(?mx) |
| <b><i> |
| [^<]* # stuff not possibly bad, and not possibly the end. |
| (?: |
| # at this point, we can have '<' if not part of something bad |
| (?! </?[ib]> ) # what we can't have |
| < # okay, so match the '<' |
| [^<]* # and continue with more safe stuff |
| ) * |
| </i></b> |
| ''' //' |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.16 |
| //---------------------------------------------------------------------------------- |
| input = 'This is a test\nTest of the duplicate word finder.\n' |
| dupWordPattern = '''(?ix) |
| \\b # start at word boundary |
| (\\S+) # find chunk of non-whitespace |
| \\b # until a word boundary |
| ( |
| \\s+ # followed by whitespace |
| \\1 # and that same chunk again |
| \\b # and a word boundary |
| ) + # one or more times |
| ''' |
| finder = input =~ dupWordPattern |
| println 'Found duplicate word: ' + finder[0][1] |
| // => Found duplicate word: test |
| |
| astr = 'nobody' |
| bstr = 'bodysnatcher' |
| m = "$astr $bstr" =~ /^(\w+)(\w+) \2(\w+)$/ |
| actual = "${m[0][2]} overlaps in ${m[0][1]}-${m[0][2]}-${m[0][3]}" |
| assert actual == 'body overlaps in no-body-snatcher' |
| |
| cap = 'o' * 180 |
| while (m = (cap =~ /^(oo+?)\1+$/)) { |
| p1 = m[0][1] |
| print p1.size() + ' ' |
| cap = cap.replaceAll(p1,'o') |
| } |
| println cap.size() |
| // => 2 2 3 3 5 |
| |
| // diophantine |
| // solve for 12x + 15y + 16z = 281, maximizing x |
| if ((m = ('o' * 281) =~ /^(o*)\1{11}(o*)\2{14}(o*)\3{15}$/)) { |
| x=m[0][1].size(); y=m[0][2].size(); z=m[0][3].size() |
| println "One solution is: x=$x; y=$y; z=$z" |
| } else println "No solution." |
| // => One solution is: x=17; y=3; z=2 |
| |
| // using different quantifiers: |
| // /^(o+)\1{11}(o+)\2{14}(o+)\3{15}$/ |
| // => One solution is: x=17; y=3; z=2 |
| |
| // /^(o*?)\1{11}(o*)\2{14}(o*)\3{15}$/ |
| // => One solution is: x=0; y=7; z=11 |
| |
| // /^(o+?)\1{11}(o*)\2{14}(o*)\3{15}$/ |
| // => One solution is: x=1; y=3; z=14 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.17 |
| //---------------------------------------------------------------------------------- |
| // Groovy doesn't currently support x!~y so you must use the !(x=~y) style |
| |
| // alpha OR beta |
| assert 'alpha' ==~ /alpha|beta/ |
| assert 'beta' ==~ /alpha|beta/ |
| assert 'betalpha' =~ /alpha/ || 'betalpha' =~ /beta/ |
| |
| // alpha AND beta |
| assert !('alpha' =~ /(?=.*alpha)(?=.*beta)/) |
| assert 'alphabeta' =~ /(?=.*alpha)(?=.*beta)/ |
| assert 'betalpha' =~ /(?=.*alpha)(?=.*beta)/ |
| assert 'betalpha' =~ /alpha/ && 'betalpha' =~ /beta/ |
| |
| // alpha AND beta, no overlap |
| assert 'alphabeta' =~ /alpha.*beta|beta.*alpha/ |
| assert !('betalpha' =~ /alpha.*beta|beta.*alpha/) |
| |
| // NOT beta |
| assert 'alpha gamma' =~ /^(?:(?!beta).)*$/ |
| assert !('alpha beta gamma' =~ /^(?:(?!beta).)*$/) |
| |
| // NOT bad BUT good |
| assert !('GOOD and BAD' =~ /(?=(?:(?!BAD).)*$)GOOD/) |
| assert !('BAD' =~ /(?=(?:(?!BAD).)*$)GOOD/) |
| assert !('WORSE' =~ /(?=(?:(?!BAD).)*$)GOOD/) |
| assert 'GOOD' =~ /(?=(?:(?!BAD).)*$)GOOD/ |
| |
| // minigrep could be done as a one-liner as follows |
| // groovy -p -e "if (line =~ /pat/) return line" datafile |
| |
| string = 'labelled' |
| assert string =~ /^(?=.*bell)(?=.*lab)/ |
| assert string =~ /bell/ && string =~ 'lab' |
| fakeAddress = "blah bell blah " |
| murrayHillRegex = '''(?x) |
| ^ # start of string |
| (?= # zero-width lookahead |
| .* # any amount of intervening stuff |
| bell # the desired bell string |
| ) # rewind, since we were only looking |
| (?= # and do the same thing |
| .* # any amount of intervening stuff |
| lab # and the lab part |
| ) |
| ''' |
| assert string =~ murrayHillRegex |
| assert !(fakeAddress =~ murrayHillRegex) |
| |
| // eliminate overlapping |
| assert !(string =~ /(?:^.*bell.*lab)|(?:^.*lab.*bell)/) |
| |
| brandRegex = '''(?x) |
| (?: # non-capturing grouper |
| ^ .*? # any amount of stuff at the front |
| bell # look for a bell |
| .*? # followed by any amount of anything |
| lab # look for a lab |
| ) # end grouper |
| | # otherwise, try the other direction |
| (?: # non-capturing grouper |
| ^ .*? # any amount of stuff at the front |
| lab # look for a lab |
| .*? # followed by any amount of anything |
| bell # followed by a bell |
| ) # end grouper |
| ''' |
| assert !(string =~ brandRegex) |
| |
| map = 'the great baldo' |
| |
| assert map =~ /^(?:(?!waldo).)*$/ |
| noWaldoRegex = '''(?x) |
| ^ # start of string |
| (?: # non-capturing grouper |
| (?! # look ahead negation |
| waldo # is he ahead of us now? |
| ) # is so, the negation failed |
| . # any character (cuzza /s) |
| ) * # repeat that grouping 0 or more |
| $ # through the end of the string |
| ''' |
| assert map =~ noWaldoRegex |
| |
| // on unix systems use: realFakedInput = 'w'.process().text |
| fakedInput = ''' |
| 7:15am up 206 days, 13:30, 4 users, load average: 1.04, 1.07, 1.04 |
| USER TTY FROM LOGIN@ IDLE JCPU PCPU WHAT |
| tchrist tty1 5:16pm 36days 24:43 0.03s xinit |
| tchrist tty2 5:19pm 6days 0.43s 0.43s -tcsh |
| tchrist ttyp0 chthon 7:58am 3days 23.44s 0.44s -tcsh |
| gnat ttyS4 coprolith 2:01pm 13:36m 0.30s 0.30s -tcsh |
| '''.trim() + '\n' |
| |
| def miniGrepMethod(input) { |
| input.split('\n').findAll{it =~ '^(?!.*ttyp).*tchrist'} |
| } |
| assert miniGrepMethod(fakedInput).size() == 2 |
| |
| findUserRegex = '''(?xm) |
| ^ # anchored to the start |
| (?! # zero-width look-ahead assertion |
| .* # any amount of anything (faster than .*?) |
| ttyp # the string you don't want to find |
| ) # end look-ahead negation; rewind to start |
| .* # any amount of anything (faster than .*?) |
| tchrist # now try to find Tom |
| ''' |
| assert (fakedInput =~ findUserRegex).count == 2 |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.18 |
| //---------------------------------------------------------------------------------- |
| // Groovy uses Unicode character encoding |
| // special care needs to be taken when using unicode because of the different |
| // byte lengths, e.g. � can be encoded as two bytes \u0061\u0300 and is also |
| // supported in legacy character sets by a single character \u00E0. To Match |
| // this character, you can't use any of /./, /../, /a/, /\u00E0/, /\u0061/\u0300 |
| // or /\pL/. The correct way is to use /X (not currently supported) or one |
| // of /\pL/\pM*/ to ensure that it is a letter or /\PM\pM*/ when you just want |
| // to combine multicharacter sequences and don't care whether it is a letter |
| def checkUnicode(s) { |
| println s + ' is of size ' + s.size() |
| println 'Exactly matches /./ ' + (s ==~ /./) |
| println 'Exactly matches /../ ' + (s ==~ /../) |
| println 'Exactly matches /a/ ' + (s ==~ /a/) |
| println 'Exactly matches /\\u00E0/ ' + (s ==~ /\u00E0/) |
| println 'Exactly matches /\\u0061\\u0300/ ' + (s ==~ /\u0061\u0300/) |
| println 'Exactly matches /\\pL/ ' + (s ==~ /\pL/) |
| println 'Exactly matches /\\pL\\pM*/ ' + (s ==~ /\pL\pM*/) |
| println 'Exactly matches /\\PM\\pM*/ ' + (s ==~ /\PM\pM*/) |
| } |
| checkUnicode('�') |
| checkUnicode('\u0061\u0300') |
| checkUnicode('\u00E0') |
| // => |
| // � is of size 1 |
| // Exactly matches /./ true |
| // Exactly matches /../ false |
| // Exactly matches /a/ false |
| // Exactly matches /\u00E0/ true |
| // Exactly matches /\u0061\u0300/ false |
| // Exactly matches /\pL/ true |
| // Exactly matches /\pL\pM*/ true |
| // Exactly matches /\PM\pM*/ true |
| // a? is of size 2 |
| // Exactly matches /./ false |
| // Exactly matches /../ true |
| // Exactly matches /a/ false |
| // Exactly matches /\u00E0/ false |
| // Exactly matches /\u0061\u0300/ true |
| // Exactly matches /\pL/ false |
| // Exactly matches /\pL\pM*/ true |
| // Exactly matches /\PM\pM*/ true |
| // � is of size 1 |
| // Exactly matches /./ true |
| // Exactly matches /../ false |
| // Exactly matches /a/ false |
| // Exactly matches /\u00E0/ true |
| // Exactly matches /\u0061\u0300/ false |
| // Exactly matches /\pL/ true |
| // Exactly matches /\pL\pM*/ true |
| // Exactly matches /\PM\pM*/ true |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.19 |
| //---------------------------------------------------------------------------------- |
| // The Perl Cookbook categorizes this as a hard problem ... mostly for |
| // reasons not related to the actual regex - but with a 60-line regex |
| // perhaps there are some issues with that too. Further details: |
| // http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/ckaddr.gz |
| |
| simpleCommentStripper = /\([^()]*\)/ |
| println 'Book Publishing <marketing@books.com> (We will spam you)'.replaceAll(simpleCommentStripper, '') |
| // => Book Publishing <marketing@books.com> |
| |
| // inspired by the fact that domain names can contain any foreign character these days |
| modern = /^.+@[^\.].*\.[a-z]{2,}>?$/ |
| |
| // .Net |
| lenient = /\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*/ |
| |
| // a little more checking |
| strict = /^[_a-zA-Z0-9- <]+(\.[_a-zA-Z0-9- <]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\./ + |
| /(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))>?$/ |
| |
| addresses = ['someuser@somehost.com', |
| 'Book Publishing <marketing@books.com>'] |
| addresses.each{ |
| assert it =~ lenient |
| assert it =~ strict |
| assert it =~ modern |
| } |
| |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.20 |
| //---------------------------------------------------------------------------------- |
| def findAction(ans) { |
| def re = '(?i)^' + Pattern.quote(ans) |
| if ("SEND" =~ re) println "Action is send" |
| else if ("STOP" =~ re) println "Action is stop" |
| else if ("ABORT" =~ re) println "Action is abort" |
| else if ("EDIT" =~ re) println "Action is edit" |
| else println 'No Match' |
| } |
| findAction('edit something') |
| // => No Match |
| findAction('edit') |
| // => Action is edit |
| findAction('se') |
| // => Action is send |
| findAction('e') |
| // => Action is edit |
| |
| def buildAbbrev(words) { |
| def table = new TreeMap() |
| words.each{ w -> |
| (0..<w.size()).each { n -> |
| if (!(words - w).any{ |
| it.size() >= n+1 && it[0..n] == w[0..n] |
| }) table[w[0..n]] = w |
| } |
| } |
| table |
| } |
| println buildAbbrev('send stop abort edit'.split(' ').toList()) |
| // => ["a":"abort", "ab":"abort", "abo":"abort", "abor":"abort", "abort":"abort", |
| // "e":"edit", "ed":"edit", "edi":"edit", "edit":"edit", "se":"send", "sen":"send", |
| // "send":"send", "st":"stop", "sto":"stop", "stop":"stop"] |
| |
| // miniShellScript: |
| // dummy methods |
| def invokeEditor() { println "invoking editor" } |
| def deliverMessage() { println "delivering message at " + new Date() } |
| actions = [ |
| edit: this.&invokeEditor, |
| send: this.&deliverMessage, |
| list: { println Runtime.runtime.freeMemory() }, |
| abort: { System.exit(0) }, |
| unknown: { println "Unknown Command"} |
| ] |
| |
| table = buildAbbrev(actions.keySet().toList()) |
| prompt = '\n> ' |
| print 'Enter Commands: edit send list abort' + prompt |
| new BufferedReader(new InputStreamReader(System.in)).eachLine{ line -> |
| def idx = (table.containsKey(line)) ? table[line] : 'unknown' |
| actions[idx]() |
| print prompt |
| } |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.21 |
| //---------------------------------------------------------------------------------- |
| //% gunzip -c ~/mail/archive.gz | urlify > archive.urlified |
| //% urlify ~/mail/*.inbox > ~/allmail.urlified |
| |
| urls = '(https?|telnet|gopher|file|wais|ftp|mail)' |
| ltrs = /\w/ |
| gunk = /\#\/~:.?+=&%@!\-/ |
| punc = /.:?\-/ |
| doll = /$/ |
| all = /$ltrs$gunk$punc/ |
| |
| findUrls = """(?ix) |
| \\b # start at word boundary |
| ( # begin group 1 { |
| $urls : # need resource and a colon |
| [$all] +? # followed by on or more of any valid |
| # character, but be conservative and |
| # take only what you need to... |
| ) # end group 1 } |
| (?= # look-ahead non-consumptive assertion |
| [$punc]* # either 0 or more punctuation |
| [^$all] # followed by a non-url character |
| | # or else |
| $doll # then end of the string |
| ) |
| """ |
| |
| input = ''' |
| If you find a typo on http://groovy.codehaus.org please |
| send an email to mail:spelling.pedant@codehaus.org |
| ''' |
| |
| println input.replaceAll(findUrls,'<a href="$1">$1</a>') |
| // => |
| // If you find a typo on <a href="http://groovy.codehaus.org">http://groovy.codehaus.org</a> please |
| // send an email to <a href="mail:spelling.pedant@codehaus.org">mail:spelling.pedant@codehaus.org</a> |
| |
| // urlifyScript: |
| //#!/usr/bin/groovy |
| // urlify - wrap HTML links around URL-like constructs |
| // definitions from above |
| args.each{ file -> |
| new File(file).eachLine{ line -> |
| println line.replaceAll(findUrls,'<a href="$1">$1</a>') |
| } |
| } |
| |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.22 |
| //---------------------------------------------------------------------------------- |
| // @@INCOMPLETE@@ |
| // @@INCOMPLETE@@ |
| |
| //---------------------------------------------------------------------------------- |
| |
| // @@PLEAC@@_6.23 |
| //---------------------------------------------------------------------------------- |
| romans = /(?i)^m*(d?c{0,3}|c[dm])(l?x{0,3}|x[lc])(v?i{0,3}|i[vx])$/ |
| assert 'cmxvi' =~ romans |
| // can't have tens before 1000s (M) or 100s (C) after 5s (V) |
| assert !('xmvci' =~ romans) |
| |
| // swap first two words |
| assert 'the words'.replaceAll(/(\S+)(\s+)(\S+)/, '$3$2$1') == 'words the' |
| |
| // extract keyword and value |
| m = 'k=v' =~ /(\w+)\s*=\s*(.*)\s*$/ |
| assert m.matches() |
| assert m[0][1] == 'k' |
| assert m[0][2] == 'v' |
| |
| hasAtLeastSize = { n -> /.{$n,}/ } |
| assert 'abcdefghijklmnopqrstuvwxyz' =~ hasAtLeastSize(20) |
| |
| // MM/DD/YY HH:MM:SS (lenient - doesn't check HH > 23 etc) |
| d = /\d+/ |
| datetime = "($d)/($d)/($d) ($d):($d):($d)" |
| assert '04/05/2006 10:26:59' =~ datetime |
| |
| orig = '/usr/bin/vi' |
| expected = '/usr/local/bin/vi' |
| orig.replaceAll('/usr/bin','/usr/local/bin') == expected |
| |
| escapeSequenceRegex = /%([0-9A-Fa-f][0-9A-Fa-f])/ |
| convertEscapeToChar = { Object[] chr -> new Character((char)Integer.parseInt(chr[1],16)) } |
| assert 'abc%3cdef'.replaceAll(escapeSequenceRegex, convertEscapeToChar) == 'abc<def' |
| |
| commentStripper = '''(?xms) |
| /\\* # Match the opening delimiter |
| .* # Match a minimal number of characters */ |
| \\*/ # Match the closing delimiter |
| ''' |
| |
| input = ''' |
| a line |
| /* |
| some comment |
| */ |
| another line |
| ''' |
| expected = ''' |
| a line |
| |
| another line |
| ''' |
| |
| assert input.replaceAll(commentStripper,'') == expected |
| |
| // emulate s.trim() |
| assert ' x y '.replaceAll(/^\s+/, '').replaceAll(/\s+$/, '') == 'x y' |
| |
| // convert \\n into \n |
| assert (/a\nb/.replaceAll(/\\n/,"\n") == 'a\nb') |
| |
| // remove package symbol (Groovy/Java doesn't use this as package symbol) |
| assert 'A::B'.replaceAll(/^.*::/, '') == 'B' |
| |
| // match IP Address (requires leading 0's) |
| ipregex = /^([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])\./ + |
| /([01]?\d\d|2[0-4]\d|25[0-5])\.([01]?\d\d|2[0-4]\d|25[0-5])$/ |
| assert !('123.456.789' =~ ipregex) |
| assert '192.168.000.001' =~ ipregex |
| |
| // extract basename |
| assert 'c:/usr/temp.txt'.replaceAll(/^.*\/{1}/, '') == 'temp.txt' |
| |
| termcap = ':co#80:li#24:' |
| m = (termcap =~ /:co\#(\d+):/) |
| assert m.count == 1 |
| assert m[0][1] == '80' |
| |
| assert 'cmd c:/tmp/junk.txt'.replaceAll(/ \S+\/{1}/, ' ') == 'cmd junk.txt' |
| |
| os = System.getProperty('os.name') |
| println 'Is Linux? ' + (os ==~ /(?i)linux.*/) |
| println 'Is Windows? ' + (os ==~ /(?i)windows.*/) |
| println 'Is Mac? ' + (os ==~ /(?i)mac.*/) |
| |
| // join multiline sting |
| multi = ''' |
| This is |
| a test |
| '''.trim() |
| assert multi.replaceAll(/(?m)\n\s+/, ' ') == 'This is a test' |
| |
| // nums in string |
| string = 'The 5th test was won today by 10 wickets after 10.5 overs' |
| nums = string =~ /(\d+\.?\d*|\.\d+)/ |
| assert (0..<nums.count).collect{ nums[it][1] }.join(' ') == '5 10 10.5' |
| |
| // capitalize words |
| words = 'the Capital words ARE hiding' |
| capwords = words =~ /(\b\p{Upper}+\b)/ |
| assert (0..<capwords.count).collect{ capwords[it][1] }.join(' ') == 'ARE' |
| |
| lowords = words =~ /(\b\p{Lower}+\b)/ |
| assert (0..<lowords.count).collect{ lowords[it][1] }.join(' ') == 'the words hiding' |
| |
| capWords = words =~ /(\b\p{Upper}\p{Lower}*\b)/ |
| assert (0..<capWords.count).collect{ capWords[it][1] }.join(' ') == 'Capital' |
| |
| input = ''' |
| If you find a typo on <a href="http://groovy.codehaus.org">http://groovy.codehaus.org</a> please |
| send an email to <a href="mail:spelling.pedant@codehaus.org">mail:spelling.pedant@codehaus.org</a> |
| ''' |
| |
| linkRegex = /(?im)<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)[ '"]?>/ //' |
| links = input =~ linkRegex |
| (0..<links.count).each{ println links[it][1] } |
| // => |
| // http://groovy.codehaus.org |
| // mail:spelling.pedant@codehaus.org |
| |
| // find middle initial if any |
| m = 'Lee Harvey Oswald' =~ /^\S+\s+(\S)\S*\s+\S/ |
| initial = m.count ? m[0][1] : "" |
| assert initial == 'H' |
| |
| // inch marks to quotes |
| println 'I said "Hello" to you.'.replaceAll(/"([^"]*)"/, /``$1''/) //" |
| // => I said ``Hello'' to you. |
| |
| // extract sentences (2 spaces or newline after punctuation) |
| input = ''' |
| Is this a sentence? |
| Yes! And so |
| is this. And the fourth. |
| ''' |
| sentences = [] |
| strip = input.replaceAll(/(\p{Punct})\n/, '$1 ').replaceAll(/\n/, ' ').replaceAll(/ {3,}/,' ') |
| m = strip =~ /(\S.*?\p{Punct})(?= |\Z)/ |
| (0..<m.count).each{ sentences += m[it][1] } |
| assert sentences == ["Is this a sentence?", "Yes!", "And so is this.", "And the fourth."] |
| |
| // YYYY-MM-DD |
| m = '2007-2-28' =~ /(\d{4})-(\d\d?)-(\d\d?)/ |
| assert m.matches() |
| assert ['2007', '2', '28'] == [m[0][1], m[0][2], m[0][3]] |
| |
| usPhoneRegex = /^[01]?[- .]?(\([2-9]\d{2}\)|[2-9]\d{2})[- .]?\d{3}[- .]?\d{4}$/ |
| numbers = ''' |
| (425) 555-0123 |
| 425-555-0123 |
| 425 555 0123 |
| 1-425-555-0123 |
| '''.trim().split('\n').toList() |
| assert numbers.every{ it ==~ usPhoneRegex } |
| |
| exclaimRegex = /(?i)\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b/ |
| assert 'Oh my Goodness!' =~ exclaimRegex |
| assert !('Golly gosh' =~ exclaimRegex) |
| |
| input = 'line 1\rline 2\nline\r\nline 3\n\rline 4' |
| m = input =~ /(?m)^([^\012\015]*)(\012\015?|\015\012?)/ |
| assert m.count == 4 |
| |
| |
| // @@PLEAC@@_6.22 |
| // not an exact equivalent to original cookbook but has |
| // a reasonable subset of mostly similar functionality |
| // instead of -r recursion option, use Ant fileset wildcards |
| // e.g. **/*.c. You can also specify an excludes pattern |
| // e.g. **/*.* -X **/*.h will process all but header files |
| // (currently not optimised and with minimal error checking) |
| // uses jopt-simple (jopt-simple.sf.net) |
| |
| op = new joptsimple.OptionParser() |
| NOCASE = 'i'; op.accepts( NOCASE, "case insensitive" ) |
| WITHN = 'n'; op.accepts( WITHN, "display line/para with line/para number" ) |
| WITHF = 'H'; op.accepts( WITHF, "display line/para with filename" ) |
| NONAME = 'h'; op.accepts( NONAME, "hide filenames" ) |
| COUNT = 'c'; op.accepts( COUNT, "give count of lines/paras matching" ) |
| TCOUNT = 'C'; op.accepts( TCOUNT, "give count of total matches (multiple per line/para)" ) |
| WORD = 'w'; op.accepts( WORD, "word boundaries only" ) |
| EXACT = 'x'; op.accepts( EXACT, "exact matches only" ) |
| INVERT = 'v'; op.accepts( INVERT, "invert search sense (lines that DON'T match)" ) |
| EXCLUDE = 'X'; op.accepts( EXCLUDE, "exclude files matching pattern [default is '**/*.bak']" ). |
| withRequiredArg().describedAs('path_pattern') |
| MATCH = 'l'; op.accepts( MATCH, "list names of files with matches" ) |
| NOMATCH = 'L'; op.accepts( NOMATCH, "list names of files with no match" ) |
| PARA = 'p'; op.accepts( PARA, "para mode (.* matches newlines)" ). |
| withOptionalArg().describedAs('para_pattern') |
| EXPR = 'e'; op.accepts( EXPR, "expression (when pattern begins with '-')" ). |
| withRequiredArg().describedAs('pattern') |
| FILE = 'f'; op.accepts( FILE, "file containing pattern" ). |
| withRequiredArg().describedAs('filename') |
| HELP = 'help'; op.accepts( HELP, "display this message" ) |
| |
| options = op.parse(args) |
| params = options.nonOptionArguments() |
| if (options.wasDetected( HELP )) { |
| op.printHelpOn( System.out ) |
| } else if (params.size() == 0) { |
| println "Usage: grep [OPTION]... PATTERN [FILE]...\nTry 'grep --$HELP' for more information." |
| } else { |
| modifiers = [] |
| paraPattern = '' |
| o_withn = options.wasDetected( WITHN ) |
| o_withf = options.wasDetected( WITHF ) |
| o_noname = options.wasDetected( NONAME ) |
| o_count = options.wasDetected( COUNT ) |
| o_tcount = options.wasDetected( TCOUNT ) |
| o_invert = options.wasDetected( INVERT ) |
| o_match = options.wasDetected( MATCH ) |
| o_nomatch = options.wasDetected( NOMATCH ) |
| if (options.wasDetected( EXPR )) { |
| pattern = options.valueOf( EXPR ) |
| } else if (options.wasDetected( FILE )) { |
| pattern = new File(options.valueOf( FILE )).text.trim() |
| } else { |
| pattern = params[0] |
| params = params[1..-1] |
| } |
| if (options.wasDetected( EXCLUDE )) excludes = options.valueOf( EXCLUDE ) |
| else excludes = ['**/*.bak'] |
| if (options.wasDetected( EXACT )) pattern = '^' + pattern + '$' |
| else if (options.wasDetected( WORD )) pattern = /\b$pattern\b/ |
| if (options.wasDetected( NOCASE )) modifiers += 'i' |
| if (options.wasDetected( PARA )) { |
| if (options.hasArgument( PARA )) paraPattern = options.valueOf( PARA ) |
| else paraPattern = '^$' |
| paraPattern = '(?sm)' + paraPattern |
| modifiers += 'sm' |
| } |
| if (modifiers) pattern = "(?${modifiers.join()})" + pattern |
| |
| if (params.size() == 0) grepStream(System.in, '<stdin>') |
| else { |
| scanner = new AntBuilder().fileScanner { |
| fileset(dir:'.', includes:params.join(','), excludes:excludes) |
| } |
| for (f in scanner) { |
| grepStream(new FileInputStream(f), f) |
| } |
| } |
| } |
| |
| def grepStream(s, name) { |
| def count = 0 |
| def tcount = 0 |
| def pieces |
| if (paraPattern) pieces = s.text.split(paraPattern) |
| else pieces = s.readLines() |
| def fileMode = o_match || o_nomatch || o_count || o_tcount |
| pieces.eachWithIndex{line, index -> |
| def m = line =~ pattern |
| boolean found = m.count |
| if (found != o_invert) { |
| count++ |
| tcount += m.count |
| if (!fileMode) { |
| linefields = [] |
| if (o_withf) linefields += name |
| if (o_withn) linefields += index + 1 |
| linefields += line |
| println linefields.join(':') |
| } |
| } |
| } |
| def display = true |
| if ((o_match && count == 0) || (o_nomatch && count != 0)) display = false |
| if (fileMode && display) { |
| filefields = [] |
| if (!o_noname) filefields += name |
| if (o_tcount) filefields += tcount |
| else if (o_count) filefields += count |
| println filefields.join(':') |
| } |
| } |
| //---------------------------------------------------------------------------------- |