| #!/usr/bin/env ruby |
| require 'json' |
| |
| puts 'DANGER, WILL ROBINSON! THIS IS NOT READY FOR PRODUCTION USE!' |
| |
| # Map usernames between different systems (id.a.o and JIRA|Confluence) |
| # Input data is: |
| # committers = [{ID=>'curcuru', NAME =>'Shane Curcuru', MAIL=>['mail1', 'mail2',...]}, ...] |
| # other = [{ID=>'curcuru', NAME =>'Shane Curcuru', MAIL=>'mailone'}, ...] |
| # Analyzes matching IDs and emails, and returns: |
| # matches = hash by committer id of all committers, and if they matched |
| # SAME:... committer ID and an email address match exactly |
| # Most likely committer has same ID on both |
| # DIFF:... committer ID matches, but none of the emails matched |
| # Most likely the other account is a different person than committer |
| # NONE:... there were no ID matches |
| # Committer's ID is not found in other system |
| # crossmatches = hash by email address of any other matches from |
| # any of the other system's emails to any committer's emails |
| # REVIEW:... email address(es) matched, but IDs did not |
| # Note: emails may not be unique across accounts on either side |
| # Need to manually investigate, since may involve multiple people/accounts |
| # DIFF: an email address matched, but the IDs did not |
| # Manually invesigate to see why |
| # NOTE: You *must* manually evaluate the results! |
| module NameMap |
| extend self |
| COMMITTER_JSON = 'https://whimsy.apache.org/roster/committer/index.json' |
| ID = 'id' |
| MAIL = 'mail' |
| NAME = 'name' |
| |
| TEST_COMMITTERS = [ # Drawn from Whimsy's committer data |
| { |
| 'id' => 'curcuru', |
| 'name' => 'Shane Curcuru', |
| 'mail' => [ |
| 'asf@shanecurcuru.org', |
| 'asfl@shanecurcuru.org', |
| 'curcuru@apache.org' |
| ], |
| 'member' => true |
| }, |
| { |
| 'id' => 'makemyday', |
| 'name' => 'Clint Eastwood', |
| 'mail' => [ |
| 'clint@eastwood.com', |
| 'gun@smoke.org' |
| ], |
| }, |
| { |
| 'id' => 'robocop', |
| 'name' => 'Peter Weller', |
| 'mail' => [ |
| 'peter@buckaroo.com', |
| 'both@lists.find', |
| ], |
| }, |
| { |
| 'id' => 'laurel', |
| 'name' => 'Yanni (musician)', |
| 'mail' => [ |
| 'both@lists.find', |
| ], |
| }, |
| { |
| 'id' => 'emailUnMatch', |
| 'name' => 'Test Case1c', |
| 'mail' => [ |
| 'email@example.com', |
| ], |
| }, |
| ] |
| |
| TEST_OTHER = [ # Any other system must provide id,name,email for each user |
| { |
| 'id' => 'curcuru', |
| 'name' => 'Shane Curcuru', |
| 'mail' => 'asf@shanecurcuru.org', |
| }, |
| { |
| 'id' => 'bogie', |
| 'name' => 'Rick Blaine', |
| 'mail' => 'piano@sam.org', |
| }, |
| { |
| 'id' => 'makemyday', |
| 'name' => 'Doris Day', |
| 'mail' => 'doris@day.movies', |
| }, |
| { |
| 'id' => 'yanni', |
| 'name' => 'Laurel Hardy', |
| 'mail' => 'both@lists.find', |
| }, |
| { |
| 'id' => 'yannidouble', |
| 'name' => 'Laurel and Hardy', |
| 'mail' => 'both@lists.find', |
| }, |
| { |
| 'id' => 'emailNotMatch', |
| 'name' => 'Test Case1o', |
| 'mail' => 'email@example.com', |
| }, |
| ] |
| |
| # Read committer accounts |
| # @param io stream to read JSON from |
| # @return json data |
| def get_committers(io) |
| if io |
| return JSON.parse(io) |
| else |
| return TEST_COMMITTERS |
| end |
| end |
| |
| # Read other system accounts |
| # TODO Depends on file format of exported other system accounts |
| # @param f filename to read from |
| # @return json data |
| def get_other(f) |
| return TEST_OTHER |
| end |
| |
| # Transform committer accounts into lookup hashes |
| # @param committers array from COMMITTER_JSON |
| # @return byid, bymail - hashes for lookups to committer accounts |
| # byid - hash by id of data |
| # bymail - hash by id of array of datum (in case non-unique emails) |
| def hash_committers(committers) |
| byid = {} |
| bymail = {} |
| committers.each do |hsh| |
| byid[hsh[ID]] = hsh |
| hsh[MAIL].each do |addr| # Committers can have multiple emails |
| (bymail[addr] ||= []) << hsh |
| end |
| end |
| return byid, bymail |
| end |
| |
| # Transform other system accounts into lookup hashes |
| # @param other array of hashes including 'id', 'name', 'mail' keys |
| # @return byid, bymail - hashes for lookups to other system accounts |
| # byid - hash by id of data |
| # bymail - hash by id of array of datum (in case non-unique emails) |
| def hash_other(other) |
| byid = {} |
| bymail = {} |
| other.each do |hsh| |
| byid[hsh[ID]] = hsh |
| (bymail[hsh[MAIL]] ||= []) << hsh |
| end |
| return byid, bymail |
| end |
| |
| # Compare committer ids to other system account ids |
| # @param cids - hash by id of committer data |
| # @param cmails - hash by email of [committer1, ...] |
| # @param cids - hash by id of other system account data |
| # @param cmails - hash by email of [other1, ...] |
| # @return matches, crossmatches - list of committer ids matched or not; list of emails cross-matched |
| def compare(cids, cmails, oids, omails) |
| matches = {} |
| crossmatches = {} |
| |
| # For every committer, check for a matching account in other system |
| cids.each do |cid, committer| |
| # If the other system has identical id as committer |
| if oids.has_key?(cid) |
| # Cross-check all our mails with the other account to see if *any* match |
| committer[MAIL].each do |caddr| |
| # If one matches exactly with a single other account, log a likely match |
| if caddr.eql?(oids[cid][MAIL]) |
| matches[cid] = "SAME:email match:(#{committer[NAME]},#{caddr}):(#{oids[cid][NAME]},#{oids[cid][MAIL]})" |
| break |
| end |
| end |
| if matches[cid].nil? |
| # None of our emails matched the other2 email, log |
| if committer[MAIL].length == 1 |
| matches[cid] = "DIFF:email no match:(#{committer[NAME]},#{committer[MAIL][0]}):(#{oids[cid][NAME]},#{oids[cid][MAIL]})" |
| else |
| matches[cid] = "DIFF:email no match:(#{committer[NAME]},#{committer[MAIL].length} addresses):(#{oids[cid][NAME]},#{oids[cid][MAIL]})" |
| end |
| end |
| else # No id match, log it |
| matches[cid] = "NONE:no id match found" |
| end |
| end |
| |
| # Also cross-check email addresses of other system to all committer emails |
| omails.each do |omail, other_accounts| |
| if cmails.has_key?(omail) |
| # Each bymail entry is an array; usually 1 element, but sometimes more |
| if cmails[omail].length == 1 && other_accounts.length == 1 |
| # Simple case: check single id value |
| if cmails[omail][0][ID].eql?(other_accounts[0][ID]) |
| # no-op: If both emails have single account that matches, ignore (was logged above) |
| else |
| # Mismatch of two IDs with same (unique) emails |
| crossmatches[omail] = "DIFF:id no match:(#{cmails[omail][0][ID]},#{cmails[omail][0][NAME]}):(#{other_accounts[0][ID]},#{other_accounts[0][NAME]})" |
| end |
| else |
| # Complex case: check through arrays of accounts with same email |
| str = "REVIEW:#{omail}:" |
| cmails[omail].each do |itm| |
| str += "(#{itm[ID]},#{itm[NAME]})" |
| end |
| str += ':' |
| other_accounts.each do |itm| |
| str += "(#{itm[ID]},#{itm[NAME]})" |
| end |
| crossmatches[omail] = str |
| end |
| end |
| end |
| return matches, crossmatches |
| end |
| |
| # Compare a committer list to another system's list |
| # @param cio io stream to read committer accounts from |
| # @param ofile filename to read other system accounts from |
| # @return matches, crossmatches - list of committer ids matched or not; list of emails cross-matched |
| def report(cio = nil, ofile = nil) |
| cids, cmails = hash_committers(get_committers(cio)) |
| oids, omails = hash_other(get_other(ofile)) |
| matches, crossmatches = compare(cids, cmails, oids, omails) |
| return matches, crossmatches |
| end |
| |
| # Check for email duplicates in committer roster |
| # @return hash of any committers with duplicate emails |
| # @return histogram of how many aliases committers list |
| def committer_dups(io) |
| dups = {} |
| histogram = Hash.new{|k,v| v = 0} |
| cids, cmails = hash_committers(get_committers(io)) |
| cids.each do |id, hsh| |
| histogram[hsh[MAIL].length] += 1 |
| end |
| cmails.each do |addr, ary| |
| if ary.length > 1 |
| dups[addr] = '' |
| ary.each do |hsh| |
| dups[addr] += "#{hsh[ID]}," |
| end |
| end |
| end |
| return dups, histogram |
| end |
| end |
| |
| #### MAIN TESTING CODE |
| matches, crossmatches = NameMap.report() |
| puts JSON.pretty_generate(matches) |
| puts JSON.pretty_generate(crossmatches) |
| |
| # dups, histogram = NameMap.committer_dups(File.read('committerlist-from-whimsy.json')) |
| # puts JSON.pretty_generate(dups) |
| # puts JSON.pretty_generate(histogram) |
| |