| #!/usr/bin/env ruby |
| |
| # ensure that there is a path (even a slash will do) after the script name |
| unless ENV['PATH_INFO'] and not ENV['PATH_INFO'].empty? |
| print "Status: 301 Moved Permanently\r\n" |
| print "Location: #{ENV['SCRIPT_URL']}/\r\n" |
| print "\r\n" |
| exit |
| end |
| |
| $LOAD_PATH.unshift File.realpath(File.expand_path('../../lib', __FILE__)) |
| require 'json' |
| require 'net/http' |
| require 'time' # for httpdate |
| |
| PAGETITLE = "Apache TLP Website Link Checks" # Wvisible:sites,brand |
| cols = %w( uri events foundation image license sponsorship security thanks copyright trademarks ) |
| CHECKS = { |
| 'uri' => %r{https?://[^.]+\.apache\.org}, |
| 'copyright' => %r{[Cc]opyright [^.]+ Apache Software Foundation}, # Do we need '[Tt]he ASF'? |
| 'foundation' => %r{.}, |
| 'image' => %r{.}, |
| # TODO more checks needed here, e.g. ASF registered and 3rd party marks |
| 'trademarks' => %r{trademarks of [Tt]he Apache Software Foundation}, |
| 'events' => %r{^https?://.*apache.org/events/current-event}, |
| 'license' => %r{^https?://.*apache.org/licenses/$}, # should link to parent license page only |
| 'sponsorship' => %r{^https?://.*apache.org/foundation/sponsorship}, |
| 'security' => %r{^https?://.*apache.org/[Ss]ecurity}, |
| 'thanks' => %r{^https?://.*apache.org/foundation/thanks}, |
| } |
| DOCS = { |
| 'uri' => ['https://www.apache.org/foundation/marks/pmcs#websites', |
| 'The homepage for any ProjectName must be served from http://ProjectName.apache.org'], |
| # 'copyright' => 'TBA', |
| 'foundation' => ['https://www.apache.org/foundation/marks/pmcs#navigation', |
| 'All projects must feature some prominent link back to the main ASF homepage at http://www.apache.org/'], |
| 'trademarks' => ['https://www.apache.org/foundation/marks/pmcs#attributions', |
| 'All project or product homepages must feature a prominent trademark attribution of all applicable Apache trademarks'], |
| # 'events' => 'TBA', |
| 'license' => ['https://www.apache.org/foundation/marks/pmcs#navigation', |
| '"License" should link to: http://www.apache.org/licenses/'], |
| 'sponsorship' => ['https://www.apache.org/foundation/marks/pmcs#navigation', |
| '"Sponsorship" or "Donate" should link to: http://www.apache.org/foundation/sponsorship.html'], |
| 'security' => ['https://www.apache.org/foundation/marks/pmcs#navigation', |
| '"Security" should link to either to a project-specific page [...], or to the main http://www.apache.org/security/ page'], |
| 'thanks' => ['https://www.apache.org/foundation/marks/pmcs#navigation', |
| '"Thanks" should link to: http://www.apache.org/foundation/thanks.html'], |
| } |
| DATAURI = 'https://whimsy.apache.org/public/site-scan.json' |
| |
| def analyze(sites) |
| success = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) } |
| counts = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) } |
| CHECKS.each do |nam, pat| |
| success[nam] = sites.select{ |k, site| site[nam] =~ pat }.keys |
| counts[nam]['label-success'] = success[nam].count |
| counts[nam]['label-warning'] = 0 # Reorder output |
| counts[nam]['label-danger'] = sites.select{ |k, site| site[nam].nil? }.count |
| counts[nam]['label-warning'] = sites.size - counts[nam]['label-success'] - counts[nam]['label-danger'] |
| end |
| |
| [ |
| counts, { |
| 'label-success' => '# Sites with links to primary ASF page', |
| 'label-warning' => '# Sites with link, but not an expected ASF one', |
| 'label-danger' => '# Sites with no link for this topic' |
| }, success |
| ] |
| end |
| |
| def getsites |
| local_copy = File.expand_path('../public/site-scan.json', __FILE__).untaint |
| if File.exist? local_copy |
| crawl_time = File.mtime(local_copy).httpdate # show time in same format as last-mod |
| sites = JSON.parse(File.read(local_copy)) |
| else |
| response = Net::HTTP.get_response(URI(DATAURI)) |
| crawl_time = response['last-modified'] |
| sites = JSON.parse(response.body) |
| end |
| return sites, crawl_time |
| end |
| |
| sites, crawl_time = getsites() |
| |
| analysis = analyze(sites) |
| |
| # Allow CLI testing, e.g. "PATH_INFO=/ ruby www/site.cgi >test.json" |
| # SCRIPT_NAME will always be set for a CGI invocation |
| unless ENV['SCRIPT_NAME'] |
| puts JSON.pretty_generate(analysis) |
| exit |
| end |
| |
| # Only required for CGI use |
| # if these are required earlier, the code creates an unnecessary 'assets' directory |
| |
| require 'whimsy/asf/themes' |
| require 'wunderbar' |
| require 'wunderbar/bootstrap' |
| require 'wunderbar/jquery/stupidtable' |
| |
| # Determine the color of a given table cell, given: |
| # - overall analysis of the sites, in particular the third column |
| # which is a list projects that successfully matched the check |
| # - list of links for the project in question |
| # - the column in question (which indicates the check being reported on) |
| # - the name of the project |
| def label(analysis, links, col, name) |
| if not links[col] |
| 'label-danger' |
| elsif analysis[2].include? col and not analysis[2][col].include? name |
| 'label-warning' |
| else |
| 'label-success' |
| end |
| end |
| |
| _html do |
| _head do |
| _style %{ |
| .table td {font-size: smaller;} |
| } |
| end |
| _body? do |
| _whimsy_body( |
| title: PAGETITLE, |
| subtitle: 'Checking TLP Websites For Required Links', |
| related: { |
| "/committers/tools" => "Whimsy Tool Listing", |
| "https://www.apache.org/foundation/marks/pmcs#navigation" => "Required PMC Links Policy", |
| "https://github.com/apache/whimsy/" => "Read The Whimsy Code" |
| }, |
| helpblock: -> { |
| _p do |
| _ 'This script periodically craws all Apache project websites to check them for a few specific links or text blocks that all projects are expected to have.' |
| _ 'The checks (currently in beta) include verifying that all ' |
| _a 'required links', href: 'https://www.apache.org/foundation/marks/pmcs#navigation' |
| _ ' appear on a project homepage, along with checking if project logos appear in apache.org/img' |
| end |
| _p do |
| _a 'View the crawler code', href: 'https://github.com/apache/whimsy/blob/master/tools/site-scan.rb' |
| _ ', ' |
| _a 'website display code', href: 'https://github.com/apache/whimsy/blob/master/www/site.cgi' |
| _ ', and ' |
| _a 'raw JSON data', href: DATAURI |
| _ '.' |
| _br |
| _ "Last crawl time: #{crawl_time} over #{sites.size} websites." |
| end |
| } |
| ) do |
| |
| if path_info =~ %r{/project/(.+)} |
| # details for an individual project |
| project = $1 |
| links = sites[project] |
| _whimsy_panel_table( |
| title: "Site Check For Project - #{links['display_name']}", |
| helpblock: -> { |
| _a href: '../', aria_label: 'Home to site checker' do |
| _span.glyphicon.glyphicon_home :aria_hidden |
| end |
| _span.glyphicon.glyphicon_menu_right |
| _ ' Results for project: ' |
| _a links['display_name'], href: links['uri'] |
| _ ' Check Results column is the actual text found on the project homepage for this check.' |
| } |
| ) do |
| _table.table.table_striped do |
| _tbody do |
| _thead do |
| _tr do |
| _th! 'Check Type' |
| _th! 'Check Results' |
| _th! 'Check Description' |
| end |
| end |
| cols.each do |col| |
| cls = label(analysis, links, col, project) |
| _tr do |
| _td do |
| _a col.capitalize, href: "../check/#{col}" |
| end |
| |
| if links[col] =~ /^https?:/ |
| _td class: cls do |
| _a links[col], href: links[col] |
| end |
| else |
| _td links[col], class: cls |
| end |
| |
| _td do |
| if cls == 'label-warning' |
| _ 'Expected to match the regular expression: ' |
| _code CHECKS[col].source |
| _ '' |
| else |
| _ '' |
| end |
| end |
| end |
| end |
| end |
| end |
| end |
| elsif path_info =~ %r{/check/(.+)} |
| # details for a single check |
| col = $1 |
| _whimsy_panel_table( |
| title: "Site Check Of Type - #{col.capitalize}", |
| helpblock: -> { |
| _a href: '../', aria_label: 'Home to site checker' do |
| _span.glyphicon.glyphicon_home :aria_hidden |
| end |
| _span.glyphicon.glyphicon_menu_right |
| if CHECKS.include? col |
| _ ' Check Results are expected to match the regular expression: ' |
| _code CHECKS[col].source |
| if DOCS.include? col |
| _ ' ' |
| _a DOCS[col][1], href: DOCS[col][0] |
| end |
| end |
| } |
| ) do |
| _table.table.table_condensed.table_striped do |
| _thead do |
| _tr do |
| _th! 'Project' |
| _th! 'Check Results' |
| end |
| end |
| _tbody do |
| sites.each do |n, links| |
| _tr do |
| _td do |
| _a links['display_name'], href: "../project/#{n}" |
| end |
| |
| if links[col] =~ /^https?:/ |
| _td class: label(analysis, links, col, n) do |
| _a links[col], href: links[col] |
| end |
| else |
| _td links[col], class: label(analysis, links, col, n) |
| end |
| end |
| end |
| end |
| end |
| end |
| else |
| # overview |
| _whimsy_panel_table( |
| title: "Site Check - All Projects Results", |
| helpblock: -> { |
| _ul.list_inline do |
| _li.small "Data key: " |
| analysis[1].each do |cls, desc| |
| _li.label desc, class: cls |
| end |
| _li.small " Click column badges to sort" |
| end |
| } |
| ) do |
| _table.table.table_condensed.table_striped do |
| _thead do |
| _tr do |
| _th! 'Project', data_sort: 'string-ins' |
| cols.each do |col| |
| _th! data_sort: 'string' do |
| _a col.capitalize, href: "check/#{col}" |
| _br |
| analysis[0][col].each do |cls, val| |
| _ ' ' |
| _span.label val, class: cls |
| end |
| end |
| end |
| end |
| end |
| |
| sort_order = { |
| 'label-success' => 1, |
| 'label-warning' => 2, |
| 'label-danger' => 3 |
| } |
| |
| _tbody do |
| sites.each do |n, links| |
| _tr do |
| _td do |
| _a "#{links['display_name']}", href: "project/#{n}" |
| end |
| cols.each do |c| |
| cls = label(analysis, links, c, n) |
| _td '', class: cls, data_sort_value: sort_order[cls] |
| end |
| end |
| end |
| end |
| end |
| end # of _whimsy_panel_table |
| end |
| end |
| |
| _script %{ |
| var table = $(".table").stupidtable(); |
| table.on("aftertablesort", function (event, data) { |
| var th = $(this).find("th"); |
| th.find(".arrow").remove(); |
| var dir = $.fn.stupidtable.dir; |
| var arrow = data.direction === dir.ASC ? "↑" : "↓"; |
| th.eq(data.column).append('<span class="arrow">' + arrow +'</span>'); |
| }); |
| } |
| end |
| end |