blob: 0df899c81ae871c7929a00277a42060a21406fa8 [file] [log] [blame]
#!/usr/bin/env ruby
PAGETITLE = "ASF Page Asset Checker - ALPHA"
# very rudimentary page asset checker - shows references to non-ASF assets
require 'open3'
require_relative '../../tools/asf-site-check'
# usage: whimsy.apache.org/members/page-scanner?url=http://apache.org/
print "Content-type: text/plain; charset=UTF-8\r\n\r\n"
# puts ENV['REQUEST_URI']
qs = ENV['QUERY_STRING']
if qs =~ %r{^url=(https?://[^&]+)(?:&(.+))?}
url = $1
option = $2
# we only want full URLs
option = 'all' unless option == 'showurl'
print "Checking the page #{url}\n\n"
puts "The following references were found to hosts other than apache.org and apachecon.com"
puts "The first column shows if the host is recognised as being under ASF control according to"
puts "https://privacy.apache.org/policies/asf-domains"
print "=====\n"
cmd = ['node', '/srv/whimsy/tools/scan-page.js', url, option]
out, err, status = Open3.capture3(*cmd)
if status.success?
out.split("\n").each do |url|
print ASFDOMAIN.asfurl?(url) ? 'OK ' : 'NO '
puts url
end
else
puts err.scan(/^Error:.+/).first || err # Show only the Error line if present
end
print "=====\n"
else
print "Expecting: ?url=http://.../[&showurl]\n"
end