blob: 093377f31005993e1977f21dbbedbf6592763892 [file] [log] [blame]
--[[
This is a LUA CGI script that uses LibEZT to produce templated mirror content
It uses the output from the Apache GeoIP module to choose the appropriate mirror(s)
The script supports the following optional URL parameters:
cca2 :
override the country code
preferred/Preferred :
sets the preferred server if available, otherwise it is chosen at random
as_json/asjson :
don't process the template, but return the mirror data as JSON
action=download together with filename :
generate a redirect to the file on the preferred mirror
archive_aware :
sets ARCHIVE_AWARE = true ; default : false
This script is dist, attic and archive aware.
If the target's first path-component (TLP) has a corresponding
file /x1/www/attic.apache.org/projects/TLP.html,
then the script redirects to that page.
If the final target is not in dist, it uses template archive.html ;
if ARCHIVE_AWARE, it looks up the target on archive.apache.org with
a HEAD request. The lookups are cached ; forever for positive results.
]]
-- version number of this file (automatically generated by SVN)
local VERSION = ("$Revision: 1820301 $"):match("(%d+)")
function version()
return VERSION
end
local ARCHIVE_AWARE = false -- do archive.a.o lookup with HEAD requests
local CACHE_TIMEOUT = 1800 -- should be 0 in test ; 1800 in production
local LOG_LOOKUPS = 0 -- should be 1 in test ; 0 in production
local JSON = require 'JSON'
local ezt = require 'libezt'
-- Temporary hack: allow for missing module
local HTTP = {request=function() end}
local reqHTTP, _ =
pcall(function() HTTP = require 'socket.http'; HTTP.TIMEOUT = 1; end)
local SOCK = {gettime=function() return 0 end}
local reqSOCK, _ = pcall(function() SOCK = require 'socket' ; end)
local posix = require 'posix'
local mirror_file = "/x1/www/www.apache.org/mirrors/mirrors.list"
local MAXAGE = 24*3600 -- max mirror age
local ATTIC_URI = 'http://attic.apache.org/projects/'
local ATTIC_DIR = '/var/www/attic.apache.org/projects/'
local DIST_DIR = '/var/www/www.apache.org/content/dist/'
local ARCH_URI = 'http://archive.apache.org/dist/'
local DYN_DIR = '/var/www/www.apache.org/dyn/'
local CLOSER_PG = DYN_DIR .. 'closer.html'
local ARCHIVE_PG = DYN_DIR .. 'archive.html'
local STATS_DIR = DYN_DIR .. 'stats/'
local LOOKUP_LOG = STATS_DIR .. 'AAAA'
local cache_hit = nil
local cache_in_arch = {} -- cache of archive lookups
local mirror_stamp = 0 -- when mirror_file was last processed
local mirror_map = {} -- map of all recent mirrors. [ftp|http|rsync][cc|backup]=url
local mirror_templates = {} -- cache of unprocessed mirror templates
local mirror_templates_generated = {} -- cache of generated templates
local mymap -- map of mirrors for the current request (based on the country code
function get_mirrors()
local now = os.time()
local atleast = now - MAXAGE
local f = io.open(mirror_file, "r")
local mirrord = f:read("*a")
-- Check the age of the mirrors relative to the mirror list, rather than now. (As was done by mirrors.cgi)
-- This allows the system to still work even if the list is a bit stale
-- LUA does not have a standard API to get a file date
-- However, the timestamp when the information was collected is more useful anyway
-- Parse the file header: # date : Wed Sep 2 09:49:53 2015 [UTC]
local mon, day, hh, mm, ss, yy = mirrord:match("# date : %w+ (%w+) +(%d+) (%d%d):(%d%d):(%d%d) (%d%d%d%d) %[UTC%]")
if mon then
local MON = {Jan=1,Feb=2,Mar=3,Apr=4,May=5,Jun=6,Jul=7,Aug=8,Sep=9,Oct=10,Nov=11,Dec=12}
-- use isdst = false as the timestamp is UTC
local filetime = os.time({year = yy, month = MON[mon], day = day, hour = hh, min = mm, sec = ss, isdst=false})
atleast = filetime - MAXAGE
end
mirror_map = {}
f:close()
for t, c, url, timestamp in mirrord:gmatch("([a-zA-Z]+)%s+([a-zA-Z]+)%s+(%S+)%s+(%d+)\r?\n") do
if c then
c = c:lower()
-- Don't check the timestamp for backup mirrors
if c == 'backup' or tonumber(timestamp) >= atleast then
mirror_map[c] = mirror_map[c] or {}
mirror_map[c][t] = mirror_map[c][t] or {}
--url = url:gsub("/$", "")
table.insert(mirror_map[c][t], url)
end
end
end
mirror_stamp = now
return mirror_map
end
function log_lookup(inarch,path)
local f = io.open(LOOKUP_LOG,'a')
if f then
f:write(os.date('%Y-%m-%d/%H:%M:%S')
.. " [" .. ( posix.getpid().pid or 'pid' ) .. ']'
.. " look=" .. tostring(inarch)
.. " hit=" .. tostring(cache_hit)
.. ' ' .. path
.. "\n"
)
f:close()
end
end
function interval(t) return 1000 * ( SOCK.gettime() - t ) end
function elapsed(t) return string.format("%.3f ms'\n",interval(t)) end
function file_exists(file) return posix.stat(file) ~= nil end
function is_in_attic(proj) return file_exists(ATTIC_DIR .. proj .. '.html') end
function dist_path(path) return DIST_DIR .. path end
function is_in_dist(path) return file_exists(dist_path(path)) end
function arch_uri(path) return ARCH_URI .. path end
function archive_url(path)
local uri = arch_uri(path)
return '<a href="' .. uri .. '" rel="nofollow">' .. uri .. '</a>'
end
function is_in_arch(path)
cache_hit = nil
if not cache_in_arch[path] then
cache_hit = false
elseif cache_in_arch[path].result then
cache_hit = true
else
local stamp = cache_in_arch[path].timestamp
cache_hit = ( os.time() - stamp ) < CACHE_TIMEOUT
end
if not cache_hit
then
local r, c
r, c, _ = HTTP.request { method = "HEAD", url = arch_uri(path) }
cache_in_arch[path] =
{ timestamp = os.time(), result = ( r and c == 200 ) }
end
return cache_in_arch[path].result
end
function get_page(url)
if not mirror_templates[url] or mirror_templates[url].timestamp < (os.time() - 2*CACHE_TIMEOUT) then
local f = io.open(url, "r")
mirror_templates[url] = {
data = f and f:read("*a") or "No such page",
timestamp = os.time()
}
if f then
f:close()
end
end
return mirror_templates[url]
end
function get_output_cached(page, defs, r, ezt_defs)
local pref = defs.preferred or ""
local path_info = defs.path_info or ""
local cacheKey = page .. ":" .. pref .. ":" .. path_info
if not mirror_templates_generated[cacheKey] or mirror_templates_generated[cacheKey].timestamp < (os.time() - CACHE_TIMEOUT) then
local template = get_page(page)
local tdata = recurse(defs, template.data, r, ezt_defs)
mirror_templates_generated[cacheKey] = {
data = tdata,
timestamp = os.time()
}
end
return mirror_templates_generated[cacheKey]
end
function recurse(defs, tdata, r, ezt_defs)
-- SSI emulation
tdata = tdata:gsub("<!%-%-%s*#include virtual=\"(.-)\"%s*%-%->",
function(inc)
local filepath = (defs.filepath .. inc):gsub("[/]+", "/")
if r:stat(filepath) then
local f = io.open(filepath, "r")
local d = f:read("*a")
f:close()
return d
else
return ""
end
end
)
-- Parse EZT
local structure, error = ezt:import("[ezt]"..tdata.."[end]")
-- Render output
if structure then return ezt:construct(structure, ezt_defs) else return error end
end
-- true if the string (s) ends with (e)
function endsWith(s, e)
return e == s:sub(-e:len())
end
-- true if the string (s) begins with (b)
function beginsWith(s, b)
return b == s:sub(1, b:len())
end
-- return false if string is empty (or nil)
function nonEmpty(s)
if s == null or s == '' then return nil end
return s
end
-- Temporary fix to extract the missing path_info for dyn/closer.cgi redirects only
function get_path_info(s)
local CGI_SCRIPT = "/dyn/closer.cgi/" -- original CGI script name
if beginsWith(s, CGI_SCRIPT) then
return s:sub(CGI_SCRIPT:len()) -- keep just the suffix
else
return nil
end
end
-- The request parameter has the data structures and functions as described here:
-- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#datastructures
-- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#functions
function handle(r)
local get = r:parseargs()
if get.archive_aware and not ( get.archive_aware == '0' ) then
ARCHIVE_AWARE = true
end
local now = os.time()
if mirror_stamp < (now - 3600) then
get_mirrors()
end
local country = r.notes['GEOIP_COUNTRY_NAME'] or r.subprocess_env['GEOIP_COUNTRY_NAME'] or "Unknown"
local cca2 = (get.cca2 or r.notes['GEOIP_COUNTRY_CODE'] or r.subprocess_env['GEOIP_COUNTRY_CODE'] or 'Backup'):lower()
if cca2 == 'gb' then
cca2 = 'uk'
end
local occa2 = cca2
if not mirror_map[cca2] then
cca2 = 'backup'
end
mymap = mirror_map[cca2] or mirror_map['backup']
local bmap = mirror_map['backup']
mymap['backup'] = bmap['http']
local URL = {}
for _, t in pairs({'http','ftp'}) do
URL[t] = (mymap[t] and mymap[t][math.random(1, #mymap[t])]) or (bmap[t] and bmap[t][math.random(1, #bmap[t])])
end
local page = r.filename
local got_f = get.f -- work on a copy of the parameter
if got_f then
local hname = r.hostname:gsub("www%.", "")
got_f = got_f:gsub("^/var/www/html/", "/var/www/")
got_f = got_f:gsub(hname, ""):gsub("/var/www//var/www/", "/var/www/")
if r:stat(got_f) or r:stat(got_f:gsub("%.cgi", ".html")) then
page = got_f
else
page = got_f:gsub("/www/", "/www/" .. hname:gsub("%.[a-z][a-z]%.", ".") .. "/"):gsub("[/]+", "/")
end
end
page = page:gsub("%.cgi", ".html"):gsub("%.lua", ".html")
if not r:stat(page) or not (page:match("^/var/www/") or page:match("^/x1/www/")) then
page = CLOSER_PG
end
local defs = {}
local ezt_defs = {
strings = {},
arrays = {}
}
defs.filepath = page:gsub("[^/]+$", "")
defs.debug = get.debug and true or false
defs.preferred = r:escape_html(get.preferred or get.Preferred or URL['http'] or "")
defs.path_info = r:escape_html(get.path -- command-line override
or nonEmpty(r.path_info) -- if path provided by server
or get_path_info(r.uri) -- temporary fix to extract it from r.uri for dyn/closer.cgi calls
-- Disable for now; it was previously effectively disabled because r.path_info was never false
-- or r.unparsed_uri:gsub("^.+%.cgi/*", ""):gsub("^.+%.lua/*", "") -- not sure what this is trying to do
-- TODO in any case seems wrong to use the unparsed URI as that will include the query string
or "/") -- default
:gsub("^/","",1) -- trim leading "/" as per Python version
defs.country = country
defs.cca2 = cca2
-- proj is the first path component of defs.path_info
local proj = defs.path_info
if proj and proj:find('/') then
proj = proj:sub(1,proj:find('/')-1)
end
defs.project = proj
ezt_defs.strings = defs
ezt_defs.arrays = {
http = mymap['http'] or bmap['http'],
ftp = mymap['ftp'] or bmap['ftp'],
backup = bmap['http'],
}
-- Check that preferred http/ftp exists, otherwise default to none
local prefIsOkay = false
for _,b in ipairs({'http', 'ftp', 'backup'}) do
for _, v in pairs(ezt_defs.arrays[b] or {}) do -- arrays[b] may not exist
if r:escape_html(v) == defs.preferred then
prefIsOkay = true
break
end
end
if prefIsOkay then
break
end
end
if not prefIsOkay then
ezt_defs.preferred = ""
defs.preferred = URL['http']
end
-- string only repr of preferred URL
if get.preferred and get.preferred == "true" then
r.content_type = "text/plain"
r:puts(defs.preferred)
return apache2.OK
end
local do_json = false
if (get.as_json and not (get.as_json == "0")) or (get.asjson and not (get.asjson == "0")) then
do_json = true
end
if get.action then
if get.action == 'download' and get.filename then
r.headers_out['Location'] = defs.preferred .. get.filename
r.status = 302
return apache2.OK
elseif get.action == 'info' then
r.content_type = "text/plain"
r:puts(string.format("%s\ncloser revision: %s\nlibezt revision: %s\n",
_VERSION, -- LUA
version(), -- closer
ezt:version())) -- libezt
-- Show any arguments
for k, v in pairs( get ) do
r:puts( string.format("arg %s: %s\n", k, v) )
end
local t0 = SOCK.gettime() ;
local URI = r.subprocess_env['SCRIPT_URI'] or "nil"
-- Request parameters
r:puts("r.hostname: '",r.hostname or "nil", "'\n")
r:puts("r.document_root:'",r.document_root or "nil", "'\n")
r:puts("r.uri: '",r.uri or "nil", "'\n")
-- r:puts("r.the_request: '",r.the_request or "nil", "'\n")
-- r:puts("r.unparsed_uri: '",r.unparsed_uri or "nil", "'\n")
r:puts("r.path_info: '",r.path_info or "nil","'\n")
r:puts("env[SCRIPT_URI] '",URI,"'\n")
r:puts("require HTTP : '",tostring(reqHTTP),"'\n")
r:puts("require SOCK : '",tostring(reqSOCK),"'\n")
-- calculated values
r:puts("defs.path_info: '",defs.path_info or "nil","'\n")
r:puts("defs.filepath: '",defs.filepath or "nil","'\n")
r:puts("proj : '",proj,"'\n")
r:puts("proj in attic: '",tostring(is_in_attic(proj)),"'\n")
local in_dist = tostring(is_in_dist(defs.path_info))
r:puts("dist/path : '",dist_path(defs.path_info),"'\n")
r:puts("path in dist?: '",in_dist,"'\n")
r:puts("elapsed : '",elapsed(t0))
r:puts("archive aware: '",tostring(ARCHIVE_AWARE),"'\n")
if in_dist == 'false' then
r:puts("archive uri : '",arch_uri(defs.path_info),"'\n")
end
if ARCHIVE_AWARE then
r:puts("... archive lookup ...\n")
r:puts("process PID : '",tostring(posix.getpid().pid),"'\n")
local in_arch = tostring(is_in_arch(defs.path_info))
r:puts("archive uri : '",arch_uri(defs.path_info),"'\n")
r:puts("path in arch?: '",in_arch,"'\n")
r:puts("arch cachehit: '",tostring(cache_hit),"'\n")
r:puts("elapsed : '",elapsed(t0))
end
return apache2.OK
elseif get.action == 'catlog' then
r.content_type = "text/plain"
local f = io.open(LOOKUP_LOG)
if f then
while true do
local line = f:read()
if line == nil then break end
r:puts(line,"\n")
end
f:close()
else
r:puts("can't open " .. LOOKUP_LOG .. "\n")
end
return apache2.OK
else
r.content_type = "text/plain"
r:puts("unknow action [" .. get.action .. "]\n")
return apache2.OK
end
end
if do_json then
r.content_type = "application/json"
r:puts(JSON:encode_pretty({
path_info = defs.path_info,
preferred = defs.preferred,
http = mymap['http'] or bmap['http'],
ftp = mymap['ftp'] or bmap['ftp'],
backup = bmap['http'],
in_dist = is_in_dist(defs.path_info),
in_attic = is_in_attic(proj),
cca2 = occa2
}))
return apache2.OK
end
if is_in_attic(proj) then
r.headers_out['Location'] = ATTIC_URI .. proj .. ".html"
r.status = 302
return apache2.OK
end
if not is_in_dist(defs.path_info) then
local arch_home = archive_url('') ;
local arch_path = archive_url(defs.path_info)
local lookup = '' ;
if ARCHIVE_AWARE
then
local inarch = is_in_arch(defs.path_info)
if inarch == nil then
if reqHTTP then
lookup = 'A lookup on ' .. arch_home .. ' failed.'
else
lookup = "Can't do lookups on " .. arch_home
end
lookup = lookup .. "<br>Try " .. arch_path
elseif inarch then
lookup = 'The object is in our archive : ' .. arch_path
else
lookup = 'The object is in not in our archive ' .. arch_home
end
if LOG_LOOKUPS then log_lookup(inarch,defs.path_info) end
else -- not ARCHIVE_AWARE
lookup = "It may be in our archive : " .. arch_path
end
defs.lookup = lookup
page = ARCHIVE_PG
end
local rootpath = defs.path_info:match("^([-a-z0-9]+)/")
if rootpath and rootpath == "incubator" then
rootpath = defs.path_info:match("^incubator/([-a-z0-9]+)/")
end
if rootpath then
local f = io.open(STATS_DIR .. rootpath .. ".log", "a")
if f then
-- get a bit of the IP to identify multiple unique request with same TS/CCA2
local ipbit = r.useragent_ip:match("([a-f0-9]+):?:?$") or r.useragent_ip:match("^([a-f0-9]+)") or "000"
f:write(os.time() .. " " .. ipbit .. " " .. occa2 .. " " .. defs.path_info .. "\n")
f:close()
end
end
local tdata = get_output_cached(page, defs, r, ezt_defs)
-- check for special content-type based on file name
if endsWith(page,"--xml.html") then
r.content_type = "text/xml"
else
r.content_type = "text/html"
end
r:puts(tdata.data)
if r.hostname == 'www.apache.org' then
r:puts("<!-- " .. occa2 .. " -->")
end
return apache2.OK
end