blob: 5a96223ed68a4002df500f61ef8976df4580b797 [file] [log] [blame]
module Jekyll
module RegexFilter
def replace_regex(input, reg_str, repl_str)
re = Regexp.new reg_str, Regexp::MULTILINE
# This will be returned
input.gsub re, repl_str
end
end
end
Liquid::Template.register_filter(Jekyll::RegexFilter)
#######
# This function rewrites a link in the following manner
#
# 1) If the link is fully external leave it as an unaltered link
# 2) If the link is an anchor, convert to the anchor scheme used in PDF generation
# 3) If the link target is in the PDF, change the link to point at the anchor in the PDF
# 4) If the link is pointing at somewhere on the brooklyn site which is not included in this PDF, point to the website with a specific version, so https://brooklyn.apache.org/v/0.9.0-SNAPSHOT/start/concept-quickstart.html for instance
#
# * Input - the document body, site - the jekyll site object, page - all pages, availablePages - ones included in this merge, mergePage - the root merge page, currentPage - the current page being merged
module RefactorURL
def refactorURL(input, reg_str, site, pages, availablePages, mergePage, currentPage)
if input == nil
return nil
end
# generate document id, this will be used for the anchors
$pid = "id-undefined"
if currentPage['title'] != nil
$pid = currentPage['title'].downcase.delete('/')
$pid.gsub!(/\s+/, '-')
end
# re-write any ids to our internal references
input.gsub!("id=\"", "id=\"internalLink_"+$pid+"_")
# get rid of any opening in new tabs, they'll break our anchors
input.gsub!(" target=\"_blank\"", "")
# make a multi-line regex for finding URLs within the document body
re = Regexp.new reg_str, Regexp::MULTILINE
# for each url matched replace using the following rules
input.gsub(re) {
$newLink = "#"
# there should only be one capturing group (the URL), so use the first
$match = Regexp.last_match.captures[0]
# the URL is now in match
if $match.start_with?('http')
# 1) it's an external link, leave it as it is
$newLink = $match
elsif $match.start_with?('#')
# 2) it's an anchor in the local document re-write with the local document id prefixed
$newLink = "#internalLink_"+$pid+"_"+($match.gsub! '#', '')
else
# 3/4) it's a link to a page within the site scope
# -- Firstly clean up the URL
if $match =~ /#/
# if there's an anchor remove it (anything after the #)
$match = $match[/[^#]+/]
end
# swap ./ for absolute path
if $match.start_with?('./')
$match = currentPage['dir']+"/"+$match[2, $match.length]
# if the string doesnt start with a / it cant be prefixed by the path, so prefix it
elsif !($match.start_with?('/'))
$match = currentPage['dir']+"/"+$match
end
# add index.html to the end if it's just a folder
if $match.end_with?('/')
$match = $match+"index.html"
end
# -- now work out if the linked to page is within the page scope
$pageOutOfScope = true;
for page in availablePages
if (page['url'] == $match)
# 3) the page is within the scope of the document, swap it for an anchor
$pageOutOfScope = false;
# puts "In Scope "+$match
# get the pid for this specific page
$current_pid = page['title'].downcase.delete('/')
$current_pid.gsub!(/\s+/, '-')
# make the link an anchor to it
$newLink = "#contentsLink-"+$current_pid
end
end
# 4) page is out of scope of the document put an absolute URL
if $pageOutOfScope
# puts $match+" not in scope - "+$newLink
$notFoundPrefix = true
# go through the URL prefixes in the site and swap them for the website paths
for prefix in site['pdf-rewrite-prefixes']
# make an absolute external URL for the link
if $match.start_with?(prefix[0])
$notFoundPrefix = false
$newLink = site['pdf-default-base-url']+prefix[1]+$match[prefix[0].length, $match.length]
end
end
if $notFoundPrefix
$newLink = site['pdf-default-base-url']+site['pdf-default-versioned-url-subpath']+$match
puts "PDF link to "+$match+" in "+currentPage['path']+" has unknown prefix, routing to "+$newLink
end
end
end
# return the re-written link wrapped in the property
"href=\""+$newLink+"\""
}
end
Liquid::Template.register_filter self
end