_plugins/regex_filter.rb - brooklyn-docs - Git at Google

 module Jekyll
   module RegexFilter
     def replace_regex(input, reg_str, repl_str)
       re = Regexp.new reg_str, Regexp::MULTILINE

       # This will be returned
       input.gsub re, repl_str
     end
   end
 end

 Liquid::Template.register_filter(Jekyll::RegexFilter)


 #######
 # This function rewrites a link in the following manner
 #
 # 1) If the link is fully external leave it as an unaltered link
 # 2) If the link is an anchor, convert to the anchor scheme used in PDF generation
 # 3) If the link target is in the PDF, change the link to point at the anchor in the PDF
 # 4) If the link is pointing at somewhere on the brooklyn site which is not included in this PDF, point to the website with a specific version, so https://brooklyn.apache.org/v/0.9.0-SNAPSHOT/start/concept-quickstart.html for instance
 #
 # * Input - the document body, site - the jekyll site object, page - all pages, availablePages - ones included in this merge, mergePage - the root merge page, currentPage - the current page being merged
 module RefactorURL
   def refactorURL(input, reg_str, site, pages, availablePages, mergePage, currentPage)
     if input == nil
       return nil
     end

     # generate document id, this will be used for the anchors
     $pid = "id-undefined"
     if currentPage['title'] != nil
       $pid = currentPage['title'].downcase.delete('/')
       $pid.gsub!(/\s+/, '-')
     end

     # re-write any ids to our internal references
     input.gsub!("id=\"", "id=\"internalLink_"+$pid+"_")

     # get rid of any opening in new tabs, they'll break our anchors
     input.gsub!(" target=\"_blank\"", "")

     # make a multi-line regex for finding URLs within the document body
     re = Regexp.new reg_str, Regexp::MULTILINE

     # for each url matched replace using the following rules
     input.gsub(re) {

       $newLink = "#"
       # there should only be one capturing group (the URL), so use the first
       $match = Regexp.last_match.captures[0]
       # the URL is now in match
       if $match.start_with?('http')
         # 1) it's an external link, leave it as it is
         $newLink = $match
       elsif $match.start_with?('#')
         # 2) it's an anchor in the local document re-write with the local document id prefixed
         $newLink = "#internalLink_"+$pid+"_"+($match.gsub! '#', '')
       else
         # 3/4) it's a link to a page within the site scope

         # -- Firstly clean up the URL
         if $match =~ /#/
           # if there's an anchor remove it (anything after the #)
           $match = $match[/[^#]+/]
         end
         # swap ./ for absolute path
         if $match.start_with?('./')
           $match = currentPage['dir']+"/"+$match[2, $match.length]
         # if the string doesnt start with a / it cant be prefixed by the path, so prefix it
         elsif !($match.start_with?('/'))
           $match = currentPage['dir']+"/"+$match
         end
         # add index.html to the end if it's just a folder
         if $match.end_with?('/')
           $match = $match+"index.html"
         end

         # -- now work out if the linked to page is within the page scope
         $pageOutOfScope = true;
         for page in availablePages
           if (page['url'] == $match)
             # 3) the page is within the scope of the document, swap it for an anchor
             $pageOutOfScope = false;
 #            puts "In Scope "+$match
             # get the pid for this specific page
             $current_pid = page['title'].downcase.delete('/')
             $current_pid.gsub!(/\s+/, '-')
             # make the link an anchor to it
             $newLink = "#contentsLink-"+$current_pid
           end
         end
         # 4) page is out of scope of the document put an absolute URL
         if $pageOutOfScope
 #          puts $match+" not in scope - "+$newLink
           $notFoundPrefix = true
           # go through the URL prefixes in the site and swap them for the website paths
           for prefix in site['pdf-rewrite-prefixes']

             # make an absolute external URL for the link
             if $match.start_with?(prefix[0])
               $notFoundPrefix = false
               $newLink = site['pdf-default-base-url']+prefix[1]+$match[prefix[0].length, $match.length]
             end
           end
           if $notFoundPrefix
             $newLink = site['pdf-default-base-url']+site['pdf-default-versioned-url-subpath']+$match
             puts "PDF link to "+$match+" in "+currentPage['path']+" has unknown prefix, routing to "+$newLink
           end
         end
       end
       # return the re-written link wrapped in the property
       "href=\""+$newLink+"\""
     }
   end

   Liquid::Template.register_filter self
 end
	module Jekyll
	module RegexFilter
	def replace_regex(input, reg_str, repl_str)
	re = Regexp.new reg_str, Regexp::MULTILINE

	# This will be returned
	input.gsub re, repl_str
	end
	end
	end

	Liquid::Template.register_filter(Jekyll::RegexFilter)


	#######
	# This function rewrites a link in the following manner
	#
	# 1) If the link is fully external leave it as an unaltered link
	# 2) If the link is an anchor, convert to the anchor scheme used in PDF generation
	# 3) If the link target is in the PDF, change the link to point at the anchor in the PDF
	# 4) If the link is pointing at somewhere on the brooklyn site which is not included in this PDF, point to the website with a specific version, so https://brooklyn.apache.org/v/0.9.0-SNAPSHOT/start/concept-quickstart.html for instance
	#
	# * Input - the document body, site - the jekyll site object, page - all pages, availablePages - ones included in this merge, mergePage - the root merge page, currentPage - the current page being merged
	module RefactorURL
	def refactorURL(input, reg_str, site, pages, availablePages, mergePage, currentPage)
	if input == nil
	return nil
	end

	# generate document id, this will be used for the anchors
	$pid = "id-undefined"
	if currentPage['title'] != nil
	$pid = currentPage['title'].downcase.delete('/')
	$pid.gsub!(/\s+/, '-')
	end

	# re-write any ids to our internal references
	input.gsub!("id=\"", "id=\"internalLink_"+$pid+"_")

	# get rid of any opening in new tabs, they'll break our anchors
	input.gsub!(" target=\"_blank\"", "")

	# make a multi-line regex for finding URLs within the document body
	re = Regexp.new reg_str, Regexp::MULTILINE

	# for each url matched replace using the following rules
	input.gsub(re) {

	$newLink = "#"
	# there should only be one capturing group (the URL), so use the first
	$match = Regexp.last_match.captures[0]
	# the URL is now in match
	if $match.start_with?('http')
	# 1) it's an external link, leave it as it is
	$newLink = $match
	elsif $match.start_with?('#')
	# 2) it's an anchor in the local document re-write with the local document id prefixed
	$newLink = "#internalLink_"+$pid+"_"+($match.gsub! '#', '')
	else
	# 3/4) it's a link to a page within the site scope

	# -- Firstly clean up the URL
	if $match =~ /#/
	# if there's an anchor remove it (anything after the #)
	$match = $match[/[^#]+/]
	end
	# swap ./ for absolute path
	if $match.start_with?('./')
	$match = currentPage['dir']+"/"+$match[2, $match.length]
	# if the string doesnt start with a / it cant be prefixed by the path, so prefix it
	elsif !($match.start_with?('/'))
	$match = currentPage['dir']+"/"+$match
	end
	# add index.html to the end if it's just a folder
	if $match.end_with?('/')
	$match = $match+"index.html"
	end

	# -- now work out if the linked to page is within the page scope
	$pageOutOfScope = true;
	for page in availablePages
	if (page['url'] == $match)
	# 3) the page is within the scope of the document, swap it for an anchor
	$pageOutOfScope = false;
	# puts "In Scope "+$match
	# get the pid for this specific page
	$current_pid = page['title'].downcase.delete('/')
	$current_pid.gsub!(/\s+/, '-')
	# make the link an anchor to it
	$newLink = "#contentsLink-"+$current_pid
	end
	end
	# 4) page is out of scope of the document put an absolute URL
	if $pageOutOfScope
	# puts $match+" not in scope - "+$newLink
	$notFoundPrefix = true
	# go through the URL prefixes in the site and swap them for the website paths
	for prefix in site['pdf-rewrite-prefixes']

	# make an absolute external URL for the link
	if $match.start_with?(prefix[0])
	$notFoundPrefix = false
	$newLink = site['pdf-default-base-url']+prefix[1]+$match[prefix[0].length, $match.length]
	end
	end
	if $notFoundPrefix
	$newLink = site['pdf-default-base-url']+site['pdf-default-versioned-url-subpath']+$match
	puts "PDF link to "+$match+" in "+currentPage['path']+" has unknown prefix, routing to "+$newLink
	end
	end
	end
	# return the re-written link wrapped in the property
	"href=\""+$newLink+"\""
	}
	end

	Liquid::Template.register_filter self
	end