blob: e75372c8f30405f1aa60342ef20a62052ecb4b62 [file] [log] [blame]
#!/usr/bin/env node
// @(#) extract non-ASF links when loading a page
module.paths.push('/usr/lib/node_modules')
const puppeteer = require('puppeteer');
const target = process.argv[2] || 'http://apache.org/';
const inithost = new URL(target).host;
const option = process.argv[3] || '';
function isASFhost(host) {
return host == '' || host == 'apache.org' || host.endsWith('.apache.org') || host.endsWith('.apachecon.com');
}
if (!isASFhost(inithost)) {
throw new Error("Only ASF hosts are supported - saw " + inithost);
}
(async () => {
// new fails with:
// Error: Failed to launch the browser process!
// chrome_crashpad_handler: --database is required
const browser = await puppeteer.launch({headless: "old"});
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (interceptedRequest) => {
// already handled?
if (interceptedRequest.isInterceptResolutionHandled()) return;
const url = interceptedRequest.url();
if (url == target) {
// must allow this through
interceptedRequest.continue();
} else {
let host = new URL(url).host
if (!isASFhost(host)) {
// don't visit non-ASF hosts unless requested
if (option == 'all') {
console.log(url);
interceptedRequest.continue();
} else {
if (option == 'showurl') {
console.log(url);
} else {
console.log(host);
}
interceptedRequest.abort();
}
} else {
// Need to visit at least an initial redirect
interceptedRequest.continue();
}
}
});
let result = await page.goto(target);
let status = result._status; // now seems to be null if it completed OK?
if (status && status != 200) {
let url = result._url;
let error = `Status ${status} for ${url}`;
throw new Error(error);
}
await browser.close();
})();