solr/core/src/test/org/apache/solr/cli/PostToolTest.java - solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.solr.cli;

 import static org.apache.solr.cli.SolrCLI.findTool;
 import static org.apache.solr.cli.SolrCLI.parseCmdLine;

 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import org.apache.commons.cli.CommandLine;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.SolrResponse;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.util.EnvUtils;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.util.SecurityJson;
 import org.junit.BeforeClass;
 import org.junit.Test;

 /**
  * NOTE: do *not* use real hostnames, not even "example.com", in the webcrawler tests.
  *
  * <p>A MockPageFetcher is used to prevent real HTTP requests from being executed.
  */
 @SolrTestCaseJ4.SuppressSSL
 public class PostToolTest extends SolrCloudTestCase {

   @BeforeClass
   public static void setupClusterWithSecurityEnabled() throws Exception {
     configureCluster(2)
         .addConfig("conf1", configset("cloud-minimal"))
         .withSecurityJson(SecurityJson.SIMPLE)
         .configure();
   }

   private <T extends SolrRequest<? extends SolrResponse>> T withBasicAuth(T req) {
     req.setBasicAuthCredentials(SecurityJson.USER, SecurityJson.PASS);
     return req;
   }

   @Test
   public void testBasicRun() throws Exception {
     final String collection = "testBasicRun";

     withBasicAuth(CollectionAdminRequest.createCollection(collection, "conf1", 1, 1, 0, 0))
         .processAndWait(cluster.getSolrClient(), 10);

     File jsonDoc = File.createTempFile("temp", ".json");

     FileWriter fw = new FileWriter(jsonDoc, StandardCharsets.UTF_8);
     Utils.writeJson(Map.of("id", "1", "title_s", "mytitle"), fw, true);
     fw.flush();

     String[] args = {
       "post",
       "--solr-update-url",
       cluster.getJettySolrRunner(0).getBaseUrl() + "/" + collection + "/update",
       "--credentials",
       SecurityJson.USER_PASS,
       jsonDoc.getAbsolutePath()
     };
     assertEquals(0, runTool(args));

     int numFound = 0;
     int expectedDocCount = 1;

     for (int idx = 0; idx < 100; ++idx) {
       QueryRequest req = withBasicAuth(new QueryRequest(params("q", "*:*")));
       QueryResponse rsp = req.process(cluster.getSolrClient(), collection);

       numFound = (int) rsp.getResults().getNumFound();
       if (numFound == expectedDocCount) {
         break;
       }
       Thread.sleep(100);
     }
     assertEquals("*:* found unexpected number of documents", expectedDocCount, numFound);
   }

   @Test
   public void testRunWithCollectionParam() throws Exception {
     final String collection = "testRunWithCollectionParam";

     // Provide the port as an environment variable for the PostTool to look up.
     EnvUtils.setEnv("SOLR_PORT", cluster.getJettySolrRunner(0).getLocalPort() + "");

     withBasicAuth(CollectionAdminRequest.createCollection(collection, "conf1", 1, 1, 0, 0))
         .processAndWait(cluster.getSolrClient(), 10);

     File jsonDoc = File.createTempFile("temp", ".json");

     FileWriter fw = new FileWriter(jsonDoc, StandardCharsets.UTF_8);
     Utils.writeJson(Map.of("id", "1", "title_s", "mytitle"), fw, true);
     fw.flush();

     String[] args = {
       "post", "-c", collection, "-credentials", SecurityJson.USER_PASS, jsonDoc.getAbsolutePath()
     };
     assertEquals(0, runTool(args));

     int numFound = 0;
     int expectedDocCount = 1;

     for (int idx = 0; idx < 100; ++idx) {
       QueryRequest req = withBasicAuth(new QueryRequest(params("q", "*:*")));
       QueryResponse rsp = req.process(cluster.getSolrClient(), collection);

       numFound = (int) rsp.getResults().getNumFound();
       if (numFound == expectedDocCount) {
         break;
       }
       Thread.sleep(100);
     }
     assertEquals("*:* found unexpected number of documents", expectedDocCount, numFound);
   }

   private int runTool(String[] args) throws Exception {
     Tool tool = findTool(args);
     assertTrue(tool instanceof PostTool);
     CommandLine cli = parseCmdLine(tool.getName(), args, tool.getOptions());
     return tool.runTool(cli);
   }

   @Test
   public void testNormalizeUrlEnding() {
     assertEquals("http://[ff01::114]", PostTool.normalizeUrlEnding("http://[ff01::114]/"));
     assertEquals(
         "http://[ff01::114]", PostTool.normalizeUrlEnding("http://[ff01::114]/#foo?bar=baz"));
     assertEquals(
         "http://[ff01::114]/index.html",
         PostTool.normalizeUrlEnding("http://[ff01::114]/index.html#hello"));
   }

   @Test
   public void testComputeFullUrl() throws IOException {

     PostTool webPostTool = new PostTool();

     assertEquals(
         "http://[ff01::114]/index.html",
         webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "/index.html"));
     assertEquals(
         "http://[ff01::114]/index.html",
         webPostTool.computeFullUrl(new URL("http://[ff01::114]/foo/bar/"), "/index.html"));
     assertEquals(
         "http://[ff01::114]/fil.html",
         webPostTool.computeFullUrl(new URL("http://[ff01::114]/foo.htm?baz#hello"), "fil.html"));
     //    TODO: How to know what is the base if URL path ends with "foo"??
     //    assertEquals("http://[ff01::114]/fil.html", t_web.computeFullUrl(new
     // URL("http://[ff01::114]/foo?baz#hello"), "fil.html"));
     assertNull(webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "fil.jpg"));
     assertNull(webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "mailto:hello@foo.bar"));
     assertNull(webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "ftp://server/file"));
   }

   @Test
   public void testTypeSupported() {
     PostTool postTool = new PostTool();

     assertTrue(postTool.typeSupported("application/pdf"));
     assertTrue(postTool.typeSupported("application/xml"));
     assertFalse(postTool.typeSupported("text/foo"));

     postTool.fileTypes = "doc,xls,ppt";
     postTool.fileFilter = postTool.getFileFilterFromFileTypes(postTool.fileTypes);
     assertFalse(postTool.typeSupported("application/pdf"));
     assertTrue(postTool.typeSupported("application/msword"));
   }

   @Test
   public void testAppendParam() {
     assertEquals(
         "http://[ff01::114]?foo=bar", PostTool.appendParam("http://[ff01::114]", "foo=bar"));
     assertEquals(
         "http://[ff01::114]/?a=b&foo=bar",
         PostTool.appendParam("http://[ff01::114]/?a=b", "foo=bar"));
   }

   @Test
   public void testAppendUrlPath() throws MalformedURLException {
     assertEquals(
         new URL("http://[ff01::114]/a?foo=bar"),
         PostTool.appendUrlPath(new URL("http://[ff01::114]?foo=bar"), "/a"));
   }

   @Test
   public void testGuessType() {
     File f = new File("foo.doc");
     assertEquals("application/msword", PostTool.guessType(f));
     f = new File("foobar");
     assertEquals("application/octet-stream", PostTool.guessType(f));
     f = new File("foo.json");
     assertEquals("application/json", PostTool.guessType(f));
   }

   @Test
   public void testDoFilesMode() throws MalformedURLException {
     PostTool postTool = new PostTool();
     postTool.recursive = 0;
     postTool.dryRun = true;
     postTool.solrUpdateUrl = new URL("http://localhost:8983/solr/fake/update");
     File dir = getFile("exampledocs");
     int num = postTool.postFiles(new String[] {dir.toString()}, 0, null, null);
     assertEquals(2, num);
   }

   @Test
   public void testDetectingIfRecursionPossibleInFilesMode() throws IOException {
     PostTool postTool = new PostTool();
     postTool.recursive = 1; // This is the default
     File dir = getFile("exampledocs");
     File doc = File.createTempFile("temp", ".json");
     assertTrue(postTool.recursionPossible(new String[] {dir.toString()}));
     assertFalse(postTool.recursionPossible(new String[] {doc.toString()}));
     assertTrue(postTool.recursionPossible(new String[] {doc.toString(), dir.toString()}));
   }

   @Test
   public void testRecursionAppliesToFilesMode() throws MalformedURLException {
     PostTool postTool = new PostTool();
     postTool.recursive = 1; // This is the default
     postTool.dryRun = true;
     postTool.solrUpdateUrl = new URL("http://localhost:8983/solr/fake/update");
     File dir = getFile("exampledocs");
     int num = postTool.postFiles(new String[] {dir.toString()}, 0, null, null);
     assertEquals(2, num);
   }

   @Test
   public void testDoWebMode() throws IOException, URISyntaxException {
     PostTool postTool = new PostTool();
     postTool.pageFetcher = new MockPageFetcher();
     postTool.dryRun = true;
     postTool.solrUpdateUrl = new URL("http://user:password@localhost:5150/solr/fake/update");

     // Uses mock pageFetcher
     postTool.delay = 0;
     postTool.recursive = 5;
     int num = postTool.postWebPages(new String[] {"http://[ff01::114]/#removeme"}, 0, null);
     assertEquals(5, num);

     postTool.recursive = 1;
     num = postTool.postWebPages(new String[] {"http://[ff01::114]/"}, 0, null);
     assertEquals(3, num);

     // Without respecting robots.txt
     postTool.pageFetcher.robotsCache.put("[ff01::114]", Collections.emptyList());
     postTool.recursive = 5;
     num = postTool.postWebPages(new String[] {"http://[ff01::114]/#removeme"}, 0, null);
     assertEquals(6, num);
   }

   @Test
   public void testRobotsExclusion() throws IOException, URISyntaxException {
     PostTool postTool = new PostTool();
     postTool.pageFetcher = new MockPageFetcher();
     postTool.dryRun = true;

     assertFalse(postTool.pageFetcher.isDisallowedByRobots(new URL("http://[ff01::114]/")));
     assertTrue(postTool.pageFetcher.isDisallowedByRobots(new URL("http://[ff01::114]/disallowed")));
     assertEquals(
         "There should be two entries parsed from robots.txt",
         2,
         postTool.pageFetcher.robotsCache.get("[ff01::114]").size());
   }

   static class MockPageFetcher extends PostTool.PageFetcher {
     HashMap<String, String> htmlMap = new HashMap<>();
     HashMap<String, Set<URI>> linkMap = new HashMap<>();

     public MockPageFetcher() throws IOException, URISyntaxException {
       (new PostTool()).super();
       htmlMap.put(
           "http://[ff01::114]",
           "<html><body><a href=\"http://[ff01::114]/page1\">page1</a><a href=\"http://[ff01::114]/page2\">page2</a></body></html>");
       htmlMap.put(
           "http://[ff01::114]/index.html",
           "<html><body><a href=\"http://[ff01::114]/page1\">page1</a><a href=\"http://[ff01::114]/page2\">page2</a></body></html>");
       htmlMap.put(
           "http://[ff01::114]/page1",
           "<html><body><a href=\"http://[ff01::114]/page1/foo\"></body></html>");
       htmlMap.put(
           "http://[ff01::114]/page1/foo",
           "<html><body><a href=\"http://[ff01::114]/page1/foo/bar\"></body></html>");
       htmlMap.put(
           "http://[ff01::114]/page1/foo/bar",
           "<html><body><a href=\"http://[ff01::114]/page1\"></body></html>");
       htmlMap.put(
           "http://[ff01::114]/page2",
           "<html><body><a href=\"http://[ff01::114]/\"><a href=\"http://[ff01::114]/disallowed\"/></body></html>");
       htmlMap.put(
           "http://[ff01::114]/disallowed",
           "<html><body><a href=\"http://[ff01::114]/\"></body></html>");

       Set<URI> s = new HashSet<>();
       s.add(new URI("http://[ff01::114]/page1"));
       s.add(new URI("http://[ff01::114]/page2"));
       linkMap.put("http://[ff01::114]", s);
       linkMap.put("http://[ff01::114]/index.html", s);
       s = new HashSet<>();
       s.add(new URI("http://[ff01::114]/page1/foo"));
       linkMap.put("http://[ff01::114]/page1", s);
       s = new HashSet<>();
       s.add(new URI("http://[ff01::114]/page1/foo/bar"));
       linkMap.put("http://[ff01::114]/page1/foo", s);
       s = new HashSet<>();
       s.add(new URI("http://[ff01::114]/disallowed"));
       linkMap.put("http://[ff01::114]/page2", s);

       // Simulate a robots.txt file with comments and a few disallows
       StringBuilder sb = new StringBuilder();
       sb.append(
           "# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n");
       sb.append("User-agent: * # match all bots\n");
       sb.append("Disallow:  # This is void\n");
       sb.append("Disallow: /disallow # Disallow this path\n");
       sb.append("Disallow: /nonexistentpath # Disallow this path\n");
       this.robotsCache.put(
           "[ff01::114]",
           super.parseRobotsTxt(
               new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8))));
     }

     @Override
     public PostTool.PageFetcherResult readPageFromUrl(URL u) {
       PostTool.PageFetcherResult res = new PostTool.PageFetcherResult();
       if (isDisallowedByRobots(u)) {
         res.httpStatus = 403;
         return res;
       }
       res.httpStatus = 200;
       res.contentType = "text/html";
       res.content = ByteBuffer.wrap(htmlMap.get(u.toString()).getBytes(StandardCharsets.UTF_8));
       return res;
     }

     @Override
     public Set<URI> getLinksFromWebPage(URL url, InputStream is, String type, URL postUrl) {
       Set<URI> s = linkMap.get(PostTool.normalizeUrlEnding(url.toString()));
       if (s == null) {
         s = new HashSet<>();
       }
       return s;
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.solr.cli;

	import static org.apache.solr.cli.SolrCLI.findTool;
	import static org.apache.solr.cli.SolrCLI.parseCmdLine;

	import java.io.ByteArrayInputStream;
	import java.io.File;
	import java.io.FileWriter;
	import java.io.IOException;
	import java.io.InputStream;
	import java.net.MalformedURLException;
	import java.net.URI;
	import java.net.URISyntaxException;
	import java.net.URL;
	import java.nio.ByteBuffer;
	import java.nio.charset.StandardCharsets;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Map;
	import java.util.Set;
	import org.apache.commons.cli.CommandLine;
	import org.apache.solr.SolrTestCaseJ4;
	import org.apache.solr.client.solrj.SolrRequest;
	import org.apache.solr.client.solrj.SolrResponse;
	import org.apache.solr.client.solrj.request.CollectionAdminRequest;
	import org.apache.solr.client.solrj.request.QueryRequest;
	import org.apache.solr.client.solrj.response.QueryResponse;
	import org.apache.solr.cloud.SolrCloudTestCase;
	import org.apache.solr.common.util.EnvUtils;
	import org.apache.solr.common.util.Utils;
	import org.apache.solr.util.SecurityJson;
	import org.junit.BeforeClass;
	import org.junit.Test;

	/**
	* NOTE: do not use real hostnames, not even "example.com", in the webcrawler tests.
	*
	* <p>A MockPageFetcher is used to prevent real HTTP requests from being executed.
	*/
	@SolrTestCaseJ4.SuppressSSL
	public class PostToolTest extends SolrCloudTestCase {

	@BeforeClass
	public static void setupClusterWithSecurityEnabled() throws Exception {
	configureCluster(2)
	.addConfig("conf1", configset("cloud-minimal"))
	.withSecurityJson(SecurityJson.SIMPLE)
	.configure();
	}

	private <T extends SolrRequest<? extends SolrResponse>> T withBasicAuth(T req) {
	req.setBasicAuthCredentials(SecurityJson.USER, SecurityJson.PASS);
	return req;
	}

	@Test
	public void testBasicRun() throws Exception {
	final String collection = "testBasicRun";

	withBasicAuth(CollectionAdminRequest.createCollection(collection, "conf1", 1, 1, 0, 0))
	.processAndWait(cluster.getSolrClient(), 10);

	File jsonDoc = File.createTempFile("temp", ".json");

	FileWriter fw = new FileWriter(jsonDoc, StandardCharsets.UTF_8);
	Utils.writeJson(Map.of("id", "1", "title_s", "mytitle"), fw, true);
	fw.flush();

	String[] args = {
	"post",
	"--solr-update-url",
	cluster.getJettySolrRunner(0).getBaseUrl() + "/" + collection + "/update",
	"--credentials",
	SecurityJson.USER_PASS,
	jsonDoc.getAbsolutePath()
	};
	assertEquals(0, runTool(args));

	int numFound = 0;
	int expectedDocCount = 1;

	for (int idx = 0; idx < 100; ++idx) {
	QueryRequest req = withBasicAuth(new QueryRequest(params("q", ":")));
	QueryResponse rsp = req.process(cluster.getSolrClient(), collection);

	numFound = (int) rsp.getResults().getNumFound();
	if (numFound == expectedDocCount) {
	break;
	}
	Thread.sleep(100);
	}
	assertEquals(": found unexpected number of documents", expectedDocCount, numFound);
	}

	@Test
	public void testRunWithCollectionParam() throws Exception {
	final String collection = "testRunWithCollectionParam";

	// Provide the port as an environment variable for the PostTool to look up.
	EnvUtils.setEnv("SOLR_PORT", cluster.getJettySolrRunner(0).getLocalPort() + "");

	withBasicAuth(CollectionAdminRequest.createCollection(collection, "conf1", 1, 1, 0, 0))
	.processAndWait(cluster.getSolrClient(), 10);

	File jsonDoc = File.createTempFile("temp", ".json");

	FileWriter fw = new FileWriter(jsonDoc, StandardCharsets.UTF_8);
	Utils.writeJson(Map.of("id", "1", "title_s", "mytitle"), fw, true);
	fw.flush();

	String[] args = {
	"post", "-c", collection, "-credentials", SecurityJson.USER_PASS, jsonDoc.getAbsolutePath()
	};
	assertEquals(0, runTool(args));

	int numFound = 0;
	int expectedDocCount = 1;

	for (int idx = 0; idx < 100; ++idx) {
	QueryRequest req = withBasicAuth(new QueryRequest(params("q", ":")));
	QueryResponse rsp = req.process(cluster.getSolrClient(), collection);

	numFound = (int) rsp.getResults().getNumFound();
	if (numFound == expectedDocCount) {
	break;
	}
	Thread.sleep(100);
	}
	assertEquals(": found unexpected number of documents", expectedDocCount, numFound);
	}

	private int runTool(String[] args) throws Exception {
	Tool tool = findTool(args);
	assertTrue(tool instanceof PostTool);
	CommandLine cli = parseCmdLine(tool.getName(), args, tool.getOptions());
	return tool.runTool(cli);
	}

	@Test
	public void testNormalizeUrlEnding() {
	assertEquals("http://[ff01::114]", PostTool.normalizeUrlEnding("http://[ff01::114]/"));
	assertEquals(
	"http://[ff01::114]", PostTool.normalizeUrlEnding("http://[ff01::114]/#foo?bar=baz"));
	assertEquals(
	"http://[ff01::114]/index.html",
	PostTool.normalizeUrlEnding("http://[ff01::114]/index.html#hello"));
	}

	@Test
	public void testComputeFullUrl() throws IOException {

	PostTool webPostTool = new PostTool();

	assertEquals(
	"http://[ff01::114]/index.html",
	webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "/index.html"));
	assertEquals(
	"http://[ff01::114]/index.html",
	webPostTool.computeFullUrl(new URL("http://[ff01::114]/foo/bar/"), "/index.html"));
	assertEquals(
	"http://[ff01::114]/fil.html",
	webPostTool.computeFullUrl(new URL("http://[ff01::114]/foo.htm?baz#hello"), "fil.html"));
	// TODO: How to know what is the base if URL path ends with "foo"??
	// assertEquals("http://[ff01::114]/fil.html", t_web.computeFullUrl(new
	// URL("http://[ff01::114]/foo?baz#hello"), "fil.html"));
	assertNull(webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "fil.jpg"));
	assertNull(webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "mailto:hello@foo.bar"));
	assertNull(webPostTool.computeFullUrl(new URL("http://[ff01::114]/"), "ftp://server/file"));
	}

	@Test
	public void testTypeSupported() {
	PostTool postTool = new PostTool();

	assertTrue(postTool.typeSupported("application/pdf"));
	assertTrue(postTool.typeSupported("application/xml"));
	assertFalse(postTool.typeSupported("text/foo"));

	postTool.fileTypes = "doc,xls,ppt";
	postTool.fileFilter = postTool.getFileFilterFromFileTypes(postTool.fileTypes);
	assertFalse(postTool.typeSupported("application/pdf"));
	assertTrue(postTool.typeSupported("application/msword"));
	}

	@Test
	public void testAppendParam() {
	assertEquals(
	"http://[ff01::114]?foo=bar", PostTool.appendParam("http://[ff01::114]", "foo=bar"));
	assertEquals(
	"http://[ff01::114]/?a=b&foo=bar",
	PostTool.appendParam("http://[ff01::114]/?a=b", "foo=bar"));
	}

	@Test
	public void testAppendUrlPath() throws MalformedURLException {
	assertEquals(
	new URL("http://[ff01::114]/a?foo=bar"),
	PostTool.appendUrlPath(new URL("http://[ff01::114]?foo=bar"), "/a"));
	}

	@Test
	public void testGuessType() {
	File f = new File("foo.doc");
	assertEquals("application/msword", PostTool.guessType(f));
	f = new File("foobar");
	assertEquals("application/octet-stream", PostTool.guessType(f));
	f = new File("foo.json");
	assertEquals("application/json", PostTool.guessType(f));
	}

	@Test
	public void testDoFilesMode() throws MalformedURLException {
	PostTool postTool = new PostTool();
	postTool.recursive = 0;
	postTool.dryRun = true;
	postTool.solrUpdateUrl = new URL("http://localhost:8983/solr/fake/update");
	File dir = getFile("exampledocs");
	int num = postTool.postFiles(new String[] {dir.toString()}, 0, null, null);
	assertEquals(2, num);
	}

	@Test
	public void testDetectingIfRecursionPossibleInFilesMode() throws IOException {
	PostTool postTool = new PostTool();
	postTool.recursive = 1; // This is the default
	File dir = getFile("exampledocs");
	File doc = File.createTempFile("temp", ".json");
	assertTrue(postTool.recursionPossible(new String[] {dir.toString()}));
	assertFalse(postTool.recursionPossible(new String[] {doc.toString()}));
	assertTrue(postTool.recursionPossible(new String[] {doc.toString(), dir.toString()}));
	}

	@Test
	public void testRecursionAppliesToFilesMode() throws MalformedURLException {
	PostTool postTool = new PostTool();
	postTool.recursive = 1; // This is the default
	postTool.dryRun = true;
	postTool.solrUpdateUrl = new URL("http://localhost:8983/solr/fake/update");
	File dir = getFile("exampledocs");
	int num = postTool.postFiles(new String[] {dir.toString()}, 0, null, null);
	assertEquals(2, num);
	}

	@Test
	public void testDoWebMode() throws IOException, URISyntaxException {
	PostTool postTool = new PostTool();
	postTool.pageFetcher = new MockPageFetcher();
	postTool.dryRun = true;
	postTool.solrUpdateUrl = new URL("http://user:password@localhost:5150/solr/fake/update");

	// Uses mock pageFetcher
	postTool.delay = 0;
	postTool.recursive = 5;
	int num = postTool.postWebPages(new String[] {"http://[ff01::114]/#removeme"}, 0, null);
	assertEquals(5, num);

	postTool.recursive = 1;
	num = postTool.postWebPages(new String[] {"http://[ff01::114]/"}, 0, null);
	assertEquals(3, num);

	// Without respecting robots.txt
	postTool.pageFetcher.robotsCache.put("[ff01::114]", Collections.emptyList());
	postTool.recursive = 5;
	num = postTool.postWebPages(new String[] {"http://[ff01::114]/#removeme"}, 0, null);
	assertEquals(6, num);
	}

	@Test
	public void testRobotsExclusion() throws IOException, URISyntaxException {
	PostTool postTool = new PostTool();
	postTool.pageFetcher = new MockPageFetcher();
	postTool.dryRun = true;

	assertFalse(postTool.pageFetcher.isDisallowedByRobots(new URL("http://[ff01::114]/")));
	assertTrue(postTool.pageFetcher.isDisallowedByRobots(new URL("http://[ff01::114]/disallowed")));
	assertEquals(
	"There should be two entries parsed from robots.txt",
	2,
	postTool.pageFetcher.robotsCache.get("[ff01::114]").size());
	}

	static class MockPageFetcher extends PostTool.PageFetcher {
	HashMap<String, String> htmlMap = new HashMap<>();
	HashMap<String, Set<URI>> linkMap = new HashMap<>();

	public MockPageFetcher() throws IOException, URISyntaxException {
	(new PostTool()).super();
	htmlMap.put(
	"http://[ff01::114]",
	"<html><body><a href=\"http://[ff01::114]/page1\">page1</a><a href=\"http://[ff01::114]/page2\">page2</a></body></html>");
	htmlMap.put(
	"http://[ff01::114]/index.html",
	"<html><body><a href=\"http://[ff01::114]/page1\">page1</a><a href=\"http://[ff01::114]/page2\">page2</a></body></html>");
	htmlMap.put(
	"http://[ff01::114]/page1",
	"<html><body><a href=\"http://[ff01::114]/page1/foo\"></body></html>");
	htmlMap.put(
	"http://[ff01::114]/page1/foo",
	"<html><body><a href=\"http://[ff01::114]/page1/foo/bar\"></body></html>");
	htmlMap.put(
	"http://[ff01::114]/page1/foo/bar",
	"<html><body><a href=\"http://[ff01::114]/page1\"></body></html>");
	htmlMap.put(
	"http://[ff01::114]/page2",
	"<html><body><a href=\"http://[ff01::114]/\"><a href=\"http://[ff01::114]/disallowed\"/></body></html>");
	htmlMap.put(
	"http://[ff01::114]/disallowed",
	"<html><body><a href=\"http://[ff01::114]/\"></body></html>");

	Set<URI> s = new HashSet<>();
	s.add(new URI("http://[ff01::114]/page1"));
	s.add(new URI("http://[ff01::114]/page2"));
	linkMap.put("http://[ff01::114]", s);
	linkMap.put("http://[ff01::114]/index.html", s);
	s = new HashSet<>();
	s.add(new URI("http://[ff01::114]/page1/foo"));
	linkMap.put("http://[ff01::114]/page1", s);
	s = new HashSet<>();
	s.add(new URI("http://[ff01::114]/page1/foo/bar"));
	linkMap.put("http://[ff01::114]/page1/foo", s);
	s = new HashSet<>();
	s.add(new URI("http://[ff01::114]/disallowed"));
	linkMap.put("http://[ff01::114]/page2", s);

	// Simulate a robots.txt file with comments and a few disallows
	StringBuilder sb = new StringBuilder();
	sb.append(
	"# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n");
	sb.append("User-agent: * # match all bots\n");
	sb.append("Disallow: # This is void\n");
	sb.append("Disallow: /disallow # Disallow this path\n");
	sb.append("Disallow: /nonexistentpath # Disallow this path\n");
	this.robotsCache.put(
	"[ff01::114]",
	super.parseRobotsTxt(
	new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8))));
	}

	@Override
	public PostTool.PageFetcherResult readPageFromUrl(URL u) {
	PostTool.PageFetcherResult res = new PostTool.PageFetcherResult();
	if (isDisallowedByRobots(u)) {
	res.httpStatus = 403;
	return res;
	}
	res.httpStatus = 200;
	res.contentType = "text/html";
	res.content = ByteBuffer.wrap(htmlMap.get(u.toString()).getBytes(StandardCharsets.UTF_8));
	return res;
	}

	@Override
	public Set<URI> getLinksFromWebPage(URL url, InputStream is, String type, URL postUrl) {
	Set<URI> s = linkMap.get(PostTool.normalizeUrlEnding(url.toString()));
	if (s == null) {
	s = new HashSet<>();
	}
	return s;
	}
	}
	}