blob: e6a2a0e85e6609526595e63bd66fa6b507b8f7f4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.protocol;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.metadata.SpellCheckedMetadata;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.WritableTestUtils;
import org.apache.tika.mime.MimeTypes;
import org.junit.Assert;
import org.junit.Test;
/** Unit tests for Content. */
public class TestContent {
private static Configuration conf = NutchConfiguration.create();
@Test
public void testContent() throws Exception {
String page = "<HTML><BODY><H1>Hello World</H1><P>The Quick Brown Fox Jumped Over the Lazy Fox.</BODY></HTML>";
String url = "http://www.foo.com/";
SpellCheckedMetadata metaData = new SpellCheckedMetadata();
metaData.add("Host", "www.foo.com");
metaData.add("Content-Type", "text/html");
Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
metaData, conf);
WritableTestUtils.testWritable(r);
Assert.assertEquals("text/html", r.getMetadata().get("Content-Type"));
Assert.assertEquals("text/html", r.getMetadata().get("content-type"));
Assert.assertEquals("text/html", r.getMetadata().get("CONTENTYPE"));
}
/** Unit tests for getContentType(String, String, byte[]) method. */
@Test
public void testGetContentType() throws Exception {
Content c = null;
Metadata p = new Metadata();
c = new Content("http://www.foo.com/", "http://www.foo.com/",
"".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf);
Assert.assertEquals("text/html", c.getContentType());
c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
"".getBytes("UTF8"), "", p, conf);
Assert.assertEquals("text/html", c.getContentType());
c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
"".getBytes("UTF8"), null, p, conf);
Assert.assertEquals("text/html", c.getContentType());
c = new Content("http://www.foo.com/", "http://www.foo.com/",
"<html></html>".getBytes("UTF8"), "", p, conf);
Assert.assertEquals("text/html", c.getContentType());
c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
"<html></html>".getBytes("UTF8"), "text/plain", p, conf);
Assert.assertEquals("text/html", c.getContentType());
c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/",
"<html></html>".getBytes("UTF8"), "text/plain", p, conf);
Assert.assertEquals("text/html", c.getContentType());
c = new Content("http://www.foo.com/", "http://www.foo.com/",
"".getBytes("UTF8"), "", p, conf);
Assert.assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
c = new Content("http://www.foo.com/", "http://www.foo.com/",
"".getBytes("UTF8"), null, p, conf);
Assert.assertNotNull(c.getContentType());
}
}