| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.any23.plugin.htmlscraper; |
| |
| import org.apache.any23.extractor.ExtractionContext; |
| import org.apache.any23.extractor.ExtractionException; |
| import org.apache.any23.extractor.ExtractionParameters; |
| import org.apache.any23.extractor.ExtractionResult; |
| import org.junit.After; |
| import org.junit.Assert; |
| import org.junit.Before; |
| import org.junit.Test; |
| import org.eclipse.rdf4j.model.IRI; |
| import org.eclipse.rdf4j.model.impl.SimpleValueFactory; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.Arrays; |
| import java.util.HashSet; |
| |
| import static org.mockito.Mockito.any; |
| import static org.mockito.Mockito.eq; |
| import static org.mockito.Mockito.mock; |
| import static org.mockito.Mockito.verify; |
| |
| /** |
| * Test case for {@link HTMLScraperExtractor}. |
| * |
| * @author Michele Mostarda (mostarda@fbk.eu) |
| */ |
| public class HTMLScraperExtractorTest { |
| |
| private HTMLScraperExtractor extractor; |
| |
| @Before |
| public void setUp() { |
| extractor = new HTMLScraperExtractorFactory().createExtractor(); |
| } |
| |
| @After |
| public void tearDown() { |
| extractor = null; |
| } |
| |
| @Test |
| public void testGetExtractors() { |
| final String[] extractors = extractor.getTextExtractors(); |
| Assert.assertEquals( new HashSet<>(Arrays.asList(extractors)).size(), 4 ); |
| } |
| |
| @Test |
| public void testRun() throws IOException, ExtractionException { |
| final InputStream is = this.getClass().getResourceAsStream("html-scraper-extractor-test.html"); |
| final ExtractionResult extractionResult = mock(ExtractionResult.class); |
| final IRI pageIRI = SimpleValueFactory.getInstance().createIRI("http://fake/test/page/testrun"); |
| final ExtractionContext extractionContext = new ExtractionContext( |
| extractor.getDescription().getExtractorName(), |
| pageIRI |
| ); |
| extractor.run(ExtractionParameters.newDefault(), extractionContext, is, extractionResult); |
| |
| verify(extractionResult).writeTriple( |
| eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_DE_PROPERTY), any()); |
| verify(extractionResult).writeTriple( |
| eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_AE_PROPERTY), any()); |
| verify(extractionResult).writeTriple( |
| eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_LCE_PROPERTY), any()); |
| verify(extractionResult).writeTriple( |
| eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_CE_PROPERTY), any()); |
| } |
| |
| } |