TIKA-3306 add a parser-override content type
diff --git a/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java b/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
index 2f6b6d8..fbdedf9 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
@@ -78,7 +78,9 @@
//short circuit via OverrideDetector
//can't rely on ordering because subsequent detector may
//change Override's to a specialization of Override's
- if (detector instanceof OverrideDetector && metadata.get(TikaCoreProperties.CONTENT_TYPE_OVERRIDE) != null) {
+ if (detector instanceof OverrideDetector
+ && (metadata.get(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE) != null ||
+ metadata.get(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE) != null)) {
return detector.detect(input, metadata);
}
MediaType detected = detector.detect(input, metadata);
diff --git a/tika-core/src/main/java/org/apache/tika/detect/OverrideDetector.java b/tika-core/src/main/java/org/apache/tika/detect/OverrideDetector.java
index 318ede8..818e228 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/OverrideDetector.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/OverrideDetector.java
@@ -25,17 +25,25 @@
/**
* Use this to force a content type detection via the
- * {@link TikaCoreProperties#CONTENT_TYPE_OVERRIDE} key in the metadata object.
+ * {@link TikaCoreProperties#CONTENT_TYPE_USER_OVERRIDE} key in the metadata object.
+ *
+ * This is also required to override detection by some parsers
+ * via {@link TikaCoreProperties#CONTENT_TYPE_PARSER_OVERRIDE}.
+ *
*/
public class OverrideDetector implements Detector {
@Override
public MediaType detect(InputStream input, Metadata metadata) throws IOException {
- String type = metadata.get(TikaCoreProperties.CONTENT_TYPE_OVERRIDE);
- if (type == null) {
- return MediaType.OCTET_STREAM;
- } else {
+ String type = metadata.get(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE);
+ if (type != null) {
return MediaType.parse(type);
}
+ type = metadata.get(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE);
+ if (type != null) {
+ return MediaType.parse(type);
+ } else {
+ return MediaType.OCTET_STREAM;
+ }
}
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index 37a36da..3b9c777 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -170,9 +170,20 @@
Property CONTENT_TYPE_HINT =
Property.internalText(HttpHeaders.CONTENT_TYPE+"-Hint");
- Property CONTENT_TYPE_OVERRIDE =
+ /**
+ * This is used by users to override detection with the override detector.
+ */
+ Property CONTENT_TYPE_USER_OVERRIDE =
Property.internalText(HttpHeaders.CONTENT_TYPE+"-Override");
+
+ /**
+ * This is used by parsers to override detection of embedded resources
+ * with the override detector.
+ */
+ Property CONTENT_TYPE_PARSER_OVERRIDE =
+ Property.internalText(HttpHeaders.CONTENT_TYPE+"-Parser-Override");
+
/**
* @see DublinCore#FORMAT
*/
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index a42c7ca..91ee6a8 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -29,6 +29,7 @@
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.sax.SecureContentHandler;
@@ -114,7 +115,11 @@
// Automatically detect the MIME type of the document
MediaType type = detector.detect(tis, metadata);
- metadata.set(Metadata.CONTENT_TYPE, type.toString());
+ //update CONTENT_TYPE as long as it wasn't set by parser override
+ if (metadata.get(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE) == null
+ || ! metadata.get(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE).equals(type.toString())) {
+ metadata.set(Metadata.CONTENT_TYPE, type.toString());
+ }
//check for zero-byte inputstream
if (tis.getOpenContainer() == null) {
tis.mark(1);
diff --git a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
index bf10736..e7329d2 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
@@ -19,7 +19,9 @@
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.sax.TaggedContentHandler;
@@ -232,7 +234,12 @@
protected Parser getParser(Metadata metadata, ParseContext context) {
Map<MediaType, Parser> map = getParsers(context);
- MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
+ //check for parser override first
+ String contentTypeString = metadata.get(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE);
+ if (contentTypeString == null) {
+ contentTypeString = metadata.get(Metadata.CONTENT_TYPE);
+ }
+ MediaType type = MediaType.parse(contentTypeString);
if (type != null) {
// We always work on the normalised, canonical form
type = registry.normalize(type);
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
index eae94dd..d805963 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
@@ -24,7 +24,6 @@
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
@@ -84,21 +83,13 @@
metadataException = e;
}
- String mime = metadata.get(Metadata.CONTENT_TYPE);
- String override = metadata.get(TikaCoreProperties.CONTENT_TYPE_OVERRIDE);
try (InputStream pathStream = Files.newInputStream(path)) {
//specify ocr content type
- metadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, ocrMediaType.toString());
+ metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE, ocrMediaType.toString());
//need to use bodycontenthandler to filter out re-dumping of metadata
//in xhtmlhandler
ocrParser.parse(pathStream, new EmbeddedContentHandler(
new BodyContentHandler(xhtml)), metadata, context);
- } finally {
- //reset actual mime because AutoDetectParser will set mime to detected
- //which is the override.
- metadata.set(Metadata.CONTENT_TYPE, mime);
- //reset override too
- metadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, override);
}
xhtml.endDocument();
} finally {
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
index da3e0d9..6f67e68 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
@@ -250,7 +250,7 @@
MediaType mediaType = detector.detect(tis, submd);
if (mediaType != null) {
//detect only once
- submd.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, mediaType.toString());
+ submd.set(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE, mediaType.toString());
if (mediaType.toString().startsWith("text")) {
return true;
}
@@ -602,7 +602,7 @@
try {
Metadata inlineMetadata = new Metadata();
if (inlineText) {
- inlineMetadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, MediaType.TEXT_PLAIN.toString());
+ inlineMetadata.set(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE, MediaType.TEXT_PLAIN.toString());
}
parser.parse(
new ByteArrayInputStream(part.bytes),
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
index bc87521..64e6f60 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
@@ -100,7 +100,7 @@
Queue<String> multiline = new LinkedList<String>();
mailMetadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
mailMetadata.set(Metadata.CONTENT_TYPE, "message/rfc822");
- mailMetadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, "message/rfc822");
+ mailMetadata.set(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE, "message/rfc822");
curLine = reader.readLine();
if (curLine == null) {
break;
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index a170b1f..de3870a 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -377,7 +377,7 @@
byte[] data = getValue(textChunk);
if (data != null) {
Metadata chunkMetadata = new Metadata();
- chunkMetadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE,
+ chunkMetadata.set(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE,
MediaType.TEXT_PLAIN.toString());
handleEmbeddedResource(
TikaInputStream.get(data), chunkMetadata, null,
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParser.java
index df5d90a..906b33b 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParser.java
@@ -221,7 +221,7 @@
//the underlying bytes from the pstMail object...
byte[] mailContent = pstMail.getBody().getBytes(UTF_8);
- mailMetadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE,
+ mailMetadata.set(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE,
MediaType.TEXT_PLAIN.toString());
embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent),
handler, mailMetadata, true);
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 6372964..24288af 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -17,7 +17,6 @@
package org.apache.tika.parser.ocr;
import org.apache.tika.TikaTest;
-import org.apache.tika.config.InitializableProblemHandler;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
@@ -40,7 +39,6 @@
import java.io.File;
import java.io.InputStream;
-import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
@@ -90,7 +88,7 @@
Metadata metadata = new Metadata();
MediaType ocrMediaType = new MediaType(mediaType.getType(),
"OCR-" + mediaType.getSubtype());
- metadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE,
+ metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE,
ocrMediaType.toString());
return metadata;
}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 347b86b..53b7ee4 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -455,16 +455,11 @@
ImageIOUtil.writeImage(image, config.getOcrImageFormatName(),
os, dpi, config.getOcrImageQuality());
}
- String mime = metadata.get(Metadata.CONTENT_TYPE);
- String overrideMime = metadata.get(TikaCoreProperties.CONTENT_TYPE_OVERRIDE);
try (InputStream is = TikaInputStream.get(tmpFile)) {
- metadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, ocrImageMediaType.toString());
+ metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE, ocrImageMediaType.toString());
ocrParser.parse(is,
new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
metadata, context);
- } finally {
- metadata.set(Metadata.CONTENT_TYPE, mime);
- metadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, overrideMime);
}
} catch (IOException e) {
handleCatchableIOE(e);
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/csv/TextAndCSVParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/csv/TextAndCSVParser.java
index 36ed122..8e92e08 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/csv/TextAndCSVParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/main/java/org/apache/tika/parser/csv/TextAndCSVParser.java
@@ -48,7 +48,7 @@
import org.xml.sax.SAXException;
/**
- * Unless the {@link TikaCoreProperties#CONTENT_TYPE_OVERRIDE} is set,
+ * Unless the {@link TikaCoreProperties#CONTENT_TYPE_USER_OVERRIDE} is set,
* this parser tries to assess whether the file is a text file, csv or tsv.
* If the detector detects regularity in column numbers and/or encapsulated cells,
* this parser will apply the {@link org.apache.commons.csv.CSVParser};
@@ -283,7 +283,7 @@
}
private CSVParams getOverride(Metadata metadata) {
- String override = metadata.get(TikaCoreProperties.CONTENT_TYPE_OVERRIDE);
+ String override = metadata.get(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE);
if (override == null) {
return new CSVParams();
}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/csv/TextAndCSVParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/csv/TextAndCSVParserTest.java
index 809f881..192681d 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/csv/TextAndCSVParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/csv/TextAndCSVParserTest.java
@@ -29,7 +29,6 @@
import org.apache.commons.io.ByteOrderMark;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
@@ -94,7 +93,7 @@
@Test
public void testCSV_UTF8_TypeOverride() throws Exception {
Metadata metadata = new Metadata();
- metadata.set(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, "text/csv; charset=UTF-8");
+ metadata.set(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE, "text/csv; charset=UTF-8");
XMLResult xmlResult = getXML(new ByteArrayInputStream(CSV_UTF8), PARSER, metadata);
assertEquals("comma", xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
assertMediaTypeEquals("csv", "UTF-8","comma",
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
index c474240..a439b05 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
@@ -110,8 +110,8 @@
assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
- assertEquals(50, meta_jpg.names().length);
- assertEquals(109, meta_jpg_exif.names().length);
+ assertEquals(51, meta_jpg.names().length);
+ assertEquals(110, meta_jpg_exif.names().length);
}
private static class Pair {
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index e719e7a..f36106b 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -113,6 +113,16 @@
}
+ @Test
+ public void testParserContentTypeOverride() throws Exception {
+ Assume.assumeTrue("can run OCR", canRun());
+ //this tests that the content-type is not overwritten by the ocr parser override content type
+ List<Metadata> metadata = getRecursiveMetadata("testOCR.pdf", AUTO_DETECT_PARSER,
+ BasicContentHandlerFactory.HANDLER_TYPE.XML);
+ assertContains("<meta name=\"Content-Type\" content=\"application/pdf\" />",
+ metadata.get(0).get(TikaCoreProperties.TIKA_CONTENT));
+ }
+
private void testBasicOCR(String resource, String[] nonOCRContains, int numMetadatas) throws Exception{
Assume.assumeTrue("can run OCR", canRun());
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 2d560a5..26c6827 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -17,22 +17,18 @@
package org.apache.tika.server.core.resource;
-import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.apache.cxf.attachment.ContentDisposition;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
-import org.apache.tika.detect.Detector;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
import org.apache.tika.sax.RichTextContentHandler;
@@ -294,7 +290,7 @@
if (mediaType != null) {
metadata.add(Metadata.CONTENT_TYPE, mediaType.toString());
- metadata.add(TikaCoreProperties.CONTENT_TYPE_OVERRIDE, mediaType.toString());
+ metadata.add(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE, mediaType.toString());
}
for (Map.Entry<String, List<String>> e : httpHeaders.entrySet()) {