blob: 5d0b05e9ce50c54b2436365162b1aacfc7d119d1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.crypto;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.security.NoSuchProviderException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import org.bouncycastle.asn1.cryptopro.CryptoProObjectIdentifiers;
import org.bouncycastle.asn1.nist.NISTObjectIdentifiers;
import org.bouncycastle.asn1.oiw.OIWObjectIdentifiers;
import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers;
import org.bouncycastle.asn1.teletrust.TeleTrusTObjectIdentifiers;
import org.bouncycastle.asn1.x509.GeneralName;
import org.bouncycastle.asn1.x509.X509ObjectIdentifiers;
import org.bouncycastle.asn1.x9.X9ObjectIdentifiers;
import org.bouncycastle.cms.CMSSignedDataGenerator;
import org.bouncycastle.tsp.TimeStampToken;
import org.bouncycastle.tsp.cms.CMSTimeStampedData;
import org.bouncycastle.tsp.cms.CMSTimeStampedDataParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.WriteLimitReachedException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.RereadableInputStream;
/**
* Tika parser for Time Stamped Data Envelope (application/timestamped-data)
*/
public class TSDParser extends AbstractParser {
public static final String TSD_MIME_TYPE = "application/timestamped-data";
private static final long serialVersionUID = 3268158344501763323L;
private static final Logger LOG = LoggerFactory.getLogger(TSDParser.class);
private static final String TSD_LOOP_LABEL = "Time-Stamp-n.";
private static final String TSD_DESCRIPTION_LABEL = "Description";
private static final String TSD_DESCRIPTION_VALUE = "Time Stamped Data Envelope";
private static final String TSD_PARSED_LABEL = "File-Parsed";
private static final String TSD_PARSED_DATE = "File-Parsed-DateTime";
private static final String TSD_DATE = "Time-Stamp-DateTime";
private static final String TSD_DATE_FORMAT = "UTC";
private static final String TSD_POLICY_ID = "Policy-Id";
private static final String TSD_SERIAL_NUMBER = "Serial-Number";
private static final String TSD_TSA = "TSA";
private static final String TSD_ALGORITHM = "Algorithm";
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("timestamped-data"));
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
//Try to parse TSD file
try (RereadableInputStream ris = new RereadableInputStream(stream, 2048, true)) {
Metadata TSDAndEmbeddedMetadata = new Metadata();
List<TSDMetas> tsdMetasList = this.extractMetas(ris);
this.buildMetas(tsdMetasList,
metadata != null && metadata.size() > 0 ? TSDAndEmbeddedMetadata : metadata);
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
ris.rewind();
//Try to parse embedded file in TSD file
this.parseTSDContent(ris, xhtml, TSDAndEmbeddedMetadata, context);
xhtml.endDocument();
}
}
private List<TSDMetas> extractMetas(InputStream stream) throws SAXException {
List<TSDMetas> tsdMetasList = new ArrayList<>();
try {
CMSTimeStampedData cmsTimeStampedData = new CMSTimeStampedData(stream);
TimeStampToken[] tokens = cmsTimeStampedData.getTimeStampTokens();
for (TimeStampToken token : tokens) {
TSDMetas tsdMetas = new TSDMetas(true, token.getTimeStampInfo().getGenTime(),
token.getTimeStampInfo().getPolicy().getId(),
token.getTimeStampInfo().getSerialNumber(),
token.getTimeStampInfo().getTsa(),
token.getTimeStampInfo().getHashAlgorithm().getAlgorithm().getId());
tsdMetasList.add(tsdMetas);
}
} catch (SecurityException e) {
throw e;
} catch (Exception ex) {
WriteLimitReachedException.throwIfWriteLimitReached(ex);
LOG.error("Error in TSDParser.buildMetas {}", ex.getMessage());
tsdMetasList.clear();
}
return tsdMetasList;
}
private void buildMetas(List<TSDMetas> tsdMetasList, Metadata metadata) {
int count = 1;
for (TSDMetas tsdm : tsdMetasList) {
metadata.set(TSD_LOOP_LABEL + count + " - " + Metadata.CONTENT_TYPE, TSD_MIME_TYPE);
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_DESCRIPTION_LABEL,
TSD_DESCRIPTION_VALUE);
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_PARSED_LABEL,
tsdm.getParseBuiltStr());
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_PARSED_DATE,
tsdm.getParsedDateStr() + " " + TSD_DATE_FORMAT);
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_DATE,
tsdm.getEmitDateStr() + " " + TSD_DATE_FORMAT);
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_POLICY_ID, tsdm.getPolicyId());
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_SERIAL_NUMBER,
tsdm.getSerialNumberFormatted());
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_TSA, tsdm.getTsaStr());
metadata.set(TSD_LOOP_LABEL + count + " - " + TSD_ALGORITHM, tsdm.getAlgorithmName());
count++;
}
}
private void parseTSDContent(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws SAXException {
CMSTimeStampedDataParser cmsTimeStampedDataParser = null;
EmbeddedDocumentExtractor edx = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
if (edx.shouldParseEmbedded(metadata)) {
try {
cmsTimeStampedDataParser = new CMSTimeStampedDataParser(stream);
try (InputStream is = TikaInputStream.get(cmsTimeStampedDataParser.getContent())) {
edx.parseEmbedded(is, handler, metadata, false);
}
} catch (SecurityException e) {
throw e;
} catch (Exception ex) {
WriteLimitReachedException.throwIfWriteLimitReached(ex);
LOG.error("Error in TSDParser.parseTSDContent {}", ex.getMessage());
} finally {
this.closeCMSParser(cmsTimeStampedDataParser);
}
}
}
private void closeCMSParser(CMSTimeStampedDataParser cmsTimeStampedDataParser) {
if (cmsTimeStampedDataParser != null) {
try {
cmsTimeStampedDataParser.close();
} catch (IOException ex) {
LOG.error("Error in TSDParser.closeCMSParser {}", ex.getMessage());
}
}
}
private static class OIDNameMapper {
private static final Map<String, String> encryptionAlgs = new HashMap<>();
private static final Map<String, String> digestAlgs = new HashMap<>();
static {
encryptionAlgs.put(X9ObjectIdentifiers.id_dsa_with_sha1.getId(), "DSA");
encryptionAlgs.put(X9ObjectIdentifiers.id_dsa.getId(), "DSA");
encryptionAlgs.put(OIWObjectIdentifiers.dsaWithSHA1.getId(), "DSA");
encryptionAlgs.put(PKCSObjectIdentifiers.rsaEncryption.getId(), "RSA");
encryptionAlgs.put(PKCSObjectIdentifiers.sha1WithRSAEncryption.getId(), "RSA");
encryptionAlgs
.put(TeleTrusTObjectIdentifiers.teleTrusTRSAsignatureAlgorithm.getId(), "RSA");
encryptionAlgs.put(X509ObjectIdentifiers.id_ea_rsa.getId(), "RSA");
encryptionAlgs.put(CMSSignedDataGenerator.ENCRYPTION_ECDSA, "ECDSA");
encryptionAlgs.put(X9ObjectIdentifiers.ecdsa_with_SHA2.getId(), "ECDSA");
encryptionAlgs.put(X9ObjectIdentifiers.ecdsa_with_SHA224.getId(), "ECDSA");
encryptionAlgs.put(X9ObjectIdentifiers.ecdsa_with_SHA256.getId(), "ECDSA");
encryptionAlgs.put(X9ObjectIdentifiers.ecdsa_with_SHA384.getId(), "ECDSA");
encryptionAlgs.put(X9ObjectIdentifiers.ecdsa_with_SHA512.getId(), "ECDSA");
encryptionAlgs.put(CMSSignedDataGenerator.ENCRYPTION_RSA_PSS, "RSAandMGF1");
encryptionAlgs.put(CryptoProObjectIdentifiers.gostR3410_94.getId(), "GOST3410");
encryptionAlgs.put(CryptoProObjectIdentifiers.gostR3410_2001.getId(), "ECGOST3410");
encryptionAlgs.put("1.3.6.1.4.1.5849.1.6.2", "ECGOST3410");
encryptionAlgs.put("1.3.6.1.4.1.5849.1.1.5", "GOST3410");
digestAlgs.put(PKCSObjectIdentifiers.md5.getId(), "MD5");
digestAlgs.put(OIWObjectIdentifiers.idSHA1.getId(), "SHA1");
digestAlgs.put(NISTObjectIdentifiers.id_sha224.getId(), "SHA224");
digestAlgs.put(NISTObjectIdentifiers.id_sha256.getId(), "SHA256");
digestAlgs.put(NISTObjectIdentifiers.id_sha384.getId(), "SHA384");
digestAlgs.put(NISTObjectIdentifiers.id_sha512.getId(), "SHA512");
digestAlgs.put(PKCSObjectIdentifiers.sha1WithRSAEncryption.getId(), "SHA1");
digestAlgs.put(PKCSObjectIdentifiers.sha224WithRSAEncryption.getId(), "SHA224");
digestAlgs.put(PKCSObjectIdentifiers.sha256WithRSAEncryption.getId(), "SHA256");
digestAlgs.put(PKCSObjectIdentifiers.sha384WithRSAEncryption.getId(), "SHA384");
digestAlgs.put(PKCSObjectIdentifiers.sha512WithRSAEncryption.getId(), "SHA512");
digestAlgs.put(TeleTrusTObjectIdentifiers.ripemd128.getId(), "RIPEMD128");
digestAlgs.put(TeleTrusTObjectIdentifiers.ripemd160.getId(), "RIPEMD160");
digestAlgs.put(TeleTrusTObjectIdentifiers.ripemd256.getId(), "RIPEMD256");
digestAlgs.put(CryptoProObjectIdentifiers.gostR3411.getId(), "GOST3411");
digestAlgs.put("1.3.6.1.4.1.5849.1.2.1", "GOST3411");
}
public static String getDigestAlgName(String digestAlgOID) {
String algName = digestAlgs.get(digestAlgOID);
if (algName != null) {
return algName;
}
return digestAlgOID;
}
public static String getEncryptionAlgName(String encryptionAlgOID) {
String algName = encryptionAlgs.get(encryptionAlgOID);
if (algName != null) {
return algName;
}
return encryptionAlgOID;
}
public static MessageDigest getDigestInstance(String algorithm, String provider)
throws NoSuchProviderException, NoSuchAlgorithmException {
if (provider != null) {
try {
return MessageDigest.getInstance(algorithm, provider);
} catch (NoSuchAlgorithmException e) {
return MessageDigest.getInstance(algorithm); // try rolling back
}
} else {
return MessageDigest.getInstance(algorithm);
}
}
}
private static class TSDMetas {
private final String DATE_FORMAT = "dd/MM/yyyy HH:mm:ss";
private Boolean parseBuilt = false;
private Date emitDate = new Date();
private String policyId = "";
private BigInteger serialNumber = null;
private GeneralName tsa = null;
private String algorithm = "";
private Date parsedDate = new Date();
public TSDMetas() {
super();
}
public TSDMetas(Boolean parseBuilt, Date emitDate, String policyId, BigInteger serialNumber,
GeneralName tsa, String algorithm) {
super();
this.parseBuilt = parseBuilt;
this.emitDate = emitDate;
this.policyId = policyId;
this.serialNumber = serialNumber;
this.tsa = tsa;
this.algorithm = algorithm;
}
public Boolean isParseBuilt() {
return parseBuilt;
}
public String getParseBuiltStr() {
return String.valueOf(this.isParseBuilt() != null ? this.isParseBuilt() : false);
}
public void setParseBuilt(Boolean parseBuilt) {
this.parseBuilt = parseBuilt;
}
public Date getEmitDate() {
return emitDate;
}
public void setEmitDate(Date emitDate) {
this.emitDate = emitDate;
}
public String getEmitDateStr() {
SimpleDateFormat sdf = new SimpleDateFormat(this.DATE_FORMAT, Locale.ROOT);
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
return sdf.format(this.getEmitDate() != null ? this.getEmitDate() : new Date());
}
public String getPolicyId() {
return policyId;
}
public void setPolicyId(String policyId) {
this.policyId = policyId;
}
public BigInteger getSerialNumber() {
return serialNumber;
}
public void setSerialNumber(BigInteger serialNumber) {
this.serialNumber = serialNumber;
}
public String getSerialNumberFormatted() {
String outsn = String.format(Locale.ROOT, "%12x", getSerialNumber());
return outsn != null ? outsn.trim() : "" + getSerialNumber();
}
public GeneralName getTsa() {
return tsa;
}
public String getTsaStr() {
return tsa + "";
}
public void setTSA(GeneralName tsa) {
this.tsa = tsa;
}
public String getAlgorithm() {
return algorithm;
}
public void setAlgorithm(String algorithm) {
this.algorithm = algorithm;
}
public String getAlgorithmName() {
return OIDNameMapper.getDigestAlgName(getAlgorithm());
}
public Date getParsedDate() {
return parsedDate;
}
public void setParsedDate(Date parsedDate) {
this.parsedDate = parsedDate;
}
public String getParsedDateStr() {
SimpleDateFormat sdf = new SimpleDateFormat(this.DATE_FORMAT, Locale.ROOT);
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
return sdf.format(this.getParsedDate() != null ? this.getParsedDate() : new Date());
}
@Override
public String toString() {
return "TSDMetas [parseBuilt=" + parseBuilt + ", emitDate=" + emitDate + ", policyId=" +
policyId + ", serialNumber=" + serialNumber + ", tsa=" + tsa + ", algorithm=" +
algorithm + ", parsedDate=" + parsedDate + "]";
}
}
}