/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.oodt.cas.metadata.util;

//JDK imports
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;

//APACHE imports
import org.apache.tika.Tika;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MimeTypesFactory;

/**
 * @author mattmann
 * @author bfoster
 * 
 * <p>
 * This is a facade class to insulate CAS Metadata from its underlying Mime Type
 * substrate library, <a href="http://tika.apache.org/">Apache Tika</a>.
 * Any mime handling code should be placed in this utility class, and hidden
 * from the CAS Metadata classes that rely on it.
 * </p>
 */
public final class MimeTypeUtils {

    private static final String SEPARATOR = ";";

    /* our Tika mime type registry */
    private MimeTypes mimeTypes;

    private Tika tika;

    /* whether or not magic should be employed or not */
    private boolean mimeMagic;

    /* static resource path for the mimeTypesFile */
    public final static String MIME_FILE_RES_PATH = "tika-mimetypes.xml";

    /* our log stream */
    private static final Logger LOG = Logger.getLogger(MimeTypeUtils.class
            .getName());

    public MimeTypeUtils() {
        this(MimeTypeUtils.class.getResourceAsStream(MIME_FILE_RES_PATH), true);
    }

    public MimeTypeUtils(String filePath) throws FileNotFoundException {
        this(filePath, true);
    }

    public MimeTypeUtils(String filePath, boolean magic)
            throws FileNotFoundException {
        this(new FileInputStream(filePath), magic);
    }

    public MimeTypeUtils(InputStream mimeIs, boolean magic) {
    	try {
    		this.mimeTypes = MimeTypesFactory.create(mimeIs);
    		this.mimeMagic = magic;
    		this.tika = new Tika(new DefaultDetector(this.mimeTypes));
    	}catch (Exception e) {
    		LOG.log(Level.SEVERE, "Failed to load MimeType Registry : " + e.getMessage(), e);
    	}
    }

    /**
     * Cleans a {@link MimeType} name by removing out the actual
     * {@link MimeType}, from a string of the form:
     * 
     * <pre>
     *           &lt;primary type&gt;/&lt;sub type&gt; ; &lt; optional params
     * </pre>
     * 
     * @param origType
     *            The original mime type string to be cleaned.
     * @return The primary type, and subtype, concatenated, e.g., the actual
     *         mime type.
     */
    public static String cleanMimeType(String origType) {
        if (origType == null)
            return null;

        // take the origType and split it on ';'
        String[] tokenizedMimeType = origType.split(SEPARATOR);
        if (tokenizedMimeType.length > 1) {
            // there was a ';' in there, take the first value
            return tokenizedMimeType[0];
        } else {
            // there wasn't a ';', so just return the orig type
            return origType;
        }
    }

    /**
     * Same as {@link #autoResolveContentType(String, String, byte[])}, but
     * this method passes <code>null</code> as the initial type.
     * 
     * @param url
     *            The String URL to use to check glob patterns.
     * @param data
     *            The byte data to potentially use in magic detection.
     * @return The String {@link MimeType}.
     */
    public String autoResolveContentType(String url, byte[] data) {
        return autoResolveContentType(null, url, data);
    }

    /**
     * A facade interface to trying all the possible mime type resolution
     * strategies available within Tika. First, the mime type provided in
     * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
     * Then the cleaned mime type is looked up in the underlying Tika
     * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType}
     * is found, then that mime type is used, otherwise {@link URL} resolution
     * is used to try and determine the mime type. If that means is
     * unsuccessful, and if <code>mime.type.magic</code> is enabled in
     * {@link NutchConfiguration}, then mime type magic resolution is used to
     * try and obtain a better-than-the-default approximation of the
     * {@link MimeType}.
     * 
     * @param typeName
     *            The original mime type, returned from a {@link ProtocolOutput}.
     * @param url
     *            The given {@link URL}, that Nutch was trying to crawl.
     * @param data
     *            The byte data, returned from the crawl, if any.
     * @return The correctly, automatically guessed {@link MimeType} name.
     */
    public String autoResolveContentType(String typeName, String url,
            byte[] data) {
        MimeType type = null;
        String cleanedMimeType = null;

        try {
            cleanedMimeType = MimeTypeUtils.cleanMimeType(typeName) != null ? this.mimeTypes
                    .forName(MimeTypeUtils.cleanMimeType(typeName)).getName()
                    : null;
        } catch (MimeTypeException mte) {
            // Seems to be a malformed mime type name...
        }

        // first try to get the type from the cleaned type name
        try {
            type = cleanedMimeType != null ? this.mimeTypes
                    .forName(cleanedMimeType) : null;
        } catch (MimeTypeException e) {
            type = null;
        }

        // if returned null, or if it's the default type then try url resolution
        if (type == null
                || (type.getName().equals(MimeTypes.OCTET_STREAM))) {
            // If no mime-type header, or cannot find a corresponding registered
            // mime-type, then guess a mime-type from the url pattern
            try {
                type = mimeTypes.forName(tika.detect(url)) != null ? mimeTypes.forName(tika.detect(url)) : type;
            } catch (Exception e) {
                // MimeTypeException or IOException from tika.detect. Ignore.
            }
        }

        // if magic is enabled use mime magic to guess if the mime type returned
        // from the magic guess is different than the one that's already set so
        // far
        // if it is, and it's not the default mime type, then go with the mime
        // type
        // returned by the magic
        if (this.mimeMagic) {
            MimeType magicType;
            try {
                magicType =  mimeTypes.forName(tika.detect(data));
            } catch (Exception e) {
                magicType = null;
            }
            if (magicType != null
                    && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
                    && type != null
                    && !type.getName().equals(magicType.getName())) {
                // If magic enabled and the current mime type differs from that
                // of the
                // one returned from the magic, take the magic mimeType
                type = magicType;
            }

            // if type is STILL null after all the resolution strategies, go for
            // the
            // default type
            if (type == null) {
                try {
                    type = this.mimeTypes.forName(MimeTypes.OCTET_STREAM);
                } catch (Exception ignore) {
                }
            }
        }

        return type.getName();
    }

    /**
     * Facade interface to Tika's underlying
     * {@link tika.detect(String)} method.
     *
     * @param url
     *            A string representation of the document {@link URL} to sense
     *            the {@link MimeType} for.
     * @return An appropriate {@link MimeType}, identified from the given
     *         Document url in string form.
     */
    public String getMimeType(URL url) {
        try {
    	    return tika.detect(url);
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * A facade interface to Tika's underlying {@link org.apache.tika.tika.detect(String)}
     * method.
     *
     * @param name
     *            The name of a valid {@link MimeType} in the Tika mime
     *            registry.
     * @return The object representation of the {@link MimeType}, if it exists,
     *         or null otherwise.
     */
    public String getMimeType(String name) {
        try {
            return tika.detect(name);
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    /**
     * Facade interface to Tika's underlying {@link org.apache.tika.Tika#detect(File)}
     * method.
     *
     * @param f
     *            The {@link File} to sense the {@link MimeType} for.
     * @return The {@link MimeType} of the given {@link File}, or null if it
     *         cannot be determined.
     */
    public String getMimeType(File f) {
        try {
            return tika.detect(f);
        } catch (Exception e) {
            System.err.println("\n\n\n");
            e.printStackTrace();
            System.err.println("\n\n\n");
            return null;
        }
    }

    /**
     * Utility method to act as a facade to
     * {@link MimeTypes#getMimeType(byte[])}.
     *
     * @param data
     *            The byte data to get the {@link MimeType} for.
     * @return The String representation of the resolved {@link MimeType}, or
     *         null if a suitable {@link MimeType} is not found.
     */
    public String getMimeTypeByMagic(byte[] data) {
        try {
            return tika.detect(data);
        } catch (Exception e) {
            return null;
        }
    }
    
    public String getDescriptionForMimeType(String mimeType) {
    	try {
    		return this.mimeTypes.forName(mimeType).getDescription();
    	}catch (Exception e) {
    		LOG.log(Level.WARNING, "Failed to get description for mimetype " 
    				+ mimeType + " : " + e.getMessage());
    		return null;
    	}
    }

    public String getSuperTypeForMimeType(String mimeType) {
    	try {
    		MediaType mediaType = this.mimeTypes.getMediaTypeRegistry().getSupertype(this.mimeTypes.forName(mimeType).getType());
    		if (mediaType != null)
    			return mediaType.getType() + "/" + mediaType.getSubtype();
    		else
    			return null;
    	}catch (Exception e) {
    		LOG.log(Level.WARNING, "Failed to get super-type for mimetype " 
    				+ mimeType + " : " + e.getMessage());
    		return null;
    	}
    }
    
    /**
     * @return the mimeMagic
     */
    public boolean isMimeMagic() {
        return mimeMagic;
    }

    /**
     * @param mimeMagic the mimeMagic to set
     */
    public void setMimeMagic(boolean mimeMagic) {
        this.mimeMagic = mimeMagic;
    }
    
    public static byte[] readMagicHeader(InputStream stream) throws IOException {
    	return readMagicHeader(stream, 1024);
    }
    
    public static byte[] readMagicHeader(InputStream stream, int headerByteSize) 
    		throws IOException {
        if (stream == null) {
            throw new IllegalArgumentException("InputStream is missing");
        }

        byte[] bytes = new byte[headerByteSize];
        int totalRead = 0;

        int lastRead = stream.read(bytes);
        while (lastRead != -1) {
            totalRead += lastRead;
            if (totalRead == bytes.length) {
                return bytes;
            }
            lastRead = stream.read(bytes, totalRead, bytes.length - totalRead);
        }

        byte[] shorter = new byte[totalRead];
        System.arraycopy(bytes, 0, shorter, 0, totalRead);
        return shorter;
    }

}
