blob: 6f2de7765ff00d99b3c1d6f5d3c77b697dee0813 [file] [log] [blame]
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/
package org.apache.james.mailbox.inmemory;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.james.mailbox.store.extractor.ParsedContent;
import org.apache.james.mailbox.store.extractor.TextExtractor;
import org.jsoup.Jsoup;
import com.google.common.collect.Maps;
public class JsoupTextExtractor implements TextExtractor {
@Override
public ParsedContent extractContent(InputStream inputStream, String contentType, String fileName) throws Exception {
Map<String, List<String>> emptyMetadata = Maps.newHashMap();
if (contentType != null) {
if (contentType.equals("text/plain")) {
return new ParsedContent(IOUtils.toString(inputStream), emptyMetadata);
}
if (contentType.equals("text/html")) {
String text = Jsoup.parse(IOUtils.toString(inputStream)).text();
return new ParsedContent(text, emptyMetadata);
}
}
return new ParsedContent(null, emptyMetadata);
}
}