mailbox/memory/src/main/java/org/apache/james/mailbox/inmemory/JsoupTextExtractor.java - james-project - Git at Google

 /****************************************************************
  * Licensed to the Apache Software Foundation (ASF) under one   *
  * or more contributor license agreements.  See the NOTICE file *
  * distributed with this work for additional information        *
  * regarding copyright ownership.  The ASF licenses this file   *
  * to you under the Apache License, Version 2.0 (the            *
  * "License"); you may not use this file except in compliance   *
  * with the License.  You may obtain a copy of the License at   *
  *                                                              *
  *   http://www.apache.org/licenses/LICENSE-2.0                 *
  *                                                              *
  * Unless required by applicable law or agreed to in writing,   *
  * software distributed under the License is distributed on an  *
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
  * KIND, either express or implied.  See the License for the    *
  * specific language governing permissions and limitations      *
  * under the License.                                           *
  ****************************************************************/

 package org.apache.james.mailbox.inmemory;

 import java.io.InputStream;
 import java.util.List;
 import java.util.Map;

 import org.apache.commons.io.IOUtils;
 import org.apache.james.mailbox.store.extractor.ParsedContent;
 import org.apache.james.mailbox.store.extractor.TextExtractor;
 import org.jsoup.Jsoup;

 import com.google.common.collect.Maps;


 public class JsoupTextExtractor implements TextExtractor {

     @Override
     public ParsedContent extractContent(InputStream inputStream, String contentType, String fileName) throws Exception {
         Map<String, List<String>> emptyMetadata = Maps.newHashMap();
         if (contentType != null) {
            if (contentType.equals("text/plain")) {
             return new ParsedContent(IOUtils.toString(inputStream), emptyMetadata);
            }
            if (contentType.equals("text/html")) {
                String text = Jsoup.parse(IOUtils.toString(inputStream)).text();
                return new ParsedContent(text, emptyMetadata);
            }
         }
         return new ParsedContent(null, emptyMetadata);
     }
 }
	/****************************************************************
	* Licensed to the Apache Software Foundation (ASF) under one *
	* or more contributor license agreements. See the NOTICE file *
	* distributed with this work for additional information *
	* regarding copyright ownership. The ASF licenses this file *
	* to you under the Apache License, Version 2.0 (the *
	* "License"); you may not use this file except in compliance *
	* with the License. You may obtain a copy of the License at *
	* *
	* http://www.apache.org/licenses/LICENSE-2.0 *
	* *
	* Unless required by applicable law or agreed to in writing, *
	* software distributed under the License is distributed on an *
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
	* KIND, either express or implied. See the License for the *
	* specific language governing permissions and limitations *
	* under the License. *
	****************************************************************/

	package org.apache.james.mailbox.inmemory;

	import java.io.InputStream;
	import java.util.List;
	import java.util.Map;

	import org.apache.commons.io.IOUtils;
	import org.apache.james.mailbox.store.extractor.ParsedContent;
	import org.apache.james.mailbox.store.extractor.TextExtractor;
	import org.jsoup.Jsoup;

	import com.google.common.collect.Maps;


	public class JsoupTextExtractor implements TextExtractor {

	@Override
	public ParsedContent extractContent(InputStream inputStream, String contentType, String fileName) throws Exception {
	Map<String, List<String>> emptyMetadata = Maps.newHashMap();
	if (contentType != null) {
	if (contentType.equals("text/plain")) {
	return new ParsedContent(IOUtils.toString(inputStream), emptyMetadata);
	}
	if (contentType.equals("text/html")) {
	String text = Jsoup.parse(IOUtils.toString(inputStream)).text();
	return new ParsedContent(text, emptyMetadata);
	}
	}
	return new ParsedContent(null, emptyMetadata);
	}
	}