blob: 24c3c257501dba3119fb8855ebb7914b8e4f0fe9 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.metron.tldextractor;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class BasicTldExtractor implements Serializable {
private static final long serialVersionUID = -7440226111118873815L;
private StringBuilder sb = new StringBuilder();
private Pattern pattern;
/**
* The inputFile.
*/
private String inputFile ="effective_tld_names.dat";
public BasicTldExtractor(String filePath) {
this.inputFile=filePath;
this.init();
}
public BasicTldExtractor() {
this.init();
}
private void init(){
try {
ArrayList<String> terms = new ArrayList<String>();
BufferedReader br = new BufferedReader(new InputStreamReader(
getClass().getClassLoader().getResourceAsStream(inputFile)));
String s = null;
while ((s = br.readLine()) != null) {
s = s.trim();
if (s.length() == 0 || s.startsWith("//") || s.startsWith("!"))
continue;
terms.add(s);
}
Collections.sort(terms, new StringLengthComparator());
for (String t : terms)
add(t);
compile();
br.close();
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
protected void add(String s) {
s = s.replace(".", "\\.");
s = "\\." + s;
if (s.startsWith("*")) {
s = s.replace("*", ".+");
sb.append(s).append("|");
} else {
sb.append(s).append("|");
}
}
public void compile() {
if (sb.length() > 0)
sb.deleteCharAt(sb.length() - 1);
sb.insert(0, "[^.]+?(");
sb.append(")$");
pattern = Pattern.compile(sb.toString());
sb = null;
}
public String extract2LD(String host) {
Matcher m = pattern.matcher(host);
if (m.find()) {
return m.group(0);
}
return null;
}
public String extractTLD(String host) {
Matcher m = pattern.matcher(host);
if (m.find()) {
return m.group(1);
}
return null;
}
public static class StringLengthComparator implements Comparator<String> {
public int compare(String s1, String s2) {
if (s1.length() > s2.length())
return -1;
if (s1.length() < s2.length())
return 1;
return 0;
}
}
/**
* Returns the sb.
* @return the sb.
*/
public StringBuilder getSb() {
return sb;
}
/**
* Sets the sb.
* @param sb the sb.
*/
public void setSb(StringBuilder sb) {
this.sb = sb;
}
/**
* Returns the inputFile.
* @return the inputFile.
*/
public String getInputFile() {
return inputFile;
}
/**
* Sets the inputFile.
* @param inputFile the inputFile.
*/
public void setInputFile(String inputFile) {
this.inputFile = inputFile;
}
}