| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.indexer.geoip; |
| |
| import java.io.IOException; |
| import java.net.InetAddress; |
| import java.net.UnknownHostException; |
| |
| import org.apache.nutch.indexer.NutchDocument; |
| |
| import com.maxmind.geoip2.DatabaseReader; |
| import com.maxmind.geoip2.WebServiceClient; |
| import com.maxmind.geoip2.exception.GeoIp2Exception; |
| import com.maxmind.geoip2.model.InsightsResponse; |
| import com.maxmind.geoip2.model.CityResponse; |
| import com.maxmind.geoip2.model.ConnectionTypeResponse; |
| import com.maxmind.geoip2.model.CountryResponse; |
| import com.maxmind.geoip2.model.DomainResponse; |
| import com.maxmind.geoip2.model.IspResponse; |
| import com.maxmind.geoip2.record.City; |
| import com.maxmind.geoip2.record.Continent; |
| import com.maxmind.geoip2.record.Country; |
| import com.maxmind.geoip2.record.Location; |
| import com.maxmind.geoip2.record.Postal; |
| import com.maxmind.geoip2.record.RepresentedCountry; |
| import com.maxmind.geoip2.record.Subdivision; |
| import com.maxmind.geoip2.record.Traits; |
| |
| /** |
| * <p> |
| * Simple utility class which enables efficient, structured |
| * {@link org.apache.nutch.indexer.NutchDocument} building based on input from |
| * {@link GeoIPIndexingFilter}, where configuration is also read. |
| * </p> |
| * <p> |
| * Based on the nature of the input, this class wraps factory type |
| * implementations for populating {@link org.apache.nutch.indexer.NutchDocument} |
| * 's with the correct {@link org.apache.nutch.indexer.NutchField} information. |
| * |
| */ |
| public class GeoIPDocumentCreator { |
| |
| /** |
| * Default constructor. |
| */ |
| public GeoIPDocumentCreator() { |
| } |
| |
| public static NutchDocument createDocFromInsightsService(String serverIp, |
| NutchDocument doc, WebServiceClient client) throws UnknownHostException, |
| IOException, GeoIp2Exception { |
| doc.add("ip", serverIp); |
| InsightsResponse response = client |
| .insights(InetAddress.getByName(serverIp)); |
| // CityResponse response = client.city(InetAddress.getByName(serverIp)); |
| |
| City city = response.getCity(); |
| doc.add("cityName", city.getName()); // 'Minneapolis' |
| doc.add("cityConfidence", city.getConfidence()); // 50 |
| doc.add("cityGeoNameId", city.getGeoNameId()); |
| |
| Continent continent = response.getContinent(); |
| doc.add("continentCode", continent.getCode()); |
| doc.add("continentGeoNameId", continent.getGeoNameId()); |
| doc.add("continentName", continent.getName()); |
| |
| Country country = response.getCountry(); |
| doc.add("countryIsoCode", country.getIsoCode()); // 'US' |
| doc.add("countryName", country.getName()); // 'United States' |
| doc.add("countryConfidence", country.getConfidence()); // 99 |
| doc.add("countryGeoName", country.getGeoNameId()); |
| |
| Location location = response.getLocation(); |
| doc.add("latLon", location.getLatitude() + "," + location.getLongitude()); // 44.9733, |
| // -93.2323 |
| doc.add("accRadius", location.getAccuracyRadius()); // 3 |
| doc.add("timeZone", location.getTimeZone()); // 'America/Chicago' |
| doc.add("metroCode", location.getMetroCode()); |
| |
| Postal postal = response.getPostal(); |
| doc.add("postalCode", postal.getCode()); // '55455' |
| doc.add("postalConfidence", postal.getConfidence()); // 40 |
| |
| RepresentedCountry rCountry = response.getRepresentedCountry(); |
| doc.add("countryType", rCountry.getType()); |
| |
| Subdivision subdivision = response.getMostSpecificSubdivision(); |
| doc.add("subDivName", subdivision.getName()); // 'Minnesota' |
| doc.add("subDivIdoCode", subdivision.getIsoCode()); // 'MN' |
| doc.add("subDivConfidence", subdivision.getConfidence()); // 90 |
| doc.add("subDivGeoNameId", subdivision.getGeoNameId()); |
| |
| Traits traits = response.getTraits(); |
| doc.add("autonSystemNum", traits.getAutonomousSystemNumber()); |
| doc.add("autonSystemOrg", traits.getAutonomousSystemOrganization()); |
| doc.add("domain", traits.getDomain()); |
| doc.add("isp", traits.getIsp()); |
| doc.add("org", traits.getOrganization()); |
| doc.add("userType", traits.getUserType()); |
| //for better results, users should upgrade to |
| //https://www.maxmind.com/en/solutions/geoip2-enterprise-product-suite/anonymous-ip-database |
| doc.add("isAnonProxy", traits.isAnonymousProxy()); |
| return doc; |
| } |
| |
| @SuppressWarnings("unused") |
| public static NutchDocument createDocFromCityService(String serverIp, |
| NutchDocument doc, WebServiceClient client) throws UnknownHostException, |
| IOException, GeoIp2Exception { |
| CityResponse response = client.city(InetAddress.getByName(serverIp)); |
| return doc; |
| } |
| |
| @SuppressWarnings("unused") |
| public static NutchDocument createDocFromCountryService(String serverIp, |
| NutchDocument doc, WebServiceClient client) throws UnknownHostException, |
| IOException, GeoIp2Exception { |
| CountryResponse response = client.country(InetAddress.getByName(serverIp)); |
| return doc; |
| } |
| |
| public static NutchDocument createDocFromIspDb(String serverIp, |
| NutchDocument doc, DatabaseReader reader) throws UnknownHostException, |
| IOException, GeoIp2Exception { |
| IspResponse response = reader.isp(InetAddress.getByName(serverIp)); |
| doc.add("ip", serverIp); |
| doc.add("autonSystemNum", response.getAutonomousSystemNumber()); |
| doc.add("autonSystemOrg", response.getAutonomousSystemOrganization()); |
| doc.add("isp", response.getIsp()); |
| doc.add("org", response.getOrganization()); |
| return doc; |
| } |
| |
| public static NutchDocument createDocFromDomainDb(String serverIp, |
| NutchDocument doc, DatabaseReader reader) throws UnknownHostException, |
| IOException, GeoIp2Exception { |
| DomainResponse response = reader.domain(InetAddress.getByName(serverIp)); |
| doc.add("ip", serverIp); |
| doc.add("domain", response.getDomain()); |
| return doc; |
| } |
| |
| public static NutchDocument createDocFromConnectionDb(String serverIp, |
| NutchDocument doc, DatabaseReader reader) throws UnknownHostException, |
| IOException, GeoIp2Exception { |
| ConnectionTypeResponse response = reader.connectionType(InetAddress |
| .getByName(serverIp)); |
| doc.add("ip", serverIp); |
| doc.add("connType", response.getConnectionType().toString()); |
| return doc; |
| } |
| |
| public static NutchDocument createDocFromCityDb(String serverIp, |
| NutchDocument doc, DatabaseReader reader) throws UnknownHostException, |
| IOException, GeoIp2Exception { |
| doc.add("ip", serverIp); |
| CityResponse response = reader.city(InetAddress.getByName(serverIp)); |
| |
| City city = response.getCity(); |
| doc.add("cityName", city.getName()); // 'Minneapolis' |
| doc.add("cityConfidence", city.getConfidence()); // 50 |
| doc.add("cityGeoNameId", city.getGeoNameId()); |
| |
| Continent continent = response.getContinent(); |
| doc.add("continentCode", continent.getCode()); |
| doc.add("continentGeoNameId", continent.getGeoNameId()); |
| doc.add("continentName", continent.getName()); |
| |
| Country country = response.getCountry(); |
| doc.add("countryIsoCode", country.getIsoCode()); // 'US' |
| doc.add("countryName", country.getName()); // 'United States' |
| doc.add("countryConfidence", country.getConfidence()); // 99 |
| doc.add("countryGeoName", country.getGeoNameId()); |
| |
| Location location = response.getLocation(); |
| doc.add("latLon", location.getLatitude() + "," + location.getLongitude()); // 44.9733, |
| // -93.2323 |
| doc.add("accRadius", location.getAccuracyRadius()); // 3 |
| doc.add("timeZone", location.getTimeZone()); // 'America/Chicago' |
| doc.add("metroCode", location.getMetroCode()); |
| |
| Postal postal = response.getPostal(); |
| doc.add("postalCode", postal.getCode()); // '55455' |
| doc.add("postalConfidence", postal.getConfidence()); // 40 |
| |
| RepresentedCountry rCountry = response.getRepresentedCountry(); |
| doc.add("countryType", rCountry.getType()); |
| |
| Subdivision subdivision = response.getMostSpecificSubdivision(); |
| doc.add("subDivName", subdivision.getName()); // 'Minnesota' |
| doc.add("subDivIdoCode", subdivision.getIsoCode()); // 'MN' |
| doc.add("subDivConfidence", subdivision.getConfidence()); // 90 |
| doc.add("subDivGeoNameId", subdivision.getGeoNameId()); |
| return doc; |
| } |
| |
| } |