| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.protocol.httpclient; |
| |
| import java.lang.invoke.MethodHandles; |
| import java.io.IOException; |
| import java.io.UnsupportedEncodingException; |
| import java.net.CookieHandler; |
| import java.net.CookieManager; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| |
| import org.apache.commons.httpclient.Header; |
| import org.apache.commons.httpclient.HttpClient; |
| import org.apache.commons.httpclient.NameValuePair; |
| import org.apache.commons.httpclient.cookie.CookiePolicy; |
| import org.apache.commons.httpclient.methods.GetMethod; |
| import org.apache.commons.httpclient.methods.PostMethod; |
| import org.apache.commons.httpclient.params.HttpMethodParams; |
| import org.apache.commons.io.IOUtils; |
| import org.apache.commons.lang3.reflect.FieldUtils; |
| import org.jsoup.Jsoup; |
| import org.jsoup.nodes.Document; |
| import org.jsoup.nodes.Element; |
| import org.jsoup.select.Elements; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| public class HttpFormAuthentication { |
| private static final Logger LOG = LoggerFactory |
| .getLogger(MethodHandles.lookup().lookupClass()); |
| private static Map<String, String> defaultLoginHeaders = new HashMap<String, String>(); |
| |
| static { |
| defaultLoginHeaders.put("User-Agent", "Mozilla/5.0"); |
| defaultLoginHeaders.put("Accept", |
| "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); |
| defaultLoginHeaders.put("Accept-Language", "en-US,en;q=0.5"); |
| defaultLoginHeaders.put("Connection", "keep-alive"); |
| defaultLoginHeaders.put("Content-Type", |
| "application/x-www-form-urlencoded"); |
| } |
| |
| private HttpClient client; |
| private HttpFormAuthConfigurer authConfigurer = new HttpFormAuthConfigurer(); |
| private String cookies; |
| |
| public HttpFormAuthentication(HttpFormAuthConfigurer authConfigurer, |
| HttpClient client, Http http) { |
| this.authConfigurer = authConfigurer; |
| this.client = client; |
| defaultLoginHeaders.put("Accept", http.getAccept()); |
| defaultLoginHeaders.put("Accept-Language", http.getAcceptLanguage()); |
| defaultLoginHeaders.put("User-Agent", http.getUserAgent()); |
| } |
| |
| public HttpFormAuthentication(String loginUrl, String loginForm, |
| Map<String, String> loginPostData, |
| Map<String, String> additionalPostHeaders, |
| Set<String> removedFormFields) { |
| this.authConfigurer.setLoginUrl(loginUrl); |
| this.authConfigurer.setLoginFormId(loginForm); |
| this.authConfigurer.setLoginPostData( |
| loginPostData == null ? new HashMap<String, String>() : loginPostData); |
| this.authConfigurer.setAdditionalPostHeaders(additionalPostHeaders == null |
| ? new HashMap<String, String>() : additionalPostHeaders); |
| this.authConfigurer.setRemovedFormFields( |
| removedFormFields == null ? new HashSet<String>() : removedFormFields); |
| this.client = new HttpClient(); |
| } |
| |
| public void login() throws Exception { |
| // make sure cookies are turned on |
| CookieHandler.setDefault(new CookieManager()); |
| String pageContent = httpGetPageContent(authConfigurer.getLoginUrl()); |
| List<NameValuePair> params = getLoginFormParams(pageContent); |
| sendPost(authConfigurer.getLoginUrl(), params); |
| } |
| |
| private void sendPost(String url, List<NameValuePair> params) |
| throws Exception { |
| PostMethod post = null; |
| try { |
| if (authConfigurer.isLoginRedirect()) { |
| post = new PostMethod(url) { |
| @Override |
| public boolean getFollowRedirects() { |
| return true; |
| } |
| }; |
| } else { |
| post = new PostMethod(url); |
| } |
| // we can't use post.setFollowRedirects(true) as it will throw |
| // IllegalArgumentException: |
| // Entity enclosing requests cannot be redirected without user |
| // intervention |
| setLoginHeader(post); |
| |
| // NUTCH-2280 |
| LOG.debug("FormAuth: set cookie policy"); |
| this.setCookieParams(authConfigurer, post.getParams()); |
| |
| post.addParameters(params.toArray(new NameValuePair[0])); |
| int rspCode = client.executeMethod(post); |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("rspCode: " + rspCode); |
| LOG.debug("\nSending 'POST' request to URL : " + url); |
| |
| LOG.debug("Post parameters : " + params); |
| LOG.debug("Response Code : " + rspCode); |
| for (Header header : post.getRequestHeaders()) { |
| LOG.debug("Response headers : " + header); |
| } |
| } |
| String rst = IOUtils.toString(post.getResponseBodyAsStream()); |
| LOG.debug("login post result: " + rst); |
| } finally { |
| if (post != null) { |
| post.releaseConnection(); |
| } |
| } |
| } |
| |
| /** |
| * NUTCH-2280 Set the cookie policy value from httpclient-auth.xml for the |
| * Post httpClient action. |
| * |
| * @param fromConfigurer |
| * - the httpclient-auth.xml values |
| * |
| * @param params |
| * - the HttpMethodParams from the current httpclient instance |
| * |
| * @throws NoSuchFieldException |
| * @throws SecurityException |
| * @throws IllegalArgumentException |
| * @throws IllegalAccessException |
| */ |
| private void setCookieParams(HttpFormAuthConfigurer formConfigurer, |
| HttpMethodParams params) throws NoSuchFieldException, SecurityException, |
| IllegalArgumentException, IllegalAccessException { |
| // NUTCH-2280 - set the HttpClient cookie policy |
| if (formConfigurer.getCookiePolicy() != null) { |
| String policy = formConfigurer.getCookiePolicy(); |
| Object p = FieldUtils.readDeclaredStaticField(CookiePolicy.class, policy); |
| if (null != p) { |
| LOG.debug("reflection of cookie value: " + p.toString()); |
| params.setParameter(HttpMethodParams.COOKIE_POLICY, p); |
| } |
| } |
| } |
| |
| private void setLoginHeader(PostMethod post) { |
| Map<String, String> headers = new HashMap<String, String>(); |
| headers.putAll(defaultLoginHeaders); |
| // additionalPostHeaders can overwrite value in defaultLoginHeaders |
| headers.putAll(authConfigurer.getAdditionalPostHeaders()); |
| for (Entry<String, String> entry : headers.entrySet()) { |
| post.addRequestHeader(entry.getKey(), entry.getValue()); |
| } |
| post.addRequestHeader("Cookie", getCookies()); |
| } |
| |
| private String httpGetPageContent(String url) throws IOException { |
| |
| GetMethod get = new GetMethod(url); |
| try { |
| for (Entry<String, String> entry : authConfigurer |
| .getAdditionalPostHeaders().entrySet()) { |
| get.addRequestHeader(entry.getKey(), entry.getValue()); |
| } |
| client.executeMethod(get); |
| Header cookieHeader = get.getResponseHeader("Set-Cookie"); |
| if (cookieHeader != null) { |
| setCookies(cookieHeader.getValue()); |
| } |
| String rst = IOUtils.toString(get.getResponseBodyAsStream()); |
| return rst; |
| } finally { |
| get.releaseConnection(); |
| } |
| |
| } |
| |
| private List<NameValuePair> getLoginFormParams(String pageContent) |
| throws UnsupportedEncodingException { |
| List<NameValuePair> params = new ArrayList<NameValuePair>(); |
| Document doc = Jsoup.parse(pageContent); |
| Element loginform = doc.getElementById(authConfigurer.getLoginFormId()); |
| if (loginform == null) { |
| LOG.debug("No form element found with 'id' = {}, trying 'name'.", |
| authConfigurer.getLoginFormId()); |
| loginform = doc |
| .select("form[name=" + authConfigurer.getLoginFormId() + "]").first(); |
| if (loginform == null) { |
| LOG.debug("No form element found with 'name' = {}", |
| authConfigurer.getLoginFormId()); |
| throw new IllegalArgumentException( |
| "No form exists: " + authConfigurer.getLoginFormId()); |
| } |
| } |
| Elements inputElements = loginform.getElementsByTag("input"); |
| // skip fields in removedFormFields or loginPostData |
| for (Element inputElement : inputElements) { |
| String key = inputElement.attr("name"); |
| String value = inputElement.attr("value"); |
| if (authConfigurer.getLoginPostData().containsKey(key) |
| || authConfigurer.getRemovedFormFields().contains(key)) { |
| // value = loginPostData.get(key); |
| continue; |
| } |
| params.add(new NameValuePair(key, value)); |
| } |
| // add key and value in loginPostData |
| for (Entry<String, String> entry : authConfigurer.getLoginPostData() |
| .entrySet()) { |
| params.add(new NameValuePair(entry.getKey(), entry.getValue())); |
| } |
| return params; |
| } |
| |
| public String getCookies() { |
| return cookies; |
| } |
| |
| public void setCookies(String cookies) { |
| this.cookies = cookies; |
| } |
| |
| public boolean isRedirect() { |
| return authConfigurer.isLoginRedirect(); |
| } |
| |
| public void setRedirect(boolean redirect) { |
| this.authConfigurer.setLoginRedirect(redirect); |
| } |
| |
| } |