blob: 951a341f3529a9a7488a659d0841fcf83cff4612 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
"""
OCW module to download a file from ESGF.
"""
from __future__ import print_function
import sys
from os.path import expanduser, join
from ocw.esgf.constants import ESGF_CREDENTIALS
if sys.version_info[0] >= 3:
from http.client import HTTPSConnection
from urllib.request import build_opener
from urllib.request import HTTPCookieProcessor
from urllib.request import HTTPSHandler
else:
# Not Python 3 - today, it is most likely to be Python 2
# But note that this might need an update when Python 4
# might be around one day
from httplib import HTTPSConnection
from urllib2 import build_opener
from urllib2 import HTTPCookieProcessor
from urllib2 import HTTPSHandler
class HTTPSClientAuthHandler(HTTPSHandler):
"""
HTTP handler that transmits an X509 certificate as part of the request
"""
def __init__(self, key, cert):
HTTPSHandler.__init__(self)
self.key = key
self.cert = cert
def https_open(self, req):
"""
Opens the https connection.
:param req: The https request object.
:return: An addinfourl object for the request.
"""
return self.do_open(self.getConnection, req)
def getConnection(self, host, timeout=300):
"""
Create an HTTPSConnection object.
:param host: The ESGF server to connect to.
:param timeout: Connection timeout in seconds.
:return:
"""
return HTTPSConnection(host, key_file=self.key, cert_file=self.cert, timeout=timeout)
def download(url, toDirectory="/tmp"):
"""
Function to download a single file from ESGF.
:param url: the URL of the file to download
:param toDirectory: target directory where the file will be written
"""
# setup HTTP handler
cert_file = expanduser(ESGF_CREDENTIALS)
opener = build_opener(HTTPSClientAuthHandler(cert_file, cert_file))
opener.add_handler(HTTPCookieProcessor())
# download file
local_file_path = join(toDirectory, url.split('/')[-1])
print("\nDownloading url: %s to local path: %s ..." % (url, local_file_path))
local_file = open(local_file_path, 'w')
web_file = opener.open(url)
local_file.write(web_file.read())
# cleanup
local_file.close()
web_file.close()
opener.close()
print("... done")