| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """ |
| Provides base classes for working with storage |
| """ |
| |
| # Backward compatibility for Python 2.5 |
| from __future__ import with_statement |
| |
| import os.path # pylint: disable-msg=W0404 |
| import hashlib |
| from os.path import join as pjoin |
| |
| from libcloud.utils.py3 import httplib |
| from libcloud.utils.py3 import b |
| |
| import libcloud.utils.files |
| from libcloud.common.types import LibcloudError |
| from libcloud.common.base import ConnectionUserAndKey, BaseDriver |
| from libcloud.storage.types import ObjectDoesNotExistError |
| |
| __all__ = [ |
| 'Object', |
| 'Container', |
| 'StorageDriver', |
| |
| 'CHUNK_SIZE', |
| 'DEFAULT_CONTENT_TYPE' |
| ] |
| |
| CHUNK_SIZE = 8096 |
| |
| # Default Content-Type which is sent when uploading an object if one is not |
| # supplied and can't be detected when using non-strict mode. |
| DEFAULT_CONTENT_TYPE = 'application/octet-stream' |
| |
| |
| class Object(object): |
| """ |
| Represents an object (BLOB). |
| """ |
| |
| def __init__(self, name, size, hash, extra, meta_data, container, |
| driver): |
| """ |
| :param name: Object name (must be unique per container). |
| :type name: ``str`` |
| |
| :param size: Object size in bytes. |
| :type size: ``int`` |
| |
| :param hash: Object hash. |
| :type hash: ``str`` |
| |
| :param container: Object container. |
| :type container: :class:`Container` |
| |
| :param extra: Extra attributes. |
| :type extra: ``dict`` |
| |
| :param meta_data: Optional object meta data. |
| :type meta_data: ``dict`` |
| |
| :param driver: StorageDriver instance. |
| :type driver: :class:`StorageDriver` |
| """ |
| |
| self.name = name |
| self.size = size |
| self.hash = hash |
| self.container = container |
| self.extra = extra or {} |
| self.meta_data = meta_data or {} |
| self.driver = driver |
| |
| def get_cdn_url(self): |
| return self.driver.get_object_cdn_url(obj=self) |
| |
| def enable_cdn(self, **kwargs): |
| return self.driver.enable_object_cdn(obj=self, **kwargs) |
| |
| def download(self, destination_path, overwrite_existing=False, |
| delete_on_failure=True): |
| return self.driver.download_object(self, destination_path, |
| overwrite_existing, |
| delete_on_failure) |
| |
| def as_stream(self, chunk_size=None): |
| return self.driver.download_object_as_stream(self, chunk_size) |
| |
| def delete(self): |
| return self.driver.delete_object(self) |
| |
| def __repr__(self): |
| return ('<Object: name=%s, size=%s, hash=%s, provider=%s ...>' % |
| (self.name, self.size, self.hash, self.driver.name)) |
| |
| |
| class Container(object): |
| """ |
| Represents a container (bucket) which can hold multiple objects. |
| """ |
| |
| def __init__(self, name, extra, driver): |
| """ |
| :param name: Container name (must be unique). |
| :type name: ``str`` |
| |
| :param extra: Extra attributes. |
| :type extra: ``dict`` |
| |
| :param driver: StorageDriver instance. |
| :type driver: :class:`StorageDriver` |
| """ |
| |
| self.name = name |
| self.extra = extra or {} |
| self.driver = driver |
| |
| def iterate_objects(self): |
| return self.driver.iterate_container_objects(container=self) |
| |
| def list_objects(self): |
| return self.driver.list_container_objects(container=self) |
| |
| def get_cdn_url(self): |
| return self.driver.get_container_cdn_url(container=self) |
| |
| def enable_cdn(self, **kwargs): |
| return self.driver.enable_container_cdn(container=self, **kwargs) |
| |
| def get_object(self, object_name): |
| return self.driver.get_object(container_name=self.name, |
| object_name=object_name) |
| |
| def upload_object(self, file_path, object_name, extra=None, **kwargs): |
| return self.driver.upload_object( |
| file_path, self, object_name, extra=extra, **kwargs) |
| |
| def upload_object_via_stream(self, iterator, object_name, extra=None, |
| **kwargs): |
| return self.driver.upload_object_via_stream( |
| iterator, self, object_name, extra=extra, **kwargs) |
| |
| def download_object(self, obj, destination_path, overwrite_existing=False, |
| delete_on_failure=True): |
| return self.driver.download_object( |
| obj, destination_path, overwrite_existing=overwrite_existing, |
| delete_on_failure=delete_on_failure) |
| |
| def download_object_as_stream(self, obj, chunk_size=None): |
| return self.driver.download_object_as_stream(obj, chunk_size) |
| |
| def delete_object(self, obj): |
| return self.driver.delete_object(obj) |
| |
| def delete(self): |
| return self.driver.delete_container(self) |
| |
| def __repr__(self): |
| return ('<Container: name=%s, provider=%s>' |
| % (self.name, self.driver.name)) |
| |
| |
| class StorageDriver(BaseDriver): |
| """ |
| A base StorageDriver to derive from. |
| """ |
| |
| connectionCls = ConnectionUserAndKey |
| name = None |
| hash_type = 'md5' |
| supports_chunked_encoding = False |
| |
| # When strict mode is used, exception will be thrown if no content type is |
| # provided and none can be detected when uploading an object |
| strict_mode = False |
| |
| def iterate_containers(self): |
| """ |
| Return a generator of containers for the given account |
| |
| :return: A generator of Container instances. |
| :rtype: ``generator`` of :class:`Container` |
| """ |
| raise NotImplementedError( |
| 'iterate_containers not implemented for this driver') |
| |
| def list_containers(self): |
| """ |
| Return a list of containers. |
| |
| :return: A list of Container instances. |
| :rtype: ``list`` of :class:`Container` |
| """ |
| return list(self.iterate_containers()) |
| |
| def iterate_container_objects(self, container): |
| """ |
| Return a generator of objects for the given container. |
| |
| :param container: Container instance |
| :type container: :class:`Container` |
| |
| :return: A generator of Object instances. |
| :rtype: ``generator`` of :class:`Object` |
| """ |
| raise NotImplementedError( |
| 'iterate_container_objects not implemented for this driver') |
| |
| def list_container_objects(self, container): |
| """ |
| Return a list of objects for the given container. |
| |
| :param container: Container instance. |
| :type container: :class:`Container` |
| |
| :return: A list of Object instances. |
| :rtype: ``list`` of :class:`Object` |
| """ |
| return list(self.iterate_container_objects(container)) |
| |
| def get_container(self, container_name): |
| """ |
| Return a container instance. |
| |
| :param container_name: Container name. |
| :type container_name: ``str`` |
| |
| :return: :class:`Container` instance. |
| :rtype: :class:`Container` |
| """ |
| raise NotImplementedError( |
| 'get_object not implemented for this driver') |
| |
| def get_container_cdn_url(self, container): |
| """ |
| Return a container CDN URL. |
| |
| :param container: Container instance |
| :type container: :class:`Container` |
| |
| :return: A CDN URL for this container. |
| :rtype: ``str`` |
| """ |
| raise NotImplementedError( |
| 'get_container_cdn_url not implemented for this driver') |
| |
| def get_object(self, container_name, object_name): |
| """ |
| Return an object instance. |
| |
| :param container_name: Container name. |
| :type container_name: ``str`` |
| |
| :param object_name: Object name. |
| :type object_name: ``str`` |
| |
| :return: :class:`Object` instance. |
| :rtype: :class:`Object` |
| """ |
| raise NotImplementedError( |
| 'get_object not implemented for this driver') |
| |
| def get_object_cdn_url(self, obj): |
| """ |
| Return an object CDN URL. |
| |
| :param obj: Object instance |
| :type obj: :class:`Object` |
| |
| :return: A CDN URL for this object. |
| :rtype: ``str`` |
| """ |
| raise NotImplementedError( |
| 'get_object_cdn_url not implemented for this driver') |
| |
| def enable_container_cdn(self, container): |
| """ |
| Enable container CDN. |
| |
| :param container: Container instance |
| :type container: :class:`Container` |
| |
| :rtype: ``bool`` |
| """ |
| raise NotImplementedError( |
| 'enable_container_cdn not implemented for this driver') |
| |
| def enable_object_cdn(self, obj): |
| """ |
| Enable object CDN. |
| |
| :param obj: Object instance |
| :type obj: :class:`Object` |
| |
| :rtype: ``bool`` |
| """ |
| raise NotImplementedError( |
| 'enable_object_cdn not implemented for this driver') |
| |
| def download_object(self, obj, destination_path, overwrite_existing=False, |
| delete_on_failure=True): |
| """ |
| Download an object to the specified destination path. |
| |
| :param obj: Object instance. |
| :type obj: :class:`Object` |
| |
| :param destination_path: Full path to a file or a directory where the |
| incoming file will be saved. |
| :type destination_path: ``str`` |
| |
| :param overwrite_existing: True to overwrite an existing file, |
| defaults to False. |
| :type overwrite_existing: ``bool`` |
| |
| :param delete_on_failure: True to delete a partially downloaded file if |
| the download was not successful (hash |
| mismatch / file size). |
| :type delete_on_failure: ``bool`` |
| |
| :return: True if an object has been successfully downloaded, False |
| otherwise. |
| :rtype: ``bool`` |
| """ |
| raise NotImplementedError( |
| 'download_object not implemented for this driver') |
| |
| def download_object_as_stream(self, obj, chunk_size=None): |
| """ |
| Return a generator which yields object data. |
| |
| :param obj: Object instance |
| :type obj: :class:`Object` |
| |
| :param chunk_size: Optional chunk size (in bytes). |
| :type chunk_size: ``int`` |
| """ |
| raise NotImplementedError( |
| 'download_object_as_stream not implemented for this driver') |
| |
| def upload_object(self, file_path, container, object_name, extra=None, |
| verify_hash=True, headers=None): |
| """ |
| Upload an object currently located on a disk. |
| |
| :param file_path: Path to the object on disk. |
| :type file_path: ``str`` |
| |
| :param container: Destination container. |
| :type container: :class:`Container` |
| |
| :param object_name: Object name. |
| :type object_name: ``str`` |
| |
| :param verify_hash: Verify hash |
| :type verify_hash: ``bool`` |
| |
| :param extra: Extra attributes (driver specific). (optional) |
| :type extra: ``dict`` |
| |
| :param headers: (optional) Additional request headers, |
| such as CORS headers. For example: |
| headers = {'Access-Control-Allow-Origin': 'http://mozilla.com'} |
| :type headers: ``dict`` |
| |
| :rtype: :class:`Object` |
| """ |
| raise NotImplementedError( |
| 'upload_object not implemented for this driver') |
| |
| def upload_object_via_stream(self, iterator, container, |
| object_name, |
| extra=None, |
| headers=None): |
| """ |
| Upload an object using an iterator. |
| |
| If a provider supports it, chunked transfer encoding is used and you |
| don't need to know in advance the amount of data to be uploaded. |
| |
| Otherwise if a provider doesn't support it, iterator will be exhausted |
| so a total size for data to be uploaded can be determined. |
| |
| Note: Exhausting the iterator means that the whole data must be |
| buffered in memory which might result in memory exhausting when |
| uploading a very large object. |
| |
| If a file is located on a disk you are advised to use upload_object |
| function which uses fs.stat function to determine the file size and it |
| doesn't need to buffer whole object in the memory. |
| |
| :param iterator: An object which implements the iterator interface. |
| :type iterator: :class:`object` |
| |
| :param container: Destination container. |
| :type container: :class:`Container` |
| |
| :param object_name: Object name. |
| :type object_name: ``str`` |
| |
| :param extra: (optional) Extra attributes (driver specific). Note: |
| This dictionary must contain a 'content_type' key which represents |
| a content type of the stored object. |
| :type extra: ``dict`` |
| |
| :param headers: (optional) Additional request headers, |
| such as CORS headers. For example: |
| headers = {'Access-Control-Allow-Origin': 'http://mozilla.com'} |
| :type headers: ``dict`` |
| |
| :rtype: ``object`` |
| """ |
| raise NotImplementedError( |
| 'upload_object_via_stream not implemented for this driver') |
| |
| def delete_object(self, obj): |
| """ |
| Delete an object. |
| |
| :param obj: Object instance. |
| :type obj: :class:`Object` |
| |
| :return: ``bool`` True on success. |
| :rtype: ``bool`` |
| """ |
| raise NotImplementedError( |
| 'delete_object not implemented for this driver') |
| |
| def create_container(self, container_name): |
| """ |
| Create a new container. |
| |
| :param container_name: Container name. |
| :type container_name: ``str`` |
| |
| :return: Container instance on success. |
| :rtype: :class:`Container` |
| """ |
| raise NotImplementedError( |
| 'create_container not implemented for this driver') |
| |
| def delete_container(self, container): |
| """ |
| Delete a container. |
| |
| :param container: Container instance |
| :type container: :class:`Container` |
| |
| :return: ``True`` on success, ``False`` otherwise. |
| :rtype: ``bool`` |
| """ |
| raise NotImplementedError( |
| 'delete_container not implemented for this driver') |
| |
| def _get_object(self, obj, callback, callback_kwargs, response, |
| success_status_code=None): |
| """ |
| Call passed callback and start transfer of the object' |
| |
| :param obj: Object instance. |
| :type obj: :class:`Object` |
| |
| :param callback: Function which is called with the passed |
| callback_kwargs |
| :type callback: :class:`function` |
| |
| :param callback_kwargs: Keyword arguments which are passed to the |
| callback. |
| :type callback_kwargs: ``dict`` |
| |
| :param response: Response instance. |
| :type response: :class:`Response` |
| |
| :param success_status_code: Status code which represents a successful |
| transfer (defaults to httplib.OK) |
| :type success_status_code: ``int`` |
| |
| :return: ``True`` on success, ``False`` otherwise. |
| :rtype: ``bool`` |
| """ |
| success_status_code = success_status_code or httplib.OK |
| |
| if response.status == success_status_code: |
| return callback(**callback_kwargs) |
| elif response.status == httplib.NOT_FOUND: |
| raise ObjectDoesNotExistError(object_name=obj.name, |
| value='', driver=self) |
| |
| raise LibcloudError(value='Unexpected status code: %s' % |
| (response.status), |
| driver=self) |
| |
| def _save_object(self, response, obj, destination_path, |
| overwrite_existing=False, delete_on_failure=True, |
| chunk_size=None): |
| """ |
| Save object to the provided path. |
| |
| :param response: RawResponse instance. |
| :type response: :class:`RawResponse` |
| |
| :param obj: Object instance. |
| :type obj: :class:`Object` |
| |
| :param destination_path: Destination directory. |
| :type destination_path: ``str`` |
| |
| :param delete_on_failure: True to delete partially downloaded object if |
| the download fails. |
| :type delete_on_failure: ``bool`` |
| |
| :param overwrite_existing: True to overwrite a local path if it already |
| exists. |
| :type overwrite_existing: ``bool`` |
| |
| :param chunk_size: Optional chunk size |
| (defaults to ``libcloud.storage.base.CHUNK_SIZE``, 8kb) |
| :type chunk_size: ``int`` |
| |
| :return: ``True`` on success, ``False`` otherwise. |
| :rtype: ``bool`` |
| """ |
| |
| chunk_size = chunk_size or CHUNK_SIZE |
| |
| base_name = os.path.basename(destination_path) |
| |
| if not base_name and not os.path.exists(destination_path): |
| raise LibcloudError( |
| value='Path %s does not exist' % (destination_path), |
| driver=self) |
| |
| if not base_name: |
| file_path = pjoin(destination_path, obj.name) |
| else: |
| file_path = destination_path |
| |
| if os.path.exists(file_path) and not overwrite_existing: |
| raise LibcloudError( |
| value='File %s already exists, but ' % (file_path) + |
| 'overwrite_existing=False', |
| driver=self) |
| |
| bytes_transferred = 0 |
| |
| with open(file_path, 'wb') as file_handle: |
| for chunk in response._response.iter_content(chunk_size): |
| file_handle.write(b(chunk)) |
| bytes_transferred += len(chunk) |
| |
| if int(obj.size) != int(bytes_transferred): |
| # Transfer failed, support retry? |
| if delete_on_failure: |
| try: |
| os.unlink(file_path) |
| except Exception: |
| pass |
| |
| return False |
| |
| return True |
| |
| def _upload_object(self, object_name, content_type, request_path, |
| request_method='PUT', |
| headers=None, file_path=None, stream=None, |
| upload_func=None, upload_func_kwargs=None, |
| chunked=False, multipart=False): |
| """ |
| Helper function for setting common request headers and calling the |
| passed in callback which uploads an object. |
| """ |
| headers = headers or {} |
| |
| if file_path and not os.path.exists(file_path): |
| raise OSError('File %s does not exist' % (file_path)) |
| |
| if stream is not None and not hasattr(stream, 'next') and not \ |
| hasattr(stream, '__next__'): |
| raise AttributeError('iterator object must implement next() ' + |
| 'method.') |
| |
| if not content_type: |
| if file_path: |
| name = file_path |
| else: |
| name = object_name |
| content_type, _ = libcloud.utils.files.guess_file_mime_type(name) |
| |
| if not content_type: |
| if self.strict_mode: |
| raise AttributeError('File content-type could not be ' |
| 'guessed and no content_type value ' |
| 'is provided') |
| else: |
| # Fallback to a content-type |
| content_type = DEFAULT_CONTENT_TYPE |
| |
| headers['Content-Type'] = content_type |
| if stream: |
| response = self.connection.request( |
| request_path, |
| method=request_method, data=stream, |
| headers=headers, raw=True) |
| stream_hash, stream_length = self._hash_buffered_stream( |
| stream, |
| self._get_hash_function()) |
| else: |
| with open(file_path, 'rb') as file_stream: |
| response = self.connection.request( |
| request_path, |
| method=request_method, data=file_stream, |
| headers=headers, raw=True) |
| with open(file_path, 'rb') as file_stream: |
| stream_hash, stream_length = self._hash_buffered_stream( |
| file_stream, |
| self._get_hash_function()) |
| |
| if not response.success(): |
| response.parse_error() |
| |
| if upload_func: |
| upload_func(**upload_func_kwargs) |
| |
| return {'response': response, |
| 'bytes_transferred': stream_length, |
| 'data_hash': stream_hash} |
| |
| def _hash_buffered_stream(self, stream, hasher, blocksize=65536): |
| total_len = 0 |
| |
| if hasattr(stream, '__next__') or hasattr(stream, 'next'): |
| for chunk in libcloud.utils.files.read_in_chunks(iterator=stream): |
| hasher.update(b(chunk)) |
| total_len += len(chunk) |
| |
| return (hasher.hexdigest(), total_len) |
| |
| if not hasattr(stream, '__exit__'): |
| for s in stream: |
| hasher.update(s) |
| total_len = total_len + len(s) |
| return (hasher.hexdigest(), total_len) |
| |
| with stream: |
| buf = stream.read(blocksize) |
| while len(buf) > 0: |
| total_len = total_len + len(buf) |
| hasher.update(buf) |
| buf = stream.read(blocksize) |
| |
| return (hasher.hexdigest(), total_len) |
| |
| def _get_hash_function(self): |
| """ |
| Return instantiated hash function for the hash type supported by |
| the provider. |
| """ |
| try: |
| func = getattr(hashlib, self.hash_type)() |
| except AttributeError: |
| raise RuntimeError('Invalid or unsupported hash type: %s' % |
| (self.hash_type)) |
| |
| return func |