Merge pull request #1547 from Kami/google_storage_object_size_fix

Fix a bug with edge-case in get_object() method in Google Storage Driver
diff --git a/CHANGES.rst b/CHANGES.rst
index 8121e53..0a66e17 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -31,6 +31,21 @@
   driver constructor.
   (GITHUB-1546)
 
+Storage
+~~~~~~~
+
+- [Google Cloud Storage] Fix a bug and make sure we also correctly handle
+  scenario in ``get_object()`` method when the object size is returned in
+  ``x-goog-stored-content-length`` and not ``content-length`` header.
+
+  Reported by Veith Röthlingshöfer - @RunOrVeith.
+  (GITHUB-1544, GITHUB-1547)
+
+- [Google Cloud Storage] Update ``get_object()`` method and ensure
+  ``object.size`` attribute is an integer and not a string. This way it's
+  consistent with ``list_objects()`` method.
+  (GITHUB-1547)
+
 Changes in Apache Libcloud 3.3.0
 --------------------------------
 
diff --git a/libcloud/storage/drivers/google_storage.py b/libcloud/storage/drivers/google_storage.py
index a6bb03f..c16e4de 100644
--- a/libcloud/storage/drivers/google_storage.py
+++ b/libcloud/storage/drivers/google_storage.py
@@ -13,6 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Dict
+from typing import Optional
+
 import copy
 import json
 
@@ -406,3 +409,14 @@
         self.json_connection.request(
             url, method='POST',
             data=json.dumps({'role': role, 'entity': entity}))
+
+    def _get_content_length_from_headers(self,
+                                         headers: Dict[str, str]
+                                         ) -> Optional[int]:
+        # We need to override this since Google storage doesn't always return
+        # Content-Length header.
+        # See https://github.com/apache/libcloud/issues/1544 for details.
+        x_goog_content_length = headers.get('x-goog-stored-content-length',
+                                            None)
+        content_length = headers.get('content-length', x_goog_content_length)
+        return content_length
diff --git a/libcloud/storage/drivers/s3.py b/libcloud/storage/drivers/s3.py
index 86b4c45..a19d5c6 100644
--- a/libcloud/storage/drivers/s3.py
+++ b/libcloud/storage/drivers/s3.py
@@ -13,6 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Dict
+from typing import Optional
+
 import base64
 import hmac
 import time
@@ -1047,6 +1050,15 @@
 
         return container
 
+    def _get_content_length_from_headers(self,
+                                         headers: Dict[str, str]
+                                         ) -> Optional[int]:
+        """
+        Prase object size from the provided response headers.
+        """
+        content_length = headers.get("content-length", None)
+        return content_length
+
     def _headers_to_object(self, object_name, container, headers):
         hash = headers['etag'].replace('"', '')
         extra = {'content_type': headers['content-type'],
@@ -1063,7 +1075,13 @@
             key = key.replace(self.http_vendor_prefix + '-meta-', '')
             meta_data[key] = value
 
-        obj = Object(name=object_name, size=headers['content-length'],
+        content_length = self._get_content_length_from_headers(headers=headers)
+
+        if content_length is None:
+            raise KeyError("Can not deduce object size from headers for "
+                           "object %s" % (object_name))
+
+        obj = Object(name=object_name, size=int(content_length),
                      hash=hash, extra=extra,
                      meta_data=meta_data,
                      container=container,
diff --git a/libcloud/test/storage/test_google_storage.py b/libcloud/test/storage/test_google_storage.py
index ba3a8ba..58984bd 100644
--- a/libcloud/test/storage/test_google_storage.py
+++ b/libcloud/test/storage/test_google_storage.py
@@ -79,6 +79,21 @@
 
         return httplib.OK, body, headers, httplib.responses[httplib.OK]
 
+    def _test2_test_cont_length_get_object(self, method, url, body, headers):
+        # test_get_object_object_size_not_in_content_length_header
+        # Google uses a different HTTP header prefix for meta data
+        body = self.fixtures.load('list_containers.xml')
+        headers = {
+            'content-type': 'application/zip',
+            'etag': '"e31208wqsdoj329jd"',
+            'x-goog-meta-rabbits': 'monkeys',
+            'x-goog-stored-content-length': '9587',
+            'last-modified': 'Thu, 13 Sep 2012 07:13:22 GMT'
+        }
+
+        return httplib.OK, body, headers, httplib.responses[httplib.OK]
+
+
     def _container_path_UNAUTHORIZED(self, method, url, body, headers):
         return (httplib.UNAUTHORIZED,
                 '',
@@ -332,6 +347,18 @@
         # Not supported on Google Storage
         pass
 
+    def test_get_object_object_size_in_content_length(self):
+        self.mock_response_klass.type = 'get_object'
+        obj = self.driver.get_object(container_name='test2',
+                                     object_name='test')
+        self.assertEqual(obj.size, 12345)
+
+    def test_get_object_object_size_not_in_content_length_header(self):
+        self.mock_response_klass.type = 'get_object'
+        obj = self.driver.get_object(container_name='test2',
+                                     object_name='test_cont_length')
+        self.assertEqual(obj.size, 9587)
+
     def test_delete_permissions(self):
         mock_request = mock.Mock()
         self.driver.json_connection.request = mock_request
diff --git a/libcloud/test/storage/test_s3.py b/libcloud/test/storage/test_s3.py
index 868ef80..ab6b6c7 100644
--- a/libcloud/test/storage/test_s3.py
+++ b/libcloud/test/storage/test_s3.py
@@ -130,7 +130,7 @@
                 httplib.responses[httplib.OK])
 
     def _test2_test_get_object(self, method, url, body, headers):
-        # test_get_object
+        # test_get_object_success
         body = self.fixtures.load('list_containers.xml')
         headers = {'content-type': 'application/zip',
                    'etag': '"e31208wqsdoj329jd"',
@@ -144,6 +144,34 @@
                 headers,
                 httplib.responses[httplib.OK])
 
+    def _test2_get_object_no_content_length(self, method, url, body, headers):
+        # test_get_object_unable_to_determine_object_size
+        body = self.fixtures.load('list_containers.xml')
+        headers = {'content-type': 'application/zip',
+                   'etag': '"e31208wqsdoj329jd"',
+                   'x-amz-meta-rabbits': 'monkeys',
+                   'last-modified': 'Thu, 13 Sep 2012 07:13:22 GMT'
+                   }
+
+        return (httplib.OK,
+                body,
+                headers,
+                httplib.responses[httplib.OK])
+
+    def _test2_test_get_object_no_content_length(self, method, url, body, headers):
+        # test_get_object_unable_to_determine_object_size
+        body = self.fixtures.load('list_containers.xml')
+        headers = {'content-type': 'application/zip',
+                   'etag': '"e31208wqsdoj329jd"',
+                   'x-amz-meta-rabbits': 'monkeys',
+                   'last-modified': 'Thu, 13 Sep 2012 07:13:22 GMT'
+                   }
+
+        return (httplib.OK,
+                body,
+                headers,
+                httplib.responses[httplib.OK])
+
     def _new_container_INVALID_NAME(self, method, url, body, headers):
         # test_create_container
         return (httplib.BAD_REQUEST,
@@ -579,13 +607,22 @@
 
         self.assertEqual(obj.name, 'test')
         self.assertEqual(obj.container.name, 'test2')
-        self.assertEqual(obj.size, '12345')
+        self.assertEqual(obj.size, 12345)
         self.assertEqual(obj.hash, 'e31208wqsdoj329jd')
         self.assertEqual(obj.extra['last_modified'],
                          'Thu, 13 Sep 2012 07:13:22 GMT')
         self.assertEqual(obj.extra['content_type'], 'application/zip')
         self.assertEqual(obj.meta_data['rabbits'], 'monkeys')
 
+    def test_get_object_unable_to_determine_object_size(self):
+        self.mock_response_klass.type = 'get_object_no_content_length'
+
+        expected_msg = "Can not deduce object size from headers"
+        self.assertRaisesRegex(KeyError, expected_msg,
+                               self.driver.get_object,
+                               container_name='test2',
+                               object_name='test')
+
     def test_create_container_bad_request(self):
         # invalid container name, returns a 400 bad request
         self.mock_response_klass.type = 'INVALID_NAME'