Update scrape-ec2-sizes script so it uses larger chunk size since the
file we are downloading is massive (multiple 100MBs).

Also update the script and make sure we try to clean up the file in case
locally cached file is corrupted or incomplete.
diff --git a/contrib/scrape-ec2-sizes.py b/contrib/scrape-ec2-sizes.py
index 7872ab8..c202903 100755
--- a/contrib/scrape-ec2-sizes.py
+++ b/contrib/scrape-ec2-sizes.py
@@ -28,6 +28,8 @@
 import re
 import os
 import json
+import shutil
+import atexit
 
 import requests
 import ijson  # pylint: disable=import-error
@@ -195,22 +197,25 @@
 
 
 def download_json():
-    response = requests.get(URL, stream=True)
-    try:
+    if os.path.isfile(FILEPATH):
         return open(FILEPATH, 'r')
-    except IOError:
-        with open(FILEPATH, 'wb') as fo:
-            for chunk in response.iter_content(chunk_size=2**20):
-                if chunk:
-                    fo.write(chunk)
+
+    # File not cached locally, download data and cache it
+    with requests.get(URL, stream=True) as response:
+        with open(FILEPATH, 'wb') as fp:
+            # NOTE: We use shutil.copyfileobj with large chunk size instead of
+            # response.iter_content with large chunk size since data we
+            # download is massive and copyfileobj is more efficient.
+            shutil.copyfileobj(response.raw, fp, 10 * 1024 * 1024)
+
     return open(FILEPATH, 'r')
 
 
 def get_json():
-    try:
-        return open(FILEPATH, 'r')
-    except IOError:
-        return download_json()
+    if not os.path.isfile(FILEPATH):
+        return download_json(), False
+
+    return open(FILEPATH, 'r'), True
 
 
 def filter_extras(extras):
@@ -230,9 +235,22 @@
     for region_id in regions:
         regions[region_id]['instance_types'] = []
     # Parse
-    json_file = get_json()
+    json_file, from_file = get_json()
     products_data = ijson.items(json_file, 'products')
-    products_data = next(products_data)
+
+    try:
+        products_data = next(products_data)
+    except ijson.common.IncompleteJSONError as e:
+        # This likely indicates that the cached file is incomplete or corrupt so we delete it and re
+        # download data
+        if from_file:
+            os.remove(FILEPATH)
+            json_file, from_file = get_json()
+            products_data = ijson.items(json_file, 'products')
+            products_data = next(products_data)
+        else:
+            raise e
+
     for sku in products_data:
         if products_data[sku]['productFamily'] != "Compute Instance":
             continue
diff --git a/tox.ini b/tox.ini
index 7f5caf9..3a6d498 100644
--- a/tox.ini
+++ b/tox.ini
@@ -152,7 +152,9 @@
 basepython: python3.7
 deps = requests
        ijson
-commands = bash -c 'python contrib/scrape-ec2-sizes.py > libcloud/compute/constants.py'
+commands = 
+    bash -c 'echo "Scrapping EC2 sizes, this may take up to 5 minutes more since the actual JSON data we download and scrape is very large"'
+    bash -c 'python contrib/scrape-ec2-sizes.py > libcloud/compute/constants.py'
 
 [testenv:pylint]
 deps = -r{toxinidir}/requirements-tests.txt