contrib/scrape-ec2-sizes.py - libcloud - Git at Google

 #!/usr/bin/env python
 #
 #  Licensed to the Apache Software Foundation (ASF) under one
 #  or more contributor license agreements.  See the NOTICE file
 #  distributed with this work for additional information
 #  regarding copyright ownership.  The ASF licenses this file
 #  to you under the Apache License, Version 2.0 (the
 #  "License"); you may not use this file except in compliance
 #  with the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing,
 #  software distributed under the License is distributed on an
 #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 #  KIND, either express or implied.  See the License for the
 #  specific language governing permissions and limitations
 #  under the License.

 """
 This script downloads and parses AWS EC2 from
 https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json.
 It writes a Python module with constants about EC2's sizes and regions.

 Use it as following (run it in the root of the repo directory):
     $ python contrib/scrape-ec2-sizes.py
 """

 import re
 import os
 import json
 import atexit

 import requests
 import tqdm  # pylint: disable=import-error
 import ijson  # pylint: disable=import-error

 FILEPATH = os.environ.get("TMP_JSON", "/tmp/ec.json")
 URL = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json"
 IGNORED_FIELDS = ["locationType", "operatingSystem"]
 REG1_STORAGE = re.compile(r"(\d+) x ([0-9,]+)")
 REG2_STORAGE = re.compile(r"(\d+) GB.*?")
 REG_BANDWIDTH = re.compile(r"\D*(\d+)\D*")
 #  From <https://aws.amazon.com/marketplace/help/200777880>
 REGION_DETAILS = {
     # America
     "US East (N. Virginia)": {
         "id": "us-east-1",
         "endpoint": "ec2.us-east-1.amazonaws.com",
         "api_name": "ec2_us_east",
         "country": "USA",
         "signature_version": "2",
     },
     "US East (Ohio)": {
         "id": "us-east-2",
         "endpoint": "ec2.us-east-2.amazonaws.com",
         "api_name": "ec2_us_east_ohio",
         "country": "USA",
         "signature_version": "4",
     },
     "US West (N. California)": {
         "id": "us-west-1",
         "endpoint": "ec2.us-west-1.amazonaws.com",
         "api_name": "ec2_us_west",
         "country": "USA",
         "signature_version": "2",
     },
     "US West (Oregon)": {
         "id": "us-west-2",
         "endpoint": "ec2.us-west-2.amazonaws.com",
         "api_name": "ec2_us_west_oregon",
         "country": "US",
         "signature_version": "2",
     },
     "Canada (Central)": {
         "id": "ca-central-1",
         "endpoint": "ec2.ca-central-1.amazonaws.com",
         "api_name": "ec2_ca_central_1",
         "country": "Canada",
         "signature_version": "4",
     },
     "South America (Sao Paulo)": {
         "id": "sa-east-1",
         "endpoint": "ec2.sa-east-1.amazonaws.com",
         "api_name": "ec2_sa_east",
         "country": "Brazil",
         "signature_version": "2",
     },
     "AWS GovCloud (US)": {
         "id": "us-gov-west-1",
         "endpoint": "ec2.us-gov-west-1.amazonaws.com",
         "api_name": "ec2_us_govwest",
         "country": "US",
         "signature_version": "2",
     },
     # EU
     "eu-west-1": {
         "id": "eu-west-1",
         "endpoint": "ec2.eu-west-1.amazonaws.com",
         "api_name": "ec2_eu_west",
         "country": "Ireland",
         "signature_version": "2",
     },
     "EU (Ireland)": {  # Duplicate from AWS' JSON
         "id": "eu-west-1",
         "endpoint": "ec2.eu-west-1.amazonaws.com",
         "api_name": "ec2_eu_west",
         "country": "Ireland",
         "signature_version": "2",
     },
     "EU (London)": {
         "id": "eu-west-2",
         "endpoint": "ec2.eu-west-2.amazonaws.com",
         "api_name": "ec2_eu_west_london",
         "country": "United Kingdom",
         "signature_version": "4",
     },
     "EU (Milan)": {
         "id": "eu-south-1",
         "endpoint": "ec2.eu-south-1.amazonaws.com",
         "api_name": "ec2_eu_south",
         "country": "Italy",
         "signature_version": "4",
     },
     "EU (Paris)": {
         "id": "eu-west-3",
         "endpoint": "ec2.eu-west-3.amazonaws.com",
         "api_name": "ec2_eu_west_paris",
         "country": "France",
         "signature_version": "4",
     },
     "EU (Frankfurt)": {
         "id": "eu-central-1",
         "endpoint": "ec2.eu-central-1.amazonaws.com",
         "api_name": "ec2_eu_central",
         "country": "Frankfurt",
         "signature_version": "4",
     },
     "EU (Stockholm)": {
         "id": "eu-north-1",
         "endpoint": "ec2.eu-north-1.amazonaws.com",
         "api_name": "ec2_eu_north_stockholm",
         "country": "Stockholm",
         "signature_version": "4",
     },
     # Asia
     "Asia Pacific (Mumbai)": {
         "id": "ap-south-1",
         "endpoint": "ec2.ap-south-1.amazonaws.com",
         "api_name": "ec2_ap_south_1",
         "country": "India",
         "signature_version": "4",
     },
     "Asia Pacific (Singapore)": {
         "id": "ap-southeast-1",
         "endpoint": "ec2.ap-southeast-1.amazonaws.com",
         "api_name": "ec2_ap_southeast",
         "country": "Singapore",
         "signature_version": "2",
     },
     "Asia Pacific (Sydney)": {
         "id": "ap-southeast-2",
         "endpoint": "ec2.ap-southeast-2.amazonaws.com",
         "api_name": "ec2_ap_southeast_2",
         "country": "Australia",
         "signature_version": "2",
     },
     "Asia Pacific (Tokyo)": {
         "id": "ap-northeast-1",
         "endpoint": "ec2.ap-northeast-1.amazonaws.com",
         "api_name": "ec2_ap_northeast",
         "country": "Japan",
         "signature_version": "2",
     },
     "Asia Pacific (Seoul)": {
         "id": "ap-northeast-2",
         "endpoint": "ec2.ap-northeast-2.amazonaws.com",
         "api_name": "ec2_ap_northeast",
         "country": "South Korea",
         "signature_version": "4",
     },
     "Asia Pacific (Osaka-Local)": {
         "id": "ap-northeast-3",
         "endpoint": "ec2.ap-northeast-3.amazonaws.com",
         "api_name": "ec2_ap_northeast",
         "country": "Japan",
         "signature_version": "4",
     },
     "Asia Pacific (Hong Kong)": {
         "id": "ap-east-1",
         "endpoint": "ec2.ap-east-1.amazonaws.com",
         "api_name": "ec2_ap_east",
         "country": "Hong Kong",
         "signature_version": "2",
     },
     # Not in JSON
     "China (Beijing)": {
         "id": "cn-north-1",
         "endpoint": "ec2.cn-north-1.amazonaws.com.cn",
         "api_name": "ec2_cn_north",
         "country": "China",
         "signature_version": "4",
     },
     "China (Ningxia)": {
         "id": "cn-northwest-1",
         "endpoint": "ec2.cn-northwest-1.amazonaws.com.cn",
         "api_name": "ec2_cn_northwest",
         "country": "China",
         "signature_version": "4",
     },
 }

 FILE_HEADER = """
 # File generated by contrib/scrape-ec2-sizes.py script - DO NOT EDIT manually
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """.strip()


 def download_json():
     if os.path.isfile(FILEPATH):
         print("Using data from existing cached file %s" % (FILEPATH))
         return open(FILEPATH, "r")

     def remove_partial_cached_file():
         if os.path.isfile(FILEPATH):
             os.remove(FILEPATH)

     # File not cached locally, download data and cache it
     with requests.get(URL, stream=True) as response:
         atexit.register(remove_partial_cached_file)

         total_size_in_bytes = int(response.headers.get("content-length", 0))
         progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)

         chunk_size = 10 * 1024 * 1024

         with open(FILEPATH, "wb") as fp:
             # NOTE: We use shutil.copyfileobj with large chunk size instead of
             # response.iter_content with large chunk size since data we
             # download is massive and copyfileobj is more efficient.
             # shutil.copyfileobj(response.raw, fp, 10 * 1024 * 1024)
             for chunk_data in response.iter_content(chunk_size):
                 progress_bar.update(len(chunk_data))
                 fp.write(chunk_data)

         progress_bar.close()
         atexit.unregister(remove_partial_cached_file)

     return open(FILEPATH, "r")


 def get_json():
     if not os.path.isfile(FILEPATH):
         return download_json(), False

     print("Using data from existing cached file %s" % (FILEPATH))
     return open(FILEPATH, "r"), True


 def filter_extras(extras):
     return {
         key: extras[key]
         for key in extras
         if key
         not in [
             "capacitystatus",
             "ebsOptimized",
             "operation",
             "licenseModel",
             "preInstalledSw",
             "tenancy",
             "usagetype",
         ]
     }


 def parse():
     # Set vars
     sizes = {}
     regions = {r["id"]: r for r in REGION_DETAILS.values()}
     for region_id in regions:
         regions[region_id]["instance_types"] = []
     # Parse
     json_file, from_file = get_json()
     products_data = ijson.items(json_file, "products")

     try:
         products_data = next(products_data)
     except ijson.common.IncompleteJSONError as e:
         # This likely indicates that the cached file is incomplete or corrupt so we delete it and re
         # download data
         if from_file:
             os.remove(FILEPATH)
             json_file, from_file = get_json()
             products_data = ijson.items(json_file, "products")
             products_data = next(products_data)
         else:
             raise e

     for sku in products_data:
         if products_data[sku].get("productFamily", "unknown") != "Compute Instance":
             continue
         location = products_data[sku]["attributes"].pop("location")
         if location not in REGION_DETAILS:
             continue
         # Get region & size ID
         region_id = REGION_DETAILS[location]["id"]
         instance_type = products_data[sku]["attributes"]["instanceType"]
         # Add size to region
         if instance_type not in regions[region_id]["instance_types"]:
             regions[region_id]["instance_types"].append(instance_type)
         # Parse sizes
         if instance_type not in sizes:
             for field in IGNORED_FIELDS:
                 products_data[sku]["attributes"].pop(field, None)
             # Compute RAM
             ram = int(
                 float(
                     products_data[sku]["attributes"]["memory"]
                     .split()[0]
                     .replace(",", "")
                 )
                 * 1024
             )
             # Compute bandwdith
             bw_match = REG_BANDWIDTH.match(
                 products_data[sku]["attributes"]["networkPerformance"]
             )
             if bw_match is not None:
                 bandwidth = int(bw_match.groups()[0])
             else:
                 bandwidth = None
             sizes[instance_type] = {
                 "id": instance_type,
                 "name": instance_type,
                 "ram": ram,
                 "bandwidth": bandwidth,
                 "extra": filter_extras(products_data[sku]["attributes"]),
             }
             if products_data[sku]["attributes"].get("storage") != "EBS only":
                 match = REG1_STORAGE.match(products_data[sku]["attributes"]["storage"])
                 if match:
                     disk_number, disk_size = match.groups()
                 else:
                     match = REG2_STORAGE.match(
                         products_data[sku]["attributes"]["storage"]
                     )
                     if match:
                         disk_number, disk_size = 1, match.groups()[0]
                     else:
                         disk_number, disk_size = 0, "0"
                 disk_number, disk_size = (
                     int(disk_number),
                     int(disk_size.replace(",", "")),
                 )
                 sizes[instance_type]["disk"] = disk_number * disk_size
             else:
                 sizes[instance_type]["disk"] = 0
             products_data[sku]["attributes"]
     # Sort
     for region in regions:
         regions[region]["instance_types"] = sorted(regions[region]["instance_types"])
     return sizes, regions


 def dump():
     print("Scraping size data, this may take up to 10-15 minutes...")

     sizes, regions = parse()

     separators = (",", ": ")

     # 1. Write file with instance types constants
     file_path = "libcloud/compute/constants/ec2_instance_types.py"
     with open(file_path, "w") as fp:
         fp.write(FILE_HEADER + "\n")
         fp.write("\n")
         fp.write(
             "INSTANCE_TYPES = "
             + json.dumps(
                 sizes, indent=4, sort_keys=True, separators=separators
             ).replace("null", "None")
         )

     print("")
     print("Data written to %s" % (file_path))
     print("")

     # 2. Write file with full details for each region
     file_path = "libcloud/compute/constants/ec2_region_details_complete.py"
     with open(file_path, "w") as fp:
         fp.write(FILE_HEADER + "\n")
         fp.write("\n")
         fp.write(
             "REGION_DETAILS = "
             + json.dumps(
                 regions, indent=4, sort_keys=True, separators=separators
             ).replace("null", "None")
         )

     print("Data written to %s" % (file_path))
     print("")

     # 3. Write file with partial region details (everything except instance_types attribute)
     regions_partial = {}
     keys_to_keep = ["api_name", "country", "id", "endpoint", "signature_version"]

     for region_name, region_details in regions.items():
         regions_partial[region_name] = {}

         for key, value in region_details.items():
             if key not in keys_to_keep:
                 continue

             regions_partial[region_name][key] = value

     file_path = "libcloud/compute/constants/ec2_region_details_partial.py"

     with open(file_path, "w") as fp:
         fp.write(FILE_HEADER + "\n")
         fp.write("\n")
         fp.write(
             "REGION_DETAILS = "
             + json.dumps(
                 regions_partial, indent=4, sort_keys=True, separators=separators
             ).replace("null", "None")
         )

     print("Data written to %s" % (file_path))
     print("")


 if __name__ == "__main__":
     dump()
	#!/usr/bin/env python
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	"""
	This script downloads and parses AWS EC2 from
	https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json.
	It writes a Python module with constants about EC2's sizes and regions.

	Use it as following (run it in the root of the repo directory):
	$ python contrib/scrape-ec2-sizes.py
	"""

	import re
	import os
	import json
	import atexit

	import requests
	import tqdm # pylint: disable=import-error
	import ijson # pylint: disable=import-error

	FILEPATH = os.environ.get("TMP_JSON", "/tmp/ec.json")
	URL = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json"
	IGNORED_FIELDS = ["locationType", "operatingSystem"]
	REG1_STORAGE = re.compile(r"(\d+) x ([0-9,]+)")
	REG2_STORAGE = re.compile(r"(\d+) GB.*?")
	REG_BANDWIDTH = re.compile(r"\D(\d+)\D")
	# From <https://aws.amazon.com/marketplace/help/200777880>
	REGION_DETAILS = {
	# America
	"US East (N. Virginia)": {
	"id": "us-east-1",
	"endpoint": "ec2.us-east-1.amazonaws.com",
	"api_name": "ec2_us_east",
	"country": "USA",
	"signature_version": "2",
	},
	"US East (Ohio)": {
	"id": "us-east-2",
	"endpoint": "ec2.us-east-2.amazonaws.com",
	"api_name": "ec2_us_east_ohio",
	"country": "USA",
	"signature_version": "4",
	},
	"US West (N. California)": {
	"id": "us-west-1",
	"endpoint": "ec2.us-west-1.amazonaws.com",
	"api_name": "ec2_us_west",
	"country": "USA",
	"signature_version": "2",
	},
	"US West (Oregon)": {
	"id": "us-west-2",
	"endpoint": "ec2.us-west-2.amazonaws.com",
	"api_name": "ec2_us_west_oregon",
	"country": "US",
	"signature_version": "2",
	},
	"Canada (Central)": {
	"id": "ca-central-1",
	"endpoint": "ec2.ca-central-1.amazonaws.com",
	"api_name": "ec2_ca_central_1",
	"country": "Canada",
	"signature_version": "4",
	},
	"South America (Sao Paulo)": {
	"id": "sa-east-1",
	"endpoint": "ec2.sa-east-1.amazonaws.com",
	"api_name": "ec2_sa_east",
	"country": "Brazil",
	"signature_version": "2",
	},
	"AWS GovCloud (US)": {
	"id": "us-gov-west-1",
	"endpoint": "ec2.us-gov-west-1.amazonaws.com",
	"api_name": "ec2_us_govwest",
	"country": "US",
	"signature_version": "2",
	},
	# EU
	"eu-west-1": {
	"id": "eu-west-1",
	"endpoint": "ec2.eu-west-1.amazonaws.com",
	"api_name": "ec2_eu_west",
	"country": "Ireland",
	"signature_version": "2",
	},
	"EU (Ireland)": { # Duplicate from AWS' JSON
	"id": "eu-west-1",
	"endpoint": "ec2.eu-west-1.amazonaws.com",
	"api_name": "ec2_eu_west",
	"country": "Ireland",
	"signature_version": "2",
	},
	"EU (London)": {
	"id": "eu-west-2",
	"endpoint": "ec2.eu-west-2.amazonaws.com",
	"api_name": "ec2_eu_west_london",
	"country": "United Kingdom",
	"signature_version": "4",
	},
	"EU (Milan)": {
	"id": "eu-south-1",
	"endpoint": "ec2.eu-south-1.amazonaws.com",
	"api_name": "ec2_eu_south",
	"country": "Italy",
	"signature_version": "4",
	},
	"EU (Paris)": {
	"id": "eu-west-3",
	"endpoint": "ec2.eu-west-3.amazonaws.com",
	"api_name": "ec2_eu_west_paris",
	"country": "France",
	"signature_version": "4",
	},
	"EU (Frankfurt)": {
	"id": "eu-central-1",
	"endpoint": "ec2.eu-central-1.amazonaws.com",
	"api_name": "ec2_eu_central",
	"country": "Frankfurt",
	"signature_version": "4",
	},
	"EU (Stockholm)": {
	"id": "eu-north-1",
	"endpoint": "ec2.eu-north-1.amazonaws.com",
	"api_name": "ec2_eu_north_stockholm",
	"country": "Stockholm",
	"signature_version": "4",
	},
	# Asia
	"Asia Pacific (Mumbai)": {
	"id": "ap-south-1",
	"endpoint": "ec2.ap-south-1.amazonaws.com",
	"api_name": "ec2_ap_south_1",
	"country": "India",
	"signature_version": "4",
	},
	"Asia Pacific (Singapore)": {
	"id": "ap-southeast-1",
	"endpoint": "ec2.ap-southeast-1.amazonaws.com",
	"api_name": "ec2_ap_southeast",
	"country": "Singapore",
	"signature_version": "2",
	},
	"Asia Pacific (Sydney)": {
	"id": "ap-southeast-2",
	"endpoint": "ec2.ap-southeast-2.amazonaws.com",
	"api_name": "ec2_ap_southeast_2",
	"country": "Australia",
	"signature_version": "2",
	},
	"Asia Pacific (Tokyo)": {
	"id": "ap-northeast-1",
	"endpoint": "ec2.ap-northeast-1.amazonaws.com",
	"api_name": "ec2_ap_northeast",
	"country": "Japan",
	"signature_version": "2",
	},
	"Asia Pacific (Seoul)": {
	"id": "ap-northeast-2",
	"endpoint": "ec2.ap-northeast-2.amazonaws.com",
	"api_name": "ec2_ap_northeast",
	"country": "South Korea",
	"signature_version": "4",
	},
	"Asia Pacific (Osaka-Local)": {
	"id": "ap-northeast-3",
	"endpoint": "ec2.ap-northeast-3.amazonaws.com",
	"api_name": "ec2_ap_northeast",
	"country": "Japan",
	"signature_version": "4",
	},
	"Asia Pacific (Hong Kong)": {
	"id": "ap-east-1",
	"endpoint": "ec2.ap-east-1.amazonaws.com",
	"api_name": "ec2_ap_east",
	"country": "Hong Kong",
	"signature_version": "2",
	},
	# Not in JSON
	"China (Beijing)": {
	"id": "cn-north-1",
	"endpoint": "ec2.cn-north-1.amazonaws.com.cn",
	"api_name": "ec2_cn_north",
	"country": "China",
	"signature_version": "4",
	},
	"China (Ningxia)": {
	"id": "cn-northwest-1",
	"endpoint": "ec2.cn-northwest-1.amazonaws.com.cn",
	"api_name": "ec2_cn_northwest",
	"country": "China",
	"signature_version": "4",
	},
	}

	FILE_HEADER = """
	# File generated by contrib/scrape-ec2-sizes.py script - DO NOT EDIT manually
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	""".strip()


	def download_json():
	if os.path.isfile(FILEPATH):
	print("Using data from existing cached file %s" % (FILEPATH))
	return open(FILEPATH, "r")

	def remove_partial_cached_file():
	if os.path.isfile(FILEPATH):
	os.remove(FILEPATH)

	# File not cached locally, download data and cache it
	with requests.get(URL, stream=True) as response:
	atexit.register(remove_partial_cached_file)

	total_size_in_bytes = int(response.headers.get("content-length", 0))
	progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)

	chunk_size = 10 * 1024 * 1024

	with open(FILEPATH, "wb") as fp:
	# NOTE: We use shutil.copyfileobj with large chunk size instead of
	# response.iter_content with large chunk size since data we
	# download is massive and copyfileobj is more efficient.
	# shutil.copyfileobj(response.raw, fp, 10 * 1024 * 1024)
	for chunk_data in response.iter_content(chunk_size):
	progress_bar.update(len(chunk_data))
	fp.write(chunk_data)

	progress_bar.close()
	atexit.unregister(remove_partial_cached_file)

	return open(FILEPATH, "r")


	def get_json():
	if not os.path.isfile(FILEPATH):
	return download_json(), False

	print("Using data from existing cached file %s" % (FILEPATH))
	return open(FILEPATH, "r"), True


	def filter_extras(extras):
	return {
	key: extras[key]
	for key in extras
	if key
	not in [
	"capacitystatus",
	"ebsOptimized",
	"operation",
	"licenseModel",
	"preInstalledSw",
	"tenancy",
	"usagetype",
	]
	}


	def parse():
	# Set vars
	sizes = {}
	regions = {r["id"]: r for r in REGION_DETAILS.values()}
	for region_id in regions:
	regions[region_id]["instance_types"] = []
	# Parse
	json_file, from_file = get_json()
	products_data = ijson.items(json_file, "products")

	try:
	products_data = next(products_data)
	except ijson.common.IncompleteJSONError as e:
	# This likely indicates that the cached file is incomplete or corrupt so we delete it and re
	# download data
	if from_file:
	os.remove(FILEPATH)
	json_file, from_file = get_json()
	products_data = ijson.items(json_file, "products")
	products_data = next(products_data)
	else:
	raise e

	for sku in products_data:
	if products_data[sku].get("productFamily", "unknown") != "Compute Instance":
	continue
	location = products_data[sku]["attributes"].pop("location")
	if location not in REGION_DETAILS:
	continue
	# Get region & size ID
	region_id = REGION_DETAILS[location]["id"]
	instance_type = products_data[sku]["attributes"]["instanceType"]
	# Add size to region
	if instance_type not in regions[region_id]["instance_types"]:
	regions[region_id]["instance_types"].append(instance_type)
	# Parse sizes
	if instance_type not in sizes:
	for field in IGNORED_FIELDS:
	products_data[sku]["attributes"].pop(field, None)
	# Compute RAM
	ram = int(
	float(
	products_data[sku]["attributes"]["memory"]
	.split()[0]
	.replace(",", "")
	)
	* 1024
	)
	# Compute bandwdith
	bw_match = REG_BANDWIDTH.match(
	products_data[sku]["attributes"]["networkPerformance"]
	)
	if bw_match is not None:
	bandwidth = int(bw_match.groups()[0])
	else:
	bandwidth = None
	sizes[instance_type] = {
	"id": instance_type,
	"name": instance_type,
	"ram": ram,
	"bandwidth": bandwidth,
	"extra": filter_extras(products_data[sku]["attributes"]),
	}
	if products_data[sku]["attributes"].get("storage") != "EBS only":
	match = REG1_STORAGE.match(products_data[sku]["attributes"]["storage"])
	if match:
	disk_number, disk_size = match.groups()
	else:
	match = REG2_STORAGE.match(
	products_data[sku]["attributes"]["storage"]
	)
	if match:
	disk_number, disk_size = 1, match.groups()[0]
	else:
	disk_number, disk_size = 0, "0"
	disk_number, disk_size = (
	int(disk_number),
	int(disk_size.replace(",", "")),
	)
	sizes[instance_type]["disk"] = disk_number * disk_size
	else:
	sizes[instance_type]["disk"] = 0
	products_data[sku]["attributes"]
	# Sort
	for region in regions:
	regions[region]["instance_types"] = sorted(regions[region]["instance_types"])
	return sizes, regions


	def dump():
	print("Scraping size data, this may take up to 10-15 minutes...")

	sizes, regions = parse()

	separators = (",", ": ")

	# 1. Write file with instance types constants
	file_path = "libcloud/compute/constants/ec2_instance_types.py"
	with open(file_path, "w") as fp:
	fp.write(FILE_HEADER + "\n")
	fp.write("\n")
	fp.write(
	"INSTANCE_TYPES = "
	+ json.dumps(
	sizes, indent=4, sort_keys=True, separators=separators
	).replace("null", "None")
	)

	print("")
	print("Data written to %s" % (file_path))
	print("")

	# 2. Write file with full details for each region
	file_path = "libcloud/compute/constants/ec2_region_details_complete.py"
	with open(file_path, "w") as fp:
	fp.write(FILE_HEADER + "\n")
	fp.write("\n")
	fp.write(
	"REGION_DETAILS = "
	+ json.dumps(
	regions, indent=4, sort_keys=True, separators=separators
	).replace("null", "None")
	)

	print("Data written to %s" % (file_path))
	print("")

	# 3. Write file with partial region details (everything except instance_types attribute)
	regions_partial = {}
	keys_to_keep = ["api_name", "country", "id", "endpoint", "signature_version"]

	for region_name, region_details in regions.items():
	regions_partial[region_name] = {}

	for key, value in region_details.items():
	if key not in keys_to_keep:
	continue

	regions_partial[region_name][key] = value

	file_path = "libcloud/compute/constants/ec2_region_details_partial.py"

	with open(file_path, "w") as fp:
	fp.write(FILE_HEADER + "\n")
	fp.write("\n")
	fp.write(
	"REGION_DETAILS = "
	+ json.dumps(
	regions_partial, indent=4, sort_keys=True, separators=separators
	).replace("null", "None")
	)

	print("Data written to %s" % (file_path))
	print("")


	if __name__ == "__main__":
	dump()