contrib/scrape-ec2-sizes.py - libcloud - Git at Google

 #!/usr/bin/env python
 #
 #  Licensed to the Apache Software Foundation (ASF) under one
 #  or more contributor license agreements.  See the NOTICE file
 #  distributed with this work for additional information
 #  regarding copyright ownership.  The ASF licenses this file
 #  to you under the Apache License, Version 2.0 (the
 #  "License"); you may not use this file except in compliance
 #  with the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing,
 #  software distributed under the License is distributed on an
 #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 #  KIND, either express or implied.  See the License for the
 #  specific language governing permissions and limitations
 #  under the License.

 """
 This script downloads and parses AWS EC2 from
 https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json.
 It writes a Python module with constants about EC2's sizes and regions.

 Use it as following (run it in the root of the repo directory):
     $ python contrib/scrape-ec2-sizes.py
 """

 import re
 import os
 import json
 import atexit

 import requests
 import tqdm  # pylint: disable=import-error
 import ijson  # pylint: disable=import-error

 FILEPATH = os.environ.get('TMP_JSON', '/tmp/ec.json')
 URL = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json"
 IGNORED_FIELDS = ['locationType', 'operatingSystem']
 REG1_STORAGE = re.compile(r'(\d+) x ([0-9,]+)')
 REG2_STORAGE = re.compile(r'(\d+) GB.*?')
 REG_BANDWIDTH = re.compile(r'\D*(\d+)\D*')
 #  From <https://aws.amazon.com/marketplace/help/200777880>
 REGION_DETAILS = {
     # America
     'US East (N. Virginia)': {
         'id': 'us-east-1',
         'endpoint': 'ec2.us-east-1.amazonaws.com',
         'api_name': 'ec2_us_east',
         'country': 'USA',
         'signature_version': '2',
     },
     'US East (Ohio)': {
         'id': 'us-east-2',
         'endpoint': 'ec2.us-east-2.amazonaws.com',
         'api_name': 'ec2_us_east_ohio',
         'country': 'USA',
         'signature_version': '4',
     },
     'US West (N. California)': {
         'id': 'us-west-1',
         'endpoint': 'ec2.us-west-1.amazonaws.com',
         'api_name': 'ec2_us_west',
         'country': 'USA',
         'signature_version': '2',
     },
     'US West (Oregon)': {
         'id': 'us-west-2',
         'endpoint': 'ec2.us-west-2.amazonaws.com',
         'api_name': 'ec2_us_west_oregon',
         'country': 'US',
         'signature_version': '2',
     },
     'Canada (Central)': {
         'id': 'ca-central-1',
         'endpoint': 'ec2.ca-central-1.amazonaws.com',
         'api_name': 'ec2_ca_central_1',
         'country': 'Canada',
         'signature_version': '4',
     },
     'South America (Sao Paulo)': {
         'id': 'sa-east-1',
         'endpoint': 'ec2.sa-east-1.amazonaws.com',
         'api_name': 'ec2_sa_east',
         'country': 'Brazil',
         'signature_version': '2',
     },
     'AWS GovCloud (US)': {
         'id': 'us-gov-west-1',
         'endpoint': 'ec2.us-gov-west-1.amazonaws.com',
         'api_name': 'ec2_us_govwest',
         'country': 'US',
         'signature_version': '2',
     },
     # EU
     'eu-west-1': {
         'id': 'eu-west-1',
         'endpoint': 'ec2.eu-west-1.amazonaws.com',
         'api_name': 'ec2_eu_west',
         'country': 'Ireland',
         'signature_version': '2',
     },
     'EU (Ireland)': {  # Duplicate from AWS' JSON
         'id': 'eu-west-1',
         'endpoint': 'ec2.eu-west-1.amazonaws.com',
         'api_name': 'ec2_eu_west',
         'country': 'Ireland',
         'signature_version': '2',
     },
     'EU (London)': {
         'id': 'eu-west-2',
         'endpoint': 'ec2.eu-west-2.amazonaws.com',
         'api_name': 'ec2_eu_west_london',
         'country': 'United Kingdom',
         'signature_version': '4',
     },
     'EU (Paris)': {
         'id': 'eu-west-3',
         'endpoint': 'ec2.eu-west-3.amazonaws.com',
         'api_name': 'ec2_eu_west_paris',
         'country': 'France',
         'signature_version': '4',
     },
     'EU (Frankfurt)': {
         'id': 'eu-central-1',
         'endpoint': 'ec2.eu-central-1.amazonaws.com',
         'api_name': 'ec2_eu_central',
         'country': 'Frankfurt',
         'signature_version': '4',
     },
     'EU (Stockholm)': {
         'id': 'eu-north-1',
         'endpoint': 'ec2.eu-north-1.amazonaws.com',
         'api_name': 'ec2_eu_north_stockholm',
         'country': 'Stockholm',
         'signature_version': '4',
     },
     # Asia
     'Asia Pacific (Mumbai)': {
         'id': 'ap-south-1',
         'endpoint': 'ec2.ap-south-1.amazonaws.com',
         'api_name': 'ec2_ap_south_1',
         'country': 'India',
         'signature_version': '4',
     },
     'Asia Pacific (Singapore)': {
         'id': 'ap-southeast-1',
         'endpoint': 'ec2.ap-southeast-1.amazonaws.com',
         'api_name': 'ec2_ap_southeast',
         'country': 'Singapore',
         'signature_version': '2',
     },
     'Asia Pacific (Sydney)': {
         'id': 'ap-southeast-2',
         'endpoint': 'ec2.ap-southeast-2.amazonaws.com',
         'api_name': 'ec2_ap_southeast_2',
         'country': 'Australia',
         'signature_version': '2',
     },
     'Asia Pacific (Tokyo)': {
         'id': 'ap-northeast-1',
         'endpoint': 'ec2.ap-northeast-1.amazonaws.com',
         'api_name': 'ec2_ap_northeast',
         'country': 'Japan',
         'signature_version': '2',
     },
     'Asia Pacific (Seoul)': {
         'id': 'ap-northeast-2',
         'endpoint': 'ec2.ap-northeast-2.amazonaws.com',
         'api_name': 'ec2_ap_northeast',
         'country': 'South Korea',
         'signature_version': '4',
     },
     'Asia Pacific (Osaka-Local)': {
         'id': 'ap-northeast-3',
         'endpoint': 'ec2.ap-northeast-3.amazonaws.com',
         'api_name': 'ec2_ap_northeast',
         'country': 'Japan',
         'signature_version': '4',
     },
     # Not in JSON
     'China (Beijing)': {
         'id': 'cn-north-1',
         'endpoint': 'ec2.cn-north-1.amazonaws.com.cn',
         'api_name': 'ec2_cn_north',
         'country': 'China',
         'signature_version': '4',
     },
     'China (Ningxia)': {
         'id': 'cn-northwest-1',
         'endpoint': 'ec2.cn-northwest-1.amazonaws.com.cn',
         'api_name': 'ec2_cn_northwest',
         'country': 'China',
         'signature_version': '4',
     },
 }

 FILE_HEADER = """
 # File generated by contrib/scrape-ec2-sizes.py script - DO NOT EDIT manually
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """.strip()


 def download_json():
     if os.path.isfile(FILEPATH):
         print("Using data from existing cached file %s" % (FILEPATH))
         return open(FILEPATH, 'r')

     def remove_partial_cached_file():
         if os.path.isfile(FILEPATH):
             os.remove(FILEPATH)

     # File not cached locally, download data and cache it
     with requests.get(URL, stream=True) as response:
         atexit.register(remove_partial_cached_file)

         total_size_in_bytes = int(response.headers.get('content-length', 0))
         progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)

         chunk_size = 10 * 1024 * 1024

         with open(FILEPATH, 'wb') as fp:
             # NOTE: We use shutil.copyfileobj with large chunk size instead of
             # response.iter_content with large chunk size since data we
             # download is massive and copyfileobj is more efficient.
             # shutil.copyfileobj(response.raw, fp, 10 * 1024 * 1024)
             for chunk_data in response.iter_content(chunk_size):
                 progress_bar.update(len(chunk_data))
                 fp.write(chunk_data)

         progress_bar.close()
         atexit.unregister(remove_partial_cached_file)

     return open(FILEPATH, 'r')


 def get_json():
     if not os.path.isfile(FILEPATH):
         return download_json(), False

     print("Using data from existing cached file %s" % (FILEPATH))
     return open(FILEPATH, 'r'), True


 def filter_extras(extras):
     return {
         key: extras[key] for key in extras
         if key not in [
             'capacitystatus', 'ebsOptimized', 'operation', 'licenseModel',
             'preInstalledSw', 'tenancy', 'usagetype'
         ]
     }


 def parse():
     # Set vars
     sizes = {}
     regions = {r['id']: r for r in REGION_DETAILS.values()}
     for region_id in regions:
         regions[region_id]['instance_types'] = []
     # Parse
     json_file, from_file = get_json()
     products_data = ijson.items(json_file, 'products')

     try:
         products_data = next(products_data)
     except ijson.common.IncompleteJSONError as e:
         # This likely indicates that the cached file is incomplete or corrupt so we delete it and re
         # download data
         if from_file:
             os.remove(FILEPATH)
             json_file, from_file = get_json()
             products_data = ijson.items(json_file, 'products')
             products_data = next(products_data)
         else:
             raise e

     for sku in products_data:
         if products_data[sku].get('productFamily', "unknown") != "Compute Instance":
             continue
         location = products_data[sku]['attributes'].pop('location')
         if location not in REGION_DETAILS:
             continue
         # Get region & size ID
         region_id = REGION_DETAILS[location]['id']
         instance_type = products_data[sku]['attributes']['instanceType']
         # Add size to region
         if instance_type not in regions[region_id]['instance_types']:
             regions[region_id]['instance_types'].append(instance_type)
         # Parse sizes
         if instance_type not in sizes:
             for field in IGNORED_FIELDS:
                 products_data[sku]['attributes'].pop(field, None)
             # Compute RAM
             ram = int(float(products_data[sku]['attributes']['memory'].split()[0]
                       .replace(',', '')) * 1024)
             # Compute bandwdith
             bw_match = REG_BANDWIDTH.match(products_data[sku]['attributes']['networkPerformance'])
             if bw_match is not None:
                 bandwidth = int(bw_match.groups()[0])
             else:
                 bandwidth = None
             sizes[instance_type] = {
                 'id': instance_type,
                 'name': instance_type,
                 'ram': ram,
                 'bandwidth': bandwidth,
                 'extra': filter_extras(products_data[sku]['attributes']),
             }
             if products_data[sku]['attributes'].get('storage') != "EBS only":
                 match = REG1_STORAGE.match(products_data[sku]['attributes']['storage'])
                 if match:
                     disk_number, disk_size = match.groups()
                 else:
                     match = REG2_STORAGE.match(products_data[sku]['attributes']['storage'])
                     if match:
                         disk_number, disk_size = 1, match.groups()[0]
                     else:
                         disk_number, disk_size = 0, '0'
                 disk_number, disk_size = int(disk_number), int(disk_size.replace(',', ''))
                 sizes[instance_type]['disk'] = disk_number * disk_size
             else:
                 sizes[instance_type]['disk'] = 0
             products_data[sku]['attributes']
     # Sort
     for region in regions:
         regions[region]['instance_types'] = sorted(regions[region]['instance_types'])
     return sizes, regions


 def dump():
     print("Scraping size data, this may take up to 10-15 minutes...")

     sizes, regions = parse()

     separators = (',', ': ')

     # 1. Write file with instance types constants
     file_path = "libcloud/compute/constants/ec2_instance_types.py"
     with open(file_path, "w") as fp:
         fp.write(FILE_HEADER + "\n")
         fp.write("\n")
         fp.write("INSTANCE_TYPES = " + json.dumps(sizes, indent=4, sort_keys=True,
                                                   separators=separators).replace('null', 'None'))

     print("")
     print("Data written to %s" % (file_path))
     print("")

     # 2. Write file with full details for each region
     file_path = "libcloud/compute/constants/ec2_region_details_complete.py"
     with open(file_path, "w") as fp:
         fp.write(FILE_HEADER + "\n")
         fp.write("\n")
         fp.write("REGION_DETAILS = " + json.dumps(regions, indent=4, sort_keys=True,
                                                   separators=separators).replace('null', 'None'))

     print("Data written to %s" % (file_path))
     print("")

     # 3. Write file with partial region details (everything except instance_types attribute)
     regions_partial = {}
     keys_to_keep = ["api_name", "country", "id", "endpoint", "signature_version"]

     for region_name, region_details in regions.items():
         regions_partial[region_name] = {}

         for key, value in region_details.items():
             if key not in keys_to_keep:
                 continue

             regions_partial[region_name][key] = value

     file_path = "libcloud/compute/constants/ec2_region_details_partial.py"

     with open(file_path, "w") as fp:
         fp.write(FILE_HEADER + "\n")
         fp.write("\n")
         fp.write("REGION_DETAILS = " + json.dumps(regions_partial, indent=4, sort_keys=True,
                                                   separators=separators).replace('null', 'None'))

     print("Data written to %s" % (file_path))
     print("")


 if __name__ == '__main__':
     dump()
	#!/usr/bin/env python
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	"""
	This script downloads and parses AWS EC2 from
	https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json.
	It writes a Python module with constants about EC2's sizes and regions.

	Use it as following (run it in the root of the repo directory):
	$ python contrib/scrape-ec2-sizes.py
	"""

	import re
	import os
	import json
	import atexit

	import requests
	import tqdm # pylint: disable=import-error
	import ijson # pylint: disable=import-error

	FILEPATH = os.environ.get('TMP_JSON', '/tmp/ec.json')
	URL = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json"
	IGNORED_FIELDS = ['locationType', 'operatingSystem']
	REG1_STORAGE = re.compile(r'(\d+) x ([0-9,]+)')
	REG2_STORAGE = re.compile(r'(\d+) GB.*?')
	REG_BANDWIDTH = re.compile(r'\D(\d+)\D')
	# From <https://aws.amazon.com/marketplace/help/200777880>
	REGION_DETAILS = {
	# America
	'US East (N. Virginia)': {
	'id': 'us-east-1',
	'endpoint': 'ec2.us-east-1.amazonaws.com',
	'api_name': 'ec2_us_east',
	'country': 'USA',
	'signature_version': '2',
	},
	'US East (Ohio)': {
	'id': 'us-east-2',
	'endpoint': 'ec2.us-east-2.amazonaws.com',
	'api_name': 'ec2_us_east_ohio',
	'country': 'USA',
	'signature_version': '4',
	},
	'US West (N. California)': {
	'id': 'us-west-1',
	'endpoint': 'ec2.us-west-1.amazonaws.com',
	'api_name': 'ec2_us_west',
	'country': 'USA',
	'signature_version': '2',
	},
	'US West (Oregon)': {
	'id': 'us-west-2',
	'endpoint': 'ec2.us-west-2.amazonaws.com',
	'api_name': 'ec2_us_west_oregon',
	'country': 'US',
	'signature_version': '2',
	},
	'Canada (Central)': {
	'id': 'ca-central-1',
	'endpoint': 'ec2.ca-central-1.amazonaws.com',
	'api_name': 'ec2_ca_central_1',
	'country': 'Canada',
	'signature_version': '4',
	},
	'South America (Sao Paulo)': {
	'id': 'sa-east-1',
	'endpoint': 'ec2.sa-east-1.amazonaws.com',
	'api_name': 'ec2_sa_east',
	'country': 'Brazil',
	'signature_version': '2',
	},
	'AWS GovCloud (US)': {
	'id': 'us-gov-west-1',
	'endpoint': 'ec2.us-gov-west-1.amazonaws.com',
	'api_name': 'ec2_us_govwest',
	'country': 'US',
	'signature_version': '2',
	},
	# EU
	'eu-west-1': {
	'id': 'eu-west-1',
	'endpoint': 'ec2.eu-west-1.amazonaws.com',
	'api_name': 'ec2_eu_west',
	'country': 'Ireland',
	'signature_version': '2',
	},
	'EU (Ireland)': { # Duplicate from AWS' JSON
	'id': 'eu-west-1',
	'endpoint': 'ec2.eu-west-1.amazonaws.com',
	'api_name': 'ec2_eu_west',
	'country': 'Ireland',
	'signature_version': '2',
	},
	'EU (London)': {
	'id': 'eu-west-2',
	'endpoint': 'ec2.eu-west-2.amazonaws.com',
	'api_name': 'ec2_eu_west_london',
	'country': 'United Kingdom',
	'signature_version': '4',
	},
	'EU (Paris)': {
	'id': 'eu-west-3',
	'endpoint': 'ec2.eu-west-3.amazonaws.com',
	'api_name': 'ec2_eu_west_paris',
	'country': 'France',
	'signature_version': '4',
	},
	'EU (Frankfurt)': {
	'id': 'eu-central-1',
	'endpoint': 'ec2.eu-central-1.amazonaws.com',
	'api_name': 'ec2_eu_central',
	'country': 'Frankfurt',
	'signature_version': '4',
	},
	'EU (Stockholm)': {
	'id': 'eu-north-1',
	'endpoint': 'ec2.eu-north-1.amazonaws.com',
	'api_name': 'ec2_eu_north_stockholm',
	'country': 'Stockholm',
	'signature_version': '4',
	},
	# Asia
	'Asia Pacific (Mumbai)': {
	'id': 'ap-south-1',
	'endpoint': 'ec2.ap-south-1.amazonaws.com',
	'api_name': 'ec2_ap_south_1',
	'country': 'India',
	'signature_version': '4',
	},
	'Asia Pacific (Singapore)': {
	'id': 'ap-southeast-1',
	'endpoint': 'ec2.ap-southeast-1.amazonaws.com',
	'api_name': 'ec2_ap_southeast',
	'country': 'Singapore',
	'signature_version': '2',
	},
	'Asia Pacific (Sydney)': {
	'id': 'ap-southeast-2',
	'endpoint': 'ec2.ap-southeast-2.amazonaws.com',
	'api_name': 'ec2_ap_southeast_2',
	'country': 'Australia',
	'signature_version': '2',
	},
	'Asia Pacific (Tokyo)': {
	'id': 'ap-northeast-1',
	'endpoint': 'ec2.ap-northeast-1.amazonaws.com',
	'api_name': 'ec2_ap_northeast',
	'country': 'Japan',
	'signature_version': '2',
	},
	'Asia Pacific (Seoul)': {
	'id': 'ap-northeast-2',
	'endpoint': 'ec2.ap-northeast-2.amazonaws.com',
	'api_name': 'ec2_ap_northeast',
	'country': 'South Korea',
	'signature_version': '4',
	},
	'Asia Pacific (Osaka-Local)': {
	'id': 'ap-northeast-3',
	'endpoint': 'ec2.ap-northeast-3.amazonaws.com',
	'api_name': 'ec2_ap_northeast',
	'country': 'Japan',
	'signature_version': '4',
	},
	# Not in JSON
	'China (Beijing)': {
	'id': 'cn-north-1',
	'endpoint': 'ec2.cn-north-1.amazonaws.com.cn',
	'api_name': 'ec2_cn_north',
	'country': 'China',
	'signature_version': '4',
	},
	'China (Ningxia)': {
	'id': 'cn-northwest-1',
	'endpoint': 'ec2.cn-northwest-1.amazonaws.com.cn',
	'api_name': 'ec2_cn_northwest',
	'country': 'China',
	'signature_version': '4',
	},
	}

	FILE_HEADER = """
	# File generated by contrib/scrape-ec2-sizes.py script - DO NOT EDIT manually
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	""".strip()


	def download_json():
	if os.path.isfile(FILEPATH):
	print("Using data from existing cached file %s" % (FILEPATH))
	return open(FILEPATH, 'r')

	def remove_partial_cached_file():
	if os.path.isfile(FILEPATH):
	os.remove(FILEPATH)

	# File not cached locally, download data and cache it
	with requests.get(URL, stream=True) as response:
	atexit.register(remove_partial_cached_file)

	total_size_in_bytes = int(response.headers.get('content-length', 0))
	progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)

	chunk_size = 10 * 1024 * 1024

	with open(FILEPATH, 'wb') as fp:
	# NOTE: We use shutil.copyfileobj with large chunk size instead of
	# response.iter_content with large chunk size since data we
	# download is massive and copyfileobj is more efficient.
	# shutil.copyfileobj(response.raw, fp, 10 * 1024 * 1024)
	for chunk_data in response.iter_content(chunk_size):
	progress_bar.update(len(chunk_data))
	fp.write(chunk_data)

	progress_bar.close()
	atexit.unregister(remove_partial_cached_file)

	return open(FILEPATH, 'r')


	def get_json():
	if not os.path.isfile(FILEPATH):
	return download_json(), False

	print("Using data from existing cached file %s" % (FILEPATH))
	return open(FILEPATH, 'r'), True


	def filter_extras(extras):
	return {
	key: extras[key] for key in extras
	if key not in [
	'capacitystatus', 'ebsOptimized', 'operation', 'licenseModel',
	'preInstalledSw', 'tenancy', 'usagetype'
	]
	}


	def parse():
	# Set vars
	sizes = {}
	regions = {r['id']: r for r in REGION_DETAILS.values()}
	for region_id in regions:
	regions[region_id]['instance_types'] = []
	# Parse
	json_file, from_file = get_json()
	products_data = ijson.items(json_file, 'products')

	try:
	products_data = next(products_data)
	except ijson.common.IncompleteJSONError as e:
	# This likely indicates that the cached file is incomplete or corrupt so we delete it and re
	# download data
	if from_file:
	os.remove(FILEPATH)
	json_file, from_file = get_json()
	products_data = ijson.items(json_file, 'products')
	products_data = next(products_data)
	else:
	raise e

	for sku in products_data:
	if products_data[sku].get('productFamily', "unknown") != "Compute Instance":
	continue
	location = products_data[sku]['attributes'].pop('location')
	if location not in REGION_DETAILS:
	continue
	# Get region & size ID
	region_id = REGION_DETAILS[location]['id']
	instance_type = products_data[sku]['attributes']['instanceType']
	# Add size to region
	if instance_type not in regions[region_id]['instance_types']:
	regions[region_id]['instance_types'].append(instance_type)
	# Parse sizes
	if instance_type not in sizes:
	for field in IGNORED_FIELDS:
	products_data[sku]['attributes'].pop(field, None)
	# Compute RAM
	ram = int(float(products_data[sku]['attributes']['memory'].split()[0]
	.replace(',', '')) * 1024)
	# Compute bandwdith
	bw_match = REG_BANDWIDTH.match(products_data[sku]['attributes']['networkPerformance'])
	if bw_match is not None:
	bandwidth = int(bw_match.groups()[0])
	else:
	bandwidth = None
	sizes[instance_type] = {
	'id': instance_type,
	'name': instance_type,
	'ram': ram,
	'bandwidth': bandwidth,
	'extra': filter_extras(products_data[sku]['attributes']),
	}
	if products_data[sku]['attributes'].get('storage') != "EBS only":
	match = REG1_STORAGE.match(products_data[sku]['attributes']['storage'])
	if match:
	disk_number, disk_size = match.groups()
	else:
	match = REG2_STORAGE.match(products_data[sku]['attributes']['storage'])
	if match:
	disk_number, disk_size = 1, match.groups()[0]
	else:
	disk_number, disk_size = 0, '0'
	disk_number, disk_size = int(disk_number), int(disk_size.replace(',', ''))
	sizes[instance_type]['disk'] = disk_number * disk_size
	else:
	sizes[instance_type]['disk'] = 0
	products_data[sku]['attributes']
	# Sort
	for region in regions:
	regions[region]['instance_types'] = sorted(regions[region]['instance_types'])
	return sizes, regions


	def dump():
	print("Scraping size data, this may take up to 10-15 minutes...")

	sizes, regions = parse()

	separators = (',', ': ')

	# 1. Write file with instance types constants
	file_path = "libcloud/compute/constants/ec2_instance_types.py"
	with open(file_path, "w") as fp:
	fp.write(FILE_HEADER + "\n")
	fp.write("\n")
	fp.write("INSTANCE_TYPES = " + json.dumps(sizes, indent=4, sort_keys=True,
	separators=separators).replace('null', 'None'))

	print("")
	print("Data written to %s" % (file_path))
	print("")

	# 2. Write file with full details for each region
	file_path = "libcloud/compute/constants/ec2_region_details_complete.py"
	with open(file_path, "w") as fp:
	fp.write(FILE_HEADER + "\n")
	fp.write("\n")
	fp.write("REGION_DETAILS = " + json.dumps(regions, indent=4, sort_keys=True,
	separators=separators).replace('null', 'None'))

	print("Data written to %s" % (file_path))
	print("")

	# 3. Write file with partial region details (everything except instance_types attribute)
	regions_partial = {}
	keys_to_keep = ["api_name", "country", "id", "endpoint", "signature_version"]

	for region_name, region_details in regions.items():
	regions_partial[region_name] = {}

	for key, value in region_details.items():
	if key not in keys_to_keep:
	continue

	regions_partial[region_name][key] = value

	file_path = "libcloud/compute/constants/ec2_region_details_partial.py"

	with open(file_path, "w") as fp:
	fp.write(FILE_HEADER + "\n")
	fp.write("\n")
	fp.write("REGION_DETAILS = " + json.dumps(regions_partial, indent=4, sort_keys=True,
	separators=separators).replace('null', 'None'))

	print("Data written to %s" % (file_path))
	print("")


	if __name__ == '__main__':
	dump()