| #!/usr/bin/env python |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import os |
| import re |
| import json |
| import copy |
| import time |
| from collections import defaultdict, OrderedDict |
| |
| import requests |
| import _jsonnet # pylint: disable=import-error |
| |
| LINUX_PRICING_URLS = [ |
| # Deprecated instances (JSON format) |
| "https://aws.amazon.com/ec2/pricing/json/linux-od.json", |
| # Previous generation instances (JavaScript file) |
| "https://a0.awsstatic.com/pricing/1/ec2/previous-generation/linux-od.min.js", |
| # New generation instances (JavaScript file) |
| # Using other endpoint atm |
| # 'https://a0.awsstatic.com/pricing/1/ec2/linux-od.min.js' |
| ] |
| |
| EC2_REGIONS = [ |
| "us-east-1", |
| "us-east-2", |
| "us-west-1", |
| "us-west-2", |
| "us-gov-west-1", |
| "eu-west-1", |
| "eu-west-2", |
| "eu-west-3", |
| "eu-north-1", |
| "eu-south-1", |
| "eu-central-1", |
| "ca-central-1", |
| "ap-southeast-1", |
| "ap-southeast-2", |
| "ap-northeast-1", |
| "ap-northeast-2", |
| "ap-south-1", |
| "sa-east-1", |
| "cn-north-1", |
| "ap-east-1", |
| ] |
| |
| EC2_INSTANCE_TYPES = [ |
| "t1.micro", |
| "m1.small", |
| "m1.medium", |
| "m1.large", |
| "m1.xlarge", |
| "m2.xlarge", |
| "m2.2xlarge", |
| "m2.4xlarge", |
| "m3.medium", |
| "m3.large", |
| "m3.xlarge", |
| "m3.2xlarge", |
| "c1.medium", |
| "c1.xlarge", |
| "cc1.4xlarge", |
| "cc2.8xlarge", |
| "c3.large", |
| "c3.xlarge", |
| "c3.2xlarge", |
| "c3.4xlarge", |
| "c3.8xlarge", |
| "d2.xlarge", |
| "d2.2xlarge", |
| "d2.4xlarge", |
| "d2.8xlarge", |
| "cg1.4xlarge", |
| "g2.2xlarge", |
| "g2.8xlarge", |
| "cr1.8xlarge", |
| "hs1.4xlarge", |
| "hs1.8xlarge", |
| "i2.xlarge", |
| "i2.2xlarge", |
| "i2.4xlarge", |
| "i2.8xlarge", |
| "i3.large", |
| "i3.xlarge", |
| "i3.2xlarge", |
| "i3.4xlarge", |
| "i3.8xlarge", |
| "i3.16large", |
| "r3.large", |
| "r3.xlarge", |
| "r3.2xlarge", |
| "r3.4xlarge", |
| "r3.8xlarge", |
| "r4.large", |
| "r4.xlarge", |
| "r4.2xlarge", |
| "r4.4xlarge", |
| "r4.8xlarge", |
| "r4.16xlarge", |
| "t2.micro", |
| "t2.small", |
| "t2.medium", |
| "t2.large", |
| "x1.32xlarge", |
| ] |
| |
| # Maps EC2 region name to region name used in the pricing file |
| REGION_NAME_MAP = { |
| "us-east": "ec2_us_east", |
| "us-east-1": "ec2_us_east", |
| "us-east-2": "ec2_us_east_ohio", |
| "us-west": "ec2_us_west", |
| "us-west-1": "ec2_us_west", |
| "us-west-2": "ec2_us_west_oregon", |
| "eu-west-1": "ec2_eu_west", |
| "eu-west-2": "ec2_eu_west_london", |
| "eu-west-3": "ec2_eu_west_3", |
| "eu-ireland": "ec2_eu_west", |
| "eu-south-1": "ec2_eu_south", |
| "eu-central-1": "ec2_eu_central", |
| "ca-central-1": "ec2_ca_central_1", |
| "apac-sin": "ec2_ap_southeast", |
| "ap-southeast-1": "ec2_ap_southeast", |
| "apac-syd": "ec2_ap_southeast_2", |
| "ap-southeast-2": "ec2_ap_southeast_2", |
| "apac-tokyo": "ec2_ap_northeast", |
| "ap-northeast-1": "ec2_ap_northeast", |
| "ap-northeast-2": "ec2_ap_northeast", |
| "ap-south-1": "ec2_ap_south_1", |
| "sa-east-1": "ec2_sa_east", |
| "us-gov-west-1": "ec2_us_govwest", |
| "cn-north-1": "ec2_cn_north", |
| "ap-east-1": "ec2_ap_east", |
| } |
| |
| INSTANCE_SIZES = [ |
| "micro", |
| "small", |
| "medium", |
| "large", |
| "xlarge", |
| "x-large", |
| "extra-large", |
| ] |
| |
| RE_NUMERIC_OTHER = re.compile(r"(?:([0-9]+)|([-A-Z_a-z]+)|([^-0-9A-Z_a-z]+))") |
| |
| BASE_PATH = os.path.dirname(os.path.abspath(__file__)) |
| PRICING_FILE_PATH = os.path.join(BASE_PATH, "../libcloud/data/pricing.json") |
| PRICING_FILE_PATH = os.path.abspath(PRICING_FILE_PATH) |
| |
| |
| def scrape_ec2_pricing(): |
| result = defaultdict(OrderedDict) |
| os_map = {"linux": "ec2_linux", "windows-std": "ec2_windows"} |
| for item in os_map.values(): |
| result[item] = {} |
| for url in LINUX_PRICING_URLS: |
| response = requests.get(url) |
| |
| if re.match(r".*?\.json$", url): |
| data = response.json() |
| print("Sample response: %s..." % (str(data)[:100])) |
| elif re.match(r".*?\.js$", url): |
| data = response.content.decode("utf-8") |
| print("Sample response: %s..." % (data[:100])) |
| match = re.match(r"^.*callback\((.*?)\);?$", data, re.MULTILINE | re.DOTALL) |
| data = match.group(1) |
| # NOTE: We used to use demjson, but it's not working under Python 3 and new version of |
| # setuptools anymore so we use jsonnet |
| # demjson supports non-strict mode and can parse unquoted objects |
| data = json.loads(_jsonnet.evaluate_snippet("snippet", data)) |
| regions = data["config"]["regions"] |
| |
| for region_data in regions: |
| region_name = region_data["region"] |
| instance_types = region_data["instanceTypes"] |
| |
| for instance_type in instance_types: |
| sizes = instance_type["sizes"] |
| for size in sizes: |
| if not result["ec2_linux"].get(size["size"], False): |
| result["ec2_linux"][size["size"]] = {} |
| price = size["valueColumns"][0]["prices"]["USD"] |
| if str(price).lower() == "n/a": |
| # Price not available |
| continue |
| |
| result["ec2_linux"][size["size"]][region_name] = float(price) |
| |
| res = defaultdict(OrderedDict) |
| url = "https://calculator.aws/pricing/1.0/" "ec2/region/{}/ondemand/{}/index.json" |
| instances = set() |
| for OS in ["linux", "windows-std"]: |
| res[os_map[OS]] = {} |
| for region in EC2_REGIONS: |
| res[os_map[OS]][region] = {} |
| full_url = url.format(region, OS) |
| response = requests.get(full_url) |
| if response.status_code != 200: |
| print( |
| "Skipping URL %s which returned non 200-status code (%s)" |
| % (full_url, response.status_code) |
| ) |
| continue |
| data = response.json() |
| |
| for entry in data["prices"]: |
| instance_type = entry["attributes"].get("aws:ec2:instanceType", "") |
| instances.add(instance_type) |
| price = entry["price"].get("USD", 0) |
| res[os_map[OS]][region][instance_type] = price |
| for item in os_map.values(): |
| for instance in instances: |
| if not result[item].get(instance, False): |
| result[item][instance] = {} |
| for region in EC2_REGIONS: |
| if res[item][region].get(instance, False): |
| result[item][instance][region] = float(res[item][region][instance]) |
| |
| return result |
| |
| |
| def update_pricing_file(pricing_file_path, pricing_data): |
| with open(pricing_file_path, "r") as fp: |
| content = fp.read() |
| |
| data = json.loads(content) |
| original_data = copy.deepcopy(data) |
| |
| data["compute"].update(pricing_data) |
| |
| if data == original_data: |
| # Nothing has changed, bail out early and don't update "updated" attribute |
| print("Nothing has changed, skipping update.") |
| return |
| |
| data["updated"] = int(time.time()) |
| |
| # Always sort the pricing info |
| data = sort_nested_dict(data) |
| |
| content = json.dumps(data, indent=4) |
| lines = content.splitlines() |
| lines = [line.rstrip() for line in lines] |
| content = "\n".join(lines) |
| |
| with open(pricing_file_path, "w") as fp: |
| fp.write(content) |
| |
| |
| def sort_nested_dict(value): |
| """ |
| Recursively sort a nested dict. |
| """ |
| result = OrderedDict() |
| |
| for key, value in sorted(value.items(), key=sort_key_by_numeric_other): |
| if isinstance(value, (dict, OrderedDict)): |
| result[key] = sort_nested_dict(value) |
| else: |
| result[key] = value |
| |
| return result |
| |
| |
| def sort_key_by_numeric_other(key_value): |
| """ |
| Split key into numeric, alpha and other part and sort accordingly. |
| """ |
| result = [] |
| |
| for (numeric, alpha, other) in RE_NUMERIC_OTHER.findall(key_value[0]): |
| numeric = int(numeric) if numeric else -1 |
| alpha = INSTANCE_SIZES.index(alpha) if alpha in INSTANCE_SIZES else alpha |
| alpha = str(alpha) |
| item = tuple([numeric, alpha, other]) |
| result.append(item) |
| |
| return tuple(result) |
| |
| |
| def main(): |
| print("Scraping EC2 pricing data (this may take up to 2 minutes)....") |
| |
| pricing_data = scrape_ec2_pricing() |
| update_pricing_file(pricing_file_path=PRICING_FILE_PATH, pricing_data=pricing_data) |
| |
| print("Pricing data updated") |
| |
| |
| if __name__ == "__main__": |
| main() |