| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| import argparse |
| import string |
| from netCDF4 import Dataset, num2date |
| import sys |
| import datetime |
| import csv |
| from collections import OrderedDict |
| import logging |
| |
| |
| #TODO: Get rid of numpy errors? |
| #TODO: Update big SDAP README |
| |
| LOGGER = logging.getLogger("cdms_reader") |
| |
| def assemble_matches(filename): |
| """ |
| Read a CDMS netCDF file and return a list of matches. |
| |
| Parameters |
| ---------- |
| filename : str |
| The CDMS netCDF file name. |
| |
| Returns |
| ------- |
| matches : list |
| List of matches. Each list element is a dictionary. |
| For match m, netCDF group GROUP (PrimaryData or SecondaryData), and |
| group variable VARIABLE: |
| matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match |
| matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record |
| matches[m][GROUP][VARIABLE]: variable value |
| """ |
| |
| try: |
| # Open the netCDF file |
| |
| with Dataset(filename, 'r') as cdms_nc: |
| # Check that the number of groups is consistent w/ the MatchedGroups |
| # dimension |
| assert len(cdms_nc.groups) == cdms_nc.dimensions['MatchedGroups'].size,\ |
| ("Number of groups isn't the same as MatchedGroups dimension.") |
| |
| matches = [] |
| matched_records = cdms_nc.dimensions['MatchedRecords'].size |
| |
| # Loop through the match IDs to assemble matches |
| for match in range(0, matched_records): |
| match_dict = OrderedDict() |
| # Grab the data from each platform (group) in the match |
| for group_num, group in enumerate(cdms_nc.groups): |
| match_dict[group] = OrderedDict() |
| match_dict[group]['matchID'] = match |
| ID = cdms_nc.variables['matchIDs'][match][group_num] |
| match_dict[group][group + 'ID'] = ID |
| for var in cdms_nc.groups[group].variables.keys(): |
| match_dict[group][var] = cdms_nc.groups[group][var][ID] |
| |
| # Create a UTC datetime field from timestamp |
| dt = num2date(match_dict[group]['time'], |
| cdms_nc.groups[group]['time'].units) |
| match_dict[group]['datetime'] = dt |
| LOGGER.info(match_dict) |
| matches.append(match_dict) |
| |
| return matches |
| except (OSError, IOError) as err: |
| LOGGER.exception("Error reading netCDF file " + filename) |
| raise err |
| |
| def assemble_matches_by_primary(filename): |
| """ |
| Read a CDMS netCDF file and return a list of matches, in which secondary data |
| points are grouped together by their primary data point match. |
| |
| This function returns matches in a different order than the 'assemble_matches' function. |
| In this function, all secondary data is associated with its primary match without the need |
| to access multiple matches. |
| |
| Parameters |
| ---------- |
| filename : str |
| The CDMS netCDF file name. |
| |
| Returns |
| ------- |
| matches : list |
| List of matches. Each list element is a dictionary that maps a primary record to all of its associated secondary records. |
| For match m, netCDF group GROUP (PrimaryData or SecondaryData), and |
| group variable VARIABLE: |
| |
| matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match |
| matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record |
| matches[m][GROUP][VARIABLE]: variable value. Each VARIABLE is returned as a masked array. |
| |
| ex. To access the first secondary time value available for a given match: |
| matches[m]['SecondaryData']['time'][0] |
| """ |
| |
| try: |
| # Open the netCDF file |
| with Dataset(filename, 'r') as cdms_nc: |
| # Check that the number of groups is consistent w/ the MatchedGroups |
| # dimension |
| assert len(cdms_nc.groups) == cdms_nc.dimensions['MatchedGroups'].size,\ |
| ("Number of groups isn't the same as MatchedGroups dimension.") |
| |
| matched_records = cdms_nc.dimensions['MatchedRecords'].size |
| primary_matches = cdms_nc.groups['PrimaryData'].dimensions['dim'].size |
| matches = [OrderedDict()] * primary_matches |
| |
| for match in range(matched_records): |
| PID = int(cdms_nc.variables['matchIDs'][match][0]) |
| |
| if len(matches[PID]) == 0: #establishes ordered dictionary for first match[PID] |
| matches[PID] = OrderedDict() |
| |
| for group_num, group in enumerate(cdms_nc.groups): |
| |
| if group_num == 0: #primary |
| |
| if group not in matches[PID].keys(): #initialization |
| matches[PID][group] = OrderedDict() |
| matches[PID][group]['matchID'] = [] |
| |
| matches[PID][group]['matchID'].append(match) |
| ID = cdms_nc.variables['matchIDs'][match][group_num] |
| matches[PID][group][group + 'ID'] = ID |
| |
| for var in cdms_nc.groups[group].variables.keys(): |
| matches[PID][group][var] = cdms_nc.groups[group][var][ID] |
| |
| dt = num2date(matches[PID][group]['time'], cdms_nc.groups[group]['time'].units) |
| matches[PID][group]['datetime'] = dt |
| |
| elif group_num == 1: #secondary |
| |
| if group not in matches[PID].keys(): #initialization |
| matches[PID][group] = OrderedDict() |
| matches[PID][group]['matchID'] = [] |
| matches[PID][group][group + 'ID'] = [] |
| matches[PID][group]['datetime'] = [] |
| |
| matches[PID][group]['matchID'].append(match) |
| ID = cdms_nc.variables['matchIDs'][match][group_num] |
| matches[PID][group][group + 'ID'].append(ID) |
| |
| for var in cdms_nc.groups[group].variables.keys(): |
| if var not in matches[PID][group].keys(): |
| matches[PID][group][var] = [] |
| matches[PID][group][var].append(cdms_nc.groups[group][var][ID]) |
| |
| dt = num2date(matches[PID][group]['time'], cdms_nc.groups[group]['time'].units) |
| matches[PID][group]['datetime'].append(dt[0]) |
| |
| return matches |
| except (OSError, IOError) as err: |
| LOGGER.exception("Error reading netCDF file " + filename) |
| raise err |
| |
| def matches_to_csv(matches, csvfile): |
| """ |
| Write the CDMS matches to a CSV file. Include a header of column names |
| which are based on the group and variable names from the netCDF file. |
| |
| Parameters |
| ---------- |
| matches : list |
| The list of dictionaries containing the CDMS matches as returned from |
| assemble_matches. |
| csvfile : str |
| The name of the CSV output file. |
| """ |
| # Create a header for the CSV. Column names are GROUP_VARIABLE or |
| # GROUP_GROUPID. |
| header = [] |
| for key, value in matches[0].items(): |
| for otherkey in value.keys(): |
| header.append(key + "_" + otherkey) |
| |
| try: |
| # Write the CSV file |
| with open(csvfile, 'w') as output_file: |
| csv_writer = csv.writer(output_file) |
| csv_writer.writerow(header) |
| for match in matches: |
| row = [] |
| for group, data in match.items(): |
| for value in data.values(): |
| row.append(value) |
| csv_writer.writerow(row) |
| except (OSError, IOError) as err: |
| LOGGER.exception("Error writing CSV file " + csvfile) |
| raise err |
| |
| def get_globals(filename): |
| """ |
| Write the CDMS global attributes to a text file. Additionally, |
| within the file there will be a description of where all the different |
| outputs go and how to best utlize this program. |
| |
| Parameters |
| ---------- |
| filename : str |
| The name of the original '.nc' input file. |
| |
| """ |
| x0 = "README / cdms_reader.py Program Use and Description:\n" |
| x1 = "\nThe cdms_reader.py program reads a CDMS netCDF (a NETCDF file with a matchIDs variable)\n" |
| x2 = "file into memory, assembles a list of matches of primary and secondary data\n" |
| x3 = "and optionally\n" |
| x4 = "output the matches to a CSV file. Each matched pair contains one primary\n" |
| x5 = "data record and one secondary data record.\n" |
| x6 = "\nBelow, this file wil list the global attributes of the .nc (NETCDF) file.\n" |
| x7 = "If you wish to see a full dump of the data from the .nc file,\n" |
| x8 = "please utilize the ncdump command from NETCDF (or look at the CSV file).\n" |
| try: |
| with Dataset(filename, "r", format="NETCDF4") as ncFile: |
| txtName = filename.replace(".nc", ".txt") |
| with open(txtName, "w") as txt: |
| txt.write(x0 + x1 +x2 +x3 + x4 + x5 + x6 + x7 + x8) |
| txt.write("\nGlobal Attributes:") |
| for x in ncFile.ncattrs(): |
| txt.write(f'\t :{x} = "{ncFile.getncattr(x)}" ;\n') |
| |
| |
| except (OSError, IOError) as err: |
| LOGGER.exception("Error reading netCDF file " + filename) |
| print("Error reading file!") |
| raise err |
| |
| def create_logs(user_option, logName): |
| """ |
| Write the CDMS log information to a file. Additionally, the user may |
| opt to print this information directly to stdout, or discard it entirely. |
| |
| Parameters |
| ---------- |
| user_option : str |
| The result of the arg.log 's interpretation of |
| what option the user selected. |
| logName : str |
| The name of the log file we wish to write to, |
| assuming the user did not use the -l option. |
| """ |
| if user_option == 'N': |
| print("** Note: No log was created **") |
| |
| |
| elif user_option == '1': |
| #prints the log contents to stdout |
| logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', |
| level=logging.INFO, |
| datefmt='%Y-%m-%d %H:%M:%S', |
| handlers=[ |
| logging.StreamHandler(sys.stdout) |
| ]) |
| |
| else: |
| #prints log to a .log file |
| logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', |
| level=logging.INFO, |
| datefmt='%Y-%m-%d %H:%M:%S', |
| handlers=[ |
| logging.FileHandler(logName) |
| ]) |
| if user_option != 1 and user_option != 'Y': |
| print(f"** Bad usage of log option. Log will print to {logName} **") |
| |
| |
| |
| |
| if __name__ == '__main__': |
| """ |
| Execution: |
| python cdms_reader.py filename |
| OR |
| python3 cdms_reader.py filename |
| OR |
| python3 cdms_reader.py filename -c -g |
| OR |
| python3 cdms_reader.py filename --csv --meta |
| |
| Note (For Help Try): |
| python3 cdms_reader.py -h |
| OR |
| python3 cdms_reader.py --help |
| |
| """ |
| |
| u0 = '\n%(prog)s -h OR --help \n' |
| u1 = '%(prog)s filename -c -g\n%(prog)s filename --csv --meta\n' |
| u2 ='Use -l OR -l1 to modify destination of logs' |
| p = argparse.ArgumentParser(usage= u0 + u1 + u2) |
| |
| #below block is to customize user options |
| p.add_argument('filename', help='CDMS netCDF file to read') |
| p.add_argument('-c', '--csv', nargs='?', const= 'Y', default='N', |
| help='Use -c or --csv to retrieve CSV output') |
| p.add_argument('-g', '--meta', nargs='?', const='Y', default='N', |
| help='Use -g or --meta to retrieve global attributes / metadata') |
| p.add_argument('-l', '--log', nargs='?', const='N', default='Y', |
| help='Use -l or --log to AVOID creating log files, OR use -l1 to print to stdout/console') |
| |
| #arguments are processed by the next line |
| args = p.parse_args() |
| |
| logName = args.filename.replace(".nc", ".log") |
| create_logs(args.log, logName) |
| |
| cdms_matches = assemble_matches(args.filename) |
| |
| if args.csv == 'Y' : |
| matches_to_csv(cdms_matches, args.filename.replace(".nc",".csv")) |
| |
| if args.meta == 'Y' : |
| get_globals(args.filename) |
| |