examples/model_examples/time-series/utils.py - hamilton - Git at Google

 import logging

 import numpy as np
 import pandas as pd

 logger = logging.getLogger(__name__)


 def reduce_mem_usage(df: pd.DataFrame, name: str, verbose=True):
     """Taken from the notebook, this reduces the memory of each column if possible by downcasting the type if it can.

     :param df:
     :param name:
     :param verbose:
     :return:
     """
     numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
     start_mem = df.memory_usage().sum() / 1024**2
     for col in df.columns:
         col_type = df[col].dtypes
         if col_type in numerics:

             c_min = df[col].min()
             c_max = df[col].max()
             if str(col_type)[:3] == "int":
                 if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                     df[col] = df[col].astype(np.int8)
                 elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                     df[col] = df[col].astype(np.int16)
                 elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                     df[col] = df[col].astype(np.int32)
                 elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                     df[col] = df[col].astype(np.int64)
             else:
                 if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                     df[col] = df[col].astype(np.float16)
                 if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                     df[col] = df[col].astype(np.float32)
                 else:
                     df[col] = df[col].astype(np.float64)
     end_mem = df.memory_usage().sum() / 1024**2
     if verbose:
         logger.info(
             "{}: Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format(
                 name, end_mem, 100 * (start_mem - end_mem) / start_mem
             )
         )
     return df
	import logging

	import numpy as np
	import pandas as pd

	logger = logging.getLogger(__name__)


	def reduce_mem_usage(df: pd.DataFrame, name: str, verbose=True):
	"""Taken from the notebook, this reduces the memory of each column if possible by downcasting the type if it can.

	:param df:
	:param name:
	:param verbose:
	:return:
	"""
	numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
	start_mem = df.memory_usage().sum() / 1024**2
	for col in df.columns:
	col_type = df[col].dtypes
	if col_type in numerics:

	c_min = df[col].min()
	c_max = df[col].max()
	if str(col_type)[:3] == "int":
	if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
	df[col] = df[col].astype(np.int8)
	elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
	df[col] = df[col].astype(np.int16)
	elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
	df[col] = df[col].astype(np.int32)
	elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
	df[col] = df[col].astype(np.int64)
	else:
	if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
	df[col] = df[col].astype(np.float16)
	if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
	df[col] = df[col].astype(np.float32)
	else:
	df[col] = df[col].astype(np.float64)
	end_mem = df.memory_usage().sum() / 1024**2
	if verbose:
	logger.info(
	"{}: Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format(
	name, end_mem, 100 * (start_mem - end_mem) / start_mem
	)
	)
	return df