blob: f95d5a0a093bb25b020574fb3d82c76d25a043d0 [file]
from typing import Any, List
import pandas as pd
def data_1() -> pd.DataFrame:
df = pd.DataFrame.from_dict({"col_1": [3, 2, pd.NA, 0], "col_2": ["a", "b", pd.NA, "d"]})
return df
def data_2() -> pd.DataFrame:
df = pd.DataFrame.from_dict(
{"col_1": ["a", "b", pd.NA, "d", "e"], "col_2": [150, 155, 145, 200, 5000]}
)
return df
def data_3() -> pd.DataFrame:
df = pd.DataFrame.from_dict({"col_1": [150, 155, 145, 200, 5000], "col_2": [10, 23, 32, 50, 0]})
return df
# data1 and data2
def _filter(some_data: pd.DataFrame) -> pd.DataFrame:
return some_data.dropna()
# data 2
# this is for value
def _add_missing_value(some_data: pd.DataFrame, missing_row: List[Any]) -> pd.DataFrame:
some_data.loc[-1] = missing_row
return some_data
# data 2
# this is for source
def _join(some_data: pd.DataFrame, other_data: pd.DataFrame) -> pd.DataFrame:
return some_data.set_index("col_2").join(other_data.set_index("col_1"))
# data1 and data2
def _sort(some_data: pd.DataFrame) -> pd.DataFrame:
columns = some_data.columns
return some_data.sort_values(by=columns[0])
if __name__ == "__main__":
# print("Filter data 1")
# print(_filter(data_1()))
# print("Sort data 1")
print("Final data 1")
print(_sort(_filter(data_1())))
# print("Filter data 2")
# print(_filter(data_2()))
# print("Add missing value data 2")
# print(_add_missing_value(_filter(data_2()),missing_row=['c', 145]))
# print("Join data 2 and data 3")
# print(_join(_add_missing_value(_filter(data_2()),missing_row=['c', 145]),other_data=data_3()))
# print("Sort joined dataframe")
print("Final data 2")
print(
_sort(
_join(
_add_missing_value(_filter(data_2()), missing_row=["c", 145]), other_data=data_3()
)
)
)