blob: 449aba2aa295d8ee47c42d78877a27105db1c49a [file] [log] [blame]
test:
table_name: "factdata_hq_09222020"
timer: 60
pipeline:
time_series: # This is done on whole bucketized data
factdata_table_name: "unittest_factdata_10122020"
conditions: []
yesterday: "2020-03-22" # data is used for training from -<prepare_past_days> to -1(yesterday)
prepare_past_days: 2
bucket_size: 10 # maximum number of buckets to process starting from 0
bucket_step: 1 # size of bucket batch that is processed in one iteration
output_table_name: 'unittest_pipeline_ts_10122020' # name of the hive table that keeps cleansed and normalized data before writing it into tfrecords, over-writes the existing table
uckey_clustering: # This is done on whole data, not slicing on buckets
pre_cluster_table_name: 'unittest_pipeline_pre_cluster_10122020'
create_pre_cluster_table: True
output_table_name: 'unittest_pipeline_cluster_10122020'
cluster_size:
number_of_virtual_clusters: 0
datapoints_min_th: 0.15
datapoints_th_uckeys: 0.5
datapoints_th_clusters: 0.5
popularity_norm: 0.01
popularity_th: 5
median_popularity_of_dense: 2
normalization: # This is done on whole data, not slicing on buckets
output_table_name: 'unittest_trainready_10132020'
columns: {
'price_cat':['1','2','3'],
'a': ['','1','2','3','4','5','6'],
'g':['','g_f','g_m','g_x'],
't':['UNKNOWN','3G','4G','WIFI','2G'],
'r':['', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82'],
'si':['d4d7362e879511e5bdec00163e291137', 'b6le0s4qo8', 'd47737w664', 'd971z9825e', '72bcd2720e5011e79bc8fa163e05184e', 'j1430itab9wj3b', 'w3wx3nv9ow5i97', 'g9iv6p4sjy', '71bcd2720e5011e79bc8fa163e05184e', '7b0d7b55ab0c11e68b7900163e3e481d', 'm1040xexan', 'x2fpfbm8rt', '05', '66bcd2720e5011e79bc8fa163e05184e', 'g7m2zuits8', 'l2d4ec6csv', 'a8syykhszz', 'w9fmyd5r0i', 'a47eavw7ex', 'p7gsrebd4m', 'q4jtehrqn2', '03', 'l03493p0r3', 's4z85pd1h8', 'f1iprgyl13', '17dd6d8098bf11e5bdec00163e291137', 'e351de37263311e6af7500163e291137', '68bcd2720e5011e79bc8fa163e05184e', '5cd1c663263511e6af7500163e291137', 'k4werqx13k', 'x0ej5xhk60kjwq', '04', 'a290af82884e11e5bdec00163e291137', '15e9ddce941b11e5bdec00163e291137', 'z041bf6g4s', 'd9jucwkpr3', 'c4n08ku47t']
}
holidays: ['2019-11-09', '2019-11-10', '2019-11-11', '2019-11-25', '2019-11-26', '2019-11-27','2019-11-28', '2019-12-24','2019-12-25', '2019-12-26','2019-12-31', '2020-01-01', '2020-01-02', '2020-01-19','2020-01-20', '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-25', '2020-02-08']