blob: 58263da4ec84d68b7cc27ab31cde0c65c07ddd86 [file] [log] [blame]
#testing datasets for dl predictor
factdata_columns = ['uckey', 'count_array', 'hour', 'day', 'bucket_id']
factdata_tested = [
('banner,b8nyhficur,4G,,,CPM,1,249', ['1:21'], 20, '2020-03-21', 149),
('banner,d3lxq4l6rn,WIFI,g_m,3,CPM,3,28', ['1:2', '3:2'], 20, '2020-03-22', 149),
('banner,k5hz58vpst,4G,,,CPM,2,213', ['2:23', '1:169'], 20, '2020-03-23', 149)
]
factdata_expected_drop_region = [
('banner,b8nyhficur,4G,,,CPM,249', ['1:21'], 20, '2020-03-21', 149),
('banner,d3lxq4l6rn,WIFI,g_m,3,CPM,28', ['1:2', '3:2'], 20, '2020-03-22', 149),
('banner,k5hz58vpst,4G,,,CPM,213', ['2:23', '1:169'], 20, '2020-03-23', 149)
]
region_mapping_columns = ['new', 'old']
region_mapping_columns_renamed = ['ipl_new', 'ipl_old']
region_mapping_tested = [
(249, 80),
(28, 28),
(213, 76)
]
factdata_expected_region_mapped = [
('banner,b8nyhficur,4G,,,CPM,80', ['1:21'], 20, '2020-03-21', 149),
('banner,d3lxq4l6rn,WIFI,g_m,3,CPM,28', ['1:2', '3:2'], 20, '2020-03-22', 149),
('banner,k5hz58vpst,4G,,,CPM,76', ['2:23', '1:169'], 20, '2020-03-23', 149)
]
new_bucket_size = 10
factdata_new_bucket_tested = [
('banner,b8nyhficur,4G,,,CPM,80', ['1:21'], 20, '2020-03-21', 149),
('banner,d3lxq4l6rn,WIFI,g_m,3,CPM,28', ['1:2', '3:2'], 20, '2020-03-22', 250),
('banner,k5hz58vpst,4G,,,CPM,76', ['2:23', '1:169'], 20, '2020-03-23', 301)
]
factdata_new_bucket_expected = [
('banner,b8nyhficur,4G,,,CPM,80', ['1:21'], 20, '2020-03-21', 9),
('banner,d3lxq4l6rn,WIFI,g_m,3,CPM,28', ['1:2', '3:2'], 20, '2020-03-22', 3),
('banner,k5hz58vpst,4G,,,CPM,76', ['2:23', '1:169'], 20, '2020-03-23', 0)
]
factdata_main_ts_tested = factdata_new_bucket_expected
factdata_main_ts_columns = ['uckey', 'price_cat', 'ts', 'a', 'g', 't', 'si', 'r']
factdata_main_ts_expected = [
('banner,b8nyhficur,4G,,,CPM,80', '1', [21, None], '', '', '4G', 'b8nyhficur', ''),
('banner,d3lxq4l6rn,WIFI,g_m,3,CPM,28', '1', [None, 2], '3', 'g_m', 'WIFI', 'd3lxq4l6rn', ''),
('banner,d3lxq4l6rn,WIFI,g_m,3,CPM,28', '3', [None, 2], '3', 'g_m', 'WIFI', 'd3lxq4l6rn', ''),
('banner,k5hz58vpst,4G,,,CPM,76', '2', [None, None], '', '', '4G', 'k5hz58vpst', ''),
('banner,k5hz58vpst,4G,,,CPM,76', '1', [None, None], '', '', '4G', 'k5hz58vpst', '')
]
popularity_th = 5
datapoints_min_th = 0.15
factdata_remove_weak_uckeys_columns = factdata_main_ts_columns + ['imp', 'p']
factdata_remove_weak_uckeys_expected = [
('banner,b8nyhficur,4G,,,CPM,80', '1', [21, None], '', '', '4G', 'b8nyhficur', '', 21, 10.5),
]
datapoints_th_uckeys = 0.5
popularity_norm = 0.01
factdata_is_spare_uckeys_columns = factdata_main_ts_columns + ['imp', 'p'] + ['sparse']
factdata_is_spare_uckeys_expected = [
('banner,b8nyhficur,4G,,,CPM,80', '1', [21, None], '', '', '4G', 'b8nyhficur', '', 21, 10.5, True)
]
median_popularity_of_dense = 2
factdata_cluster_columns = ['uckey', 'price_cat', 'a', 'g', 't', 'si', 'r', 'ts', 'imp', 'p', 'p_n']
a_feature_value_list = ['','1','2','3','4','5','6']
factdata_cluster_columns_ohe_a = factdata_cluster_columns + ['a_', 'a_1', 'a_2', 'a_3', 'a_4', 'a_5', 'a_6']
g_feature_value_list = ['','g_f','g_m','g_x']
factdata_cluster_columns_ohe_g = factdata_cluster_columns + ['g_', 'g_g_f', 'g_g_m', 'g_g_x']
t_feature_value_list = ['UNKNOWN','3G','4G','WIFI','2G']
factdata_cluster_columns_ohe_t = factdata_cluster_columns + ['t_UNKNOWN','t_3G','t_4G','t_WIFI','t_2G']
factdata_cluster_tested = [
('native,a47eavw7ex,WIFI,g_f,5,CPC,35', '1', {'5': 1.0}, {'g_f': 1.0}, {'WIFI': 1.0},
{'a47eavw7ex': 1.0}, {'': 1.0}, [2713, 3151, 1627, 2276, 2743, 2726, 2910, 3306, 3983, 3136,
3127, 3551, 3664, 3895, 3601, 3702, 3857, 3791, 4033, 3741,
3923, 3847, 3887, 3910, 3667, 3537, 3482, 3621, 4027, 3672,
3858, 3745, 3885, 4100, 3914, 3888, 3991, 3864, 3814, 3794,
3880, 3789, 2724, 3197, 2648, 2362, 2872, 2922, 2755, 2679,
2751, 2579, 2571, 2708, 3026, 3107, 2169, 508, 3892, 3185,
3336, 3424, 3662, 3781, 3883, 3609, 520, 3704, 3848, 3975,
3837, 3850, 3705, 3888, 3865, 3955, 3591, 3340, 3103, 3706,
3701, 3488, 3319, 3145, 3136, 2992, 3149, 2876, 2590, 2527],
299788, 3330.977783203125, -0.3134732246398926)
]
factdata_cluster_expected_ohe_a = [
('native,a47eavw7ex,WIFI,g_f,5,CPC,35', '1', {'5': 1.0}, {'g_f': 1.0}, {'WIFI': 1.0},
{'a47eavw7ex': 1.0}, {'': 1.0}, [2713, 3151, 1627, 2276, 2743, 2726, 2910, 3306, 3983, 3136,
3127, 3551, 3664, 3895, 3601, 3702, 3857, 3791, 4033, 3741,
3923, 3847, 3887, 3910, 3667, 3537, 3482, 3621, 4027, 3672,
3858, 3745, 3885, 4100, 3914, 3888, 3991, 3864, 3814, 3794,
3880, 3789, 2724, 3197, 2648, 2362, 2872, 2922, 2755, 2679,
2751, 2579, 2571, 2708, 3026, 3107, 2169, 508, 3892, 3185,
3336, 3424, 3662, 3781, 3883, 3609, 520, 3704, 3848, 3975,
3837, 3850, 3705, 3888, 3865, 3955, 3591, 3340, 3103, 3706,
3701, 3488, 3319, 3145, 3136, 2992, 3149, 2876, 2590, 2527],
299788, 3330.977783203125, -0.3134732246398926, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0)
]
factdata_cluster_expected_ohe_g = [
('native,a47eavw7ex,WIFI,g_f,5,CPC,35', '1', {'5': 1.0}, {'g_f': 1.0}, {'WIFI': 1.0},
{'a47eavw7ex': 1.0}, {'': 1.0}, [2713, 3151, 1627, 2276, 2743, 2726, 2910, 3306, 3983, 3136,
3127, 3551, 3664, 3895, 3601, 3702, 3857, 3791, 4033, 3741,
3923, 3847, 3887, 3910, 3667, 3537, 3482, 3621, 4027, 3672,
3858, 3745, 3885, 4100, 3914, 3888, 3991, 3864, 3814, 3794,
3880, 3789, 2724, 3197, 2648, 2362, 2872, 2922, 2755, 2679,
2751, 2579, 2571, 2708, 3026, 3107, 2169, 508, 3892, 3185,
3336, 3424, 3662, 3781, 3883, 3609, 520, 3704, 3848, 3975,
3837, 3850, 3705, 3888, 3865, 3955, 3591, 3340, 3103, 3706,
3701, 3488, 3319, 3145, 3136, 2992, 3149, 2876, 2590, 2527],
299788, 3330.977783203125, -0.3134732246398926, 0.0, 1.0, 0.0, 0.0)
]
factdata_cluster_expected_ohe_t = [
('native,a47eavw7ex,WIFI,g_f,5,CPC,35', '1', {'5': 1.0}, {'g_f': 1.0}, {'WIFI': 1.0},
{'a47eavw7ex': 1.0}, {'': 1.0}, [2713, 3151, 1627, 2276, 2743, 2726, 2910, 3306, 3983, 3136,
3127, 3551, 3664, 3895, 3601, 3702, 3857, 3791, 4033, 3741,
3923, 3847, 3887, 3910, 3667, 3537, 3482, 3621, 4027, 3672,
3858, 3745, 3885, 4100, 3914, 3888, 3991, 3864, 3814, 3794,
3880, 3789, 2724, 3197, 2648, 2362, 2872, 2922, 2755, 2679,
2751, 2579, 2571, 2708, 3026, 3107, 2169, 508, 3892, 3185,
3336, 3424, 3662, 3781, 3883, 3609, 520, 3704, 3848, 3975,
3837, 3850, 3705, 3888, 3865, 3955, 3591, 3340, 3103, 3706,
3701, 3488, 3319, 3145, 3136, 2992, 3149, 2876, 2590, 2527],
299788, 3330.977783203125, -0.3134732246398926, 0.0, 0.0, 0.0, 1.0, 0.0)
]