| --------------------------------------------------------------------------- |
| -- Rules: |
| -- ------ |
| -- 1) Any DB objects should be created w/o schema prefix, |
| -- since this file is executed in a separate schema context. |
| -- 2) There should be no DROP statements in this script, since |
| -- all objects created in the default schema will be cleaned-up outside. |
| --------------------------------------------------------------------------- |
| m4_include(`SQLCommon.m4') |
| |
| --------------------------------------------------------------------------- |
| -- Build vocabulary: |
| --------------------------------------------------------------------------- |
| CREATE TABLE lda_vocab(wordid INT4, word TEXT); |
| |
| INSERT INTO lda_vocab VALUES |
| (0, 'code'), (1, 'data'), (2, 'graph'), (3, 'image'), (4, 'input'), (5, |
| 'layer'), (6, 'learner'), (7, 'loss'), (8, 'model'), (9, 'network'), (10, |
| 'neuron'), (11, 'object'), (12, 'output'), (13, 'rate'), (14, 'set'), (15, |
| 'signal'), (16, 'sparse'), (17, 'spatial'), (18, 'system'), (19, 'training'); |
| |
| --------------------------------------------------------------------------- |
| -- Build training dataset: |
| --------------------------------------------------------------------------- |
| CREATE TABLE lda_training |
| ( |
| docid INT4, |
| wordid INT4, |
| count INT4 |
| ); |
| |
| INSERT INTO lda_training VALUES |
| (0, 0, 2),(0, 3, 2),(0, 5, 1),(0, 7, 1),(0, 8, 1),(0, 9, 1),(0, 11, 1),(0, 13, |
| 1), (1, 0, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(1, 6, 1),(1, 7, 1),(1, 10, 1),(1, |
| 14, 1),(1, 17, 1),(1, 18, 1), (2, 4, 2),(2, 5, 1),(2, 6, 2),(2, 12, 1),(2, 13, |
| 1),(2, 15, 1),(2, 18, 2), (3, 0, 1),(3, 1, 2),(3, 12, 3),(3, 16, 1),(3, 17, |
| 2),(3, 19, 1), (4, 1, 1),(4, 2, 1),(4, 3, 1),(4, 5, 1),(4, 6, 1),(4, 10, 1),(4, |
| 11, 1),(4, 14, 1),(4, 18, 1),(4, 19, 1), (5, 0, 1),(5, 2, 1),(5, 5, 1),(5, 7, |
| 1),(5, 10, 1),(5, 12, 1),(5, 16, 1),(5, 18, 1),(5, 19, 2), (6, 1, 1),(6, 3, |
| 1),(6, 12, 2),(6, 13, 1),(6, 14, 2),(6, 15, 1),(6, 16, 1),(6, 17, 1), (7, 0, |
| 1),(7, 2, 1),(7, 4, 1),(7, 5, 1),(7, 7, 2),(7, 8, 1),(7, 11, 1),(7, 14, 1),(7, |
| 16, 1), (8, 2, 1),(8, 4, 4),(8, 6, 2),(8, 11, 1),(8, 15, 1),(8, 18, 1), |
| (9, 0, 1),(9, 1, 1),(9, 4, 1),(9, 9, 2),(9, 12, 2),(9, 15, 1),(9, 18, 1),(9, |
| 19, 1); |
| |
| CREATE TABLE lda_training_odd_voc_size |
| ( |
| docid INT4, |
| wordid INT4, |
| count INT4 |
| ); |
| |
| INSERT INTO lda_training_odd_voc_size VALUES |
| (0, 0, 2),(0, 3, 2),(0, 5, 1),(0, 7, 1),(0, 8, 1),(0, 9, 1),(0, 11, 1),(0, 13, |
| 1), (1, 0, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(1, 6, 1),(1, 7, 1),(1, 10, 1),(1, |
| 14, 1),(1, 17, 1),(1, 18, 1), (2, 4, 2),(2, 5, 1),(2, 6, 2),(2, 12, 1),(2, 13, |
| 1),(2, 15, 1),(2, 18, 2), (3, 0, 1),(3, 1, 2),(3, 12, 3),(3, 16, 1),(3, 17, |
| 2),(3, 19, 1), (4, 1, 1),(4, 2, 1),(4, 3, 1),(4, 5, 1),(4, 6, 1),(4, 10, 1),(4, |
| 11, 1),(4, 14, 1),(4, 18, 1),(4, 19, 1), (5, 0, 1),(5, 2, 1),(5, 5, 1),(5, 7, |
| 1),(5, 10, 1),(5, 12, 1),(5, 16, 1),(5, 18, 1),(5, 19, 2), (6, 1, 1),(6, 3, |
| 1),(6, 12, 2),(6, 13, 1),(6, 14, 2),(6, 15, 1),(6, 16, 1),(6, 17, 1), (7, 0, |
| 1),(7, 2, 1),(7, 4, 1),(7, 5, 1),(7, 7, 2),(7, 8, 1),(7, 11, 1),(7, 14, 1),(7, |
| 16, 1), (8, 2, 1),(8, 4, 4),(8, 6, 2),(8, 11, 1),(8, 15, 1),(8, 18, 1), |
| (9, 0, 1),(9, 1, 1),(9, 4, 1),(9, 9, 2),(9, 12, 2),(9, 15, 1),(9, 18, 1),(9, |
| 19, 1),(9, 20, 1); |
| |
| |
| CREATE TABLE lda_testing |
| ( |
| docid INT4, |
| wordid INT4, |
| count INT4 |
| ); |
| |
| INSERT INTO lda_testing VALUES |
| (0, 0, 2),(0, 8, 1),(0, 9, 1),(0, 10, 1),(0, 12, 1),(0, 15, 2),(0, 18, 1),(0, |
| 19, 1), (1, 0, 1),(1, 2, 1),(1, 5, 1),(1, 7, 1),(1, 12, 2),(1, 13, 1),(1, 16, |
| 1),(1, 17, 1),(1, 18, 1), (2, 0, 1),(2, 1, 1),(2, 2, 1),(2, 3, 1),(2, 4, 1),(2, |
| 5, 1),(2, 6, 1),(2, 12, 1),(2, 14, 1),(2, 18, 1), (3, 2, 2),(3, 6, 2),(3, 7, |
| 1),(3, 9, 1),(3, 11, 2),(3, 14, 1),(3, 15, 1), (4, 1, 1),(4, 2, 2),(4, 3, |
| 1),(4, 5, 2),(4, 6, 1),(4, 11, 1),(4, 18, 2); |
| |
| --------------------------------------------------------------------------- |
| -- Test |
| --------------------------------------------------------------------------- |
| SELECT lda_train( |
| 'lda_training', |
| 'lda_model', |
| 'lda_output_data', |
| 20, 5, 2, 10, 0.01); |
| |
| |
| SELECT lda_parse_model(model, voc_size, topic_num) AS parsed_model |
| FROM lda_model; |
| |
| |
| |
| SELECT |
| assert( |
| topic_num IS NOT NULL AND topic_num = 5, |
| 'topic_num should be 5!' |
| ), |
| assert( |
| sum_of_topic_counts IS NOT NULL AND sum_of_word_counts IS NOT NULL |
| AND sum_of_topic_counts = sum_of_word_counts, |
| 'sum_of_topic_counts (' || sum_of_topic_counts::text || ') != sum_of_word_counts (' || sum_of_word_counts::text || ')!' |
| ) |
| FROM |
| ( |
| SELECT |
| array_upper((parsed_model).total_topic_counts, 1) AS topic_num, |
| array_sum((parsed_model).total_topic_counts) AS sum_of_topic_counts |
| FROM |
| ( |
| SELECT lda_parse_model(model, voc_size, topic_num) AS parsed_model |
| FROM lda_model |
| ) subq_parsed_model |
| ) subq_topic_num, |
| ( |
| SELECT sum(count) AS sum_of_word_counts FROM lda_training |
| )subq_word_count; |
| |
| SELECT lda_predict( |
| 'lda_testing', |
| 'lda_model', |
| 'lda_pred'); |
| |
| SELECT lda_predict( |
| 'lda_testing', |
| 'lda_model', |
| 'lda_pred_2', |
| 5); |
| |
| SELECT lda_get_perplexity( |
| 'lda_model', |
| 'lda_pred'); |
| |
| SELECT __lda_util_index_sort(array[1, 4, 2, 3]); |
| SELECT __lda_util_transpose(array[[1, 2, 3],[4, 5, 6]]); |
| SELECT assert(count(*) = 2, 'Wrong answer: __lda_util_unnest_transpose()') |
| FROM |
| ( |
| SELECT __lda_util_unnest_transpose(array[1, 2, 0, 4, 5, 0]::bigint[], 3, 2) |
| ) subq; |
| SELECT assert(count(*) = 3, 'Wrong answer: __lda_util_unnest()') |
| FROM |
| ( |
| SELECT __lda_util_unnest(array[1, 2, 0, 4, 5, 0]::bigint[], 3, 2) |
| ) subq; |
| SELECT __lda_util_norm_with_smoothing(array[1, 4, 2, 3], 0.1); |
| |
| -- no words hit ceiling |
| SELECT |
| assert( |
| __lda_check_count_ceiling( |
| array[0, 0, 0, 0]::bigint[], |
| 2, |
| 2) |
| IS NULL, |
| '__lda_check_count_ceiling should return NULL for [0, 0, 0, 0]'); |
| -- length: 1 |
| SELECT |
| assert( |
| __lda_check_count_ceiling( |
| array[-1, -1, -1, -1]::bigint[], |
| 2, |
| 2) |
| IS NOT NULL, |
| '__lda_check_count_ceiling should not return NULL for [-1, -1, -1, -1]'); |
| |
| SELECT lda_get_topic_desc( |
| 'lda_model', |
| 'lda_vocab', |
| 'topic_word_desc', |
| 5); |
| |
| SELECT lda_get_topic_word_count( |
| 'lda_model', |
| 'topic_word_count'); |
| |
| SELECT lda_get_word_topic_count( |
| 'lda_model', |
| 'topic_word_count_2'); |
| |
| SELECT lda_get_word_topic_mapping( |
| 'lda_output_data', |
| 'word_topic_mapping'); |
| |
| SELECT * |
| FROM __lda_util_norm_vocab('lda_vocab', 'norm_lda_vocab'); |
| |
| SELECT * |
| FROM __lda_util_norm_dataset('lda_testing', 'norm_lda_vocab', |
| 'norm_lda_data'); |
| |
| SELECT * |
| FROM __lda_util_conorm_data('lda_testing', 'lda_vocab', |
| 'norm_lda_data_2', 'norm_lda_vocab_2'); |
| |
| -- both voc_size and topic_num are odd or even |
| SELECT lda_train( |
| 'lda_training_odd_voc_size', |
| 'lda_model_odd_voc_size_even_topic_num', |
| 'lda_output_odd_voc_size_even_topic_num', |
| 21, 6, 2, 3, 0.01); |
| |
| SELECT lda_train( |
| 'lda_training_odd_voc_size', |
| 'lda_model_odd_voc_size_odd_topic_num', |
| 'lda_output_odd_voc_size_odd_topic_num', |
| 21, 5, 2, 3, 0.01); |
| |
| SELECT lda_train( |
| 'lda_training', |
| 'lda_model_even_voc_size_even_topic_num', |
| 'lda_output_even_voc_size_even_topic_num', |
| 20, 6, 2, 3, 0.01); |
| |
| DROP TABLE IF EXISTS documents; |
| CREATE TABLE documents(docid INT4, contents TEXT); |
| INSERT INTO documents VALUES |
| (0, 'b a a c'), |
| (1, 'd e f f f'); |
| |
| ALTER TABLE documents ADD COLUMN words TEXT[]; |
| UPDATE documents SET words = regexp_split_to_array(lower(contents), E'[\\s+\\.\\,]'); |
| |
| DROP TABLE IF EXISTS my_training, my_training_vocabulary; |
| SELECT term_frequency('documents', 'docid', 'words', 'my_training', TRUE); |
| |
| |
| DROP TABLE IF EXISTS my_model, my_outdata; |
| SELECT lda_train( |
| 'my_training', |
| 'my_model', |
| 'my_outdata', |
| 6, 2, 2, 3, 0.01); |
| |
| DROP TABLE IF EXISTS word_topic_count; |
| SELECT lda_get_word_topic_count( 'my_model', 'word_topic_count'); |
| SELECT * FROM word_topic_count ORDER BY wordid; |
| |
| -- This function will validate that the output of lda_get_word_topic_count() is consistent with the output of lda_train |
| CREATE OR REPLACE FUNCTION validate_lda_output() RETURNS integer AS $$ |
| DECLARE |
| -- variables for looping through the word_topic_mapping table |
| word_topic_mapping_row RECORD; |
| word_topic_mapping_wordid int; |
| word_topic_mapping_topicid int; |
| |
| -- we use the following array from word_topic_mapping to compare with the output of lda_get_word_topic_count() |
| word_topic_mapping_array INT[6][2] := ARRAY[[0,0],[0,0],[0,0],[0,0],[0,0],[0,0]]; |
| word_topic_mapping_topic_count int[]; |
| |
| -- variables for looping through the output of lda_get_word_topic_count() |
| word_topic_count_row RECORD; |
| word_topic_count_topic_count int[]; |
| BEGIN |
| -- Create helper table from out_data |
| DROP TABLE IF EXISTS word_topic_mapping; |
| CREATE table word_topic_mapping as SELECT wordid, topicid FROM ( |
| SELECT unnest(svec_from_string('{' || array_to_string(counts, ',') || '}:{' || array_to_string(words, ',') || '}')::float[]) AS wordid, |
| unnest(topic_assignment) AS topicid FROM my_outdata) a |
| order by wordid ; |
| |
| -- Construct the mapping array |
| FOR word_topic_mapping_row IN SELECT * FROM word_topic_mapping ORDER BY wordid LOOP |
| -- we need to add 1 because postgres array starts from 1 and our wordid and topicid start from 0 |
| word_topic_mapping_wordid := word_topic_mapping_row.wordid+1; |
| word_topic_mapping_topicid := word_topic_mapping_row.topicid+1; |
| |
| word_topic_mapping_array[word_topic_mapping_wordid][word_topic_mapping_topicid] := word_topic_mapping_array[word_topic_mapping_wordid][word_topic_mapping_topicid] + 1; |
| |
| END LOOP; |
| |
| -- Loop through the output of lda_get_word_topic_count() and compare the results with the helper table word_topic_mapping |
| FOR word_topic_count_row IN select * from word_topic_count ORDER BY wordid LOOP |
| word_topic_count_topic_count := ARRAY[word_topic_count_row.topic_count]; |
| word_topic_mapping_topic_count := word_topic_mapping_array[word_topic_count_row.wordid+1:word_topic_count_row.wordid+1]; -- Here arrayX[i:i] means the ith 1d array of a 2d array |
| |
| IF (word_topic_mapping_topic_count != word_topic_count_topic_count) THEN |
| RAISE EXCEPTION 'Topic assignment for wordid % does not match. word_topic_mapping: % word_topic_count: %',word_topic_count_row.wordid, word_topic_mapping_topic_count, word_topic_count_topic_count; |
| END IF; |
| END LOOP; |
| |
| RETURN 1; |
| END; |
| $$ LANGUAGE plpgsql; |
| |
| select validate_lda_output(); |
| |
| |
| ---------- TEST CASES FOR PERPLEXITY ---------- |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 2, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 2, -- evaluate_every |
| .2); -- perplexity_tol |
| |
| SELECT assert(perplexity_iters = '{2}', 'Number of Perplexity iterations are wrong') FROM lda_model; |
| SELECT assert(perplexity[1] > 0 , 'Perplexity value should be greate than 0') FROM lda_model ; |
| -- Commenting the below flaky test to re-visit later. |
| -- select assert(array_upper(ARRAY(Select distinct unnest(perplexity)),1)= array_upper(perplexity,1) , 'Perplexity values should be unique') from lda_model ; |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 3, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 1, -- evaluate_every |
| .1 -- perplexity_tol |
| ); |
| |
| SELECT assert(array_upper(perplexity,1) = 3, 'Perplexity calculation is wrong') FROM lda_model; |
| SELECT assert(perplexity[1] > 0 , 'Perplexity value should be greate than 0') FROM lda_model ; |
| -- Commenting the below flaky test to re-visit later. |
| -- select assert(array_upper(ARRAY(Select distinct unnest(perplexity)),1)= array_upper(perplexity,1) , 'Perplexity values should be unique') from lda_model ; |
| |
| |
| -- Function to check if the perplexity value returned from the function |
| -- and calculated by the train funcion are same. |
| CREATE OR REPLACE FUNCTION validate_perplexity() RETURNS boolean AS $$ |
| |
| DECLARE |
| perplexity_from_func Double precision[]; |
| perplexity_lda_train Double precision[]; |
| |
| BEGIN |
| drop table if exists lda_model, lda_output_data; |
| PERFORM lda_train( |
| 'lda_training', |
| 'lda_model', |
| 'lda_output_data', |
| 20, 5, 2, 10, 0.01, 2, .2); |
| |
| SELECT array_agg(round(lda_get_perplexity::numeric,10)) INTO perplexity_from_func from lda_get_perplexity('lda_model','lda_output_data'); |
| |
| select perplexity INTO perplexity_lda_train from lda_model ; |
| |
| |
| if perplexity_lda_train != perplexity_from_func THEN |
| return FALSE; |
| ELSE |
| return TRUE; |
| END IF; |
| |
| END; |
| |
| $$ LANGUAGE plpgsql; |
| |
| SELECT assert(validate_perplexity() = TRUE, 'Perplexity calculation is wrong'); |
| SELECT assert(perplexity[1] > 0 , 'Perplexity value should be greate than 0') FROM lda_model ; |
| -- Commenting the below flaky test to re-visit later. |
| -- select assert(array_upper(ARRAY(Select distinct unnest(perplexity)),1)= array_upper(perplexity,1) , 'Perplexity values should be unique') from lda_model ; |
| |
| -- Test for evaluate_every = Number of iterations = 1. It should give exactly one perplexity value -- |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 1, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 1, -- evaluate_every |
| .1 -- perplexity_tol |
| ); |
| |
| select assert(perplexity != '{}', 'Perplexity should be calculated') from lda_model; |
| select assert(array_upper(perplexity,1) = 1, 'Perplexity should not have more than 1 value') from lda_model; |
| |
| |
| -- Test for evaluate_every = 0 and -1 : In this do not calculate perplexity-- |
| |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 1, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 0, -- evaluate_every |
| .1 -- perplexity_tol |
| ); |
| |
| select assert(perplexity = '{}', 'Perplexity should not be calculated') from lda_model; |
| select assert(perplexity_iters = '{}', 'Perplexity iterations should be null') from lda_model ; |
| |
| -- Test for evaluate_every = 0 and -1 : In this do not calculate perplexity-- |
| |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 1, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| -1, -- evaluate_every |
| .1 -- perplexity_tol |
| ); |
| |
| select assert(perplexity = '{}', 'Perplexity should not be calculated') from lda_model ; |
| select assert(perplexity_iters = '{}', 'Perplexity iterations should be null') from lda_model ; |
| |
| |
| -- Test to check if the perplexity_iters are matching the expected value -- |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 10, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 2, -- evaluate_every |
| .1 -- perplexity_tol |
| ); |
| |
| SELECT assert(array_upper(perplexity_iters,1) <= 5, 'Perplexity iterations are different from expected') FROM lda_model ; |
| SELECT assert(perplexity[1] > 0 , 'Perplexity value should be greate than 0') FROM lda_model ; |
| -- Commenting the below flaky test to re-visit later. |
| -- select assert(array_upper(ARRAY(Select distinct unnest(perplexity)),1)= array_upper(perplexity,1) , 'Perplexity values should be unique') from lda_model ; |
| |
| |
| -- Test: If the difference between any two iterations is less than the perplexity_tol, we will stop the training. -- |
| -- In this case, it will iterate to two iterations only as the perplexity_tol is very large and the difference between the 2 and 4th iteration -- |
| -- will be less than 10, so Only 2 iterations will be recorded -- |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 10, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 2, -- evaluate_every |
| 100 -- perplexity_tol |
| ); |
| |
| |
| SELECT assert(abs(perplexity[2] - perplexity[1]) <100, 'Perplexity tol is less than the perplexity difference') FROM lda_model ; |
| SELECT assert(array_upper(perplexity_iters,1) = 2, 'Perplexity iterations are different from expected') FROM lda_model ; |
| SELECT assert(perplexity[1] > 0 , 'Perplexity value should be greate than 0') FROM lda_model ; |
| -- Commenting the below flaky test to re-visit later. |
| -- select assert(array_upper(ARRAY(Select distinct unnest(perplexity)),1)= array_upper(perplexity,1) , 'Perplexity values should be unique') from lda_model ; |
| |
| |
| -- Test for evaluate_every = 1 and 0 : In this case the iterations should not stop early -- |
| |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 10, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 1, -- evaluate_every |
| 0 -- perplexity_tol |
| ); |
| |
| select assert(num_iterations = 10, 'Perplexity should run for all the iterations') from lda_model ; |
| |
| |
| -- Test for evaluate_every = NULL and perplexity_tol = NULL. In this case it should not calculate perplexity -- |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 10, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| NULL, -- evaluate_every |
| NULL -- perplexity_tol |
| ); |
| |
| select assert(perplexity = '{}', 'Perplexity should not be calculated') from lda_model ; |
| select assert(perplexity_iters = '{}', 'Perplexity iterations should be null') from lda_model ; |
| |
| |
| -- Test for evaluate_every = 1 and perplexity_tol = NULL. -- |
| -- In this case it should calculate perplexity with perplexity_tol = 0.1 as default value -- |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 10, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| 1, -- evaluate_every |
| NULL -- perplexity_tol |
| ); |
| |
| select assert(array_upper(perplexity_iters,1) >= 1, 'Perplexity iterations are different from expected') from lda_model ; |
| select assert(perplexity != '{}', 'Perplexity should be calculated') from lda_model; |
| |
| |
| -- Test for evaluate_every = NULL and perplexity_tol != NULL -- |
| -- In this case it should not calculate perplexity -- |
| |
| |
| drop table if exists lda_model, lda_output_data; |
| SELECT lda_train( |
| 'lda_training', -- data_table |
| 'lda_model', -- model_table |
| 'lda_output_data', -- output_data_table |
| 20, -- voc_size |
| 5, -- topic_num |
| 10, -- iter_num |
| 10, -- alpha |
| 0.01, -- beta |
| NULL, -- evaluate_every |
| 1 -- perplexity_tol |
| ); |
| |
| select assert(perplexity = '{}', 'Perplexity should not be calculated') from lda_model ; |
| select assert(perplexity_iters = '{}', 'Perplexity iterations should be null') from lda_model ; |
| |