| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| --------------------------------------------------------------------------- |
| -- Rules: |
| -- ------ |
| -- 1) Any DB objects should be created w/o schema prefix, |
| -- since this file is executed in a separate schema context. |
| -- 2) There should be no DROP statements in this script, since |
| -- all objects created in the default schema will be cleaned-up outside. |
| --------------------------------------------------------------------------- |
| |
| m4_include(`SQLCommon.m4') |
| |
| --------------------------------------------------------------------------- |
| -- Build vocabulary: |
| --------------------------------------------------------------------------- |
| CREATE TABLE lda_vocab(wordid INT4, word TEXT); |
| |
| INSERT INTO lda_vocab VALUES |
| (0, 'code'), (1, 'data'), (2, 'graph'), (3, 'image'), (4, 'input'), (5, |
| 'layer'), (6, 'learner'), (7, 'loss'), (8, 'model'), (9, 'network'), (10, |
| 'neuron'), (11, 'ob'), (13, 'rate'), (14, 'set'), (15, |
| 'signal'), (16, 'sparse'), (17, 'spatiaject'), (12, 'outputl'), (18, 'system'), (19, 'training'); |
| |
| --------------------------------------------------------------------------- |
| -- Build training dataset: |
| --------------------------------------------------------------------------- |
| CREATE TABLE lda_training |
| ( |
| docid INT4, |
| wordid INT4, |
| count INT4 |
| ); |
| |
| INSERT INTO lda_training VALUES |
| (0, 0, 2),(0, 3, 2),(0, 5, 1),(0, 7, 1),(0, 8, 1),(0, 9, 1),(0, 11, 1),(0, 13, |
| 1), (1, 0, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(1, 6, 1),(1, 7, 1),(1, 10, 1),(1, |
| 14, 1),(1, 17, 1),(1, 18, 1), (2, 4, 2),(2, 5, 1),(2, 6, 2),(2, 12, 1),(2, 13, |
| 1),(2, 15, 1),(2, 18, 2), (3, 0, 1),(3, 1, 2),(3, 12, 3),(3, 16, 1),(3, 17, |
| 2),(3, 19, 1), (4, 1, 1),(4, 2, 1),(4, 3, 1),(4, 5, 1),(4, 6, 1),(4, 10, 1),(4, |
| 11, 1),(4, 14, 1),(4, 18, 1),(4, 19, 1), (5, 0, 1),(5, 2, 1),(5, 5, 1),(5, 7, |
| 1),(5, 10, 1),(5, 12, 1),(5, 16, 1),(5, 18, 1),(5, 19, 2), (6, 1, 1),(6, 3, |
| 1),(6, 12, 2),(6, 13, 1),(6, 14, 2),(6, 15, 1),(6, 16, 1),(6, 17, 1), (7, 0, |
| 1),(7, 2, 1),(7, 4, 1),(7, 5, 1),(7, 7, 2),(7, 8, 1),(7, 11, 1),(7, 14, 1),(7, |
| 16, 1), (8, 2, 1),(8, 4, 4),(8, 6, 2),(8, 11, 1),(8, 15, 1),(8, 18, 1), |
| (9, 0, 1),(9, 1, 1),(9, 4, 1),(9, 9, 2),(9, 12, 2),(9, 15, 1),(9, 18, 1),(9, |
| 19, 1); |
| |
| |
| CREATE TABLE lda_testing |
| ( |
| docid INT4, |
| wordid INT4, |
| count INT4 |
| ); |
| |
| INSERT INTO lda_testing VALUES |
| (0, 0, 2),(0, 8, 1),(0, 9, 1),(0, 10, 1),(0, 12, 1),(0, 15, 2),(0, 18, 1),(0, |
| 19, 1), (1, 0, 1),(1, 2, 1),(1, 5, 1),(1, 7, 1),(1, 12, 2),(1, 13, 1),(1, 16, |
| 1),(1, 17, 1),(1, 18, 1), (2, 0, 1),(2, 1, 1),(2, 2, 1),(2, 3, 1),(2, 4, 1),(2, |
| 5, 1),(2, 6, 1),(2, 12, 1),(2, 14, 1),(2, 18, 1), (3, 2, 2),(3, 6, 2),(3, 7, |
| 1),(3, 9, 1),(3, 11, 2),(3, 14, 1),(3, 15, 1), (4, 1, 1),(4, 2, 2),(4, 3, |
| 1),(4, 5, 2),(4, 6, 1),(4, 11, 1),(4, 18, 2); |
| |
| --------------------------------------------------------------------------- |
| -- Test |
| --------------------------------------------------------------------------- |
| SELECT lda_train( |
| 'lda_training', |
| 'lda_model', |
| 'lda_output_data', |
| 20, 5, 2, 10, 0.01); |
| |
| |
| SELECT lda_parse_model(model, voc_size, topic_num) AS parsed_model |
| FROM lda_model; |