| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| --------------------------------------------------------------------------- |
| -- Rules: |
| -- ------ |
| -- 1) Any DB objects should be created w/o schema prefix, |
| -- since this file is executed in a separate schema context. |
| -- 2) There should be no DROP statements in this script, since |
| -- all objects created in the default schema will be cleaned-up outside. |
| -- |
| -- This test is not performed in those platforms which don't support |
| -- ORDERED AGGREGATES. |
| --------------------------------------------------------------------------- |
| |
| m4_include(`SQLCommon.m4') |
| m4_changequote(<!,!>) |
| |
| m4_ifdef(<!__HAS_ORDERED_AGGREGATES__!>,<! |
| |
| -- Regex table |
| CREATE TABLE train_regex(pattern text,name text); |
| INSERT INTO train_regex VALUES |
| ('^[A-Z][a-z]+$','InitCapital'), ('^[A-Z]+$','isAllCapital'), |
| ('^.*[0-9]+.*$','containsDigit'),('^.+[.]$','endsWithDot'), |
| ('^.+[,]$','endsWithComma'), ('^.+er$','endsWithER'), |
| ('^.+est$','endsWithEst'), ('^.+ed$','endsWithED'), |
| ('^.+s$','endsWithS'), ('^.+ing$','endsWithIng'), |
| ('^.+ly$','endsWithly'), ('^.+-.+$','isDashSeparatedWords'), |
| ('^.*@.*$','isEmailId'); |
| |
| -- Label table |
| CREATE TABLE crf_label (id integer,label character varying); |
| INSERT INTO crf_label VALUES |
| (0,'CC'), (1,'CD'), (2,'DT'), (3,'EX'), (4,'FW'), (5,'IN'), (6,'JJ'), (7,'JJR'), (8,'JJS'), |
| (9,'LS'), (10,'MD'), (11,'NN'), (12,'NNS'), (13,'NNP'),(14,'NNPS'),(15,'PDT'),(16,'POS'),(17,'PRP'), |
| (18,'PRP$'),(19,'RB'), (20,'RBR'), (21,'RBS'), (22,'RP'), (23,'SYM'), (24,'TO'), (25,'UH'), (26,'VB'), |
| (27,'VBD'), (28,'VBG'),(29,'VBN'), (30,'VBP'), (31,'VBZ'),(32,'WDT'), (33,'WP'), (34,'WP$'),(35,'WRB'), |
| (36,'$'), (37,'#'), (38,''''''), (39,'``'), (40,'('), (41,')'), (42,','), (43,'.'), (44,':'); |
| |
| CREATE TABLE train_segmenttbl(start_pos integer,doc_id integer,seg_text text,label integer,max_pos integer); |
| INSERT INTO train_segmenttbl VALUES |
| (0,1,'confidence',11,36), (1,1,'in',5,36), (2,1,'the',2,36), (3,1,'pound',11,36), |
| (4,1,'is',31,36), (5,1,'widely',19,36), (6,1,'expected',29,36), (7,1,'to',24,36), |
| (8,1,'take',26,36), (9,1,'another',2,36), (10,1,'sharp',6,36), (11,1,'dive',11,36), |
| (12,1,'if',5,36), (13,1,'trade',11,36), (14,1,'figures',12,36), (15,1,'for',5,36), |
| (16,1,'september',13,36), (17,1,',',42,36), (18,1,'due',6,36), (19,1,'for',5,36), |
| (20,1,'release',11,36), (21,1,'tomorrow',11,36), (22,1,',',42,36), (23,1,'fail',26,36), |
| (24,1,'to',24,36), (25,1,'show',26,36), (26,1,'a',2,36), (27,1,'substantial',6,36), |
| (28,1,'improvement',11,36),(29,1,'from',5,36), (30,1,'july',13,36), (31,1,'and',0,36), |
| (32,1,'august',13,36), (33,1,'''s',16,36), (34,1,'near-record',6,36),(35,1,'deficits',12,36), |
| (36,1,'.',43,36), (0,2,'chancellor',13,26),(1,2,'of',5,26), (2,2,'the',2,26), |
| (3,2,'exchequer',13,26), (4,2,'nigel',13,26), (5,2,'lawson',13,26), (6,2,'''s',16,26), |
| (7,2,'restated',29,26), (8,2,'commitment',11,26),(9,2,'to',24,26), (10,2,'a',2,26), |
| (11,2,'firm',11,26), (12,2,'monetary',6,26), (13,2,'policy',11,26), (14,2,'has',31,26), |
| (15,2,'helped',29,26), (16,2,'to',24,26), (17,2,'prevent',26,26), (18,2,'a',2,26), |
| (19,2,'freefall',11,26), (20,2,'in',5,26), (21,2,'sterling',11,26), (22,2,'over',5,26), |
| (23,2,'the',2,26), (24,2,'past',6,26), (25,2,'week',11,26), (26,2,'.',43,26); |
| |
| |
| SELECT crf_train_fgen('train_segmenttbl', 'train_regex', 'crf_label', 'train_dictionary', 'train_featuretbl','train_featureset'); |
| |
| |
| !>) |
| m4_changequote(<!`!>,<!'!>) |