| --------------------------------------------------------------------------- |
| -- Rules: |
| -- ------ |
| -- 1) Any DB objects should be created w/o schema prefix, |
| -- since this file is executed in a separate schema context. |
| -- 2) There should be no DROP statements in this script, since |
| -- all objects created in the default schema will be cleaned-up outside. |
| --------------------------------------------------------------------------- |
| m4_include(`SQLCommon.m4') |
| m4_changequote(<!,!>) |
| m4_ifdef(<!__HAS_ORDERED_AGGREGATES__!>,<! |
| |
| -- Regex table |
| CREATE TABLE train_new_segmenttbl(start_pos integer,doc_id integer,seg_text text,label integer,max_pos integer); |
| INSERT INTO train_new_segmenttbl VALUES |
| (0, 1, 'freight', 12, 27), |
| (1, 1, 'rates', 13, 27), |
| (2, 1, ',', 43, 27), |
| (3, 1, 'declining', 29, 27), |
| (4, 1, 'for', 6, 27), |
| (5, 1, 'most', 22, 27), |
| (6, 1, 'of', 6, 27), |
| (7, 1, 'the', 3, 27), |
| (8, 1, 'decade', 12, 27), |
| (9, 1, 'because', 6, 27), |
| (10, 1, 'of', 6, 27), |
| (11, 1, 'competition', 12, 27), |
| (12, 1, 'spurred', 30, 27), |
| (13, 1, 'by', 6, 27), |
| (14, 1, 'deregulation', 12, 27), |
| (15, 1, ',', 43, 27), |
| (16, 1, 'are', 31, 27), |
| (17, 1, 'bottoming', 29, 27), |
| (18, 1, 'out', 6, 27), |
| (19, 1, ',', 43, 27), |
| (20, 1, 'turning', 29, 27), |
| (21, 1, 'upward', 20, 27), |
| (22, 1, 'and', 1, 27), |
| (23, 1, 'threatening', 29, 27), |
| (24, 1, 'to', 25, 27), |
| (25, 1, 'fuel', 27, 27), |
| (26, 1, 'inflation', 12, 27), |
| (27, 1, '.', 44, 27), |
| (0, 2, 'trucking', 14, 29), |
| (1, 2, ',', 43, 29), |
| (2, 2, 'shipping', 29, 29), |
| (3, 2, 'and', 1, 29), |
| (4, 2, 'air-freight', 12, 29), |
| (5, 2, 'companies', 13, 29), |
| (6, 2, 'have', 31, 29), |
| (7, 2, 'announced', 30, 29), |
| (8, 2, 'rate', 12, 29), |
| (9, 2, 'increases', 13, 29), |
| (10, 2, ',', 43, 29), |
| (11, 2, 'scheduled', 30, 29), |
| (12, 2, 'for', 6, 29), |
| (13, 2, 'this', 3, 29), |
| (14, 2, 'fall', 12, 29), |
| (15, 2, 'or', 1, 29), |
| (16, 2, 'early', 7, 29), |
| (17, 2, 'next', 7, 29), |
| (18, 2, 'year', 12, 29), |
| (19, 2, ',', 43, 29), |
| (20, 2, 'reflecting', 29, 29), |
| (21, 2, 'higher', 8, 29), |
| (22, 2, 'costs', 13, 29), |
| (23, 2, 'and', 1, 29), |
| (24, 2, 'tightened', 28, 29), |
| (25, 2, 'demand', 12, 29), |
| (26, 2, 'for', 6, 29), |
| (27, 2, 'freight', 12, 29), |
| (28, 2, 'transport', 12, 29), |
| (29, 2, '.', 44, 29), |
| (0, 3, 'major', 7, 23), |
| (1, 3, 'shippers', 13, 23), |
| (2, 3, 'say', 31, 23), |
| (3, 3, 'they', 18, 23), |
| (4, 3, 'expect', 31, 23), |
| (5, 3, 'freight', 12, 23), |
| (6, 3, 'rates', 13, 23), |
| (7, 3, 'to', 25, 23), |
| (8, 3, 'rise', 27, 23), |
| (9, 3, 'at', 6, 23), |
| (10, 3, 'least', 9, 23), |
| (11, 3, 'as', 20, 23), |
| (12, 3, 'fast', 20, 23), |
| (13, 3, 'as', 6, 23), |
| (14, 3, 'inflation', 12, 23), |
| (15, 3, 'and', 1, 23), |
| (16, 3, 'maybe', 20, 23), |
| (17, 3, 'faster', 21, 23), |
| (18, 3, 'in', 6, 23), |
| (19, 3, 'the', 3, 23), |
| (20, 3, 'next', 7, 23), |
| (21, 3, 'few', 7, 23), |
| (22, 3, 'years', 13, 23), |
| (23, 3, '.', 44, 23), |
| (0, 4, 'that', 3, 30), |
| (1, 4, '''s', 32, 30), |
| (2, 4, 'a', 3, 30), |
| (3, 4, 'big', 7, 30), |
| (4, 4, 'change', 12, 30), |
| (5, 4, 'from', 6, 30), |
| (6, 4, 'recent', 7, 30), |
| (7, 4, 'years', 13, 30), |
| (8, 4, 'when', 36, 30), |
| (9, 4, 'freight', 12, 30), |
| (10, 4, 'haulage', 12, 30), |
| (11, 4, 'was', 28, 30), |
| (12, 4, 'a', 3, 30), |
| (13, 4, 'bright', 7, 30), |
| (14, 4, 'spot', 12, 30), |
| (15, 4, 'for', 6, 30), |
| (16, 4, 'u.s.', 14, 30), |
| (17, 4, 'productivity', 12, 30), |
| (18, 4, ',', 43, 30), |
| (19, 4, 'helping', 29, 30), |
| (20, 4, 'to', 25, 30), |
| (21, 4, 'restrain', 27, 30), |
| (22, 4, 'inflation', 12, 30), |
| (23, 4, 'and', 1, 30), |
| (24, 4, 'make', 27, 30), |
| (25, 4, 'u.s.', 14, 30), |
| (26, 4, 'industry', 12, 30), |
| (27, 4, 'more', 21, 30), |
| (28, 4, 'competitive', 7, 30), |
| (29, 4, 'abroad', 20, 30), |
| (30, 4, '.', 44, 30), |
| (0, 5, '``', 40, 49), |
| (1, 5, 'demand', 12, 49), |
| (2, 5, 'has', 32, 49), |
| (3, 5, 'caught', 30, 49), |
| (4, 5, 'up', 6, 49), |
| (5, 5, 'with', 6, 49), |
| (6, 5, 'the', 3, 49), |
| (7, 5, 'supply', 12, 49), |
| (8, 5, 'of', 6, 49), |
| (9, 5, 'certain', 7, 49), |
| (10, 5, 'types', 13, 49), |
| (11, 5, 'of', 6, 49), |
| (12, 5, 'freight', 12, 49), |
| (13, 5, 'transportation', 12, 49), |
| (14, 5, ',', 43, 49), |
| (15, 5, 'and', 1, 49), |
| (16, 5, 'rates', 13, 49), |
| (17, 5, 'are', 31, 49), |
| (18, 5, 'starting', 29, 49), |
| (19, 5, 'to', 25, 49), |
| (20, 5, 'move', 27, 49), |
| (21, 5, 'up', 6, 49), |
| (22, 5, '''''', 39, 49), |
| (23, 5, 'at', 6, 49), |
| (24, 5, 'a', 3, 49), |
| (25, 5, 'rate', 12, 49), |
| (26, 5, '``', 40, 49), |
| (27, 5, 'close', 20, 49), |
| (28, 5, 'to', 25, 49), |
| (29, 5, 'or', 1, 49), |
| (30, 5, 'slightly', 20, 49), |
| (31, 5, 'more', 8, 49), |
| (32, 5, 'than', 6, 49), |
| (33, 5, 'the', 3, 49), |
| (34, 5, 'inflation', 12, 49), |
| (35, 5, 'rate', 12, 49), |
| (36, 5, ',', 43, 49), |
| (37, 5, '''''', 39, 49), |
| (38, 5, 'said', 28, 49), |
| (39, 5, 'clifford', 14, 49), |
| (40, 5, 'sayre', 14, 49), |
| (41, 5, ',', 43, 49), |
| (42, 5, 'director', 12, 49), |
| (43, 5, 'of', 6, 49), |
| (44, 5, 'logistics', 13, 49), |
| (45, 5, 'at', 6, 49), |
| (46, 5, 'du', 14, 49), |
| (47, 5, 'pont', 14, 49), |
| (48, 5, 'co', 14, 49), |
| (49, 5, '.', 44, 49), |
| (0, 6, 'shippers', 13, 24), |
| (1, 6, 'surveyed', 30, 24), |
| (2, 6, 'recently', 20, 24), |
| (3, 6, 'by', 6, 24), |
| (4, 6, 'ohio', 14, 24), |
| (5, 6, 'state', 14, 24), |
| (6, 6, 'university', 14, 24), |
| (7, 6, 'said', 28, 24), |
| (8, 6, 'they', 18, 24), |
| (9, 6, 'expect', 31, 24), |
| (10, 6, 'their', 19, 24), |
| (11, 6, 'freight-transport', 7, 24), |
| (12, 6, ',', 43, 24), |
| (13, 6, 'storage', 12, 24), |
| (14, 6, 'and', 1, 24), |
| (15, 6, 'distribution', 12, 24), |
| (16, 6, 'costs', 13, 24), |
| (17, 6, 'to', 25, 24), |
| (18, 6, 'rise', 27, 24), |
| (19, 6, 'about', 6, 24), |
| (20, 6, '4', 2, 24), |
| (21, 6, '%', 12, 24), |
| (22, 6, 'this', 3, 24), |
| (23, 6, 'year', 12, 24), |
| (24, 6, '.', 44, 24), |
| (0, 7, 'only', 20, 31), |
| (1, 7, '10', 2, 31), |
| (2, 7, '%', 12, 31), |
| (3, 7, 'of', 6, 31), |
| (4, 7, 'the', 3, 31), |
| (5, 7, '250', 2, 31), |
| (6, 7, 'shippers', 13, 31), |
| (7, 7, 'polled', 30, 31), |
| (8, 7, 'expected', 30, 31), |
| (9, 7, 'their', 19, 31), |
| (10, 7, 'freight-transport', 7, 31), |
| (11, 7, 'costs', 13, 31), |
| (12, 7, 'to', 25, 31), |
| (13, 7, 'decrease', 27, 31), |
| (14, 7, ',', 43, 31), |
| (15, 7, 'compared', 30, 31), |
| (16, 7, 'with', 6, 31), |
| (17, 7, '30', 2, 31), |
| (18, 7, '%', 12, 31), |
| (19, 7, 'who', 34, 31), |
| (20, 7, 'had', 28, 31), |
| (21, 7, 'looked', 30, 31), |
| (22, 7, 'to', 25, 31), |
| (23, 7, 'freight', 27, 31), |
| (24, 7, 'transport', 12, 31), |
| (25, 7, 'to', 25, 31), |
| (26, 7, 'reduce', 27, 31), |
| (27, 7, 'costs', 13, 31), |
| (28, 7, 'in', 6, 31), |
| (29, 7, 'past', 7, 31), |
| (30, 7, 'years', 13, 31), |
| (31, 7, '.', 44, 31); |
| |
| CREATE TABLE train_new_regex(pattern text,name text); |
| INSERT INTO train_new_regex VALUES |
| ('^[A-Z][a-z]+$','InitCapital'), ('^[A-Z]+$','isAllCapital'), |
| ('^.*[0-9]+.*$','containsDigit'),('^.+[.]$','endsWithDot'), |
| ('^.+[,]$','endsWithComma'), ('^.+er$','endsWithER'), |
| ('^.+est$','endsWithEst'), ('^.+ed$','endsWithED'), |
| ('^.+s$','endsWithS'), ('^.+ing$','endsWithIng'), |
| ('^.+ly$','endsWithly'), ('^.+-.+$','isDashSeparatedWords'), |
| ('^.*@.*$','isEmailId'); |
| analyze train_new_regex; |
| |
| CREATE TABLE crf_label_new (id integer,label character varying); |
| INSERT INTO crf_label_new VALUES |
| (0,'CC'), (1,'CD'), (2,'DT'), (3,'EX'), (4,'FW'), (5,'IN'), (6,'JJ'), (7,'JJR'), (8,'JJS'), |
| (9,'LS'), (10,'MD'), (11,'NN'), (12,'NNS'), (13,'NNP'),(14,'NNPS'),(15,'PDT'),(16,'POS'),(17,'PRP'), |
| (18,'PRP$'),(19,'RB'), (20,'RBR'), (21,'RBS'), (22,'RP'), (23,'SYM'), (24,'TO'), (25,'UH'), (26,'VB'), |
| (27,'VBD'), (28,'VBG'),(29,'VBN'), (30,'VBP'), (31,'VBZ'),(32,'WDT'), (33,'WP'), (34,'WP$'),(35,'WRB'), |
| (36,'$'), (37,'#'), (38,''''''); |
| INSERT INTO crf_label_new VALUES |
| (39,<!'``'!>); |
| m4_changequote(,) |
| INSERT INTO crf_label_new VALUES |
| (40,'('), (41,')'), (42,','), (43,'.'), (44,':'); |
| analyze crf_label_new; |
| |
| SELECT crf_train_fgen('train_new_segmenttbl', 'train_new_regex', 'crf_label_new', 'train_new_dictionary', 'train_new_featuretbl','train_new_featureset'); |
| |
| SELECT lincrf_train('train_new_featuretbl', 'train_new_featureset', 'crf_label_new', 'train_new_stats', 'train_new_crf_feature', 30); |
| |
| -- Expected feature table |
| -- The result is produced from Dr. Sunita's CRF java package with the same input |
| CREATE TABLE expected_crf_feature_new(id integer,name text,prev_label integer,label integer,weight float); |
| |
| INSERT INTO expected_crf_feature_new VALUES |
| (0, 'S.', -1, 12, 0.5516753522178934), |
| (1, 'W_freight', -1, 12, 5.959241076198326), |
| (2, 'E.', 12, 13, 2.0789747316372034), |
| (3, 'W_rates', -1, 13, 2.0837653985907174), |
| (4, 'R_endsWithS', -1, 13, 5.306222451221396), |
| (5, 'E.', 13, 43, 1.8107793215017256), |
| (6, 'W_,', -1, 43, 7.286509411020296), |
| (7, 'E.', 43, 29, 2.222715331802022), |
| (8, 'U', -1, 29, 0.9492501230997201), |
| (9, 'W_declining', -1, 29, 0.9644339685939746), |
| (10, 'R_endsWithIng', -1, 29, 5.3575087838268995), |
| (11, 'E.', 29, 6, 2.459070565360766), |
| (12, 'W_for', -1, 6, 4.538367636569374), |
| (13, 'E.', 6, 22, 2.633416169216998), |
| (14, 'U', -1, 22, 1.2724051325350452), |
| (15, 'W_most', -1, 22, 4.742804679577188), |
| (16, 'E.', 22, 6, 2.3563567868687803), |
| (17, 'W_of', -1, 6, 5.886369615822257), |
| (18, 'E.', 6, 3, 3.4072332563788974), |
| (19, 'W_the', -1, 3, 5.003139924048258), |
| (20, 'E.', 3, 12, 3.4951824947429704), |
| (21, 'U', -1, 12, 1.3530610141204105), |
| (22, 'W_decade', -1, 12, 2.5363441670581475), |
| (23, 'E.', 12, 6, 3.168556891460435), |
| (24, 'U', -1, 6, 0.8846547539288385), |
| (25, 'W_because', -1, 6, 4.371973715457267), |
| (26, 'E.', 6, 6, 2.3178076749274994), |
| (27, 'E.', 6, 12, 3.008379674551115), |
| (28, 'W_competition', -1, 12, 3.7657358279072044), |
| (29, 'E.', 12, 30, 1.4638353616301887), |
| (30, 'U', -1, 30, 1.661969150148885), |
| (31, 'W_spurred', -1, 30, 1.0199935936286502), |
| (32, 'R_endsWithED', -1, 30, 5.781210636435915), |
| (33, 'E.', 30, 6, 2.610516071895145), |
| (34, 'W_by', -1, 6, 4.016756180248158), |
| (35, 'W_deregulation', -1, 12, 3.107044113256852), |
| (36, 'E.', 12, 43, 3.0918965630757156), |
| (37, 'E.', 43, 31, 1.8076032969941829), |
| (38, 'W_are', -1, 31, 3.4834944891071182), |
| (39, 'E.', 31, 29, 3.0246636670581872), |
| (40, 'W_bottoming', -1, 29, 0.9246692126583386), |
| (41, 'W_out', -1, 6, 4.166232235849933), |
| (42, 'E.', 6, 43, 1.4287166848500423), |
| (43, 'W_turning', -1, 29, 0.9453908838237025), |
| (44, 'E.', 29, 20, 1.9755638903302428), |
| (45, 'U', -1, 20, 2.276171985449911), |
| (46, 'W_upward', -1, 20, 3.0573963140454667), |
| (47, 'E.', 20, 1, 1.7604125260111), |
| (48, 'W_and', -1, 1, 6.74422233788292), |
| (49, 'E.', 1, 29, 1.3137638161586382), |
| (50, 'W_threatening', -1, 29, 1.4170133688772073), |
| (51, 'E.', 29, 25, 1.6388299340148602), |
| (52, 'W_to', -1, 25, 5.749953289380646), |
| (53, 'E.', 25, 27, 6.296991117616081), |
| (54, 'U', -1, 27, 1.5167524281785714), |
| (55, 'W_fuel', -1, 27, 0.8669685656391124), |
| (56, 'E.', 27, 12, 2.7884267413237405), |
| (57, 'W_inflation', -1, 12, 4.497973086341019), |
| (58, 'E.', 12, 44, 2.1195755732689774), |
| (59, 'End.', -1, 44, 4.0877966130773755), |
| (60, 'W_.', -1, 44, 4.0877966130773755), |
| (61, 'S.', -1, 14, 1.3657688665015333), |
| (62, 'U', -1, 14, 2.0185522749436866), |
| (63, 'W_trucking', -1, 14, 3.1701428979681845), |
| (64, 'R_endsWithIng', -1, 14, 2.1776484880257367), |
| (65, 'E.', 14, 43, 1.497373571911848), |
| (66, 'W_shipping', -1, 29, 1.4634895303741247), |
| (67, 'E.', 29, 1, 1.3751429994639364), |
| (68, 'E.', 1, 12, 2.448895430660733), |
| (69, 'W_air-freight', -1, 12, 3.276197718236269), |
| (70, 'R_isDashSeparatedWords', -1, 12, 1.390362987986471), |
| (71, 'U', -1, 13, -0.039795570326036186), |
| (72, 'W_companies', -1, 13, 1.410903273102038), |
| (73, 'E.', 13, 31, 2.973781128828628), |
| (74, 'U', -1, 31, 1.3700945582140844), |
| (75, 'W_have', -1, 31, 3.0882453767746747), |
| (76, 'E.', 31, 30, 1.9098632637338704), |
| (77, 'W_announced', -1, 30, 1.4380819186872527), |
| (78, 'E.', 30, 12, 0.9060674369789037), |
| (79, 'W_rate', -1, 12, 5.064883717975049), |
| (80, 'W_increases', -1, 13, 2.319719757676896), |
| (81, 'E.', 43, 30, 1.299378678795197), |
| (82, 'W_scheduled', -1, 30, 0.9387965126997426), |
| (83, 'W_this', -1, 3, 4.370856586315017), |
| (84, 'R_endsWithS', -1, 3, 2.076343517584421), |
| (85, 'W_fall', -1, 12, 2.1032438732239624), |
| (86, 'E.', 12, 1, 3.030829781200263), |
| (87, 'W_or', -1, 1, 6.18424407183645), |
| (88, 'E.', 1, 7, 0.868722661655972), |
| (89, 'U', -1, 7, 2.23345149856934), |
| (90, 'W_early', -1, 7, 3.384977523038234), |
| (91, 'R_endsWithly', -1, 7, 1.5733441079056303), |
| (92, 'E.', 7, 7, 2.7161792323578373), |
| (93, 'W_next', -1, 7, 4.2480555895859), |
| (94, 'E.', 7, 12, 2.9916009076630092), |
| (95, 'W_year', -1, 12, 3.5174064665274014), |
| (96, 'W_reflecting', -1, 29, 0.6113402176204787), |
| (97, 'E.', 29, 8, 2.819730039901505), |
| (98, 'U', -1, 8, -0.18779864470197277), |
| (99, 'W_higher', -1, 8, 3.1797755805421333), |
| (100, 'R_endsWithER', -1, 8, 1.5728998100190603), |
| (101, 'E.', 8, 13, 2.318262709087868), |
| (102, 'W_costs', -1, 13, 2.381538004167296), |
| (103, 'E.', 13, 1, 0.9855356754199541), |
| (104, 'E.', 1, 28, 1.8595054995497817), |
| (105, 'U', -1, 28, 1.5446722882388022), |
| (106, 'W_tightened', -1, 28, 3.8726486164808493), |
| (107, 'R_endsWithED', -1, 28, 2.5351002298167784), |
| (108, 'E.', 28, 12, 2.274821834309125), |
| (109, 'W_demand', -1, 12, 4.064293186476195), |
| (110, 'E.', 12, 12, 2.2005287976885133), |
| (111, 'W_transport', -1, 12, 4.716356817414893), |
| (112, 'S.', -1, 7, 0.6581636527243294), |
| (113, 'W_major', -1, 7, 2.980552933449864), |
| (114, 'E.', 7, 13, 3.065321079522356), |
| (115, 'W_shippers', -1, 13, 2.542917930136408), |
| (116, 'W_say', -1, 31, 2.2479773095428093), |
| (117, 'E.', 31, 18, 2.874166869399026), |
| (118, 'W_they', -1, 18, 3.143575891184453), |
| (119, 'E.', 18, 31, 4.2749079217250445), |
| (120, 'W_expect', -1, 31, 3.595622140488586), |
| (121, 'E.', 31, 12, 1.6611422263789781), |
| (122, 'E.', 13, 25, 1.1213816226246236), |
| (123, 'W_rise', -1, 27, 1.5778724980775618), |
| (124, 'E.', 27, 6, 2.568809140561043), |
| (125, 'W_at', -1, 6, 3.9554245620023827), |
| (126, 'E.', 6, 9, 2.6559245091013293), |
| (127, 'U', -1, 9, 0.9950701292906889), |
| (128, 'W_least', -1, 9, 4.103392620219655), |
| (129, 'E.', 9, 20, 3.709110073868926), |
| (130, 'W_as', -1, 20, 3.1580690701037972), |
| (131, 'R_endsWithS', -1, 20, 1.0980845299769795), |
| (132, 'E.', 20, 20, 2.1155578516782), |
| (133, 'W_fast', -1, 20, 3.951081121254258), |
| (134, 'E.', 20, 6, 2.364546163338331), |
| (135, 'W_as', -1, 6, 4.459742427306834), |
| (136, 'R_endsWithS', -1, 6, 0.27528290655402593), |
| (137, 'E.', 1, 20, 2.002143530981041), |
| (138, 'W_maybe', -1, 20, 3.5081392526668402), |
| (139, 'E.', 20, 21, 2.2175425248487377), |
| (140, 'U', -1, 21, 0.5355418339873245), |
| (141, 'W_faster', -1, 21, 3.71210878977271), |
| (142, 'R_endsWithER', -1, 21, 3.472793820076381), |
| (143, 'E.', 21, 6, 1.3108157211407567), |
| (144, 'W_in', -1, 6, 3.840521231387154), |
| (145, 'E.', 3, 7, 2.8789088935047493), |
| (146, 'W_few', -1, 7, 2.4522548748428017), |
| (147, 'W_years', -1, 13, 1.7038384234850026), |
| (148, 'E.', 13, 44, 1.3730441332544099), |
| (149, 'S.', -1, 3, 1.926568025628669), |
| (150, 'U', -1, 3, 0.18622978543338722), |
| (151, 'W_that', -1, 3, 3.7701267071548883), |
| (152, 'E.', 3, 32, 3.0418354933422265), |
| (153, 'U', -1, 32, 1.1440561095338802), |
| (154, 'W_''s', -1, 32, 2.7291745322476184), |
| (155, 'R_endsWithS', -1, 32, 2.6658498367189947), |
| (156, 'E.', 32, 3, 1.956185562422949), |
| (157, 'W_a', -1, 3, 4.567599380671607), |
| (158, 'W_big', -1, 7, 2.9735195102454743), |
| (159, 'W_change', -1, 12, 3.3193727375317574), |
| (160, 'W_from', -1, 6, 3.8878088109703994), |
| (161, 'E.', 6, 7, 2.454738423661261), |
| (162, 'W_recent', -1, 7, 2.813702206270077), |
| (163, 'E.', 13, 36, 2.327670018390557), |
| (164, 'U', -1, 36, 1.4528234424876696), |
| (165, 'W_when', -1, 36, 4.011265086980163), |
| (166, 'E.', 36, 12, 2.857525452775004), |
| (167, 'W_haulage', -1, 12, 4.32506396474813), |
| (168, 'E.', 12, 28, 1.9580138925595794), |
| (169, 'W_was', -1, 28, 4.2660057629684), |
| (170, 'R_endsWithS', -1, 28, 1.7934223574225978), |
| (171, 'E.', 28, 3, 2.2891724016343495), |
| (172, 'W_bright', -1, 7, 2.992681149090579), |
| (173, 'W_spot', -1, 12, 3.220613491106308), |
| (174, 'E.', 6, 14, 2.6412138657174293), |
| (175, 'W_u.s.', -1, 14, 3.2079965049318577), |
| (176, 'R_endsWithDot', -1, 14, 3.2079965049318577), |
| (177, 'E.', 14, 12, 2.356503203677101), |
| (178, 'W_productivity', -1, 12, 3.822919829706685), |
| (179, 'W_helping', -1, 29, 0.694693493722968), |
| (180, 'W_restrain', -1, 27, 0.8600913604077386), |
| (181, 'E.', 1, 27, 2.411953958040776), |
| (182, 'W_make', -1, 27, 3.898590273777646), |
| (183, 'E.', 27, 14, 2.1195180614626326), |
| (184, 'W_industry', -1, 12, 3.6043926013837373), |
| (185, 'E.', 12, 21, 2.609806711358176), |
| (186, 'W_more', -1, 21, 3.552659383833094), |
| (187, 'E.', 21, 7, 3.1775432403400985), |
| (188, 'W_competitive', -1, 7, 2.976622053158026), |
| (189, 'E.', 7, 20, 1.771180632746488), |
| (190, 'W_abroad', -1, 20, 3.795196762237916), |
| (191, 'E.', 20, 44, 2.295613975954753), |
| (192, 'S.', -1, 40, 2.186455757483502), |
| (193, 'W_``', -1, 40, 5.004869688499841), |
| (194, 'E.', 40, 12, 2.5755893774727556), |
| (195, 'E.', 12, 32, 1.7721648420817446), |
| (196, 'W_has', -1, 32, 3.93625538731919), |
| (197, 'E.', 32, 30, 2.6386288731321472), |
| (198, 'W_caught', -1, 30, 3.129300187453606), |
| (199, 'W_up', -1, 6, 4.151560395841534), |
| (200, 'W_with', -1, 6, 3.675511355861305), |
| (201, 'W_supply', -1, 12, 2.715998611959373), |
| (202, 'R_endsWithly', -1, 12, 0.7673422652186473), |
| (203, 'W_certain', -1, 7, 2.851915076211084), |
| (204, 'W_types', -1, 13, 1.6258788404774098), |
| (205, 'E.', 13, 6, 2.6905432557328264), |
| (206, 'W_transportation', -1, 12, 3.191264085926264), |
| (207, 'E.', 43, 1, 1.2711473691909434), |
| (208, 'E.', 1, 13, 1.182207619933404), |
| (209, 'W_starting', -1, 29, 0.5515350033996826), |
| (210, 'W_move', -1, 27, 0.8120644141223547), |
| (211, 'E.', 6, 39, 2.7182674903817587), |
| (212, 'W_''''', -1, 39, 5.170862953158661), |
| (213, 'E.', 39, 6, 2.317192606205641), |
| (214, 'E.', 12, 40, 1.7538803171442363), |
| (215, 'E.', 40, 20, 3.0627961041121843), |
| (216, 'W_close', -1, 20, 2.495992180876776), |
| (217, 'E.', 20, 25, 1.5800294126144967), |
| (218, 'E.', 25, 1, 2.241039628902612), |
| (219, 'W_slightly', -1, 20, 1.3492186869598903), |
| (220, 'R_endsWithly', -1, 20, 2.4413654689445927), |
| (221, 'E.', 20, 8, 2.9386643499826097), |
| (222, 'W_more', -1, 8, 4.452742873438579), |
| (223, 'E.', 8, 6, 2.9936854236944725), |
| (224, 'W_than', -1, 6, 2.1644820524819646), |
| (225, 'E.', 43, 39, 1.4996394861187865), |
| (226, 'E.', 39, 28, 2.7463692256732095), |
| (227, 'W_said', -1, 28, 4.677340288404336), |
| (228, 'E.', 28, 14, 1.9336746850852031), |
| (229, 'W_clifford', -1, 14, 2.0747538313323317), |
| (230, 'E.', 14, 14, 3.9294546550138674), |
| (231, 'W_sayre', -1, 14, 3.3385592403455577), |
| (232, 'E.', 43, 12, 2.2914590080022816), |
| (233, 'W_director', -1, 12, 3.4531076078614316), |
| (234, 'E.', 6, 13, 1.054540995246233), |
| (235, 'W_logistics', -1, 13, 2.9422159343971326), |
| (236, 'W_du', -1, 14, 2.31602144186341), |
| (237, 'W_pont', -1, 14, 1.4463820426038703), |
| (238, 'W_co', -1, 14, 2.5456207401359405), |
| (239, 'E.', 14, 44, 1.7816633932596668), |
| (240, 'S.', -1, 13, 1.0387123582845164), |
| (241, 'E.', 13, 30, 1.1881628310493961), |
| (242, 'W_surveyed', -1, 30, 0.892458430315305), |
| (243, 'E.', 30, 20, 1.532676298227432), |
| (244, 'W_recently', -1, 20, 2.2798691676335316), |
| (245, 'W_ohio', -1, 14, 2.344559598905777), |
| (246, 'W_state', -1, 14, 1.3829936762017245), |
| (247, 'W_university', -1, 14, 2.237132870920883), |
| (248, 'E.', 14, 28, 1.9577360535758594), |
| (249, 'E.', 28, 18, 2.2087856989728887), |
| (250, 'E.', 31, 19, 2.079584829527366), |
| (251, 'W_their', -1, 19, 4.067554496475091), |
| (252, 'E.', 19, 7, 3.6290543548238685), |
| (253, 'W_freight-transport', -1, 7, 3.2568844715837297), |
| (254, 'R_isDashSeparatedWords', -1, 7, 0.7948432491344457), |
| (255, 'E.', 7, 43, 1.2615186281064843), |
| (256, 'W_storage', -1, 12, 2.632730740932247), |
| (257, 'W_distribution', -1, 12, 3.9157525580557246), |
| (258, 'W_about', -1, 6, 3.065121356369797), |
| (259, 'E.', 6, 2, 2.5380914123626104), |
| (260, 'W_DIGIT', -1, 2, 3.716164825033898), |
| (261, 'R_isAllCapital', -1, 2, 3.716164825033898), |
| (262, 'E.', 2, 12, 2.691091194255502), |
| (263, 'W_%', -1, 12, 4.045579535978065), |
| (264, 'E.', 12, 3, 1.5517451432551261), |
| (265, 'S.', -1, 20, 1.0762484384385997), |
| (266, 'W_only', -1, 20, 2.268536436241348), |
| (267, 'E.', 20, 2, 1.4108190607695246), |
| (268, 'E.', 3, 2, 1.6621246460306802), |
| (269, 'E.', 2, 13, 1.41212953803894), |
| (270, 'W_polled', -1, 30, 1.647493520976822), |
| (271, 'E.', 30, 30, 0.7767009450630513), |
| (272, 'W_expected', -1, 30, 0.665862099618371), |
| (273, 'E.', 30, 19, 1.8495571863795943), |
| (274, 'W_decrease', -1, 27, 1.2921324719942597), |
| (275, 'E.', 27, 43, 1.0963707625581782), |
| (276, 'W_compared', -1, 30, 0.9464663786685238), |
| (277, 'E.', 12, 34, 1.9870890250594957), |
| (278, 'U', -1, 34, 0.9707407033846202), |
| (279, 'W_who', -1, 34, 4.159314366798417), |
| (280, 'E.', 34, 28, 3.0290938616080796), |
| (281, 'W_had', -1, 28, 3.210438877671987), |
| (282, 'E.', 28, 30, 1.498126968305331), |
| (283, 'W_looked', -1, 30, 0.8477036764861315), |
| (284, 'E.', 30, 25, 1.058282178536846), |
| (285, 'W_freight', -1, 27, 2.6547922325266726), |
| (286, 'E.', 12, 25, 0.7587012089044496), |
| (287, 'W_reduce', -1, 27, 1.9870512796567945), |
| (288, 'E.', 27, 13, 0.6748848167259296), |
| (289, 'W_past', -1, 7, 2.852378831268221); |
| |
| SELECT assert( |
| SUM(abs(c1.weight-c2.weight)) < 0.1, |
| 'Total difference between extracted feature weights and expected feature weights is > 0.1.') |
| FROM expected_crf_feature_new c1, train_new_crf_feature c2 |
| WHERE c1.name = c2.name AND c1.prev_label = c2.prev_label_id AND c1.label = c2.label_id;; |
| |
| -- Compare the expected features and the extraction features. It fails |
| -- if the features do not match. |
| SELECT assert(s1.count+s2.count = 0, 'Features extracted do not match expected features.') |
| FROM ( |
| SELECT count(*) FROM( |
| SELECT name, prev_label, label |
| FROM expected_crf_feature_new |
| EXCEPT ALL |
| SELECT name, prev_label_id, label_id |
| FROM train_new_crf_feature |
| ) AS U |
| )s1, |
| ( |
| SELECT count(*) FROM( |
| SELECT name, prev_label_id, label_id |
| FROM train_new_crf_feature |
| EXCEPT ALL |
| SELECT name, prev_label, label |
| FROM expected_crf_feature_new |
| ) AS U |
| )s2; |
| |
| !>) |
| m4_changequote(<!`!>,<!'!>) |