blob: b6d4e7c4413a0ba7888f20526359c17bd456e4be [file] [log] [blame]
---------------------------------------------------------------------------
-- Rules:
-- ------
-- 1) Any DB objects should be created w/o schema prefix,
-- since this file is executed in a separate schema context.
-- 2) There should be no DROP statements in this script, since
-- all objects created in the default schema will be cleaned-up outside.
---------------------------------------------------------------------------
m4_include(`SQLCommon.m4')
m4_changequote(<!,!>)
m4_ifdef(<!__HAS_ORDERED_AGGREGATES__!>,<!
-- Regex table
CREATE TABLE train_new_segmenttbl(start_pos integer,doc_id integer,seg_text text,label integer,max_pos integer);
INSERT INTO train_new_segmenttbl VALUES
(0, 1, 'freight', 12, 27),
(1, 1, 'rates', 13, 27),
(2, 1, ',', 43, 27),
(3, 1, 'declining', 29, 27),
(4, 1, 'for', 6, 27),
(5, 1, 'most', 22, 27),
(6, 1, 'of', 6, 27),
(7, 1, 'the', 3, 27),
(8, 1, 'decade', 12, 27),
(9, 1, 'because', 6, 27),
(10, 1, 'of', 6, 27),
(11, 1, 'competition', 12, 27),
(12, 1, 'spurred', 30, 27),
(13, 1, 'by', 6, 27),
(14, 1, 'deregulation', 12, 27),
(15, 1, ',', 43, 27),
(16, 1, 'are', 31, 27),
(17, 1, 'bottoming', 29, 27),
(18, 1, 'out', 6, 27),
(19, 1, ',', 43, 27),
(20, 1, 'turning', 29, 27),
(21, 1, 'upward', 20, 27),
(22, 1, 'and', 1, 27),
(23, 1, 'threatening', 29, 27),
(24, 1, 'to', 25, 27),
(25, 1, 'fuel', 27, 27),
(26, 1, 'inflation', 12, 27),
(27, 1, '.', 44, 27),
(0, 2, 'trucking', 14, 29),
(1, 2, ',', 43, 29),
(2, 2, 'shipping', 29, 29),
(3, 2, 'and', 1, 29),
(4, 2, 'air-freight', 12, 29),
(5, 2, 'companies', 13, 29),
(6, 2, 'have', 31, 29),
(7, 2, 'announced', 30, 29),
(8, 2, 'rate', 12, 29),
(9, 2, 'increases', 13, 29),
(10, 2, ',', 43, 29),
(11, 2, 'scheduled', 30, 29),
(12, 2, 'for', 6, 29),
(13, 2, 'this', 3, 29),
(14, 2, 'fall', 12, 29),
(15, 2, 'or', 1, 29),
(16, 2, 'early', 7, 29),
(17, 2, 'next', 7, 29),
(18, 2, 'year', 12, 29),
(19, 2, ',', 43, 29),
(20, 2, 'reflecting', 29, 29),
(21, 2, 'higher', 8, 29),
(22, 2, 'costs', 13, 29),
(23, 2, 'and', 1, 29),
(24, 2, 'tightened', 28, 29),
(25, 2, 'demand', 12, 29),
(26, 2, 'for', 6, 29),
(27, 2, 'freight', 12, 29),
(28, 2, 'transport', 12, 29),
(29, 2, '.', 44, 29),
(0, 3, 'major', 7, 23),
(1, 3, 'shippers', 13, 23),
(2, 3, 'say', 31, 23),
(3, 3, 'they', 18, 23),
(4, 3, 'expect', 31, 23),
(5, 3, 'freight', 12, 23),
(6, 3, 'rates', 13, 23),
(7, 3, 'to', 25, 23),
(8, 3, 'rise', 27, 23),
(9, 3, 'at', 6, 23),
(10, 3, 'least', 9, 23),
(11, 3, 'as', 20, 23),
(12, 3, 'fast', 20, 23),
(13, 3, 'as', 6, 23),
(14, 3, 'inflation', 12, 23),
(15, 3, 'and', 1, 23),
(16, 3, 'maybe', 20, 23),
(17, 3, 'faster', 21, 23),
(18, 3, 'in', 6, 23),
(19, 3, 'the', 3, 23),
(20, 3, 'next', 7, 23),
(21, 3, 'few', 7, 23),
(22, 3, 'years', 13, 23),
(23, 3, '.', 44, 23),
(0, 4, 'that', 3, 30),
(1, 4, '''s', 32, 30),
(2, 4, 'a', 3, 30),
(3, 4, 'big', 7, 30),
(4, 4, 'change', 12, 30),
(5, 4, 'from', 6, 30),
(6, 4, 'recent', 7, 30),
(7, 4, 'years', 13, 30),
(8, 4, 'when', 36, 30),
(9, 4, 'freight', 12, 30),
(10, 4, 'haulage', 12, 30),
(11, 4, 'was', 28, 30),
(12, 4, 'a', 3, 30),
(13, 4, 'bright', 7, 30),
(14, 4, 'spot', 12, 30),
(15, 4, 'for', 6, 30),
(16, 4, 'u.s.', 14, 30),
(17, 4, 'productivity', 12, 30),
(18, 4, ',', 43, 30),
(19, 4, 'helping', 29, 30),
(20, 4, 'to', 25, 30),
(21, 4, 'restrain', 27, 30),
(22, 4, 'inflation', 12, 30),
(23, 4, 'and', 1, 30),
(24, 4, 'make', 27, 30),
(25, 4, 'u.s.', 14, 30),
(26, 4, 'industry', 12, 30),
(27, 4, 'more', 21, 30),
(28, 4, 'competitive', 7, 30),
(29, 4, 'abroad', 20, 30),
(30, 4, '.', 44, 30),
(0, 5, '``', 40, 49),
(1, 5, 'demand', 12, 49),
(2, 5, 'has', 32, 49),
(3, 5, 'caught', 30, 49),
(4, 5, 'up', 6, 49),
(5, 5, 'with', 6, 49),
(6, 5, 'the', 3, 49),
(7, 5, 'supply', 12, 49),
(8, 5, 'of', 6, 49),
(9, 5, 'certain', 7, 49),
(10, 5, 'types', 13, 49),
(11, 5, 'of', 6, 49),
(12, 5, 'freight', 12, 49),
(13, 5, 'transportation', 12, 49),
(14, 5, ',', 43, 49),
(15, 5, 'and', 1, 49),
(16, 5, 'rates', 13, 49),
(17, 5, 'are', 31, 49),
(18, 5, 'starting', 29, 49),
(19, 5, 'to', 25, 49),
(20, 5, 'move', 27, 49),
(21, 5, 'up', 6, 49),
(22, 5, '''''', 39, 49),
(23, 5, 'at', 6, 49),
(24, 5, 'a', 3, 49),
(25, 5, 'rate', 12, 49),
(26, 5, '``', 40, 49),
(27, 5, 'close', 20, 49),
(28, 5, 'to', 25, 49),
(29, 5, 'or', 1, 49),
(30, 5, 'slightly', 20, 49),
(31, 5, 'more', 8, 49),
(32, 5, 'than', 6, 49),
(33, 5, 'the', 3, 49),
(34, 5, 'inflation', 12, 49),
(35, 5, 'rate', 12, 49),
(36, 5, ',', 43, 49),
(37, 5, '''''', 39, 49),
(38, 5, 'said', 28, 49),
(39, 5, 'clifford', 14, 49),
(40, 5, 'sayre', 14, 49),
(41, 5, ',', 43, 49),
(42, 5, 'director', 12, 49),
(43, 5, 'of', 6, 49),
(44, 5, 'logistics', 13, 49),
(45, 5, 'at', 6, 49),
(46, 5, 'du', 14, 49),
(47, 5, 'pont', 14, 49),
(48, 5, 'co', 14, 49),
(49, 5, '.', 44, 49),
(0, 6, 'shippers', 13, 24),
(1, 6, 'surveyed', 30, 24),
(2, 6, 'recently', 20, 24),
(3, 6, 'by', 6, 24),
(4, 6, 'ohio', 14, 24),
(5, 6, 'state', 14, 24),
(6, 6, 'university', 14, 24),
(7, 6, 'said', 28, 24),
(8, 6, 'they', 18, 24),
(9, 6, 'expect', 31, 24),
(10, 6, 'their', 19, 24),
(11, 6, 'freight-transport', 7, 24),
(12, 6, ',', 43, 24),
(13, 6, 'storage', 12, 24),
(14, 6, 'and', 1, 24),
(15, 6, 'distribution', 12, 24),
(16, 6, 'costs', 13, 24),
(17, 6, 'to', 25, 24),
(18, 6, 'rise', 27, 24),
(19, 6, 'about', 6, 24),
(20, 6, '4', 2, 24),
(21, 6, '%', 12, 24),
(22, 6, 'this', 3, 24),
(23, 6, 'year', 12, 24),
(24, 6, '.', 44, 24),
(0, 7, 'only', 20, 31),
(1, 7, '10', 2, 31),
(2, 7, '%', 12, 31),
(3, 7, 'of', 6, 31),
(4, 7, 'the', 3, 31),
(5, 7, '250', 2, 31),
(6, 7, 'shippers', 13, 31),
(7, 7, 'polled', 30, 31),
(8, 7, 'expected', 30, 31),
(9, 7, 'their', 19, 31),
(10, 7, 'freight-transport', 7, 31),
(11, 7, 'costs', 13, 31),
(12, 7, 'to', 25, 31),
(13, 7, 'decrease', 27, 31),
(14, 7, ',', 43, 31),
(15, 7, 'compared', 30, 31),
(16, 7, 'with', 6, 31),
(17, 7, '30', 2, 31),
(18, 7, '%', 12, 31),
(19, 7, 'who', 34, 31),
(20, 7, 'had', 28, 31),
(21, 7, 'looked', 30, 31),
(22, 7, 'to', 25, 31),
(23, 7, 'freight', 27, 31),
(24, 7, 'transport', 12, 31),
(25, 7, 'to', 25, 31),
(26, 7, 'reduce', 27, 31),
(27, 7, 'costs', 13, 31),
(28, 7, 'in', 6, 31),
(29, 7, 'past', 7, 31),
(30, 7, 'years', 13, 31),
(31, 7, '.', 44, 31);
CREATE TABLE train_new_regex(pattern text,name text);
INSERT INTO train_new_regex VALUES
('^[A-Z][a-z]+$','InitCapital'), ('^[A-Z]+$','isAllCapital'),
('^.*[0-9]+.*$','containsDigit'),('^.+[.]$','endsWithDot'),
('^.+[,]$','endsWithComma'), ('^.+er$','endsWithER'),
('^.+est$','endsWithEst'), ('^.+ed$','endsWithED'),
('^.+s$','endsWithS'), ('^.+ing$','endsWithIng'),
('^.+ly$','endsWithly'), ('^.+-.+$','isDashSeparatedWords'),
('^.*@.*$','isEmailId');
analyze train_new_regex;
CREATE TABLE crf_label_new (id integer,label character varying);
INSERT INTO crf_label_new VALUES
(0,'CC'), (1,'CD'), (2,'DT'), (3,'EX'), (4,'FW'), (5,'IN'), (6,'JJ'), (7,'JJR'), (8,'JJS'),
(9,'LS'), (10,'MD'), (11,'NN'), (12,'NNS'), (13,'NNP'),(14,'NNPS'),(15,'PDT'),(16,'POS'),(17,'PRP'),
(18,'PRP$'),(19,'RB'), (20,'RBR'), (21,'RBS'), (22,'RP'), (23,'SYM'), (24,'TO'), (25,'UH'), (26,'VB'),
(27,'VBD'), (28,'VBG'),(29,'VBN'), (30,'VBP'), (31,'VBZ'),(32,'WDT'), (33,'WP'), (34,'WP$'),(35,'WRB'),
(36,'$'), (37,'#'), (38,'''''');
INSERT INTO crf_label_new VALUES
(39,<!'``'!>);
m4_changequote(,)
INSERT INTO crf_label_new VALUES
(40,'('), (41,')'), (42,','), (43,'.'), (44,':');
analyze crf_label_new;
SELECT crf_train_fgen('train_new_segmenttbl', 'train_new_regex', 'crf_label_new', 'train_new_dictionary', 'train_new_featuretbl','train_new_featureset');
SELECT lincrf_train('train_new_featuretbl', 'train_new_featureset', 'crf_label_new', 'train_new_stats', 'train_new_crf_feature', 30);
-- Expected feature table
-- The result is produced from Dr. Sunita's CRF java package with the same input
CREATE TABLE expected_crf_feature_new(id integer,name text,prev_label integer,label integer,weight float);
INSERT INTO expected_crf_feature_new VALUES
(0, 'S.', -1, 12, 0.5516753522178934),
(1, 'W_freight', -1, 12, 5.959241076198326),
(2, 'E.', 12, 13, 2.0789747316372034),
(3, 'W_rates', -1, 13, 2.0837653985907174),
(4, 'R_endsWithS', -1, 13, 5.306222451221396),
(5, 'E.', 13, 43, 1.8107793215017256),
(6, 'W_,', -1, 43, 7.286509411020296),
(7, 'E.', 43, 29, 2.222715331802022),
(8, 'U', -1, 29, 0.9492501230997201),
(9, 'W_declining', -1, 29, 0.9644339685939746),
(10, 'R_endsWithIng', -1, 29, 5.3575087838268995),
(11, 'E.', 29, 6, 2.459070565360766),
(12, 'W_for', -1, 6, 4.538367636569374),
(13, 'E.', 6, 22, 2.633416169216998),
(14, 'U', -1, 22, 1.2724051325350452),
(15, 'W_most', -1, 22, 4.742804679577188),
(16, 'E.', 22, 6, 2.3563567868687803),
(17, 'W_of', -1, 6, 5.886369615822257),
(18, 'E.', 6, 3, 3.4072332563788974),
(19, 'W_the', -1, 3, 5.003139924048258),
(20, 'E.', 3, 12, 3.4951824947429704),
(21, 'U', -1, 12, 1.3530610141204105),
(22, 'W_decade', -1, 12, 2.5363441670581475),
(23, 'E.', 12, 6, 3.168556891460435),
(24, 'U', -1, 6, 0.8846547539288385),
(25, 'W_because', -1, 6, 4.371973715457267),
(26, 'E.', 6, 6, 2.3178076749274994),
(27, 'E.', 6, 12, 3.008379674551115),
(28, 'W_competition', -1, 12, 3.7657358279072044),
(29, 'E.', 12, 30, 1.4638353616301887),
(30, 'U', -1, 30, 1.661969150148885),
(31, 'W_spurred', -1, 30, 1.0199935936286502),
(32, 'R_endsWithED', -1, 30, 5.781210636435915),
(33, 'E.', 30, 6, 2.610516071895145),
(34, 'W_by', -1, 6, 4.016756180248158),
(35, 'W_deregulation', -1, 12, 3.107044113256852),
(36, 'E.', 12, 43, 3.0918965630757156),
(37, 'E.', 43, 31, 1.8076032969941829),
(38, 'W_are', -1, 31, 3.4834944891071182),
(39, 'E.', 31, 29, 3.0246636670581872),
(40, 'W_bottoming', -1, 29, 0.9246692126583386),
(41, 'W_out', -1, 6, 4.166232235849933),
(42, 'E.', 6, 43, 1.4287166848500423),
(43, 'W_turning', -1, 29, 0.9453908838237025),
(44, 'E.', 29, 20, 1.9755638903302428),
(45, 'U', -1, 20, 2.276171985449911),
(46, 'W_upward', -1, 20, 3.0573963140454667),
(47, 'E.', 20, 1, 1.7604125260111),
(48, 'W_and', -1, 1, 6.74422233788292),
(49, 'E.', 1, 29, 1.3137638161586382),
(50, 'W_threatening', -1, 29, 1.4170133688772073),
(51, 'E.', 29, 25, 1.6388299340148602),
(52, 'W_to', -1, 25, 5.749953289380646),
(53, 'E.', 25, 27, 6.296991117616081),
(54, 'U', -1, 27, 1.5167524281785714),
(55, 'W_fuel', -1, 27, 0.8669685656391124),
(56, 'E.', 27, 12, 2.7884267413237405),
(57, 'W_inflation', -1, 12, 4.497973086341019),
(58, 'E.', 12, 44, 2.1195755732689774),
(59, 'End.', -1, 44, 4.0877966130773755),
(60, 'W_.', -1, 44, 4.0877966130773755),
(61, 'S.', -1, 14, 1.3657688665015333),
(62, 'U', -1, 14, 2.0185522749436866),
(63, 'W_trucking', -1, 14, 3.1701428979681845),
(64, 'R_endsWithIng', -1, 14, 2.1776484880257367),
(65, 'E.', 14, 43, 1.497373571911848),
(66, 'W_shipping', -1, 29, 1.4634895303741247),
(67, 'E.', 29, 1, 1.3751429994639364),
(68, 'E.', 1, 12, 2.448895430660733),
(69, 'W_air-freight', -1, 12, 3.276197718236269),
(70, 'R_isDashSeparatedWords', -1, 12, 1.390362987986471),
(71, 'U', -1, 13, -0.039795570326036186),
(72, 'W_companies', -1, 13, 1.410903273102038),
(73, 'E.', 13, 31, 2.973781128828628),
(74, 'U', -1, 31, 1.3700945582140844),
(75, 'W_have', -1, 31, 3.0882453767746747),
(76, 'E.', 31, 30, 1.9098632637338704),
(77, 'W_announced', -1, 30, 1.4380819186872527),
(78, 'E.', 30, 12, 0.9060674369789037),
(79, 'W_rate', -1, 12, 5.064883717975049),
(80, 'W_increases', -1, 13, 2.319719757676896),
(81, 'E.', 43, 30, 1.299378678795197),
(82, 'W_scheduled', -1, 30, 0.9387965126997426),
(83, 'W_this', -1, 3, 4.370856586315017),
(84, 'R_endsWithS', -1, 3, 2.076343517584421),
(85, 'W_fall', -1, 12, 2.1032438732239624),
(86, 'E.', 12, 1, 3.030829781200263),
(87, 'W_or', -1, 1, 6.18424407183645),
(88, 'E.', 1, 7, 0.868722661655972),
(89, 'U', -1, 7, 2.23345149856934),
(90, 'W_early', -1, 7, 3.384977523038234),
(91, 'R_endsWithly', -1, 7, 1.5733441079056303),
(92, 'E.', 7, 7, 2.7161792323578373),
(93, 'W_next', -1, 7, 4.2480555895859),
(94, 'E.', 7, 12, 2.9916009076630092),
(95, 'W_year', -1, 12, 3.5174064665274014),
(96, 'W_reflecting', -1, 29, 0.6113402176204787),
(97, 'E.', 29, 8, 2.819730039901505),
(98, 'U', -1, 8, -0.18779864470197277),
(99, 'W_higher', -1, 8, 3.1797755805421333),
(100, 'R_endsWithER', -1, 8, 1.5728998100190603),
(101, 'E.', 8, 13, 2.318262709087868),
(102, 'W_costs', -1, 13, 2.381538004167296),
(103, 'E.', 13, 1, 0.9855356754199541),
(104, 'E.', 1, 28, 1.8595054995497817),
(105, 'U', -1, 28, 1.5446722882388022),
(106, 'W_tightened', -1, 28, 3.8726486164808493),
(107, 'R_endsWithED', -1, 28, 2.5351002298167784),
(108, 'E.', 28, 12, 2.274821834309125),
(109, 'W_demand', -1, 12, 4.064293186476195),
(110, 'E.', 12, 12, 2.2005287976885133),
(111, 'W_transport', -1, 12, 4.716356817414893),
(112, 'S.', -1, 7, 0.6581636527243294),
(113, 'W_major', -1, 7, 2.980552933449864),
(114, 'E.', 7, 13, 3.065321079522356),
(115, 'W_shippers', -1, 13, 2.542917930136408),
(116, 'W_say', -1, 31, 2.2479773095428093),
(117, 'E.', 31, 18, 2.874166869399026),
(118, 'W_they', -1, 18, 3.143575891184453),
(119, 'E.', 18, 31, 4.2749079217250445),
(120, 'W_expect', -1, 31, 3.595622140488586),
(121, 'E.', 31, 12, 1.6611422263789781),
(122, 'E.', 13, 25, 1.1213816226246236),
(123, 'W_rise', -1, 27, 1.5778724980775618),
(124, 'E.', 27, 6, 2.568809140561043),
(125, 'W_at', -1, 6, 3.9554245620023827),
(126, 'E.', 6, 9, 2.6559245091013293),
(127, 'U', -1, 9, 0.9950701292906889),
(128, 'W_least', -1, 9, 4.103392620219655),
(129, 'E.', 9, 20, 3.709110073868926),
(130, 'W_as', -1, 20, 3.1580690701037972),
(131, 'R_endsWithS', -1, 20, 1.0980845299769795),
(132, 'E.', 20, 20, 2.1155578516782),
(133, 'W_fast', -1, 20, 3.951081121254258),
(134, 'E.', 20, 6, 2.364546163338331),
(135, 'W_as', -1, 6, 4.459742427306834),
(136, 'R_endsWithS', -1, 6, 0.27528290655402593),
(137, 'E.', 1, 20, 2.002143530981041),
(138, 'W_maybe', -1, 20, 3.5081392526668402),
(139, 'E.', 20, 21, 2.2175425248487377),
(140, 'U', -1, 21, 0.5355418339873245),
(141, 'W_faster', -1, 21, 3.71210878977271),
(142, 'R_endsWithER', -1, 21, 3.472793820076381),
(143, 'E.', 21, 6, 1.3108157211407567),
(144, 'W_in', -1, 6, 3.840521231387154),
(145, 'E.', 3, 7, 2.8789088935047493),
(146, 'W_few', -1, 7, 2.4522548748428017),
(147, 'W_years', -1, 13, 1.7038384234850026),
(148, 'E.', 13, 44, 1.3730441332544099),
(149, 'S.', -1, 3, 1.926568025628669),
(150, 'U', -1, 3, 0.18622978543338722),
(151, 'W_that', -1, 3, 3.7701267071548883),
(152, 'E.', 3, 32, 3.0418354933422265),
(153, 'U', -1, 32, 1.1440561095338802),
(154, 'W_''s', -1, 32, 2.7291745322476184),
(155, 'R_endsWithS', -1, 32, 2.6658498367189947),
(156, 'E.', 32, 3, 1.956185562422949),
(157, 'W_a', -1, 3, 4.567599380671607),
(158, 'W_big', -1, 7, 2.9735195102454743),
(159, 'W_change', -1, 12, 3.3193727375317574),
(160, 'W_from', -1, 6, 3.8878088109703994),
(161, 'E.', 6, 7, 2.454738423661261),
(162, 'W_recent', -1, 7, 2.813702206270077),
(163, 'E.', 13, 36, 2.327670018390557),
(164, 'U', -1, 36, 1.4528234424876696),
(165, 'W_when', -1, 36, 4.011265086980163),
(166, 'E.', 36, 12, 2.857525452775004),
(167, 'W_haulage', -1, 12, 4.32506396474813),
(168, 'E.', 12, 28, 1.9580138925595794),
(169, 'W_was', -1, 28, 4.2660057629684),
(170, 'R_endsWithS', -1, 28, 1.7934223574225978),
(171, 'E.', 28, 3, 2.2891724016343495),
(172, 'W_bright', -1, 7, 2.992681149090579),
(173, 'W_spot', -1, 12, 3.220613491106308),
(174, 'E.', 6, 14, 2.6412138657174293),
(175, 'W_u.s.', -1, 14, 3.2079965049318577),
(176, 'R_endsWithDot', -1, 14, 3.2079965049318577),
(177, 'E.', 14, 12, 2.356503203677101),
(178, 'W_productivity', -1, 12, 3.822919829706685),
(179, 'W_helping', -1, 29, 0.694693493722968),
(180, 'W_restrain', -1, 27, 0.8600913604077386),
(181, 'E.', 1, 27, 2.411953958040776),
(182, 'W_make', -1, 27, 3.898590273777646),
(183, 'E.', 27, 14, 2.1195180614626326),
(184, 'W_industry', -1, 12, 3.6043926013837373),
(185, 'E.', 12, 21, 2.609806711358176),
(186, 'W_more', -1, 21, 3.552659383833094),
(187, 'E.', 21, 7, 3.1775432403400985),
(188, 'W_competitive', -1, 7, 2.976622053158026),
(189, 'E.', 7, 20, 1.771180632746488),
(190, 'W_abroad', -1, 20, 3.795196762237916),
(191, 'E.', 20, 44, 2.295613975954753),
(192, 'S.', -1, 40, 2.186455757483502),
(193, 'W_``', -1, 40, 5.004869688499841),
(194, 'E.', 40, 12, 2.5755893774727556),
(195, 'E.', 12, 32, 1.7721648420817446),
(196, 'W_has', -1, 32, 3.93625538731919),
(197, 'E.', 32, 30, 2.6386288731321472),
(198, 'W_caught', -1, 30, 3.129300187453606),
(199, 'W_up', -1, 6, 4.151560395841534),
(200, 'W_with', -1, 6, 3.675511355861305),
(201, 'W_supply', -1, 12, 2.715998611959373),
(202, 'R_endsWithly', -1, 12, 0.7673422652186473),
(203, 'W_certain', -1, 7, 2.851915076211084),
(204, 'W_types', -1, 13, 1.6258788404774098),
(205, 'E.', 13, 6, 2.6905432557328264),
(206, 'W_transportation', -1, 12, 3.191264085926264),
(207, 'E.', 43, 1, 1.2711473691909434),
(208, 'E.', 1, 13, 1.182207619933404),
(209, 'W_starting', -1, 29, 0.5515350033996826),
(210, 'W_move', -1, 27, 0.8120644141223547),
(211, 'E.', 6, 39, 2.7182674903817587),
(212, 'W_''''', -1, 39, 5.170862953158661),
(213, 'E.', 39, 6, 2.317192606205641),
(214, 'E.', 12, 40, 1.7538803171442363),
(215, 'E.', 40, 20, 3.0627961041121843),
(216, 'W_close', -1, 20, 2.495992180876776),
(217, 'E.', 20, 25, 1.5800294126144967),
(218, 'E.', 25, 1, 2.241039628902612),
(219, 'W_slightly', -1, 20, 1.3492186869598903),
(220, 'R_endsWithly', -1, 20, 2.4413654689445927),
(221, 'E.', 20, 8, 2.9386643499826097),
(222, 'W_more', -1, 8, 4.452742873438579),
(223, 'E.', 8, 6, 2.9936854236944725),
(224, 'W_than', -1, 6, 2.1644820524819646),
(225, 'E.', 43, 39, 1.4996394861187865),
(226, 'E.', 39, 28, 2.7463692256732095),
(227, 'W_said', -1, 28, 4.677340288404336),
(228, 'E.', 28, 14, 1.9336746850852031),
(229, 'W_clifford', -1, 14, 2.0747538313323317),
(230, 'E.', 14, 14, 3.9294546550138674),
(231, 'W_sayre', -1, 14, 3.3385592403455577),
(232, 'E.', 43, 12, 2.2914590080022816),
(233, 'W_director', -1, 12, 3.4531076078614316),
(234, 'E.', 6, 13, 1.054540995246233),
(235, 'W_logistics', -1, 13, 2.9422159343971326),
(236, 'W_du', -1, 14, 2.31602144186341),
(237, 'W_pont', -1, 14, 1.4463820426038703),
(238, 'W_co', -1, 14, 2.5456207401359405),
(239, 'E.', 14, 44, 1.7816633932596668),
(240, 'S.', -1, 13, 1.0387123582845164),
(241, 'E.', 13, 30, 1.1881628310493961),
(242, 'W_surveyed', -1, 30, 0.892458430315305),
(243, 'E.', 30, 20, 1.532676298227432),
(244, 'W_recently', -1, 20, 2.2798691676335316),
(245, 'W_ohio', -1, 14, 2.344559598905777),
(246, 'W_state', -1, 14, 1.3829936762017245),
(247, 'W_university', -1, 14, 2.237132870920883),
(248, 'E.', 14, 28, 1.9577360535758594),
(249, 'E.', 28, 18, 2.2087856989728887),
(250, 'E.', 31, 19, 2.079584829527366),
(251, 'W_their', -1, 19, 4.067554496475091),
(252, 'E.', 19, 7, 3.6290543548238685),
(253, 'W_freight-transport', -1, 7, 3.2568844715837297),
(254, 'R_isDashSeparatedWords', -1, 7, 0.7948432491344457),
(255, 'E.', 7, 43, 1.2615186281064843),
(256, 'W_storage', -1, 12, 2.632730740932247),
(257, 'W_distribution', -1, 12, 3.9157525580557246),
(258, 'W_about', -1, 6, 3.065121356369797),
(259, 'E.', 6, 2, 2.5380914123626104),
(260, 'W_DIGIT', -1, 2, 3.716164825033898),
(261, 'R_isAllCapital', -1, 2, 3.716164825033898),
(262, 'E.', 2, 12, 2.691091194255502),
(263, 'W_%', -1, 12, 4.045579535978065),
(264, 'E.', 12, 3, 1.5517451432551261),
(265, 'S.', -1, 20, 1.0762484384385997),
(266, 'W_only', -1, 20, 2.268536436241348),
(267, 'E.', 20, 2, 1.4108190607695246),
(268, 'E.', 3, 2, 1.6621246460306802),
(269, 'E.', 2, 13, 1.41212953803894),
(270, 'W_polled', -1, 30, 1.647493520976822),
(271, 'E.', 30, 30, 0.7767009450630513),
(272, 'W_expected', -1, 30, 0.665862099618371),
(273, 'E.', 30, 19, 1.8495571863795943),
(274, 'W_decrease', -1, 27, 1.2921324719942597),
(275, 'E.', 27, 43, 1.0963707625581782),
(276, 'W_compared', -1, 30, 0.9464663786685238),
(277, 'E.', 12, 34, 1.9870890250594957),
(278, 'U', -1, 34, 0.9707407033846202),
(279, 'W_who', -1, 34, 4.159314366798417),
(280, 'E.', 34, 28, 3.0290938616080796),
(281, 'W_had', -1, 28, 3.210438877671987),
(282, 'E.', 28, 30, 1.498126968305331),
(283, 'W_looked', -1, 30, 0.8477036764861315),
(284, 'E.', 30, 25, 1.058282178536846),
(285, 'W_freight', -1, 27, 2.6547922325266726),
(286, 'E.', 12, 25, 0.7587012089044496),
(287, 'W_reduce', -1, 27, 1.9870512796567945),
(288, 'E.', 27, 13, 0.6748848167259296),
(289, 'W_past', -1, 7, 2.852378831268221);
SELECT assert(
SUM(abs(c1.weight-c2.weight)) < 0.1,
'Total difference between extracted feature weights and expected feature weights is > 0.1.')
FROM expected_crf_feature_new c1, train_new_crf_feature c2
WHERE c1.name = c2.name AND c1.prev_label = c2.prev_label_id AND c1.label = c2.label_id;;
-- Compare the expected features and the extraction features. It fails
-- if the features do not match.
SELECT assert(s1.count+s2.count = 0, 'Features extracted do not match expected features.')
FROM (
SELECT count(*) FROM(
SELECT name, prev_label, label
FROM expected_crf_feature_new
EXCEPT ALL
SELECT name, prev_label_id, label_id
FROM train_new_crf_feature
) AS U
)s1,
(
SELECT count(*) FROM(
SELECT name, prev_label_id, label_id
FROM train_new_crf_feature
EXCEPT ALL
SELECT name, prev_label, label
FROM expected_crf_feature_new
) AS U
)s2;
!>)
m4_changequote(<!`!>,<!'!>)