| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| /* |
| * $Id$ |
| * |
| * Revision History |
| * =================== |
| * $Log: text.c,v $ |
| * Revision 1.3 2007/10/25 09:27:37 cktan |
| * fixed. |
| * |
| * Revision 1.2 2007/10/25 05:44:25 cktan |
| * more cleanup |
| * |
| * Revision 1.1 2007/10/24 20:25:23 cktan |
| * new |
| * |
| * Revision 1.2 2007/04/07 08:10:40 cmcdevitt |
| * Fixes for dbgen with large scale factors |
| * |
| * Revision 1.6 2006/07/31 17:23:09 jms |
| * fix to parallelism problem |
| * |
| * Revision 1.5 2006/05/18 23:50:00 jms |
| * commit text generation change with larger buffer |
| * |
| * Revision 1.4 2006/05/16 16:26:51 jms |
| * remove calls to FAKE_V_STR |
| * |
| * Revision 1.3 2006/05/16 15:55:58 jms |
| * first cut to Meikel |
| * |
| * Revision 1.2 2005/01/03 20:08:59 jms |
| * change line terminations |
| * |
| * Revision 1.1.1.1 2004/11/24 23:31:47 jms |
| * re-establish external server |
| * |
| * Revision 1.1.1.1 2003/08/07 17:58:34 jms |
| * recreation after CVS crash |
| * |
| * Revision 1.2 2003/08/07 17:58:34 jms |
| * Convery RNG to 64bit space as preparation for new large scale RNG |
| * |
| * Revision 1.1.1.1 2003/04/03 18:54:21 jms |
| * initial checkin |
| * |
| * |
| */ |
| /* |
| * text.c --- pseaudo text generator for use in DBGEN 2.0 |
| * |
| * Defined Routines: |
| * dbg_text() -- select and translate a sentance form |
| */ |
| |
| #ifdef TEXT_TEST |
| #define DECLARER |
| #endif /* TEST */ |
| |
| #include "config.h" |
| #include <stdlib.h> |
| #if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */ |
| #include <unistd.h> |
| #include <sys/wait.h> |
| #endif /* WIN32 */ |
| #include <stdio.h> /* */ |
| #include <limits.h> |
| #include <math.h> |
| #include <ctype.h> |
| #include <signal.h> |
| #include <string.h> |
| #include <errno.h> |
| #ifdef HP |
| #include <strings.h> |
| #endif |
| #if (defined(WIN32)&&!defined(_POSIX_)) |
| #include <process.h> |
| #pragma warning(disable:4201) |
| #pragma warning(disable:4214) |
| #pragma warning(disable:4514) |
| #define WIN32_LEAN_AND_MEAN |
| #define NOATOM |
| #define NOGDICAPMASKS |
| #define NOMETAFILE |
| #define NOMINMAX |
| #define NOMSG |
| #define NOOPENFILE |
| #define NORASTEROPS |
| #define NOSCROLL |
| #define NOSOUND |
| #define NOSYSMETRICS |
| #define NOTEXTMETRIC |
| #define NOWH |
| #define NOCOMM |
| #define NOKANJI |
| #define NOMCX |
| #include <windows.h> |
| #pragma warning(default:4201) |
| #pragma warning(default:4214) |
| #endif |
| |
| #define TEXT_POOL_SIZE (300 * 1024 * 1024) /* 300MiB */ |
| |
| #include "dss.h" |
| #include "dsstypes.h" |
| |
| /* |
| * txt_vp() -- |
| * generate a verb phrase by |
| * 1) selecting a verb phrase form |
| * 2) parsing it to select parts of speech |
| * 3) selecting appropriate words |
| * 4) adding punctuation as required |
| * |
| * Returns: length of generated phrase |
| * Called By: txt_sentence() |
| * Calls: pick_str() |
| */ |
| static int |
| txt_vp(char *dest, int sd) |
| { |
| char syntax[MAX_GRAMMAR_LEN + 1], |
| *cptr, |
| *parse_target; |
| distribution *src; |
| int i, |
| res = 0; |
| |
| pick_str(&vp, sd, &syntax[0]); |
| parse_target = syntax; |
| while ((cptr = strtok(parse_target, " ")) != NULL) { |
| src = NULL; |
| switch(*cptr) { |
| case 'D': |
| src = &adverbs; |
| break; |
| case 'V': |
| src = &verbs; |
| break; |
| case 'X': |
| src = &auxillaries; |
| break; |
| } /* end of POS switch statement */ |
| i = pick_str(src, sd, dest); |
| i = strlen(DIST_MEMBER(src, i)); |
| dest += i; |
| res += i; |
| if (*(++cptr)) { |
| /* miscelaneous fillagree, like punctuation */ |
| dest += 1; |
| res += 1; |
| *dest = *cptr; |
| } |
| *dest = ' '; |
| dest++; |
| res++; |
| parse_target = NULL; |
| } /* end of while loop */ |
| |
| return(res); |
| } |
| |
| /* |
| * txt_np() -- |
| * generate a noun phrase by |
| * 1) selecting a noun phrase form |
| * 2) parsing it to select parts of speech |
| * 3) selecting appropriate words |
| * 4) adding punctuation as required |
| * |
| * Returns: length of generated phrase |
| * Called By: txt_sentence() |
| * Calls: pick_str(), |
| */ |
| static int |
| txt_np(char *dest, int sd) |
| { |
| char syntax[MAX_GRAMMAR_LEN + 1], |
| *cptr, |
| *parse_target; |
| distribution *src; |
| int i, |
| res = 0; |
| |
| |
| pick_str(&np, sd, &syntax[0]); |
| parse_target = syntax; |
| while ((cptr = strtok(parse_target, " ")) != NULL) { |
| src = NULL; |
| switch(*cptr) { |
| case 'A': |
| src = &articles; |
| break; |
| case 'J': |
| src = &adjectives; |
| break; |
| case 'D': |
| src = &adverbs; |
| break; |
| case 'N': |
| src = &nouns; |
| break; |
| } /* end of POS switch statement */ |
| i = pick_str(src, sd, dest); |
| i = strlen(DIST_MEMBER(src, i)); |
| dest += i; |
| res += i; |
| if (*(++cptr)) { |
| /* miscelaneous fillagree, like punctuation */ |
| *dest = *cptr; |
| dest += 1; |
| res += 1; |
| } |
| *dest = ' '; |
| dest++; |
| res++; |
| parse_target = NULL; |
| } /* end of while loop */ |
| |
| return(res); |
| } |
| |
| /* |
| * txt_sentence() -- |
| * generate a sentence by |
| * 1) selecting a sentence form |
| * 2) parsing it to select parts of speech or phrase types |
| * 3) selecting appropriate words |
| * 4) adding punctuation as required |
| * |
| * Returns: length of generated sentence |
| * Called By: dbg_text() |
| * Calls: pick_str(), txt_np(), txt_vp() |
| */ |
| static int |
| txt_sentence(char *dest, int sd) |
| { |
| char syntax[MAX_GRAMMAR_LEN + 1], |
| *cptr; |
| int i, |
| res = 0, |
| len = 0; |
| |
| |
| pick_str(&grammar, sd, syntax); |
| cptr = syntax; |
| |
| next_token: /* I hate goto's, but can't seem to have parent and child use strtok() */ |
| while (*cptr && *cptr == ' ') |
| cptr++; |
| if (*cptr == '\0') |
| goto done; |
| switch(*cptr) { |
| case 'V': |
| len = txt_vp(dest, sd); |
| break; |
| case 'N': |
| len = txt_np(dest, sd); |
| break; |
| case 'P': |
| i = pick_str(&prepositions, sd, dest); |
| len = strlen(DIST_MEMBER(&prepositions, i)); |
| strcpy((dest + len), " the "); |
| len += 5; |
| len += txt_np(dest + len, sd); |
| break; |
| case 'T': |
| i = pick_str(&terminators, sd, --dest); /*terminators should abut previous word */ |
| len = strlen(DIST_MEMBER(&terminators, i)); |
| break; |
| } /* end of POS switch statement */ |
| dest += len; |
| res += len; |
| cptr++; |
| if (*cptr && *cptr != ' ') { |
| /* miscelaneous fillagree, like punctuation */ |
| dest += 1; |
| res += 1; |
| *dest = *cptr; |
| } |
| goto next_token; |
| done: |
| *dest = '\0'; |
| return(--res); |
| } |
| |
| /* |
| * dbg_text() -- |
| * produce ELIZA-like text of random, bounded length, truncating the last |
| * generated sentence as required |
| */ |
| void |
| dbg_text(char *tgt, int min, int max, int sd) |
| { |
| DSS_HUGE hgLength = 0, |
| hgOffset, |
| wordlen = 0, |
| s_len, |
| needed; |
| char sentence[MAX_SENT_LEN + 1], |
| *cp; |
| static char szTextPool[TEXT_POOL_SIZE + 1]; |
| static int bInit = 0; |
| int nLifeNoise = 0; |
| |
| if (!bInit) { |
| cp = &szTextPool[0]; |
| if (o_verbose) |
| fprintf(stderr, "\nPreloading text ... "); |
| |
| while (wordlen < TEXT_POOL_SIZE) { |
| if (o_verbose && (wordlen > nLifeNoise)) { |
| nLifeNoise += 200000; |
| fprintf(stderr, "%3.0f%%\b\b\b\b", (100.0 * wordlen)/TEXT_POOL_SIZE); |
| } |
| |
| s_len = txt_sentence(sentence, 5); |
| if ( s_len < 0) |
| INTERNAL_ERROR("Bad sentence formation"); |
| needed = TEXT_POOL_SIZE - wordlen; |
| if (needed >= (s_len + 1)) { |
| /* need the entire sentence */ |
| strcpy(cp, sentence); |
| cp += s_len; |
| wordlen += s_len + 1; |
| *(cp++) = ' '; |
| } |
| else { |
| /* chop the new sentence off to match the length target */ |
| sentence[needed] = '\0'; |
| strcpy(cp, sentence); |
| wordlen += needed; |
| cp += needed; |
| } |
| } |
| *cp = '\0'; |
| bInit = 1; |
| if (o_verbose) |
| fprintf(stderr, "\n"); |
| } |
| |
| RANDOM(hgOffset, 0, TEXT_POOL_SIZE - max, sd); |
| RANDOM(hgLength, min, max, sd); |
| strncpy(&tgt[0], &szTextPool[hgOffset], (int)hgLength); |
| tgt[hgLength] = '\0'; |
| |
| return; |
| } |
| |
| #ifdef TEXT_TEST |
| tdef tdefs[1] = { NULL }; |
| distribution nouns, |
| verbs, |
| adjectives, |
| adverbs, |
| auxillaries, |
| terminators, |
| articles, |
| prepositions, |
| grammar, |
| np, |
| vp; |
| |
| main() |
| { |
| char prattle[401]; |
| |
| verbose = 1; |
| |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "nouns", &nouns); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "verbs", &verbs); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "adjectives", &adjectives); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "adverbs", &adverbs); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "terminators", &terminators); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "articles", &articles); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "prepositions", &prepositions); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "grammar", &grammar); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "np", &np); |
| read_dist (env_config (DIST_TAG, DIST_DFLT), "vp", &vp); |
| |
| while (1) { |
| dbg_text(&prattle[0], 300, 400, 0); |
| printf("<%s>\n", prattle); |
| } |
| |
| return(0); |
| } |
| #endif /* TEST */ |