experiments/flat/src/BuildGrammar.c - incubator-retired-corinthia - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "Common.h"
 #include "BuildGrammar.h"
 #include "Util.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>

 #define MAX_CHILDREN 64

 typedef struct Builder Builder;

 struct Builder {
     Grammar *gram;
     const char *input;
     int len;
 };

 static Expression *buildExpression(Builder *builder, Term *term);

 static char *unescapeLiteral(const char *escaped)
 {
     size_t escapedLen = strlen(escaped);
     size_t escapedPos = 0;
     size_t unescapedLen = 0;
     char *unescaped = (char *)malloc(escapedLen+1);

     while (escapedPos < escapedLen) {
         char c = escaped[escapedPos++];
         if ((c == '\\') && (escapedPos < escapedLen)) {
             c = escaped[escapedPos++];
             switch (c) {
                 case 'n':
                     unescaped[unescapedLen++] = '\n';
                     break;
                 case 'r':
                     unescaped[unescapedLen++] = '\r';
                     break;
                 case 't':
                     unescaped[unescapedLen++] = '\t';
                     break;
                 default:
                     unescaped[unescapedLen++] = c;
                     break;
             }
         }
         else {
             unescaped[unescapedLen++] = c;
         }
     }

     unescaped[unescapedLen] = '\0';
     return unescaped;
 }

 // Sanity checking functions
 //
 // These are just used to verify that a Term we're about to use is of the expected type.
 // Since the format of the parse tree is likely to change in these early stages of development,
 // this will catch cases where the code in this file has not been adapted to those changes
 //
 // The large number of assert statements in this file, most of which use these functions, will
 // likely only be necessary in the short term. Ideally we should be able to get to a point where
 // one can safely write code that consumes a parse tree without having so many sanity checks.

 static int isTerm(Term *term, ExprKind kind, int count)
 {
     return ((TermKind(term) == kind) && (TermCount(term) == count));
 }

 static int isSequence(Term *term, int count)
 {
     return isTerm(term,SequenceExpr,count);
 }

 static int isIdent(Term *term, const char *name)
 {
     return (isTerm(term,IdentExpr,1) && !strcmp(name,ExprIdentValue(TermType(term))));
 }

 // Extract a substring of the input based on a specific term, or one or more children of a term.
 //
 // Each Term object has a start and end field, which specify which portion of the input string
 // the term covers (that is, the portion of the input consumed durin parsing of that term). In
 // some cases we want all the text matched by a term, and in others we only want the text matched
 // by specific children. An example of the latter is a "pseudo-terminal" like Identifier where
 // we want all characters except the trailing whitespace.

 static char *termString(Builder *builder, Term *term)
 {
     assert(term->start >= 0);
     assert(term->end <= builder->len);
     int len = term->end - term->start;
     char *str = malloc(len+1);
     memcpy(str,&builder->input[term->start],len);
     str[len] = '\0';
     return str;
 }

 static char *identifierString(Builder *builder, Term *term)
 {
     assert(isIdent(term,"Identifier"));
     Term *body = TermChildAt(term,0);
     assert(isSequence(body,2));
     Term *content = TermChildAt(body,0);
     Term *spacing = TermChildAt(body,1);
     assert(TermKind(content) == StringExpr);
     assert(isIdent(spacing,"Spacing"));
     return termString(builder,content);
 }

 // For Terms of type ChoiceExpr, determine which of the choices the corresponding term is

 static int choiceIndex(Term *body)
 {
     assert(TermKind(body) == ChoiceExpr);
     assert(TermCount(body) == 1);
     Term *choice = TermChildAt(body,0);
     int match = -1;
     int choiceCount = ExprChoiceCount(TermType(body));
     for (int i = 0; i < choiceCount; i++) {
         if (TermType(choice) == ExpressionChildAt(TermType(body),i))
             match = i;
     }
     return match;
 }

 // Expression building functions (build*)
 //
 // There is one of these for each type of expression that can be present in the grammar. The
 // supplied Term object is, in all cases, of an IdentExpr type, whose single child is the body
 // of the corresponding grammar rule.
 //
 // For example, buildPrimary is called with an ExpressionType of IdentExpr "Primary", and the
 // body is a ChoiceExpr which can contain one of five possible child types, as given in the
 // PEG grammar.
 //
 // To see the exact expression tree of a given rule, look at its definition in Builtin.c

 static Expression *buildIdentifier(Builder *builder, Term *term)
 {
     char *str = identifierString(builder,term);
     Expression *result = ExpressionNewIdent(str);
     free(str);
     return result;
 }

 static Expression *buildLiteral(Builder *builder, Term *term)
 {
     // The Literal rule in the built-in PEG grammar contains two choices - one for single quotes
     // and the other for double quotes - which otherwise have the same structure. There are four
     // children, where child 0 and child 2 represent the quotes themselves, child 1 represents the
     // escaped representation of a string, and child 3 is any trailing whitespace.
     //
     // All we're interested in is the unescaped string, so we get child 2 and convert any escape
     // sequences (e.g. \" or \n) to the characters they represent, and return a new literal
     // expression with the unescaped string as its value.

     assert(isIdent(term,"Literal"));
     Term *body = TermChildAt(term,0);
     assert(TermKind(body) == ChoiceExpr);
     int index = choiceIndex(body);
     assert((index == 0) || (index == 1));
     Term *choice = TermChildAt(body,0);
     assert(isSequence(choice,4));
     Term *content = TermChildAt(choice,1);
     char *escaped = termString(builder,content);
     char *unescaped = unescapeLiteral(escaped);
     Expression *result = ExpressionNewLit(unescaped);
     free(escaped);
     free(unescaped);
     return result;
 }

 static int decodeRangeChar(Builder *builder, Term *charTerm)
 {
     // FIXME: Handle non-ASCII chars encoded as UTF-8, as well as numeric escape sequences
     // We don't need to allocate a string for unescaped here, we should just do it directly

     assert(isIdent(charTerm,"Char"));
     char *escaped = termString(builder,charTerm);
     char *unescaped = unescapeLiteral(escaped);
     int result;
     if (strlen(unescaped) == 0)
         result = '?';
     else if (unescaped[0] < 0) // Start of UTF-8 multibyte sequence
         result = '?';
     else
         result = unescaped[0];
     free(escaped);
     free(unescaped);
     return result;
 }

 static Expression *buildRange(Builder *builder, Term *term)
 {
     // A range expression, one or more of which appear inside a [...] character class expression,
     // matches a character in a range between a start and end value. We use a single expression
     // type to represent both exact matches (where the minimum and maximum are the same), and
     // "true" ranges (which match two or more possible characters).
     //
     // Note that the representation we use for Range expressions is a (start,end) pair, where a
     // character match must satisfy the condition start <= c < end (that is, the range includes the
     // start, but does *not* include the end). This representation is for the convenience of other
     // code that works with ranges. The ExpressionNewRange function however takes the minimum and
     // maximum values - that is, a matching character c must satisfy min <= c <= max.

     assert(isIdent(term,"Range"));
     Term *body = TermChildAt(term,0);
     int index = choiceIndex(body);
     Term *choice = TermChildAt(body,0);
     assert((index == 0) || (index == 1));
     if (index == 0) {
         assert(isSequence(choice,3));
         Term *minChar = TermChildAt(choice,0);
         Term *maxChar = TermChildAt(choice,2);
         assert(isIdent(minChar,"Char"));
         assert(isIdent(maxChar,"Char"));
         int min = decodeRangeChar(builder,minChar);
         int max = decodeRangeChar(builder,maxChar);
         return ExpressionNewRange(min,max);
     }
     else {
         assert(isIdent(choice,"Char"));
         int value = decodeRangeChar(builder,choice);
         return ExpressionNewRange(value,value);
     }
 }

 static Expression *buildClass(Builder *builder, Term *term)
 {
     // A character class expression is the same as a choice expression, except that it is only
     // supposed to contain range expressions.

     assert(isIdent(term,"Class"));
     Term *body = TermChildAt(term,0);
     assert(isSequence(body,4));
     Term *star = TermChildAt(body,1);
     assert(TermKind(star) == StarExpr);

     Expression *children[MAX_CHILDREN];
     int count = 0;
     for (TermList *item = TermChildren(star); (item != NULL) && (count < MAX_CHILDREN); item = item->next) {
         assert(isSequence(item->term,2));
         Term *rangeTerm = TermChildAt(item->term,1);
         Expression *rangeExpr = buildRange(builder,rangeTerm);
         children[count++] = rangeExpr;
     }

     return ExpressionNewClass(count,children);
 }

 static Expression *buildDot(Builder *builder, Term *term)
 {
     assert(isIdent(term,"DOT"));
     return ExpressionNewDot();
 }

 static Expression *buildPrimary(Builder *builder, Term *term)
 {
     // A primary expression can be one of five possibilities: An identifier (reference to another
     // rule in the grammar), a parenthesised expresion, a literal, a character class, or a dot
     // (which matches any character).
     //
     // The type of the term is an Expression with kind == ChoiceExpr, and the term has exactly
     // one child. We use choiceIndex to determine which of the five possible expression types
     // the choice matches, and then call through to the relevant function to build the appropriate
     // type of Expression object.

     assert(isIdent(term,"Primary"));
     Term *body = TermChildAt(term,0);
     assert(TermKind(body) == ChoiceExpr);
     assert(TermCount(body) == 1);
     Term *choice = TermChildAt(body,0);

     switch (choiceIndex(body)) {
         case 0: {
             assert(isSequence(choice,2));
             Term *identifier = TermChildAt(choice,0);
             return buildIdentifier(builder,identifier);
         }
         case 1: {
             assert(isSequence(choice,4));
             Term *expression = TermChildAt(choice,2);
             Expression *result = buildExpression(builder,expression);
             return ExpressionNewString(result);
         }
         case 2: {
             assert(isSequence(choice,3));
             Term *expression = TermChildAt(choice,1);
             return buildExpression(builder,expression);
         }
         case 3:
             return buildLiteral(builder,choice);
         case 4:
             return buildClass(builder,choice);
         case 5:
             return buildDot(builder,choice);
         default:
             assert(!"Invalid choice for Primary");
             return NULL;
     }
 }

 static Expression *buildSuffix(Builder *builder, Term *term)
 {
     // A suffix expression has two children. The second is optional, and is one of either QUESTION,
     // STAR, or PLUS - which indicate that the first child may occur zero or one times, zero or more
     // times, or one or more times. The first child is always present, and represents a primary
     // expression with no prefix or suffix.
     //
     // If a QUESTION, STAR, or PLUS suffix is given, we wrap the constructed primary expression
     // inside another Expression object of type OptExpr, StarExpr, or PlusExpr. Otherwise, we just
     // return the primary expression directly.

     assert(isIdent(term,"Suffix"));
     Term *body = TermChildAt(term,0);
     assert(isSequence(body,2));
     Term *primary = TermChildAt(body,0);
     Term *suffix = TermChildAt(body,1);
     assert(isIdent(primary,"Primary"));
     assert(TermKind(suffix) == OptExpr);

     Expression *primaryExpr = buildPrimary(builder,primary);

     assert((TermCount(suffix) == 0) || (TermCount(suffix) == 1));
     if (TermCount(suffix) == 1) {
         Term *suffixChild = TermChildAt(suffix,0);
         int index = choiceIndex(suffixChild);
         Term *choice = TermChildAt(suffixChild,0);
         switch (index) {
             case 0:
                 assert(isIdent(choice,"QUESTION"));
                 return ExpressionNewOpt(primaryExpr);
             case 1:
                 assert(isIdent(choice,"STAR"));
                 return ExpressionNewStar(primaryExpr);
             case 2:
                 assert(isIdent(choice,"PLUS"));
                 return ExpressionNewPlus(primaryExpr);
             default:
                 assert(!"Invalid choice for Suffix");
                 break;
         }
     }

     return primaryExpr;
 }

 static Expression *buildPrefix(Builder *builder, Term *term)
 {
     // A prefix expression has two children. The first is optional, and is one of either AND or
     // NOT - which indicate a positive or negative lookahead assertion. The second is not optional,
     // and represents a primary expression with an optional suffix.
     //
     // If an AND or NOT prefx is given, we wrap the constructed primary expression inside another
     // Expression object of type AndExpr or NotExpr. Otherwise, we just return the primary
     // expression directly.

     assert(isIdent(term,"Prefix"));
     Term *body = TermChildAt(term,0);
     assert(isSequence(body,2));
     Term *prefix = TermChildAt(body,0);
     Term *suffix = TermChildAt(body,1);
     assert(TermKind(prefix) == OptExpr);
     assert(isIdent(suffix,"Suffix"));

     Expression *suffixExpr = buildSuffix(builder,suffix);

     assert((TermCount(prefix) == 0) || (TermCount(prefix) == 1));
     if (TermCount(prefix) == 1) {
         Term *prefixChild = TermChildAt(prefix,0);
         int index = choiceIndex(prefixChild);
         Term *choice = TermChildAt(prefixChild,0);
         switch (index) {
             case 0:
                 assert(isIdent(choice,"AND"));
                 return ExpressionNewAnd(suffixExpr);
             case 1:
                 assert(isIdent(choice,"NOT"));
                 return ExpressionNewNot(suffixExpr);
             default:
                 assert(!"Invalid choice for Prefix");
                 break;
         }
     }

     return suffixExpr;
 }

 static Expression *buildSequence(Builder *builder, Term *term)
 {
     // An Sequence consists of one or more expressions, each of which has an optional prefix
     // and suffix
     assert(isIdent(term,"Sequence"));
     Term *body = TermChildAt(term,0);
     assert(TermKind(body) == StarExpr);

     Expression *children[MAX_CHILDREN];
     int count = 0;
     for (TermList *item = TermChildren(body); (item != NULL) && (count < MAX_CHILDREN); item = item->next) {
         children[count] = buildPrefix(builder,item->term);
         count++;
     }
     if (count == 1)
         return children[0];
     else
         return ExpressionNewSequence(count,children);
 }

 static Expression *buildExpression(Builder *builder, Term *term)
 {
     // An Expression consists of one or more choices
     assert(isIdent(term,"Expression"));
     Term *body = TermChildAt(term,0);
     assert(isSequence(body,2));

     Term *child0 = TermChildAt(body,0);
     Term *child1 = TermChildAt(body,1);

     assert(isIdent(child0,"Sequence"));
     assert(TermKind(child1) == StarExpr);

     Expression *initial = buildSequence(builder,child0);
     if (TermCount(child1) == 0)
         return initial;

     Expression *children[MAX_CHILDREN];
     children[0] = initial;
     int count = 1;
     for (TermList *item = TermChildren(child1); (item != NULL) && (count < MAX_CHILDREN); item = item->next) {
         assert(isSequence(item->term,2));
         children[count] = buildSequence(builder,TermChildAt(item->term,1));
         count++;
     }
     return ExpressionNewChoice(count,children);
 }

 static void buildGrammar(Builder *builder, Term *term)
 {
     assert(isSequence(term,3));
     Term *plus = TermChildAt(term,1);
     assert(TermKind(plus) == PlusExpr);

     for (TermList *plusItem = plus->children; plusItem != NULL; plusItem = plusItem->next) {
         Term *defIdent = plusItem->term;
         assert(isIdent(defIdent,"Definition"));
         Term *defSeq = TermChildAt(defIdent,0);
         assert(isSequence(defSeq,4));
         Term *identTerm = TermChildAt(defSeq,0);
         Term *exprTerm = TermChildAt(defSeq,2);
         assert(isIdent(identTerm,"Identifier"));
         assert(isIdent(exprTerm,"Expression"));

         char *ruleName = identifierString(builder,identTerm);
         Expression *ruleExpr = buildExpression(builder,exprTerm);
         GrammarDefine(builder->gram,ruleName,ruleExpr);
         free(ruleName);
     }

     GrammarResolve(builder->gram);
 }

 // This function creates a new Grammar object from the result of parsing a file usin the built-in
 // PEG grammar. The resulting Grammar object can then be used to parse other files which are written
 // in another language that is accepted by that grammar. The Term objects are constructed by the
 // parse function defined in Parser.c.

 Grammar *grammarFromTerm(Term *term, const char *input)
 {
     Grammar *gram = GrammarNew();

     Builder *builder = (Builder *)calloc(1,sizeof(Builder));
     builder->gram = gram;
     builder->input = input;
     builder->len = strlen(input);

     buildGrammar(builder,term);

     free(builder);

     return gram;
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include "Common.h"
	#include "BuildGrammar.h"
	#include "Util.h"
	#include <stdlib.h>
	#include <stdio.h>
	#include <string.h>
	#include <assert.h>

	#define MAX_CHILDREN 64

	typedef struct Builder Builder;

	struct Builder {
	Grammar *gram;
	const char *input;
	int len;
	};

	static Expression buildExpression(Builder builder, Term *term);

	static char unescapeLiteral(const char escaped)
	{
	size_t escapedLen = strlen(escaped);
	size_t escapedPos = 0;
	size_t unescapedLen = 0;
	char unescaped = (char )malloc(escapedLen+1);

	while (escapedPos < escapedLen) {
	char c = escaped[escapedPos++];
	if ((c == '\\') && (escapedPos < escapedLen)) {
	c = escaped[escapedPos++];
	switch (c) {
	case 'n':
	unescaped[unescapedLen++] = '\n';
	break;
	case 'r':
	unescaped[unescapedLen++] = '\r';
	break;
	case 't':
	unescaped[unescapedLen++] = '\t';
	break;
	default:
	unescaped[unescapedLen++] = c;
	break;
	}
	}
	else {
	unescaped[unescapedLen++] = c;
	}
	}

	unescaped[unescapedLen] = '\0';
	return unescaped;
	}

	// Sanity checking functions
	//
	// These are just used to verify that a Term we're about to use is of the expected type.
	// Since the format of the parse tree is likely to change in these early stages of development,
	// this will catch cases where the code in this file has not been adapted to those changes
	//
	// The large number of assert statements in this file, most of which use these functions, will
	// likely only be necessary in the short term. Ideally we should be able to get to a point where
	// one can safely write code that consumes a parse tree without having so many sanity checks.

	static int isTerm(Term *term, ExprKind kind, int count)
	{
	return ((TermKind(term) == kind) && (TermCount(term) == count));
	}

	static int isSequence(Term *term, int count)
	{
	return isTerm(term,SequenceExpr,count);
	}

	static int isIdent(Term term, const char name)
	{
	return (isTerm(term,IdentExpr,1) && !strcmp(name,ExprIdentValue(TermType(term))));
	}

	// Extract a substring of the input based on a specific term, or one or more children of a term.
	//
	// Each Term object has a start and end field, which specify which portion of the input string
	// the term covers (that is, the portion of the input consumed durin parsing of that term). In
	// some cases we want all the text matched by a term, and in others we only want the text matched
	// by specific children. An example of the latter is a "pseudo-terminal" like Identifier where
	// we want all characters except the trailing whitespace.

	static char termString(Builder builder, Term *term)
	{
	assert(term->start >= 0);
	assert(term->end <= builder->len);
	int len = term->end - term->start;
	char *str = malloc(len+1);
	memcpy(str,&builder->input[term->start],len);
	str[len] = '\0';
	return str;
	}

	static char identifierString(Builder builder, Term *term)
	{
	assert(isIdent(term,"Identifier"));
	Term *body = TermChildAt(term,0);
	assert(isSequence(body,2));
	Term *content = TermChildAt(body,0);
	Term *spacing = TermChildAt(body,1);
	assert(TermKind(content) == StringExpr);
	assert(isIdent(spacing,"Spacing"));
	return termString(builder,content);
	}

	// For Terms of type ChoiceExpr, determine which of the choices the corresponding term is

	static int choiceIndex(Term *body)
	{
	assert(TermKind(body) == ChoiceExpr);
	assert(TermCount(body) == 1);
	Term *choice = TermChildAt(body,0);
	int match = -1;
	int choiceCount = ExprChoiceCount(TermType(body));
	for (int i = 0; i < choiceCount; i++) {
	if (TermType(choice) == ExpressionChildAt(TermType(body),i))
	match = i;
	}
	return match;
	}

	// Expression building functions (build*)
	//
	// There is one of these for each type of expression that can be present in the grammar. The
	// supplied Term object is, in all cases, of an IdentExpr type, whose single child is the body
	// of the corresponding grammar rule.
	//
	// For example, buildPrimary is called with an ExpressionType of IdentExpr "Primary", and the
	// body is a ChoiceExpr which can contain one of five possible child types, as given in the
	// PEG grammar.
	//
	// To see the exact expression tree of a given rule, look at its definition in Builtin.c

	static Expression buildIdentifier(Builder builder, Term *term)
	{
	char *str = identifierString(builder,term);
	Expression *result = ExpressionNewIdent(str);
	free(str);
	return result;
	}

	static Expression buildLiteral(Builder builder, Term *term)
	{
	// The Literal rule in the built-in PEG grammar contains two choices - one for single quotes
	// and the other for double quotes - which otherwise have the same structure. There are four
	// children, where child 0 and child 2 represent the quotes themselves, child 1 represents the
	// escaped representation of a string, and child 3 is any trailing whitespace.
	//
	// All we're interested in is the unescaped string, so we get child 2 and convert any escape
	// sequences (e.g. \" or \n) to the characters they represent, and return a new literal
	// expression with the unescaped string as its value.

	assert(isIdent(term,"Literal"));
	Term *body = TermChildAt(term,0);
	assert(TermKind(body) == ChoiceExpr);
	int index = choiceIndex(body);
	assert((index == 0) \|\| (index == 1));
	Term *choice = TermChildAt(body,0);
	assert(isSequence(choice,4));
	Term *content = TermChildAt(choice,1);
	char *escaped = termString(builder,content);
	char *unescaped = unescapeLiteral(escaped);
	Expression *result = ExpressionNewLit(unescaped);
	free(escaped);
	free(unescaped);
	return result;
	}

	static int decodeRangeChar(Builder builder, Term charTerm)
	{
	// FIXME: Handle non-ASCII chars encoded as UTF-8, as well as numeric escape sequences
	// We don't need to allocate a string for unescaped here, we should just do it directly

	assert(isIdent(charTerm,"Char"));
	char *escaped = termString(builder,charTerm);
	char *unescaped = unescapeLiteral(escaped);
	int result;
	if (strlen(unescaped) == 0)
	result = '?';
	else if (unescaped[0] < 0) // Start of UTF-8 multibyte sequence
	result = '?';
	else
	result = unescaped[0];
	free(escaped);
	free(unescaped);
	return result;
	}

	static Expression buildRange(Builder builder, Term *term)
	{
	// A range expression, one or more of which appear inside a [...] character class expression,
	// matches a character in a range between a start and end value. We use a single expression
	// type to represent both exact matches (where the minimum and maximum are the same), and
	// "true" ranges (which match two or more possible characters).
	//
	// Note that the representation we use for Range expressions is a (start,end) pair, where a
	// character match must satisfy the condition start <= c < end (that is, the range includes the
	// start, but does not include the end). This representation is for the convenience of other
	// code that works with ranges. The ExpressionNewRange function however takes the minimum and
	// maximum values - that is, a matching character c must satisfy min <= c <= max.

	assert(isIdent(term,"Range"));
	Term *body = TermChildAt(term,0);
	int index = choiceIndex(body);
	Term *choice = TermChildAt(body,0);
	assert((index == 0) \|\| (index == 1));
	if (index == 0) {
	assert(isSequence(choice,3));
	Term *minChar = TermChildAt(choice,0);
	Term *maxChar = TermChildAt(choice,2);
	assert(isIdent(minChar,"Char"));
	assert(isIdent(maxChar,"Char"));
	int min = decodeRangeChar(builder,minChar);
	int max = decodeRangeChar(builder,maxChar);
	return ExpressionNewRange(min,max);
	}
	else {
	assert(isIdent(choice,"Char"));
	int value = decodeRangeChar(builder,choice);
	return ExpressionNewRange(value,value);
	}
	}

	static Expression buildClass(Builder builder, Term *term)
	{
	// A character class expression is the same as a choice expression, except that it is only
	// supposed to contain range expressions.

	assert(isIdent(term,"Class"));
	Term *body = TermChildAt(term,0);
	assert(isSequence(body,4));
	Term *star = TermChildAt(body,1);
	assert(TermKind(star) == StarExpr);

	Expression *children[MAX_CHILDREN];
	int count = 0;
	for (TermList *item = TermChildren(star); (item != NULL) && (count < MAX_CHILDREN); item = item->next) {
	assert(isSequence(item->term,2));
	Term *rangeTerm = TermChildAt(item->term,1);
	Expression *rangeExpr = buildRange(builder,rangeTerm);
	children[count++] = rangeExpr;
	}

	return ExpressionNewClass(count,children);
	}

	static Expression buildDot(Builder builder, Term *term)
	{
	assert(isIdent(term,"DOT"));
	return ExpressionNewDot();
	}

	static Expression buildPrimary(Builder builder, Term *term)
	{
	// A primary expression can be one of five possibilities: An identifier (reference to another
	// rule in the grammar), a parenthesised expresion, a literal, a character class, or a dot
	// (which matches any character).
	//
	// The type of the term is an Expression with kind == ChoiceExpr, and the term has exactly
	// one child. We use choiceIndex to determine which of the five possible expression types
	// the choice matches, and then call through to the relevant function to build the appropriate
	// type of Expression object.

	assert(isIdent(term,"Primary"));
	Term *body = TermChildAt(term,0);
	assert(TermKind(body) == ChoiceExpr);
	assert(TermCount(body) == 1);
	Term *choice = TermChildAt(body,0);

	switch (choiceIndex(body)) {
	case 0: {
	assert(isSequence(choice,2));
	Term *identifier = TermChildAt(choice,0);
	return buildIdentifier(builder,identifier);
	}
	case 1: {
	assert(isSequence(choice,4));
	Term *expression = TermChildAt(choice,2);
	Expression *result = buildExpression(builder,expression);
	return ExpressionNewString(result);
	}
	case 2: {
	assert(isSequence(choice,3));
	Term *expression = TermChildAt(choice,1);
	return buildExpression(builder,expression);
	}
	case 3:
	return buildLiteral(builder,choice);
	case 4:
	return buildClass(builder,choice);
	case 5:
	return buildDot(builder,choice);
	default:
	assert(!"Invalid choice for Primary");
	return NULL;
	}
	}

	static Expression buildSuffix(Builder builder, Term *term)
	{
	// A suffix expression has two children. The second is optional, and is one of either QUESTION,
	// STAR, or PLUS - which indicate that the first child may occur zero or one times, zero or more
	// times, or one or more times. The first child is always present, and represents a primary
	// expression with no prefix or suffix.
	//
	// If a QUESTION, STAR, or PLUS suffix is given, we wrap the constructed primary expression
	// inside another Expression object of type OptExpr, StarExpr, or PlusExpr. Otherwise, we just
	// return the primary expression directly.

	assert(isIdent(term,"Suffix"));
	Term *body = TermChildAt(term,0);
	assert(isSequence(body,2));
	Term *primary = TermChildAt(body,0);
	Term *suffix = TermChildAt(body,1);
	assert(isIdent(primary,"Primary"));
	assert(TermKind(suffix) == OptExpr);

	Expression *primaryExpr = buildPrimary(builder,primary);

	assert((TermCount(suffix) == 0) \|\| (TermCount(suffix) == 1));
	if (TermCount(suffix) == 1) {
	Term *suffixChild = TermChildAt(suffix,0);
	int index = choiceIndex(suffixChild);
	Term *choice = TermChildAt(suffixChild,0);
	switch (index) {
	case 0:
	assert(isIdent(choice,"QUESTION"));
	return ExpressionNewOpt(primaryExpr);
	case 1:
	assert(isIdent(choice,"STAR"));
	return ExpressionNewStar(primaryExpr);
	case 2:
	assert(isIdent(choice,"PLUS"));
	return ExpressionNewPlus(primaryExpr);
	default:
	assert(!"Invalid choice for Suffix");
	break;
	}
	}

	return primaryExpr;
	}

	static Expression buildPrefix(Builder builder, Term *term)
	{
	// A prefix expression has two children. The first is optional, and is one of either AND or
	// NOT - which indicate a positive or negative lookahead assertion. The second is not optional,
	// and represents a primary expression with an optional suffix.
	//
	// If an AND or NOT prefx is given, we wrap the constructed primary expression inside another
	// Expression object of type AndExpr or NotExpr. Otherwise, we just return the primary
	// expression directly.

	assert(isIdent(term,"Prefix"));
	Term *body = TermChildAt(term,0);
	assert(isSequence(body,2));
	Term *prefix = TermChildAt(body,0);
	Term *suffix = TermChildAt(body,1);
	assert(TermKind(prefix) == OptExpr);
	assert(isIdent(suffix,"Suffix"));

	Expression *suffixExpr = buildSuffix(builder,suffix);

	assert((TermCount(prefix) == 0) \|\| (TermCount(prefix) == 1));
	if (TermCount(prefix) == 1) {
	Term *prefixChild = TermChildAt(prefix,0);
	int index = choiceIndex(prefixChild);
	Term *choice = TermChildAt(prefixChild,0);
	switch (index) {
	case 0:
	assert(isIdent(choice,"AND"));
	return ExpressionNewAnd(suffixExpr);
	case 1:
	assert(isIdent(choice,"NOT"));
	return ExpressionNewNot(suffixExpr);
	default:
	assert(!"Invalid choice for Prefix");
	break;
	}
	}

	return suffixExpr;
	}

	static Expression buildSequence(Builder builder, Term *term)
	{
	// An Sequence consists of one or more expressions, each of which has an optional prefix
	// and suffix
	assert(isIdent(term,"Sequence"));
	Term *body = TermChildAt(term,0);
	assert(TermKind(body) == StarExpr);

	Expression *children[MAX_CHILDREN];
	int count = 0;
	for (TermList *item = TermChildren(body); (item != NULL) && (count < MAX_CHILDREN); item = item->next) {
	children[count] = buildPrefix(builder,item->term);
	count++;
	}
	if (count == 1)
	return children[0];
	else
	return ExpressionNewSequence(count,children);
	}

	static Expression buildExpression(Builder builder, Term *term)
	{
	// An Expression consists of one or more choices
	assert(isIdent(term,"Expression"));
	Term *body = TermChildAt(term,0);
	assert(isSequence(body,2));

	Term *child0 = TermChildAt(body,0);
	Term *child1 = TermChildAt(body,1);

	assert(isIdent(child0,"Sequence"));
	assert(TermKind(child1) == StarExpr);

	Expression *initial = buildSequence(builder,child0);
	if (TermCount(child1) == 0)
	return initial;

	Expression *children[MAX_CHILDREN];
	children[0] = initial;
	int count = 1;
	for (TermList *item = TermChildren(child1); (item != NULL) && (count < MAX_CHILDREN); item = item->next) {
	assert(isSequence(item->term,2));
	children[count] = buildSequence(builder,TermChildAt(item->term,1));
	count++;
	}
	return ExpressionNewChoice(count,children);
	}

	static void buildGrammar(Builder builder, Term term)
	{
	assert(isSequence(term,3));
	Term *plus = TermChildAt(term,1);
	assert(TermKind(plus) == PlusExpr);

	for (TermList *plusItem = plus->children; plusItem != NULL; plusItem = plusItem->next) {
	Term *defIdent = plusItem->term;
	assert(isIdent(defIdent,"Definition"));
	Term *defSeq = TermChildAt(defIdent,0);
	assert(isSequence(defSeq,4));
	Term *identTerm = TermChildAt(defSeq,0);
	Term *exprTerm = TermChildAt(defSeq,2);
	assert(isIdent(identTerm,"Identifier"));
	assert(isIdent(exprTerm,"Expression"));

	char *ruleName = identifierString(builder,identTerm);
	Expression *ruleExpr = buildExpression(builder,exprTerm);
	GrammarDefine(builder->gram,ruleName,ruleExpr);
	free(ruleName);
	}

	GrammarResolve(builder->gram);
	}

	// This function creates a new Grammar object from the result of parsing a file usin the built-in
	// PEG grammar. The resulting Grammar object can then be used to parse other files which are written
	// in another language that is accepted by that grammar. The Term objects are constructed by the
	// parse function defined in Parser.c.

	Grammar grammarFromTerm(Term term, const char *input)
	{
	Grammar *gram = GrammarNew();

	Builder builder = (Builder )calloc(1,sizeof(Builder));
	builder->gram = gram;
	builder->input = input;
	builder->len = strlen(input);

	buildGrammar(builder,term);

	free(builder);

	return gram;
	}