| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.avro.util; |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.util.List; |
| import java.util.regex.Pattern; |
| import java.util.regex.Matcher; |
| |
| /** Parser for files containing test cases consisting of |
| * <code><String,String></code> pairs, where the first string is |
| * the input to the test case, and the second string is the expected |
| * output of the test case. |
| * |
| * <p> A test-case file is a sequence of <a |
| * href="en.wikipedia.org/wiki/Here_document">here documents</a> |
| * ("heredocs"), very similar in syntax to Unix Shell heredocs. |
| * Heredocs labeled "INPUT" indicate the start of a new case, and |
| * these INPUT heredocs the inputs of test cases. Following an |
| * "INPUT" heredoc can more zero or more "expected-output" heredocs. |
| * Each of these expected-output heredocs defines what we call a |
| * <dfn>subcase</dfn>. The assumption here is that for each |
| * interesting test input, there are often multiple different tests |
| * one could run, each with different expected outputs. |
| * |
| * <p> Consumers of this class call the {@link #find} method to find |
| * all subcases marked with a given label. For example, imagine the |
| * following test-case file: |
| * <blockquote> <pre> |
| * <<INPUT 0 |
| * <<VALUE 0 |
| * <<PPRINT 0 |
| * <<INPUT 1+1 |
| * <<VALUE 2 |
| * <<PPRINT 1 + 1 |
| * <<SEXP (+ 1 1) |
| * SEXP |
| * </pre> </blockquote> |
| * Calling {@link #find} on the label "VALUE" will return two test |
| * cases, the pair <code><"0","0"></code> and |
| * <code><"1+1","2"></code>. Calling it on the label "PPRINT" |
| * will return <code><"0","0"></code> and <code><"1+1","1 + |
| * 1"></code>. Notice that there need not be a subcase for every |
| * INPUT. In the case of "SEXP", for example, {@link #find} will |
| * return only the single pair <code><"1+1","(+ 1 1)"></code>. |
| * |
| * <p> There are two forms of heredocs, single-line and multi-line. |
| * The examples above (except "SEXP") are single-line heredocs. The |
| * general syntax for these is: |
| * <blockquote> <pre> |
| * ^<<([a-zA-Z][_a-zA-Z0-9]*) (.*)$ |
| * </pre> </blockquote> |
| * The first group in this regex is the label of the heredoc, and the |
| * second group is the text of the heredoc. A single space separates |
| * the two groups and is not part of there heredoc (subsequent spaces |
| * <em>will</em> be included in the heredoc). A "line terminator" as |
| * defined by the Java language (i.e., CR, LR, or CR followed by LF) |
| * terminates a singline-line heredoc but is not included in the text |
| * of the heredoc. |
| * |
| * <p> As the name implies, multi-line heredocs are spread across |
| * multiple lines, as in this example: |
| * <blockquote> <pre> |
| * <<INPUT |
| * 1 |
| * +1 + |
| * 1 |
| * INPUT |
| * <<VALUE 3 |
| * <<PPRINT 1 + 1 + 1 |
| * </pre> </blockquote> |
| * In this case, the input to the test case is spread across multiple |
| * lines (the line terminators in these documents are preserved as |
| * part of the document text). Multi-line heredocs can be used for |
| * both the inputs of text cases and the expected outputs of them. |
| |
| * <p> The syntax of multi-line heredocs obey the following pseudo-regex: |
| * <blockquote> <pre> |
| * ^<<([a-zA-Z][_a-zA-Z0-9]*)$(.*)$^\1$ |
| * </pre> </blockquote> |
| * That is, as illustrated by the example, a multi-line heredoc named |
| * "LABEL" consists of the text <code><lt;LABEL</code> on a line by |
| * itself, followed by the text of the heredoc, followed by the text |
| * <code>LABEL</code> on a line by itself (if LABEL starts a line but |
| * is not the <em>only</em> text on that line, then that entire line |
| * is part of the heredoc, and the heredoc is not terminated by that |
| * line). |
| * |
| * <p>In multi-line heredocs, neither the line terminator that |
| * terminates the start of the document, nor the one just before the |
| * label that ends the heredoc, are part of the text of the heredoc. |
| * Thus, for example, the text of the multi-line input from above |
| * would be exactly <code>"1\n+1 +\n1"</code>. If you want a new |
| * line at the end of a multi-line heredoc, put a blank line before |
| * the label ending the heredoc. |
| * |
| * <p>Also in multi-line heredocs, line-terminators within the heredoc |
| * are normalized to line-feeds ('\n'). Thus, for example, when a |
| * test file written on a Windows machine is parsed on any machine, |
| * the Windows-style line terminators within heredocs will be |
| * translated to Unix-style line terminators, no matter what platform |
| * the tests are run on. |
| * |
| * <p> Note that lines between heredocs are ignored, and can be used |
| * to provide spacing between and/or commentary on the test cases. |
| */ |
| public class CaseFinder { |
| /** Scan test-case file <code>in</code> looking for test subcases |
| * marked with <code>caseLabel</code>. Any such cases are appended |
| * (in order) to the "cases" parameter. If <code>caseLabel</code> |
| * equals the string <code>"INPUT"</code>, then returns the list of |
| * <<i>input</i>, <code>null</code>> pairs for <i>input</i> |
| * equal to all heredoc's named INPUT's found in the input |
| * stream. */ |
| public static List<Object[]> find(BufferedReader in, String label, |
| List<Object[]> cases) |
| throws IOException |
| { |
| if (! Pattern.matches(LABEL_REGEX, label)) |
| throw new IllegalArgumentException("Bad case subcase label: " + label); |
| |
| final String subcaseMarker = "<<" + label; |
| |
| for (String line = in.readLine();;) { |
| // Find next new case |
| while (line != null && !line.startsWith(NEW_CASE_MARKER)) |
| line = in.readLine(); |
| if (line == null) break; |
| String input; |
| input = processHereDoc(in, line); |
| |
| if (label.equals(NEW_CASE_NAME)) { |
| cases.add(new Object[] { input, null }); |
| line = in.readLine(); |
| continue; |
| } |
| |
| // Check to see if there's a subcase named "label" for that case |
| do { |
| line = in.readLine(); |
| } while (line != null && (!line.startsWith(NEW_CASE_MARKER) |
| && !line.startsWith(subcaseMarker))); |
| if (line == null || line.startsWith(NEW_CASE_MARKER)) continue; |
| String expectedOutput = processHereDoc(in, line); |
| |
| cases.add(new Object[] { input, expectedOutput }); |
| } |
| in.close(); |
| return cases; |
| } |
| |
| private static final String NEW_CASE_NAME = "INPUT"; |
| private static final String NEW_CASE_MARKER = "<<"+NEW_CASE_NAME; |
| private static final String LABEL_REGEX = "[a-zA-Z][_a-zA-Z0-9]*"; |
| private static final Pattern START_LINE_PATTERN |
| = Pattern.compile("^<<("+LABEL_REGEX+")(.*)$"); |
| |
| /** Reads and returns content of a heredoc. Assumes we just read a |
| * start-of-here-doc marker for a here-doc labeled "docMarker." |
| * Replaces arbitrary newlines with sytem newlines, but strips |
| * newline from final line of heredoc. Throws IOException if EOF |
| * is reached before heredoc is terminate. */ |
| private static String processHereDoc(BufferedReader in, String docStart) |
| throws IOException |
| { |
| Matcher m = START_LINE_PATTERN.matcher(docStart); |
| if (! m.matches()) |
| throw new IllegalArgumentException("Wasn't given the start of a heredoc (\""+docStart+"\")"); |
| String docName = m.group(1); |
| |
| // Determine if this is a single-line heredoc, and process if it is |
| String singleLineText = m.group(2); |
| if (singleLineText.length() != 0) { |
| if (! singleLineText.startsWith(" ")) |
| throw new IOException("Single-line heredoc missing initial space (\""+docStart+"\")"); |
| return singleLineText.substring(1); |
| } |
| |
| // Process multi-line heredocs |
| StringBuilder result = new StringBuilder(); |
| String line = in.readLine(); |
| String prevLine = ""; |
| boolean firstTime = true; |
| while (line != null && !line.equals(docName)) { |
| if (! firstTime) result.append(prevLine).append('\n'); |
| else firstTime = false; |
| prevLine = line; |
| line = in.readLine(); |
| } |
| if (line == null) |
| throw new IOException("Here document (" + docName |
| + ") terminated by end-of-file."); |
| return result.append(prevLine).toString(); |
| } |
| } |