| /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. |
| * Use of this file is governed by the BSD 3-clause license that |
| * can be found in the LICENSE.txt file in the project root. |
| */ |
| |
| #pragma once |
| |
| namespace antlr4 { |
| |
| /** |
| * Useful for rewriting out a buffered input token stream after doing some |
| * augmentation or other manipulations on it. |
| * |
| * <p> |
| * You can insert stuff, replace, and delete chunks. Note that the operations |
| * are done lazily--only if you convert the buffer to a {@link String} with |
| * {@link TokenStream#getText()}. This is very efficient because you are not |
| * moving data around all the time. As the buffer of tokens is converted to |
| * strings, the {@link #getText()} method(s) scan the input token stream and |
| * check to see if there is an operation at the current index. If so, the |
| * operation is done and then normal {@link String} rendering continues on the |
| * buffer. This is like having multiple Turing machine instruction streams |
| * (programs) operating on a single input tape. :)</p> |
| * |
| * <p> |
| * This rewriter makes no modifications to the token stream. It does not ask the |
| * stream to fill itself up nor does it advance the input cursor. The token |
| * stream {@link TokenStream#index()} will return the same value before and |
| * after any {@link #getText()} call.</p> |
| * |
| * <p> |
| * The rewriter only works on tokens that you have in the buffer and ignores the |
| * current input cursor. If you are buffering tokens on-demand, calling |
| * {@link #getText()} halfway through the input will only do rewrites for those |
| * tokens in the first half of the file.</p> |
| * |
| * <p> |
| * Since the operations are done lazily at {@link #getText}-time, operations do |
| * not screw up the token index values. That is, an insert operation at token |
| * index {@code i} does not change the index values for tokens |
| * {@code i}+1..n-1.</p> |
| * |
| * <p> |
| * Because operations never actually alter the buffer, you may always get the |
| * original token stream back without undoing anything. Since the instructions |
| * are queued up, you can easily simulate transactions and roll back any changes |
| * if there is an error just by removing instructions. For example,</p> |
| * |
| * <pre> |
| * CharStream input = new ANTLRFileStream("input"); |
| * TLexer lex = new TLexer(input); |
| * CommonTokenStream tokens = new CommonTokenStream(lex); |
| * T parser = new T(tokens); |
| * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens); |
| * parser.startRule(); |
| * </pre> |
| * |
| * <p> |
| * Then in the rules, you can execute (assuming rewriter is visible):</p> |
| * |
| * <pre> |
| * Token t,u; |
| * ... |
| * rewriter.insertAfter(t, "text to put after t");} |
| * rewriter.insertAfter(u, "text after u");} |
| * System.out.println(rewriter.getText()); |
| * </pre> |
| * |
| * <p> |
| * You can also have multiple "instruction streams" and get multiple rewrites |
| * from a single pass over the input. Just name the instruction streams and use |
| * that name again when printing the buffer. This could be useful for generating |
| * a C file and also its header file--all from the same buffer:</p> |
| * |
| * <pre> |
| * rewriter.insertAfter("pass1", t, "text to put after t");} |
| * rewriter.insertAfter("pass2", u, "text after u");} |
| * System.out.println(rewriter.getText("pass1")); |
| * System.out.println(rewriter.getText("pass2")); |
| * </pre> |
| * |
| * <p> |
| * If you don't use named rewrite streams, a "default" stream is used as the |
| * first example shows.</p> |
| */ |
| class ANTLR4CPP_PUBLIC TokenStreamRewriter { |
| public: |
| static const std::string DEFAULT_PROGRAM_NAME; |
| #if __cplusplus >= 201703L |
| static constexpr size_t PROGRAM_INIT_SIZE = 100; |
| static constexpr size_t MIN_TOKEN_INDEX = 0; |
| #else |
| enum : size_t { |
| PROGRAM_INIT_SIZE = 100, |
| MIN_TOKEN_INDEX = 0, |
| }; |
| #endif |
| |
| TokenStreamRewriter(TokenStream *tokens); |
| virtual ~TokenStreamRewriter(); |
| |
| TokenStream *getTokenStream(); |
| |
| virtual void rollback(size_t instructionIndex); |
| |
| /// Rollback the instruction stream for a program so that |
| /// the indicated instruction (via instructionIndex) is no |
| /// longer in the stream. UNTESTED! |
| virtual void rollback(const std::string &programName, size_t instructionIndex); |
| |
| virtual void deleteProgram(); |
| |
| /// Reset the program so that no instructions exist. |
| virtual void deleteProgram(const std::string &programName); |
| virtual void insertAfter(Token *t, const std::string& text); |
| virtual void insertAfter(size_t index, const std::string& text); |
| virtual void insertAfter(const std::string &programName, Token *t, const std::string& text); |
| virtual void insertAfter(const std::string &programName, size_t index, const std::string& text); |
| |
| virtual void insertBefore(Token *t, const std::string& text); |
| virtual void insertBefore(size_t index, const std::string& text); |
| virtual void insertBefore(const std::string &programName, Token *t, const std::string& text); |
| virtual void insertBefore(const std::string &programName, size_t index, const std::string& text); |
| |
| virtual void replace(size_t index, const std::string& text); |
| virtual void replace(size_t from, size_t to, const std::string& text); |
| virtual void replace(Token *indexT, const std::string& text); |
| virtual void replace(Token *from, Token *to, const std::string& text); |
| virtual void replace(const std::string &programName, size_t from, size_t to, const std::string& text); |
| virtual void replace(const std::string &programName, Token *from, Token *to, const std::string& text); |
| |
| virtual void Delete(size_t index); |
| virtual void Delete(size_t from, size_t to); |
| virtual void Delete(Token *indexT); |
| virtual void Delete(Token *from, Token *to); |
| virtual void Delete(const std::string &programName, size_t from, size_t to); |
| virtual void Delete(const std::string &programName, Token *from, Token *to); |
| |
| virtual size_t getLastRewriteTokenIndex(); |
| |
| /// Return the text from the original tokens altered per the |
| /// instructions given to this rewriter. |
| virtual std::string getText(); |
| |
| /** Return the text from the original tokens altered per the |
| * instructions given to this rewriter in programName. |
| */ |
| std::string getText(std::string programName); |
| |
| /// Return the text associated with the tokens in the interval from the |
| /// original token stream but with the alterations given to this rewriter. |
| /// The interval refers to the indexes in the original token stream. |
| /// We do not alter the token stream in any way, so the indexes |
| /// and intervals are still consistent. Includes any operations done |
| /// to the first and last token in the interval. So, if you did an |
| /// insertBefore on the first token, you would get that insertion. |
| /// The same is true if you do an insertAfter the stop token. |
| virtual std::string getText(const misc::Interval &interval); |
| |
| virtual std::string getText(const std::string &programName, const misc::Interval &interval); |
| |
| protected: |
| class RewriteOperation { |
| public: |
| /// What index into rewrites List are we? |
| size_t index; |
| std::string text; |
| |
| /// Token buffer index. |
| size_t instructionIndex; |
| |
| RewriteOperation(TokenStreamRewriter *outerInstance, size_t index); |
| RewriteOperation(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); |
| virtual ~RewriteOperation(); |
| |
| /// Execute the rewrite operation by possibly adding to the buffer. |
| /// Return the index of the next token to operate on. |
| |
| virtual size_t execute(std::string *buf); |
| virtual std::string toString(); |
| |
| private: |
| TokenStreamRewriter *const outerInstance; |
| void InitializeInstanceFields(); |
| }; |
| |
| class InsertBeforeOp : public RewriteOperation { |
| private: |
| TokenStreamRewriter *const outerInstance; |
| |
| public: |
| InsertBeforeOp(TokenStreamRewriter *outerInstance, size_t index, const std::string& text); |
| |
| virtual size_t execute(std::string *buf) override; |
| }; |
| |
| class ReplaceOp : public RewriteOperation { |
| private: |
| TokenStreamRewriter *const outerInstance; |
| |
| public: |
| size_t lastIndex; |
| |
| ReplaceOp(TokenStreamRewriter *outerInstance, size_t from, size_t to, const std::string& text); |
| virtual size_t execute(std::string *buf) override; |
| virtual std::string toString() override; |
| |
| private: |
| void InitializeInstanceFields(); |
| }; |
| |
| /// Our source stream |
| TokenStream *const tokens; |
| |
| /// You may have multiple, named streams of rewrite operations. |
| /// I'm calling these things "programs." |
| /// Maps String (name) -> rewrite (List) |
| std::map<std::string, std::vector<RewriteOperation*>> _programs; |
| |
| /// <summary> |
| /// Map String (program name) -> Integer index </summary> |
| std::map<std::string, size_t> _lastRewriteTokenIndexes; |
| virtual size_t getLastRewriteTokenIndex(const std::string &programName); |
| virtual void setLastRewriteTokenIndex(const std::string &programName, size_t i); |
| virtual std::vector<RewriteOperation*>& getProgram(const std::string &name); |
| |
| /// <summary> |
| /// We need to combine operations and report invalid operations (like |
| /// overlapping replaces that are not completed nested). Inserts to |
| /// same index need to be combined etc... Here are the cases: |
| /// |
| /// I.i.u I.j.v leave alone, nonoverlapping |
| /// I.i.u I.i.v combine: Iivu |
| /// |
| /// R.i-j.u R.x-y.v | i-j in x-y delete first R |
| /// R.i-j.u R.i-j.v delete first R |
| /// R.i-j.u R.x-y.v | x-y in i-j ERROR |
| /// R.i-j.u R.x-y.v | boundaries overlap ERROR |
| /// |
| /// Delete special case of replace (text==null): |
| /// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) |
| /// |
| /// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before |
| /// we're not deleting i) |
| /// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping |
| /// R.x-y.v I.i.u | i in x-y ERROR |
| /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) |
| /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping |
| /// |
| /// I.i.u = insert u before op @ index i |
| /// R.x-y.u = replace x-y indexed tokens with u |
| /// |
| /// First we need to examine replaces. For any replace op: |
| /// |
| /// 1. wipe out any insertions before op within that range. |
| /// 2. Drop any replace op before that is contained completely within |
| /// that range. |
| /// 3. Throw exception upon boundary overlap with any previous replace. |
| /// |
| /// Then we can deal with inserts: |
| /// |
| /// 1. for any inserts to same index, combine even if not adjacent. |
| /// 2. for any prior replace with same left boundary, combine this |
| /// insert with replace and delete this replace. |
| /// 3. throw exception if index in same range as previous replace |
| /// |
| /// Don't actually delete; make op null in list. Easier to walk list. |
| /// Later we can throw as we add to index -> op map. |
| /// |
| /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the |
| /// inserted stuff would be before the replace range. But, if you |
| /// add tokens in front of a method body '{' and then delete the method |
| /// body, I think the stuff before the '{' you added should disappear too. |
| /// |
| /// Return a map from token index to operation. |
| /// </summary> |
| virtual std::unordered_map<size_t, RewriteOperation*> reduceToSingleOperationPerIndex(std::vector<RewriteOperation*> &rewrites); |
| |
| virtual std::string catOpText(std::string *a, std::string *b); |
| |
| /// Get all operations before an index of a particular kind. |
| template <typename T> |
| std::vector<T *> getKindOfOps(std::vector<RewriteOperation *> rewrites, size_t before) { |
| std::vector<T *> ops; |
| for (size_t i = 0; i < before && i < rewrites.size(); i++) { |
| T *op = dynamic_cast<T *>(rewrites[i]); |
| if (op == nullptr) { // ignore deleted or non matching entries |
| continue; |
| } |
| ops.push_back(op); |
| } |
| return ops; |
| } |
| |
| private: |
| std::vector<RewriteOperation *>& initializeProgram(const std::string &name); |
| |
| }; |
| |
| } // namespace antlr4 |