freemarker-core/src/main/java/freemarker/core/DefaultTruncateBuiltinAlgorithm.java - freemarker - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package freemarker.core;

 import freemarker.template.SimpleScalar;
 import freemarker.template.TemplateException;
 import freemarker.template.TemplateModel;
 import freemarker.template.TemplateModelException;
 import freemarker.template.TemplateScalarModel;
 import freemarker.template.utility.ClassUtil;
 import freemarker.template.utility.NullArgumentException;

 /**
  * The default {@link TruncateBuiltinAlgorithm} implementation; see
  * {@link Configurable#setTruncateBuiltinAlgorithm(TruncateBuiltinAlgorithm)}.
  * To know the properties of this {@link TruncateBuiltinAlgorithm} implementation, see the
  * {@linkplain DefaultTruncateBuiltinAlgorithm#DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean,
  * TemplateMarkupOutputModel, Integer, Boolean, boolean, Double) constructor}. You can find more explanation and
  * examples in the documentation of the {@code truncate} built-in in the FreeMarker Manual.
  *
  * @since 2.3.29
  */
 public class DefaultTruncateBuiltinAlgorithm extends TruncateBuiltinAlgorithm {

     /** Used by {@link #ASCII_INSTANCE} as the terminator. */
     public static final String STANDARD_ASCII_TERMINATOR = "[...]";

     /** Used by {@link #UNICODE_INSTANCE} as the terminator. */
     public static final String STANDARD_UNICODE_TERMINATOR = "[\u2026]";

     /**
      * Used by {@link #ASCII_INSTANCE} and {@link #UNICODE_INSTANCE} as the markup terminator;
      * HTML {@code <span class='truncateTerminator'>[&#8230;]</span>}, where {@code &#8230;} is the ellipsis (&#8230;)
      * character. Note that while the ellipsis character is not in US-ASCII, this still works safely regardless of
      * output charset, as {@code &#8230;} itself only contains US-ASCII characters.
      */
     public static final TemplateHTMLOutputModel STANDARD_M_TERMINATOR;
     static {
         try {
             STANDARD_M_TERMINATOR = HTMLOutputFormat.INSTANCE.fromMarkup(
                     "<span class='truncateTerminator'>[&#8230;]</span>");
         } catch (TemplateModelException e) {
             throw new IllegalStateException(e);
         }
     }

     /**
      * The value used in the constructor of {@link #ASCII_INSTANCE} and  {@link #UNICODE_INSTANCE} as the
      * {@code wordBoundaryMinLength} argument.
      */
     public static final double DEFAULT_WORD_BOUNDARY_MIN_LENGTH = 0.75;

     /** Used if {@link #getMTerminatorLength(TemplateMarkupOutputModel)} can't detect the length. */
     private static final int FALLBACK_M_TERMINATOR_LENGTH = 3;

     private enum TruncationMode {
         CHAR_BOUNDARY, WORD_BOUNDARY, AUTO
     }

     /**
      * Instance that uses {@code "[...]"} as the {@code defaultTerminator} constructor argument, and thus is
      * safe to use for all output charsets. Because of that, this is the default of
      * {@link Configurable#setTruncateBuiltinAlgorithm(TruncateBuiltinAlgorithm)}. The
      * {@code defaultMTerminator} (markup terminator) is {@link #STANDARD_M_TERMINATOR}, and the
      * {@code wordBoundaryMinLength} is {@link #DEFAULT_WORD_BOUNDARY_MIN_LENGTH}, and {@code addSpaceAtWordBoundary}
      * is {@code true}.
      */
     public static final DefaultTruncateBuiltinAlgorithm ASCII_INSTANCE = new DefaultTruncateBuiltinAlgorithm(
             STANDARD_ASCII_TERMINATOR, STANDARD_M_TERMINATOR, true);

     /**
      * Instance that uses {@code "[\u2026]"} as the {@code defaultTerminator} constructor argument, which contains
      * ellipsis character ({@code "\u2026"}, U+2026), and thus only works with UTF-8, and the cp125x charsets (like
      * cp1250), and with some other rarely used ones. It does not work (becomes to a question mark) with ISO-8859-x
      * charsets (like ISO-8859-1), which are probably the most often used charsets after UTF-8.
      *
      * <p>The {@code defaultMTerminator} (markup terminator) is {@link #STANDARD_M_TERMINATOR}, and the
      * {@code wordBoundaryMinLength} is {@link #DEFAULT_WORD_BOUNDARY_MIN_LENGTH}, and {@code addSpaceAtWordBoundary}
      * is {@code true}.
      */
     public static final DefaultTruncateBuiltinAlgorithm UNICODE_INSTANCE = new DefaultTruncateBuiltinAlgorithm(
             STANDARD_UNICODE_TERMINATOR, STANDARD_M_TERMINATOR, true);

     private final TemplateScalarModel defaultTerminator;
     private final int defaultTerminatorLength;
     private final boolean defaultTerminatorRemovesDots;

     private final TemplateMarkupOutputModel<?> defaultMTerminator;
     private final Integer defaultMTerminatorLength;
     private final boolean defaultMTerminatorRemovesDots;

     private final double wordBoundaryMinLength;
     private final boolean addSpaceAtWordBoundary;

     /**
      * Creates an instance with a string (plain text) terminator and a markup terminator.
      * See parameters in {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean, TemplateMarkupOutputModel,
      * Integer, Boolean, boolean, Double)}; the missing parameters will be {@code null}.
      */
     public DefaultTruncateBuiltinAlgorithm(
             String defaultTerminator,
             TemplateMarkupOutputModel<?> defaultMTerminator,
             boolean addSpaceAtWordBoundary) {
         this(
                 defaultTerminator, null, null,
                 defaultMTerminator, null, null,
                 addSpaceAtWordBoundary, null);
     }

     /**
      * Creates an instance with string (plain text) terminator.
      * See parameters in {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean, TemplateMarkupOutputModel,
      * Integer, Boolean, boolean, Double)}; the missing parameters will be {@code null}.
      */
     public DefaultTruncateBuiltinAlgorithm(
             String defaultTerminator,
             boolean addSpaceAtWordBoundary) {
         this(
                 defaultTerminator, null, null,
                 null, null, null,
                 addSpaceAtWordBoundary, null);
     }

     /**
      * Creates an instance with markup terminator.
      * @param defaultTerminator
      *            The terminator to use if the invocation (like {@code s?truncate(20)}) doesn't specify it. The
      *            terminator is the text appended after a truncated string, to indicate that it was truncated.
      *            Typically it's {@code "[...]"} or {@code "..."}, or the same with UNICODE ellipsis character.
      * @param defaultTerminatorLength
      *            The assumed length of {@code defaultTerminator}, or {@code null} if it should be get via
      *            {@code defaultTerminator.length()}.
      * @param defaultTerminatorRemovesDots
      *            Whether dots and ellipsis characters that the {@code defaultTerminator} touches should be removed. If
      *            {@code null}, this will be auto-detected based on if {@code defaultTerminator} starts with dot or
      *            ellipsis. The goal is to avoid outcomes where we have more dots next to each other than there are in
      *            the terminator.
      * @param defaultMTerminator
      *            Similar to {@code defaultTerminator}, but is markup instead of plain text. This can be {@code null},
      *            in which case {@code defaultTerminator} will be used even if {@code ?truncate_m} or similar built-in
      *            is called.
      * @param defaultMTerminatorLength
      *            The assumed length of the terminator, or {@code null} if it should be get via
      *            {@link #getMTerminatorLength}.
      * @param defaultMTerminatorRemovesDots
      *            Similar to {@code defaultTerminatorRemovesDots}, but for {@code defaultMTerminator}. If {@code
      *            null}, and {@code defaultMTerminator} is HTML/XML/XHTML, then it will be examined of the
      *            first character of the terminator that's outside a HTML/XML tag or comment is dot or ellipsis
      *            (after resolving numerical character references). For other kind of markup it defaults to {@code
      *            true}, to be on the safe side.
      * @param addSpaceAtWordBoundary,
      *            Whether to add a space before the terminator if the truncation happens directly after the end of a
      *            word. For example, when "too long sentence" is truncated, it will be a like "too long [...]"
      *            instead of "too long[...]". When the truncation happens inside a word, this has on effect, i.e., it
      *            will be always like "too long se[...]" (no space before the terminator). Note that only whitespace is
      *            considered to be a word separator, not punctuation, so if this is {@code true}, you get results
      *            like "Some sentence. [...]".
      * @param wordBoundaryMinLength
      *            Used when {@link #truncate} or {@link #truncateM} has to decide between
      *            word boundary truncation and character boundary truncation; it's the minimum length, given as
      *            proportion of {@code maxLength}, that word boundary truncation has to produce. If the resulting
      *            length is less, we do character boundary truncation instead. For example, if {@code maxLength} is
      *            30, and this parameter is 0.85, then: 30*0.85 = 25.5, rounded up that's 26, so the resulting length
      *            must be at least 26. The result of character boundary truncation will be always accepted, even if its
      *            still too short. If this parameter is {@code null}, then {@link #DEFAULT_WORD_BOUNDARY_MIN_LENGTH}
      *            will be used. If this parameter is 0, then truncation always happens at word boundary. If this
      *            parameter is 1.0, then truncation doesn't prefer word boundaries over other places.
      */
     public DefaultTruncateBuiltinAlgorithm(
             String defaultTerminator, Integer defaultTerminatorLength,
             Boolean defaultTerminatorRemovesDots,
             TemplateMarkupOutputModel<?> defaultMTerminator, Integer defaultMTerminatorLength,
             Boolean defaultMTerminatorRemovesDots,
             boolean addSpaceAtWordBoundary, Double wordBoundaryMinLength) {
         NullArgumentException.check("defaultTerminator", defaultTerminator);
         this.defaultTerminator = new SimpleScalar(defaultTerminator);
         try {
             this.defaultTerminatorLength = defaultTerminatorLength != null ? defaultTerminatorLength
                     : defaultTerminator.length();

             this.defaultTerminatorRemovesDots = defaultTerminatorRemovesDots != null ? defaultTerminatorRemovesDots
                         : getTerminatorRemovesDots(defaultTerminator);
         } catch (TemplateModelException e) {
             throw new IllegalArgumentException("Failed to examine defaultTerminator", e);
         }

         this.defaultMTerminator = defaultMTerminator;
         if (defaultMTerminator != null) {
             try {
                 this.defaultMTerminatorLength = defaultMTerminatorLength != null ? defaultMTerminatorLength
                         : getMTerminatorLength(defaultMTerminator);

                 this.defaultMTerminatorRemovesDots = defaultMTerminatorRemovesDots != null
                         ? defaultMTerminatorRemovesDots
                         : getMTerminatorRemovesDots(defaultMTerminator);
             } catch (TemplateModelException e) {
                 throw new IllegalArgumentException("Failed to examine defaultMTerminator", e);
             }
         } else {
             // There's no mTerminator, but these final fields must be set
             this.defaultMTerminatorLength = null;
             this.defaultMTerminatorRemovesDots = false;
         }

         if (wordBoundaryMinLength == null) {
             wordBoundaryMinLength = DEFAULT_WORD_BOUNDARY_MIN_LENGTH;
         } else if (wordBoundaryMinLength < 0 || wordBoundaryMinLength > 1) {
             throw new IllegalArgumentException("wordBoundaryMinLength must be between 0.0 and 1.0 (inclusive)");
         }
         this.wordBoundaryMinLength = wordBoundaryMinLength;

         this.addSpaceAtWordBoundary = addSpaceAtWordBoundary;
     }

     @Override
     public TemplateScalarModel truncate(
              String s, int maxLength,
             TemplateScalarModel terminator, Integer terminatorLength,
              Environment env) throws TemplateException {
         return (TemplateScalarModel) unifiedTruncate(
                 s, maxLength, terminator, terminatorLength,
                 TruncationMode.AUTO, false);
     }

     @Override
     public TemplateScalarModel truncateW(
             String s, int maxLength,
             TemplateScalarModel terminator, Integer terminatorLength,
             Environment env) throws TemplateException {
         return (TemplateScalarModel) unifiedTruncate(
                 s, maxLength, terminator, terminatorLength,
                 TruncationMode.WORD_BOUNDARY, false);
     }

     @Override
     public TemplateScalarModel truncateC(
             String s, int maxLength,
             TemplateScalarModel terminator, Integer terminatorLength,
             Environment env) throws TemplateException {
         return (TemplateScalarModel) unifiedTruncate(
                 s, maxLength, terminator, terminatorLength,
                 TruncationMode.CHAR_BOUNDARY, false);
     }

     @Override
     public TemplateModel truncateM(
             String s, int maxLength,
             TemplateModel terminator, Integer terminatorLength,
             Environment env) throws TemplateException {
         return unifiedTruncate(
                 s, maxLength, terminator, terminatorLength,
                 TruncationMode.AUTO, true);
     }

     @Override
     public TemplateModel truncateWM(
             String s, int maxLength,
             TemplateModel terminator, Integer terminatorLength,
             Environment env) throws TemplateException {
         return unifiedTruncate(
                 s, maxLength, terminator, terminatorLength,
                 TruncationMode.WORD_BOUNDARY, true);
     }

     @Override
     public TemplateModel truncateCM(
             String s, int maxLength,
             TemplateModel terminator, Integer terminatorLength,
             Environment env) throws TemplateException {
         return unifiedTruncate(
                 s, maxLength, terminator, terminatorLength,
                 TruncationMode.CHAR_BOUNDARY, true);
     }

     public String getDefaultTerminator() {
         try {
             return defaultTerminator.getAsString();
         } catch (TemplateModelException e) {
             throw new IllegalStateException(e);
         }
     }

     /**
      * See similarly named parameter of {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean,
      * TemplateMarkupOutputModel, Integer, Boolean, boolean, Double)} the construction}.
      */
     public int getDefaultTerminatorLength() {
         return defaultTerminatorLength;
     }

     /**
      * See similarly named parameter of {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean,
      * TemplateMarkupOutputModel, Integer, Boolean, boolean, Double)} the construction}.
      */
     public boolean getDefaultTerminatorRemovesDots() {
         return defaultTerminatorRemovesDots;
     }

     /**
      * See similarly named parameter of {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean,
      * TemplateMarkupOutputModel, Integer, Boolean, boolean, Double)} the construction}.
      */
     public TemplateMarkupOutputModel<?> getDefaultMTerminator() {
         return defaultMTerminator;
     }

     /**
      * See similarly named parameter of {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean,
      * TemplateMarkupOutputModel, Integer, Boolean, boolean, Double)} the construction}.
      */
     public Integer getDefaultMTerminatorLength() {
         return defaultMTerminatorLength;
     }

     public boolean getDefaultMTerminatorRemovesDots() {
         return defaultMTerminatorRemovesDots;
     }

     /**
      * See similarly named parameter of {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean,
      * TemplateMarkupOutputModel, Integer, Boolean, boolean, Double)} the construction}.
      */
     public double getWordBoundaryMinLength() {
         return wordBoundaryMinLength;
     }

     /**
      * See similarly named parameter of {@link #DefaultTruncateBuiltinAlgorithm(String, Integer, Boolean,
      * TemplateMarkupOutputModel, Integer, Boolean, boolean, Double)} the construction}.
      */
     public boolean getAddSpaceAtWordBoundary() {
         return addSpaceAtWordBoundary;
     }

     /**
      * Returns the (estimated) length of the argument terminator. It should only count characters that are visible for
      * the user (like in the web browser).
      *
      * <p>In the implementation in {@link DefaultTruncateBuiltinAlgorithm}, if the markup is HTML/XML/XHTML, then this
      * counts the characters outside tags and comments, and inside CDATA sections (ignoring the CDATA section
      * delimiters). Furthermore then it counts character and entity references as having length of 1. If the markup
      * is not HTML/XML/XHTML (or subclasses of those {@link MarkupOutputFormat}-s) then it doesn't know how to
      * measure it, and simply returns 3.
      */
     @SuppressWarnings({"rawtypes", "unchecked"})
     protected int getMTerminatorLength(TemplateMarkupOutputModel<?> mTerminator) throws TemplateModelException {
         MarkupOutputFormat format = mTerminator.getOutputFormat();
         return isHTMLOrXML(format) ?
                 getLengthWithoutTags(format.getMarkupString(mTerminator))
                 : FALLBACK_M_TERMINATOR_LENGTH;
     }

     /**
      * Tells if the dots touched by the terminator text should be removed.
      *
      * <p>The implementation in {@link DefaultTruncateBuiltinAlgorithm} return {@code true} if the terminator starts
      * with dot (or ellipsis).
      *
      * @param terminator
      *            A {@link TemplateScalarModel} or {@link TemplateMarkupOutputModel}. Not {@code null}.
      */
     protected boolean getTerminatorRemovesDots(String terminator) throws TemplateModelException {
         return terminator.startsWith(".") || terminator.startsWith("\u2026");
     }

     /**
      * Same as {@link #getTerminatorRemovesDots(String)}, but invoked for a markup terminator.
      *
      * <p>The implementation in {@link DefaultTruncateBuiltinAlgorithm} will skip HTML/XML tags and comments,
      * and resolves relevant character references to find out if the first character is dot or ellipsis. But it only
      * does this for HTML/XMl/XHTML (or subclasses of those {@link MarkupOutputFormat}-s), otherwise it always
      * returns {@code true} to be on the safe side.
      */
     protected boolean getMTerminatorRemovesDots(TemplateMarkupOutputModel terminator) throws TemplateModelException {
         return isHTMLOrXML(terminator.getOutputFormat())
                 ? doesHtmlOrXmlStartWithDot(terminator.getOutputFormat().getMarkupString(terminator))
                 : true;
     }

     /**
      * Deals with both CB and WB truncation, hence it's unified.
      */
     private TemplateModel unifiedTruncate(
             String s, int maxLength,
             TemplateModel terminator, Integer terminatorLength,
             TruncationMode mode, boolean allowMarkupResult)
             throws TemplateException {
         if (s.length() <= maxLength) {
             return new SimpleScalar(s);
         }
         if (maxLength < 0) {
             throw new IllegalArgumentException("maxLength can't be negative");
         }

         Boolean terminatorRemovesDots;
         if (terminator == null) {
             if (allowMarkupResult && defaultMTerminator != null) {
                 terminator = defaultMTerminator;
                 terminatorLength = defaultMTerminatorLength;
                 terminatorRemovesDots = defaultMTerminatorRemovesDots;
             } else {
                 terminator = defaultTerminator;
                 terminatorLength = defaultTerminatorLength;
                 terminatorRemovesDots = defaultTerminatorRemovesDots;
             }
         } else {
             if (terminatorLength != null) {
                 if (terminatorLength < 0) {
                     throw new IllegalArgumentException("terminatorLength can't be negative");
                 }
             } else {
                 terminatorLength = getTerminatorLength(terminator);
             }
             terminatorRemovesDots = null; // lazily calculated
         }

         StringBuilder truncatedS = unifiedTruncateWithoutTerminatorAdded(
                 s,
                 maxLength,
                 terminator, terminatorLength, terminatorRemovesDots,
                 mode);

         // The terminator is always shown, even if with that we exceed maxLength. Otherwise the user couldn't
         // see that the string was truncated.
         if (truncatedS == null || truncatedS.length() == 0) {
             return terminator;
         }

         if (terminator instanceof TemplateScalarModel) {
             truncatedS.append(((TemplateScalarModel) terminator).getAsString());
             return new SimpleScalar(truncatedS.toString());
         } else if (terminator instanceof TemplateMarkupOutputModel) {
             TemplateMarkupOutputModel markup = (TemplateMarkupOutputModel) terminator;
             MarkupOutputFormat outputFormat = markup.getOutputFormat();
             return outputFormat.concat(outputFormat.fromPlainTextByEscaping(truncatedS.toString()), markup);
         } else {
             throw new IllegalArgumentException("Unsupported terminator type: "
                     + ClassUtil.getFTLTypeDescription(terminator));
         }
     }

     private StringBuilder unifiedTruncateWithoutTerminatorAdded(
             String s, int maxLength,
             TemplateModel terminator, int terminatorLength, Boolean terminatorRemovesDots,
             TruncationMode mode) throws TemplateModelException {
         final int cbInitialLastCIdx = maxLength - terminatorLength - 1;
         int cbLastCIdx = cbInitialLastCIdx;

         // Why we do this here: If both Word Boundary and Character Boundary truncation will be attempted, then this way
         // we don't have to skip the WS twice.
         cbLastCIdx = skipTrailingWS(s, cbLastCIdx);
         if (cbLastCIdx < 0) {
             return null;
         }

         if (mode == TruncationMode.AUTO && wordBoundaryMinLength < 1.0 || mode == TruncationMode.WORD_BOUNDARY) {
             // Do word boundary truncation. Might not be possible due to minLength restriction (see below), in which
             // case truncedS stays null.
             StringBuilder truncedS = null;
             {
                 final int wordTerminatorLength = addSpaceAtWordBoundary ? terminatorLength + 1 : terminatorLength;
                 final int minIdx = mode == TruncationMode.AUTO
                         ? Math.max(((int) Math.ceil(maxLength * wordBoundaryMinLength)) - wordTerminatorLength - 1, 0)
                         : 0;

                 int wbLastCIdx = Math.min(maxLength - wordTerminatorLength - 1, cbLastCIdx);
                 boolean followingCIsWS
                         = s.length() > wbLastCIdx + 1 ? Character.isWhitespace(s.charAt(wbLastCIdx + 1)) : true;
                 executeTruncateWB:
                 while (wbLastCIdx >= minIdx) {
                     char curC = s.charAt(wbLastCIdx);
                     boolean curCIsWS = Character.isWhitespace(curC);
                     if (!curCIsWS && followingCIsWS) {
                         // Note how we avoid getMTerminatorRemovesDots until we absolutely need its result.
                         if (!addSpaceAtWordBoundary && isDot(curC)) {
                             if (terminatorRemovesDots == null) {
                                 terminatorRemovesDots = getTerminatorRemovesDots(terminator);
                             }
                             if (terminatorRemovesDots) {
                                 while (wbLastCIdx >= minIdx && isDotOrWS(s.charAt(wbLastCIdx))) {
                                     wbLastCIdx--;
                                 }
                                 if (wbLastCIdx < minIdx) {
                                     break executeTruncateWB;
                                 }
                             }
                         }

                         truncedS = new StringBuilder(wbLastCIdx + 1 + wordTerminatorLength);
                         truncedS.append(s, 0, wbLastCIdx + 1);
                         if (addSpaceAtWordBoundary) {
                             truncedS.append(' ');
                         }
                         break executeTruncateWB;
                     }

                     followingCIsWS = curCIsWS;
                     wbLastCIdx--;
                 } // executeTruncateWB: while (...)
             }
             if (truncedS != null
                     || mode == TruncationMode.WORD_BOUNDARY
                     || mode == TruncationMode.AUTO && wordBoundaryMinLength == 0.0) {
                 return truncedS;
             }
             // We are in TruncationMode.AUTO. truncateW wasn't possible, so fall back to character boundary truncation.
         }

         // Do character boundary truncation.

         // If the truncation point is a word boundary, and thus we add a space before the terminator, then we may run
         // out of the maxLength by 1. In that case we have to truncate one character earlier.
         if (cbLastCIdx == cbInitialLastCIdx && addSpaceAtWordBoundary  && isWordEnd(s, cbLastCIdx)) {
             cbLastCIdx--;
             if (cbLastCIdx < 0) {
                 return null;
             }
         }

         // Skip trailing WS, also trailing dots if necessary.
         boolean skippedDots;
         do {
             skippedDots = false;

             cbLastCIdx = skipTrailingWS(s, cbLastCIdx);
             if (cbLastCIdx < 0) {
                 return null;
             }

             // Note how we avoid getMTerminatorRemovesDots until we absolutely need its result.
             if (isDot(s.charAt(cbLastCIdx)) && !(addSpaceAtWordBoundary && isWordEnd(s, cbLastCIdx))) {
                 if (terminatorRemovesDots == null) {
                     terminatorRemovesDots = getTerminatorRemovesDots(terminator);
                 }
                 if (terminatorRemovesDots) {
                     cbLastCIdx = skipTrailingDots(s, cbLastCIdx);
                     if (cbLastCIdx < 0) {
                         return null;
                     }
                     skippedDots = true;
                 }
             }
         } while (skippedDots);

         boolean addWordBoundarySpace = addSpaceAtWordBoundary && isWordEnd(s, cbLastCIdx);
         StringBuilder truncatedS = new StringBuilder(cbLastCIdx + 1 + (addWordBoundarySpace ? 1 : 0) + terminatorLength);
         truncatedS.append(s, 0, cbLastCIdx + 1);
         if (addWordBoundarySpace) {
             truncatedS.append(' ');
         }
         return truncatedS;
     }

     private int getTerminatorLength(TemplateModel terminator) throws TemplateModelException {
         return terminator instanceof TemplateScalarModel
                 ? ((TemplateScalarModel) terminator).getAsString().length()
                 : getMTerminatorLength((TemplateMarkupOutputModel<?>) terminator);
     }

     private boolean getTerminatorRemovesDots(TemplateModel terminator) throws TemplateModelException {
         return terminator instanceof TemplateScalarModel
                 ? getTerminatorRemovesDots(((TemplateScalarModel) terminator).getAsString())
                 : getMTerminatorRemovesDots((TemplateMarkupOutputModel<?>) terminator);
     }

     private int skipTrailingWS(String s, int lastCIdx) {
         while (lastCIdx >= 0 && Character.isWhitespace(s.charAt(lastCIdx))) {
             lastCIdx--;
         }
         return lastCIdx;
     }

     private int skipTrailingDots(String s, int lastCIdx) {
         while (lastCIdx >= 0 && isDot(s.charAt(lastCIdx))) {
             lastCIdx--;
         }
         return lastCIdx;
     }

     private boolean isWordEnd(String s, int lastCIdx) {
         return lastCIdx + 1 >= s.length() || Character.isWhitespace(s.charAt(lastCIdx + 1));
     }

     private static boolean isDot(char c) {
         return c == '.' || c == '\u2026';
     }

     private static boolean isDotOrWS(char c) {
         return isDot(c) || Character.isWhitespace(c);
     }

     private boolean isHTMLOrXML(MarkupOutputFormat<?> outputFormat) {
         return outputFormat instanceof HTMLOutputFormat || outputFormat instanceof  XMLOutputFormat;
     }

     /**
      * Returns the length of a string, ignoring HTML/XML tags and comments, also, character and entity references are
      * count as having length of 1, and CDATA sections are counted in with the length of their content. So for
      * example, the length of {@code "<span>x&amp;y</span>"} will be 3 (as visually it's {@code x&y}, which is 3
      * characters).
      */
     // Not private for testability
     static int getLengthWithoutTags(String s) {
         // Fixes/improvements here should be also done here: doesHtmlOrXmlStartWithDot

         int result = 0;
         int i = 0;
         int len = s.length();
         countChars: while (i < len) {
             char c = s.charAt(i++);
             if (c == '<') {
                 if (s.startsWith("!--", i)) {
                     // <!--...-->
                     i += 3;
                     while (i + 2 < len && !(s.charAt(i) == '-' && s.charAt(i + 1) == '-' && s.charAt(i + 2) == '>')) {
                         i++;
                     }
                     i += 3;
                     if (i >= len) {
                         break countChars;
                     }
                 } else if (s.startsWith("![CDATA[", i)) {
                     // <![CDATA[...]]>
                     i += 8;
                     while (i < len
                             && !(s.charAt(i) == ']'
                             && i + 2 < len && s.charAt(i + 1) == ']' && s.charAt(i + 2) == '>')) {
                         result++;
                         i++;
                     }
                     i += 3;
                     if (i >= len) {
                         break countChars;
                     }
                 } else {
                     // <...>
                     while (i < len && s.charAt(i) != '>') {
                         i++;
                     }
                     i++;
                     if (i >= len) {
                         break countChars;
                     }
                 }
             } else if (c == '&') {
                 // &...;
                 while (i < len && s.charAt(i) != ';') {
                     i++;
                 }
                 i++;
                 result++;
                 if (i >= len) {
                     break countChars;
                 }
             } else {
                 result++;
             }
         }
         return result;
     }

     /**
      * Check if the specified HTML or XML starts with dot or ellipsis, if we ignore tags and comments.
      */
     // Not private for testability
     static boolean doesHtmlOrXmlStartWithDot(String s) {
         // Fixes/improvements here should be also done here: getLengthWithoutTags

         int i = 0;
         int len = s.length();
         consumeChars: while (i < len) {
             char c = s.charAt(i++);
             if (c == '<') {
                 if (s.startsWith("!--", i)) {
                     // <!--...-->
                     i += 3;
                     while (i + 2 < len
                             && !((c = s.charAt(i)) == '-' && s.charAt(i + 1) == '-' && s.charAt(i + 2) == '>')) {
                         i++;
                     }
                     i += 3;
                     if (i >= len) {
                         break consumeChars;
                     }
                 } else if (s.startsWith("![CDATA[", i)) {
                     // <![CDATA[...]]>
                     i += 8;
                     while (i < len
                             && !((c = s.charAt(i)) == ']'
                             && i + 2 < len
                             && s.charAt(i + 1) == ']' && s.charAt(i + 2) == '>')) {
                         return isDot(c);
                     }
                     i += 3;
                     if (i >= len) {
                         break consumeChars;
                     }
                 } else {
                     // <...>
                     while (i < len && s.charAt(i) != '>') {
                         i++;
                     }
                     i++;
                     if (i >= len) {
                         break consumeChars;
                     }
                 }
             } else if (c == '&') {
                 // &...;
                 int start = i;
                 while (i < len && s.charAt(i) != ';') {
                     i++;
                 }
                 return isDotCharReference(s.substring(start, i));
             } else {
                 return isDot(c);
             }
         }
         return false;
     }

     // Not private for testability
     static boolean isDotCharReference(String name) {
         if (name.length() > 2 && name.charAt(0) == '#') {
             int charCode = getCodeFromNumericalCharReferenceName(name);
             return charCode == 0x2026 || charCode == 0x2e;
         }
         return name.equals("hellip") || name.equals("period");
     }

     // Not private for testability
     static int getCodeFromNumericalCharReferenceName(String name) {
         char c = name.charAt(1);
         boolean hex = c == 'x' || c == 'X';
         int code = 0;
         for (int pos = hex ? 2 : 1; pos < name.length(); pos++) {
             c = name.charAt(pos);
             code *= hex ? 16 : 10;
             if (c >= '0' && c <= '9') {
                 code += c - '0';
             } else if (hex && c >= 'a' && c <= 'f') {
                 code += c - 'a' + 10;
             } else if (hex && c >= 'A' && c <= 'F') {
                 code += c - 'A' + 10;
             } else {
                 return -1;
             }
         }
         return code;
     }

 }