docs/attachments/LUCENE-8207/LUCENE-8207.patch - lucene-jira-archive - Git at Google


 1;4601;0cFrom 0ce2400e4eb5d08586f7253cbc1324acf39ee407 Mon Sep 17 00:00:00 2001
 From: Vincent Arnaud <vincent.arnaud90@laposte.net>
 Date: Fri, 23 Feb 2018 12:35:26 +0100
 Subject: [PATCH] Add case insensitive

 add ignore case
 ---
  .../org/apache/lucene/util/automaton/Automata.java |  14 ++
  .../org/apache/lucene/util/automaton/RegExp.java   | 204 ++++++++++++++-------
  .../apache/lucene/util/automaton/TestRegExp.java   |  33 +++-
  3 files changed, 182 insertions(+), 69 deletions(-)

 diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
 index 294700b849..5639626dfc 100644
 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
 +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
 @@ -108,6 +108,20 @@ final public class Automata {
      return makeCharRange(c, c);
    }

 +  /**
 +   * Returns a new (deterministic) automaton that accepts a single codepoint of
 +   * the given value and its other case.
 +   */
 +  public static Automaton makeCharCaseInsensitive(int c) {
 +	Automaton a;
 +	if (Character.toLowerCase(c) != Character.toUpperCase(c)) {
 +	  a = Operations.union(Automata.makeChar(Character.toLowerCase(c)), Automata.makeChar(Character.toUpperCase(c)));
 +	} else {
 +	  a = Automata.makeChar(c);
 +	}
 +	return a;
 +  }
 +
    /** Appends the specified character to the specified state, returning a new state. */
    public static int appendChar(Automaton a, int state, int c) {
      int newState = a.createState();
 diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
 index a643ddb5b5..98c41b697b 100644
 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
 +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
 @@ -351,6 +351,11 @@ public class RegExp {
    public static final int INTERVAL = 0x0020;

    /**
 +   * Syntax flag, enables case insensitive (<tt>(?i)</tt>).
 +   */
 +  public static final int CASE_INSENSITIVE = 0x0040;
 +
 +  /**
     * Syntax flag, enables all optional regexp syntax.
     */
    public static final int ALL = 0xffff;
 @@ -371,8 +376,11 @@ public class RegExp {
    int flags;
    int pos;

 -  RegExp() {
 +  boolean isCaseInsensitive;
 +
 +  RegExp(final boolean aIsInsensitive) {
      this.originalString = null;
 +    this.isCaseInsensitive = aIsInsensitive;
    }

    /**
 @@ -400,15 +408,16 @@ public class RegExp {
      originalString = s;
      flags = syntax_flags;
      RegExp e;
 -    if (s.length() == 0) e = makeString("");
 +    if (s.length() == 0) e = makeString("", false);
      else {
 -      e = parseUnionExp();
 +      e = parseUnionExp(false);
        if (pos < originalString.length()) throw new IllegalArgumentException(
            "end-of-string expected at position " + pos);
      }
      kind = e.kind;
      exp1 = e.exp1;
      exp2 = e.exp2;
 +    isCaseInsensitive = e.isCaseInsensitive;
      this.s = e.s;
      c = e.c;
      min = e.min;
 @@ -566,10 +575,29 @@ public class RegExp {
          a = MinimizationOperations.minimize(a, maxDeterminizedStates);
          break;
        case REGEXP_CHAR:
 -        a = Automata.makeChar(c);
 +        if (this.isCaseInsensitive && check(CASE_INSENSITIVE)) {
 +          a = Automata.makeCharCaseInsensitive(c);
 +        } else {
 +          a = Automata.makeChar(c);
 +        }
          break;
        case REGEXP_CHAR_RANGE:
 -        a = Automata.makeCharRange(from, to);
 +        final List<Automaton> automatons = new ArrayList<>();
 +        automatons.add(Automata.makeCharRange(from, to));
 +        if (this.isCaseInsensitive) {
 +          for (int i = from; i <= to; i++) {
 +            automatons.add(Automata.makeChar(i));
 +            if (Character.toLowerCase(i) != Character.toUpperCase(i)) {
 +              automatons.add(Automata.makeChar((i == Character.toLowerCase(i)) ? Character.toUpperCase(i) : Character.toLowerCase(i)));
 +            }
 +          }
 +        }
 +        if (automatons.size() == 1){
 +          a = automatons.get(0);
 +        } else {
 +          a = Operations.union(automatons);
 +          a = MinimizationOperations.minimize(a, maxDeterminizedStates);
 +        }
          break;
        case REGEXP_ANYCHAR:
          a = Automata.makeAnyChar();
 @@ -578,7 +606,14 @@ public class RegExp {
          a = Automata.makeEmpty();
          break;
        case REGEXP_STRING:
 -        a = Automata.makeString(s);
 +        if (this.isCaseInsensitive) {
 +          a = makeCaracterAutomaton(s.charAt(0));
 +          for (int i = 1; i < s.length(); i++) {
 +            a = Operations.concatenate(a, makeCaracterAutomaton(s.charAt(i)));
 +          }
 +        } else {
 +          a = Automata.makeString(s);
 +        }
          break;
        case REGEXP_ANYSTRING:
          a = Automata.makeAnyString();
 @@ -606,7 +641,18 @@ public class RegExp {
      }
      return a;
    }
 -
 +
 +
 +  private Automaton makeCaracterAutomaton(final int c) {
 +    Automaton a;
 +    if (this.isCaseInsensitive && Character.toLowerCase(c) != Character.toUpperCase(c)) {
 +      a = Operations.union(Automata.makeChar(Character.toLowerCase(c)), Automata.makeChar(Character.toUpperCase(c)));
 +    } else {
 +      a = Automata.makeChar(c);
 +    }
 +    return a;
 +  }
 +
    private void findLeaves(RegExp exp, Kind kind, List<Automaton> list,
        Map<String,Automaton> automata, AutomatonProvider automaton_provider,
        int maxDeterminizedStates) {
 @@ -854,7 +900,7 @@ public class RegExp {
    }

    static RegExp makeUnion(RegExp exp1, RegExp exp2) {
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(false);
      r.kind = Kind.REGEXP_UNION;
      r.exp1 = exp1;
      r.exp2 = exp2;
 @@ -863,9 +909,18 @@ public class RegExp {

    static RegExp makeConcatenation(RegExp exp1, RegExp exp2) {
      if ((exp1.kind == Kind.REGEXP_CHAR || exp1.kind == Kind.REGEXP_STRING)
 -        && (exp2.kind == Kind.REGEXP_CHAR || exp2.kind == Kind.REGEXP_STRING)) return makeString(
 -        exp1, exp2);
 -    RegExp r = new RegExp();
 +        && (exp2.kind == Kind.REGEXP_CHAR || exp2.kind == Kind.REGEXP_STRING)) {
 +      if (exp1.isCaseInsensitive != exp2.isCaseInsensitive) {
 +        final RegExp r = new RegExp(false);
 +        r.kind = Kind.REGEXP_CONCATENATION;
 +        r.exp1 = exp1;
 +        r.exp2 = exp2;
 +        return r;
 +      } else {
 +        return makeString(exp1, exp2);
 +      }
 +    }
 +    RegExp r = new RegExp(false);
      r.kind = Kind.REGEXP_CONCATENATION;
      if (exp1.kind == Kind.REGEXP_CONCATENATION
          && (exp1.exp2.kind == Kind.REGEXP_CHAR || exp1.exp2.kind == Kind.REGEXP_STRING)
 @@ -885,16 +940,19 @@ public class RegExp {
    }

    static private RegExp makeString(RegExp exp1, RegExp exp2) {
 +    if (exp1.isCaseInsensitive != exp2.isCaseInsensitive) {
 +       throw new IllegalArgumentException("RegExp" + exp1 + " and " + exp2 + " should have the same case sensitivity to be concatenated.");
 +    }
      StringBuilder b = new StringBuilder();
      if (exp1.kind == Kind.REGEXP_STRING) b.append(exp1.s);
      else b.appendCodePoint(exp1.c);
      if (exp2.kind == Kind.REGEXP_STRING) b.append(exp2.s);
      else b.appendCodePoint(exp2.c);
 -    return makeString(b.toString());
 +    return makeString(b.toString(), exp1.isCaseInsensitive);
    }

    static RegExp makeIntersection(RegExp exp1, RegExp exp2) {
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(false);
      r.kind = Kind.REGEXP_INTERSECTION;
      r.exp1 = exp1;
      r.exp2 = exp2;
 @@ -902,21 +960,21 @@ public class RegExp {
    }

    static RegExp makeOptional(RegExp exp) {
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(exp.isCaseInsensitive);
      r.kind = Kind.REGEXP_OPTIONAL;
      r.exp1 = exp;
      return r;
    }

    static RegExp makeRepeat(RegExp exp) {
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(exp.isCaseInsensitive);
      r.kind = Kind.REGEXP_REPEAT;
      r.exp1 = exp;
      return r;
    }

    static RegExp makeRepeat(RegExp exp, int min) {
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(exp.isCaseInsensitive);
      r.kind = Kind.REGEXP_REPEAT_MIN;
      r.exp1 = exp;
      r.min = min;
 @@ -924,7 +982,7 @@ public class RegExp {
    }

    static RegExp makeRepeat(RegExp exp, int min, int max) {
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(exp.isCaseInsensitive);
      r.kind = Kind.REGEXP_REPEAT_MINMAX;
      r.exp1 = exp;
      r.min = min;
 @@ -933,63 +991,63 @@ public class RegExp {
    }

    static RegExp makeComplement(RegExp exp) {
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(exp.isCaseInsensitive);
      r.kind = Kind.REGEXP_COMPLEMENT;
      r.exp1 = exp;
      return r;
    }

 -  static RegExp makeChar(int c) {
 -    RegExp r = new RegExp();
 +  static RegExp makeChar(int c, boolean isInsensitive) {
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_CHAR;
      r.c = c;
      return r;
    }

 -  static RegExp makeCharRange(int from, int to) {
 +  static RegExp makeCharRange(int from, int to, boolean isInsensitive) {
      if (from > to)
        throw new IllegalArgumentException("invalid range: from (" + from + ") cannot be > to (" + to + ")");
 -    RegExp r = new RegExp();
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_CHAR_RANGE;
      r.from = from;
      r.to = to;
      return r;
    }

 -  static RegExp makeAnyChar() {
 -    RegExp r = new RegExp();
 +  static RegExp makeAnyChar(boolean isInsensitive) {
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_ANYCHAR;
      return r;
    }

 -  static RegExp makeEmpty() {
 -    RegExp r = new RegExp();
 +  static RegExp makeEmpty(boolean isInsensitive) {
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_EMPTY;
      return r;
    }

 -  static RegExp makeString(String s) {
 -    RegExp r = new RegExp();
 +  static RegExp makeString(String s, boolean isInsensitive) {
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_STRING;
      r.s = s;
      return r;
    }

 -  static RegExp makeAnyString() {
 -    RegExp r = new RegExp();
 +  static RegExp makeAnyString(boolean isInsensitive) {
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_ANYSTRING;
      return r;
    }

 -  static RegExp makeAutomaton(String s) {
 -    RegExp r = new RegExp();
 +  static RegExp makeAutomaton(String s, boolean isInsensitive) {
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_AUTOMATON;
      r.s = s;
      return r;
    }

 -  static RegExp makeInterval(int min, int max, int digits) {
 -    RegExp r = new RegExp();
 +  static RegExp makeInterval(int min, int max, int digits, boolean isInsensitive) {
 +    RegExp r = new RegExp(isInsensitive);
      r.kind = Kind.REGEXP_INTERVAL;
      r.min = min;
      r.max = max;
 @@ -1025,28 +1083,28 @@ public class RegExp {
      return (flags & flag) != 0;
    }

 -  final RegExp parseUnionExp() throws IllegalArgumentException {
 -    RegExp e = parseInterExp();
 -    if (match('|')) e = makeUnion(e, parseUnionExp());
 +  final RegExp parseUnionExp(boolean isInsensitive) throws IllegalArgumentException {
 +    RegExp e = parseInterExp(isInsensitive);
 +    if (match('|')) e = makeUnion(e, parseUnionExp(isInsensitive));
      return e;
    }

 -  final RegExp parseInterExp() throws IllegalArgumentException {
 -    RegExp e = parseConcatExp();
 +  final RegExp parseInterExp(boolean isInsensitive) throws IllegalArgumentException {
 +    RegExp e = parseConcatExp(isInsensitive);
      if (check(INTERSECTION) && match('&')) e = makeIntersection(e,
 -        parseInterExp());
 +        parseInterExp(isInsensitive));
      return e;
    }

 -  final RegExp parseConcatExp() throws IllegalArgumentException {
 -    RegExp e = parseRepeatExp();
 +  final RegExp parseConcatExp(boolean isInsensitive) throws IllegalArgumentException {
 +    RegExp e = parseRepeatExp(isInsensitive);
      if (more() && !peek(")|") && (!check(INTERSECTION) || !peek("&"))) e = makeConcatenation(
 -        e, parseConcatExp());
 +        e, parseConcatExp(isInsensitive));
      return e;
    }

 -  final RegExp parseRepeatExp() throws IllegalArgumentException {
 -    RegExp e = parseComplExp();
 +  final RegExp parseRepeatExp(boolean isInsensitive) throws IllegalArgumentException {
 +    RegExp e = parseComplExp(isInsensitive);
      while (peek("?*+{")) {
        if (match('?')) e = makeOptional(e);
        else if (match('*')) e = makeRepeat(e);
 @@ -1075,50 +1133,60 @@ public class RegExp {
      return e;
    }

 -  final RegExp parseComplExp() throws IllegalArgumentException {
 -    if (check(COMPLEMENT) && match('~')) return makeComplement(parseComplExp());
 -    else return parseCharClassExp();
 +  final RegExp parseComplExp(boolean isInsensitive) throws IllegalArgumentException {
 +    if (check(COMPLEMENT) && match('~')) return makeComplement(parseComplExp(isInsensitive));
 +    else return parseCharClassExp(isInsensitive);
    }

 -  final RegExp parseCharClassExp() throws IllegalArgumentException {
 +  final RegExp parseCharClassExp(boolean isInsensitive) throws IllegalArgumentException {
      if (match('[')) {
        boolean negate = false;
        if (match('^')) negate = true;
 -      RegExp e = parseCharClasses();
 -      if (negate) e = makeIntersection(makeAnyChar(), makeComplement(e));
 +      RegExp e = parseCharClasses(isInsensitive);
 +      if (negate) e = makeIntersection(makeAnyChar(isInsensitive), makeComplement(e));
        if (!match(']')) throw new IllegalArgumentException(
            "expected ']' at position " + pos);
        return e;
 -    } else return parseSimpleExp();
 +    } else return parseSimpleExp(isInsensitive);
    }

 -  final RegExp parseCharClasses() throws IllegalArgumentException {
 -    RegExp e = parseCharClass();
 +  final RegExp parseCharClasses(boolean isInsensitive) throws IllegalArgumentException {
 +    RegExp e = parseCharClass(isInsensitive);
      while (more() && !peek("]"))
 -      e = makeUnion(e, parseCharClass());
 +      e = makeUnion(e, parseCharClass(isInsensitive));
      return e;
    }

 -  final RegExp parseCharClass() throws IllegalArgumentException {
 +  final RegExp parseCharClass(boolean isInsensitive) throws IllegalArgumentException {
      int c = parseCharExp();
 -    if (match('-')) return makeCharRange(c, parseCharExp());
 -    else return makeChar(c);
 +    if (match('-')) return makeCharRange(c, parseCharExp(), isInsensitive);
 +    else return makeChar(c, isInsensitive);
    }

 -  final RegExp parseSimpleExp() throws IllegalArgumentException {
 -    if (match('.')) return makeAnyChar();
 -    else if (check(EMPTY) && match('#')) return makeEmpty();
 -    else if (check(ANYSTRING) && match('@')) return makeAnyString();
 +  final RegExp parseSimpleExp(boolean isInsensitive) throws IllegalArgumentException {
 +    if (match('.')) return makeAnyChar(isInsensitive);
 +    else if (check(EMPTY) && match('#')) return makeEmpty(isInsensitive);
 +    else if (check(ANYSTRING) && match('@')) return makeAnyString(isInsensitive);
      else if (match('"')) {
        int start = pos;
        while (more() && !peek("\""))
          next();
        if (!match('"')) throw new IllegalArgumentException(
            "expected '\"' at position " + pos);
 -      return makeString(originalString.substring(start, pos - 1));
 +      return makeString(originalString.substring(start, pos - 1), false);
      } else if (match('(')) {
 -      if (match(')')) return makeString("");
 -      RegExp e = parseUnionExp();
 +      if (match(')')) return makeString("", isInsensitive);
 +      if (match('?')) {
 +        if (match('i')) {
 +          if (match(')')) {
 +            isCaseInsensitive = true;
 +            return parseInterExp(true);
 +          }
 +          pos -= Character.charCount('i');
 +        }
 +        pos -= Character.charCount('?');
 +      }
 +      RegExp e = parseUnionExp(isInsensitive);
        if (!match(')')) throw new IllegalArgumentException(
            "expected ')' at position " + pos);
        return e;
 @@ -1133,7 +1201,7 @@ public class RegExp {
        if (i == -1) {
          if (!check(AUTOMATON)) throw new IllegalArgumentException(
              "interval syntax error at position " + (pos - 1));
 -        return makeAutomaton(s);
 +        return makeAutomaton(s, isInsensitive);
        } else {
          if (!check(INTERVAL)) throw new IllegalArgumentException(
              "illegal identifier at position " + (pos - 1));
 @@ -1151,13 +1219,13 @@ public class RegExp {
              imin = imax;
              imax = t;
            }
 -          return makeInterval(imin, imax, digits);
 +          return makeInterval(imin, imax, digits, isInsensitive);
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "interval syntax error at position " + (pos - 1));
          }
        }
 -    } else return makeChar(parseCharExp());
 +    } else return makeChar(parseCharExp(), isInsensitive);
    }

    final int parseCharExp() throws IllegalArgumentException {
 diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
 index 7d24939c34..a6f6d6fbce 100644
 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
 +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
 @@ -16,7 +16,6 @@
   */
  package org.apache.lucene.util.automaton;

 -
  import org.apache.lucene.util.LuceneTestCase;

  public class TestRegExp extends LuceneTestCase {
 @@ -83,4 +82,36 @@ public class TestRegExp extends LuceneTestCase {
      a = new RegExp("#?").toAutomaton(1000);
      assertTrue(a.toString().length() > 0);
    }
 +
 +  public void testWithCaseInsensitive() throws Exception {
 +    CharacterRunAutomaton run = new CharacterRunAutomaton(new RegExp("(?i)a-pa(zU|io)[O-R]apl").toAutomaton());
 +    assertTrue(run.run("a-pazUQapl"));
 +    assertTrue(run.run("a-pazUQaPl"));
 +    assertTrue(run.run("a-pazUpaPl"));
 +    assertTrue(run.run("a-paIOpaPl"));
 +    assertTrue(run.run("A-paIOpaPl"));
 +    assertFalse(run.run("o"));
 +    run = new CharacterRunAutomaton(new RegExp("a-pa(?i)(zU|io)[O-R]apl").toAutomaton());
 +    assertTrue(run.run("a-pazUQapl"));
 +    assertTrue(run.run("a-pazUQaPl"));
 +    assertTrue(run.run("a-pazUpaPl"));
 +    assertTrue(run.run("a-paIOpaPl"));
 +    assertFalse(run.run("A-paIOpaPl"));
 +    assertFalse(run.run("o"));
 +    assertTrue(run.run("a-pazUQapl"));
 +    run = new CharacterRunAutomaton(new RegExp("a-pa(zU|io)(?i)[O-R]apl").toAutomaton());
 +    assertTrue(run.run("a-pazUQaPl"));
 +    assertTrue(run.run("a-pazUpaPl"));
 +    assertFalse(run.run("a-paIOpaPl"));
 +    assertFalse(run.run("A-paIOpaPl"));
 +    assertFalse(run.run("o"));
 +    run = new CharacterRunAutomaton(new RegExp("a-pa(zU|io)[O-R](?i)apl").toAutomaton());
 +    assertTrue(run.run("a-pazUQapl"));
 +    assertTrue(run.run("a-pazUQaPl"));
 +    assertFalse(run.run("a-pazUpaPl"));
 +    assertFalse(run.run("a-paIOpaPl"));
 +    assertFalse(run.run("A-paIOpaPl"));
 +    assertFalse(run.run("o"));
 +  }
 +
  }
 --
 2.11.0

	1;4601;0cFrom 0ce2400e4eb5d08586f7253cbc1324acf39ee407 Mon Sep 17 00:00:00 2001
	From: Vincent Arnaud <vincent.arnaud90@laposte.net>
	Date: Fri, 23 Feb 2018 12:35:26 +0100
	Subject: [PATCH] Add case insensitive

	add ignore case
	---
	.../org/apache/lucene/util/automaton/Automata.java \| 14 ++
	.../org/apache/lucene/util/automaton/RegExp.java \| 204 ++++++++++++++-------
	.../apache/lucene/util/automaton/TestRegExp.java \| 33 +++-
	3 files changed, 182 insertions(+), 69 deletions(-)

	diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
	index 294700b849..5639626dfc 100644
	--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
	+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
	@@ -108,6 +108,20 @@ final public class Automata {
	return makeCharRange(c, c);
	}

	+ /**
	+ * Returns a new (deterministic) automaton that accepts a single codepoint of
	+ * the given value and its other case.
	+ */
	+ public static Automaton makeCharCaseInsensitive(int c) {
	+ Automaton a;
	+ if (Character.toLowerCase(c) != Character.toUpperCase(c)) {
	+ a = Operations.union(Automata.makeChar(Character.toLowerCase(c)), Automata.makeChar(Character.toUpperCase(c)));
	+ } else {
	+ a = Automata.makeChar(c);
	+ }
	+ return a;
	+ }
	+
	/** Appends the specified character to the specified state, returning a new state. */
	public static int appendChar(Automaton a, int state, int c) {
	int newState = a.createState();
	diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
	index a643ddb5b5..98c41b697b 100644
	--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
	+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
	@@ -351,6 +351,11 @@ public class RegExp {
	public static final int INTERVAL = 0x0020;

	/**
	+ * Syntax flag, enables case insensitive (<tt>(?i)</tt>).
	+ */
	+ public static final int CASE_INSENSITIVE = 0x0040;
	+
	+ /**
	* Syntax flag, enables all optional regexp syntax.
	*/
	public static final int ALL = 0xffff;
	@@ -371,8 +376,11 @@ public class RegExp {
	int flags;
	int pos;

	- RegExp() {
	+ boolean isCaseInsensitive;
	+
	+ RegExp(final boolean aIsInsensitive) {
	this.originalString = null;
	+ this.isCaseInsensitive = aIsInsensitive;
	}

	/**
	@@ -400,15 +408,16 @@ public class RegExp {
	originalString = s;
	flags = syntax_flags;
	RegExp e;
	- if (s.length() == 0) e = makeString("");
	+ if (s.length() == 0) e = makeString("", false);
	else {
	- e = parseUnionExp();
	+ e = parseUnionExp(false);
	if (pos < originalString.length()) throw new IllegalArgumentException(
	"end-of-string expected at position " + pos);
	}
	kind = e.kind;
	exp1 = e.exp1;
	exp2 = e.exp2;
	+ isCaseInsensitive = e.isCaseInsensitive;
	this.s = e.s;
	c = e.c;
	min = e.min;
	@@ -566,10 +575,29 @@ public class RegExp {
	a = MinimizationOperations.minimize(a, maxDeterminizedStates);
	break;
	case REGEXP_CHAR:
	- a = Automata.makeChar(c);
	+ if (this.isCaseInsensitive && check(CASE_INSENSITIVE)) {
	+ a = Automata.makeCharCaseInsensitive(c);
	+ } else {
	+ a = Automata.makeChar(c);
	+ }
	break;
	case REGEXP_CHAR_RANGE:
	- a = Automata.makeCharRange(from, to);
	+ final List<Automaton> automatons = new ArrayList<>();
	+ automatons.add(Automata.makeCharRange(from, to));
	+ if (this.isCaseInsensitive) {
	+ for (int i = from; i <= to; i++) {
	+ automatons.add(Automata.makeChar(i));
	+ if (Character.toLowerCase(i) != Character.toUpperCase(i)) {
	+ automatons.add(Automata.makeChar((i == Character.toLowerCase(i)) ? Character.toUpperCase(i) : Character.toLowerCase(i)));
	+ }
	+ }
	+ }
	+ if (automatons.size() == 1){
	+ a = automatons.get(0);
	+ } else {
	+ a = Operations.union(automatons);
	+ a = MinimizationOperations.minimize(a, maxDeterminizedStates);
	+ }
	break;
	case REGEXP_ANYCHAR:
	a = Automata.makeAnyChar();
	@@ -578,7 +606,14 @@ public class RegExp {
	a = Automata.makeEmpty();
	break;
	case REGEXP_STRING:
	- a = Automata.makeString(s);
	+ if (this.isCaseInsensitive) {
	+ a = makeCaracterAutomaton(s.charAt(0));
	+ for (int i = 1; i < s.length(); i++) {
	+ a = Operations.concatenate(a, makeCaracterAutomaton(s.charAt(i)));
	+ }
	+ } else {
	+ a = Automata.makeString(s);
	+ }
	break;
	case REGEXP_ANYSTRING:
	a = Automata.makeAnyString();
	@@ -606,7 +641,18 @@ public class RegExp {
	}
	return a;
	}
	-
	+
	+
	+ private Automaton makeCaracterAutomaton(final int c) {
	+ Automaton a;
	+ if (this.isCaseInsensitive && Character.toLowerCase(c) != Character.toUpperCase(c)) {
	+ a = Operations.union(Automata.makeChar(Character.toLowerCase(c)), Automata.makeChar(Character.toUpperCase(c)));
	+ } else {
	+ a = Automata.makeChar(c);
	+ }
	+ return a;
	+ }
	+
	private void findLeaves(RegExp exp, Kind kind, List<Automaton> list,
	Map<String,Automaton> automata, AutomatonProvider automaton_provider,
	int maxDeterminizedStates) {
	@@ -854,7 +900,7 @@ public class RegExp {
	}

	static RegExp makeUnion(RegExp exp1, RegExp exp2) {
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(false);
	r.kind = Kind.REGEXP_UNION;
	r.exp1 = exp1;
	r.exp2 = exp2;
	@@ -863,9 +909,18 @@ public class RegExp {

	static RegExp makeConcatenation(RegExp exp1, RegExp exp2) {
	if ((exp1.kind == Kind.REGEXP_CHAR \|\| exp1.kind == Kind.REGEXP_STRING)
	- && (exp2.kind == Kind.REGEXP_CHAR \|\| exp2.kind == Kind.REGEXP_STRING)) return makeString(
	- exp1, exp2);
	- RegExp r = new RegExp();
	+ && (exp2.kind == Kind.REGEXP_CHAR \|\| exp2.kind == Kind.REGEXP_STRING)) {
	+ if (exp1.isCaseInsensitive != exp2.isCaseInsensitive) {
	+ final RegExp r = new RegExp(false);
	+ r.kind = Kind.REGEXP_CONCATENATION;
	+ r.exp1 = exp1;
	+ r.exp2 = exp2;
	+ return r;
	+ } else {
	+ return makeString(exp1, exp2);
	+ }
	+ }
	+ RegExp r = new RegExp(false);
	r.kind = Kind.REGEXP_CONCATENATION;
	if (exp1.kind == Kind.REGEXP_CONCATENATION
	&& (exp1.exp2.kind == Kind.REGEXP_CHAR \|\| exp1.exp2.kind == Kind.REGEXP_STRING)
	@@ -885,16 +940,19 @@ public class RegExp {
	}

	static private RegExp makeString(RegExp exp1, RegExp exp2) {
	+ if (exp1.isCaseInsensitive != exp2.isCaseInsensitive) {
	+ throw new IllegalArgumentException("RegExp" + exp1 + " and " + exp2 + " should have the same case sensitivity to be concatenated.");
	+ }
	StringBuilder b = new StringBuilder();
	if (exp1.kind == Kind.REGEXP_STRING) b.append(exp1.s);
	else b.appendCodePoint(exp1.c);
	if (exp2.kind == Kind.REGEXP_STRING) b.append(exp2.s);
	else b.appendCodePoint(exp2.c);
	- return makeString(b.toString());
	+ return makeString(b.toString(), exp1.isCaseInsensitive);
	}

	static RegExp makeIntersection(RegExp exp1, RegExp exp2) {
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(false);
	r.kind = Kind.REGEXP_INTERSECTION;
	r.exp1 = exp1;
	r.exp2 = exp2;
	@@ -902,21 +960,21 @@ public class RegExp {
	}

	static RegExp makeOptional(RegExp exp) {
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(exp.isCaseInsensitive);
	r.kind = Kind.REGEXP_OPTIONAL;
	r.exp1 = exp;
	return r;
	}

	static RegExp makeRepeat(RegExp exp) {
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(exp.isCaseInsensitive);
	r.kind = Kind.REGEXP_REPEAT;
	r.exp1 = exp;
	return r;
	}

	static RegExp makeRepeat(RegExp exp, int min) {
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(exp.isCaseInsensitive);
	r.kind = Kind.REGEXP_REPEAT_MIN;
	r.exp1 = exp;
	r.min = min;
	@@ -924,7 +982,7 @@ public class RegExp {
	}

	static RegExp makeRepeat(RegExp exp, int min, int max) {
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(exp.isCaseInsensitive);
	r.kind = Kind.REGEXP_REPEAT_MINMAX;
	r.exp1 = exp;
	r.min = min;
	@@ -933,63 +991,63 @@ public class RegExp {
	}

	static RegExp makeComplement(RegExp exp) {
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(exp.isCaseInsensitive);
	r.kind = Kind.REGEXP_COMPLEMENT;
	r.exp1 = exp;
	return r;
	}

	- static RegExp makeChar(int c) {
	- RegExp r = new RegExp();
	+ static RegExp makeChar(int c, boolean isInsensitive) {
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_CHAR;
	r.c = c;
	return r;
	}

	- static RegExp makeCharRange(int from, int to) {
	+ static RegExp makeCharRange(int from, int to, boolean isInsensitive) {
	if (from > to)
	throw new IllegalArgumentException("invalid range: from (" + from + ") cannot be > to (" + to + ")");
	- RegExp r = new RegExp();
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_CHAR_RANGE;
	r.from = from;
	r.to = to;
	return r;
	}

	- static RegExp makeAnyChar() {
	- RegExp r = new RegExp();
	+ static RegExp makeAnyChar(boolean isInsensitive) {
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_ANYCHAR;
	return r;
	}

	- static RegExp makeEmpty() {
	- RegExp r = new RegExp();
	+ static RegExp makeEmpty(boolean isInsensitive) {
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_EMPTY;
	return r;
	}

	- static RegExp makeString(String s) {
	- RegExp r = new RegExp();
	+ static RegExp makeString(String s, boolean isInsensitive) {
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_STRING;
	r.s = s;
	return r;
	}

	- static RegExp makeAnyString() {
	- RegExp r = new RegExp();
	+ static RegExp makeAnyString(boolean isInsensitive) {
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_ANYSTRING;
	return r;
	}

	- static RegExp makeAutomaton(String s) {
	- RegExp r = new RegExp();
	+ static RegExp makeAutomaton(String s, boolean isInsensitive) {
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_AUTOMATON;
	r.s = s;
	return r;
	}

	- static RegExp makeInterval(int min, int max, int digits) {
	- RegExp r = new RegExp();
	+ static RegExp makeInterval(int min, int max, int digits, boolean isInsensitive) {
	+ RegExp r = new RegExp(isInsensitive);
	r.kind = Kind.REGEXP_INTERVAL;
	r.min = min;
	r.max = max;
	@@ -1025,28 +1083,28 @@ public class RegExp {
	return (flags & flag) != 0;
	}

	- final RegExp parseUnionExp() throws IllegalArgumentException {
	- RegExp e = parseInterExp();
	- if (match('\|')) e = makeUnion(e, parseUnionExp());
	+ final RegExp parseUnionExp(boolean isInsensitive) throws IllegalArgumentException {
	+ RegExp e = parseInterExp(isInsensitive);
	+ if (match('\|')) e = makeUnion(e, parseUnionExp(isInsensitive));
	return e;
	}

	- final RegExp parseInterExp() throws IllegalArgumentException {
	- RegExp e = parseConcatExp();
	+ final RegExp parseInterExp(boolean isInsensitive) throws IllegalArgumentException {
	+ RegExp e = parseConcatExp(isInsensitive);
	if (check(INTERSECTION) && match('&')) e = makeIntersection(e,
	- parseInterExp());
	+ parseInterExp(isInsensitive));
	return e;
	}

	- final RegExp parseConcatExp() throws IllegalArgumentException {
	- RegExp e = parseRepeatExp();
	+ final RegExp parseConcatExp(boolean isInsensitive) throws IllegalArgumentException {
	+ RegExp e = parseRepeatExp(isInsensitive);
	if (more() && !peek(")\|") && (!check(INTERSECTION) \|\| !peek("&"))) e = makeConcatenation(
	- e, parseConcatExp());
	+ e, parseConcatExp(isInsensitive));
	return e;
	}

	- final RegExp parseRepeatExp() throws IllegalArgumentException {
	- RegExp e = parseComplExp();
	+ final RegExp parseRepeatExp(boolean isInsensitive) throws IllegalArgumentException {
	+ RegExp e = parseComplExp(isInsensitive);
	while (peek("?*+{")) {
	if (match('?')) e = makeOptional(e);
	else if (match('*')) e = makeRepeat(e);
	@@ -1075,50 +1133,60 @@ public class RegExp {
	return e;
	}

	- final RegExp parseComplExp() throws IllegalArgumentException {
	- if (check(COMPLEMENT) && match('~')) return makeComplement(parseComplExp());
	- else return parseCharClassExp();
	+ final RegExp parseComplExp(boolean isInsensitive) throws IllegalArgumentException {
	+ if (check(COMPLEMENT) && match('~')) return makeComplement(parseComplExp(isInsensitive));
	+ else return parseCharClassExp(isInsensitive);
	}

	- final RegExp parseCharClassExp() throws IllegalArgumentException {
	+ final RegExp parseCharClassExp(boolean isInsensitive) throws IllegalArgumentException {
	if (match('[')) {
	boolean negate = false;
	if (match('^')) negate = true;
	- RegExp e = parseCharClasses();
	- if (negate) e = makeIntersection(makeAnyChar(), makeComplement(e));
	+ RegExp e = parseCharClasses(isInsensitive);
	+ if (negate) e = makeIntersection(makeAnyChar(isInsensitive), makeComplement(e));
	if (!match(']')) throw new IllegalArgumentException(
	"expected ']' at position " + pos);
	return e;
	- } else return parseSimpleExp();
	+ } else return parseSimpleExp(isInsensitive);
	}

	- final RegExp parseCharClasses() throws IllegalArgumentException {
	- RegExp e = parseCharClass();
	+ final RegExp parseCharClasses(boolean isInsensitive) throws IllegalArgumentException {
	+ RegExp e = parseCharClass(isInsensitive);
	while (more() && !peek("]"))
	- e = makeUnion(e, parseCharClass());
	+ e = makeUnion(e, parseCharClass(isInsensitive));
	return e;
	}

	- final RegExp parseCharClass() throws IllegalArgumentException {
	+ final RegExp parseCharClass(boolean isInsensitive) throws IllegalArgumentException {
	int c = parseCharExp();
	- if (match('-')) return makeCharRange(c, parseCharExp());
	- else return makeChar(c);
	+ if (match('-')) return makeCharRange(c, parseCharExp(), isInsensitive);
	+ else return makeChar(c, isInsensitive);
	}

	- final RegExp parseSimpleExp() throws IllegalArgumentException {
	- if (match('.')) return makeAnyChar();
	- else if (check(EMPTY) && match('#')) return makeEmpty();
	- else if (check(ANYSTRING) && match('@')) return makeAnyString();
	+ final RegExp parseSimpleExp(boolean isInsensitive) throws IllegalArgumentException {
	+ if (match('.')) return makeAnyChar(isInsensitive);
	+ else if (check(EMPTY) && match('#')) return makeEmpty(isInsensitive);
	+ else if (check(ANYSTRING) && match('@')) return makeAnyString(isInsensitive);
	else if (match('"')) {
	int start = pos;
	while (more() && !peek("\""))
	next();
	if (!match('"')) throw new IllegalArgumentException(
	"expected '\"' at position " + pos);
	- return makeString(originalString.substring(start, pos - 1));
	+ return makeString(originalString.substring(start, pos - 1), false);
	} else if (match('(')) {
	- if (match(')')) return makeString("");
	- RegExp e = parseUnionExp();
	+ if (match(')')) return makeString("", isInsensitive);
	+ if (match('?')) {
	+ if (match('i')) {
	+ if (match(')')) {
	+ isCaseInsensitive = true;
	+ return parseInterExp(true);
	+ }
	+ pos -= Character.charCount('i');
	+ }
	+ pos -= Character.charCount('?');
	+ }
	+ RegExp e = parseUnionExp(isInsensitive);
	if (!match(')')) throw new IllegalArgumentException(
	"expected ')' at position " + pos);
	return e;
	@@ -1133,7 +1201,7 @@ public class RegExp {
	if (i == -1) {
	if (!check(AUTOMATON)) throw new IllegalArgumentException(
	"interval syntax error at position " + (pos - 1));
	- return makeAutomaton(s);
	+ return makeAutomaton(s, isInsensitive);
	} else {
	if (!check(INTERVAL)) throw new IllegalArgumentException(
	"illegal identifier at position " + (pos - 1));
	@@ -1151,13 +1219,13 @@ public class RegExp {
	imin = imax;
	imax = t;
	}
	- return makeInterval(imin, imax, digits);
	+ return makeInterval(imin, imax, digits, isInsensitive);
	} catch (NumberFormatException e) {
	throw new IllegalArgumentException(
	"interval syntax error at position " + (pos - 1));
	}
	}
	- } else return makeChar(parseCharExp());
	+ } else return makeChar(parseCharExp(), isInsensitive);
	}

	final int parseCharExp() throws IllegalArgumentException {
	diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
	index 7d24939c34..a6f6d6fbce 100644
	--- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
	+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestRegExp.java
	@@ -16,7 +16,6 @@
	*/
	package org.apache.lucene.util.automaton;

	-
	import org.apache.lucene.util.LuceneTestCase;

	public class TestRegExp extends LuceneTestCase {
	@@ -83,4 +82,36 @@ public class TestRegExp extends LuceneTestCase {
	a = new RegExp("#?").toAutomaton(1000);
	assertTrue(a.toString().length() > 0);
	}
	+
	+ public void testWithCaseInsensitive() throws Exception {
	+ CharacterRunAutomaton run = new CharacterRunAutomaton(new RegExp("(?i)a-pa(zU\|io)[O-R]apl").toAutomaton());
	+ assertTrue(run.run("a-pazUQapl"));
	+ assertTrue(run.run("a-pazUQaPl"));
	+ assertTrue(run.run("a-pazUpaPl"));
	+ assertTrue(run.run("a-paIOpaPl"));
	+ assertTrue(run.run("A-paIOpaPl"));
	+ assertFalse(run.run("o"));
	+ run = new CharacterRunAutomaton(new RegExp("a-pa(?i)(zU\|io)[O-R]apl").toAutomaton());
	+ assertTrue(run.run("a-pazUQapl"));
	+ assertTrue(run.run("a-pazUQaPl"));
	+ assertTrue(run.run("a-pazUpaPl"));
	+ assertTrue(run.run("a-paIOpaPl"));
	+ assertFalse(run.run("A-paIOpaPl"));
	+ assertFalse(run.run("o"));
	+ assertTrue(run.run("a-pazUQapl"));
	+ run = new CharacterRunAutomaton(new RegExp("a-pa(zU\|io)(?i)[O-R]apl").toAutomaton());
	+ assertTrue(run.run("a-pazUQaPl"));
	+ assertTrue(run.run("a-pazUpaPl"));
	+ assertFalse(run.run("a-paIOpaPl"));
	+ assertFalse(run.run("A-paIOpaPl"));
	+ assertFalse(run.run("o"));
	+ run = new CharacterRunAutomaton(new RegExp("a-pa(zU\|io)[O-R](?i)apl").toAutomaton());
	+ assertTrue(run.run("a-pazUQapl"));
	+ assertTrue(run.run("a-pazUQaPl"));
	+ assertFalse(run.run("a-pazUpaPl"));
	+ assertFalse(run.run("a-paIOpaPl"));
	+ assertFalse(run.run("A-paIOpaPl"));
	+ assertFalse(run.run("o"));
	+ }
	+
	}
	--
	2.11.0