blob: 7b247c69a431edbd5150b36a247bcea9df4e423b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.transforms;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import org.apache.beam.sdk.testing.NeedsRunner;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/** Tests for {@link Regex}. */
@RunWith(JUnit4.class)
public class RegexTest implements Serializable {
@Rule public final transient TestPipeline p = TestPipeline.create();
@Test
@Category(NeedsRunner.class)
public void testFind() {
PCollection<String> output =
p.apply(Create.of("aj", "xj", "yj", "zj")).apply(Regex.find("[xyz]"));
PAssert.that(output).containsInAnyOrder("x", "y", "z");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testFindGroup() {
PCollection<String> output =
p.apply(Create.of("aj", "xj", "yj", "zj")).apply(Regex.find("([xyz])", 1));
PAssert.that(output).containsInAnyOrder("x", "y", "z");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testFindNone() {
PCollection<String> output = p.apply(Create.of("a", "b", "c", "d")).apply(Regex.find("[xyz]"));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testFindNameGroup() {
PCollection<String> output =
p.apply(Create.of("aj", "xj", "yj", "zj"))
.apply(Regex.find("(?<namedgroup>[xyz])", "namedgroup"));
PAssert.that(output).containsInAnyOrder("x", "y", "z");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testFindAllGroups() {
PCollection<List<String>> output =
p.apply(Create.of("aj", "xjx", "yjy", "zjz")).apply(Regex.findAll("([xyz])j([xyz])"));
PAssert.that(output)
.containsInAnyOrder(
Arrays.asList("xjx", "x", "x"),
Arrays.asList("yjy", "y", "y"),
Arrays.asList("zjz", "z", "z"));
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testFindNameNone() {
PCollection<String> output =
p.apply(Create.of("a", "b", "c", "d"))
.apply(Regex.find("(?<namedgroup>[xyz])", "namedgroup"));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVFind() {
PCollection<KV<String, String>> output =
p.apply(Create.of("a b c")).apply(Regex.findKV("a (b) (c)", 1, 2));
PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVFindNone() {
PCollection<KV<String, String>> output =
p.apply(Create.of("x y z")).apply(Regex.findKV("a (b) (c)", 1, 2));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVFindName() {
PCollection<KV<String, String>> output =
p.apply(Create.of("a b c"))
.apply(Regex.findKV("a (?<keyname>b) (?<valuename>c)", "keyname", "valuename"));
PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVFindNameNone() {
PCollection<KV<String, String>> output =
p.apply(Create.of("x y z"))
.apply(Regex.findKV("a (?<keyname>b) (?<valuename>c)", "keyname", "valuename"));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testMatches() {
PCollection<String> output =
p.apply(Create.of("a", "x", "y", "z")).apply(Regex.matches("[xyz]"));
PAssert.that(output).containsInAnyOrder("x", "y", "z");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testMatchesNone() {
PCollection<String> output =
p.apply(Create.of("a", "b", "c", "d")).apply(Regex.matches("[xyz]"));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testMatchesGroup() {
PCollection<String> output =
p.apply(Create.of("a", "x xxx", "x yyy", "x zzz")).apply(Regex.matches("x ([xyz]*)", 1));
PAssert.that(output).containsInAnyOrder("xxx", "yyy", "zzz");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testMatchesName() {
PCollection<String> output =
p.apply(Create.of("a", "x xxx", "x yyy", "x zzz"))
.apply(Regex.matches("x (?<namedgroup>[xyz]*)", "namedgroup"));
PAssert.that(output).containsInAnyOrder("xxx", "yyy", "zzz");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testMatchesNameNone() {
PCollection<String> output =
p.apply(Create.of("a", "b", "c", "d"))
.apply(Regex.matches("x (?<namedgroup>[xyz]*)", "namedgroup"));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testAllMatches() {
PCollection<List<String>> output =
p.apply(Create.of("a x", "x x", "y y", "z z")).apply(Regex.allMatches("([xyz]) ([xyz])"));
PAssert.that(output)
.containsInAnyOrder(
Arrays.asList("x x", "x", "x"),
Arrays.asList("y y", "y", "y"),
Arrays.asList("z z", "z", "z"));
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVMatches() {
PCollection<KV<String, String>> output =
p.apply(Create.of("a b c")).apply(Regex.matchesKV("a (b) (c)", 1, 2));
PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVMatchesNone() {
PCollection<KV<String, String>> output =
p.apply(Create.of("x y z")).apply(Regex.matchesKV("a (b) (c)", 1, 2));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVMatchesName() {
PCollection<KV<String, String>> output =
p.apply(Create.of("a b c"))
.apply(Regex.findKV("a (?<keyname>b) (?<valuename>c)", "keyname", "valuename"));
PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testKVMatchesNameNone() {
PCollection<KV<String, String>> output =
p.apply(Create.of("x y z"))
.apply(Regex.findKV("a (?<keyname>b) (?<valuename>c)", "keyname", "valuename"));
PAssert.that(output).empty();
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testReplaceAll() {
PCollection<String> output =
p.apply(Create.of("xj", "yj", "zj")).apply(Regex.replaceAll("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("newj", "newj", "newj");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testReplaceAllMixed() {
PCollection<String> output =
p.apply(Create.of("abc", "xj", "yj", "zj", "def")).apply(Regex.replaceAll("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("abc", "newj", "newj", "newj", "def");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testReplaceFirst() {
PCollection<String> output =
p.apply(Create.of("xjx", "yjy", "zjz")).apply(Regex.replaceFirst("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("newjx", "newjy", "newjz");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testReplaceFirstMixed() {
PCollection<String> output =
p.apply(Create.of("abc", "xjx", "yjy", "zjz", "def"))
.apply(Regex.replaceFirst("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("abc", "newjx", "newjy", "newjz", "def");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testSplits() {
PCollection<String> output =
p.apply(Create.of("The quick brown fox jumps over the lazy dog"))
.apply(Regex.split("\\W+"));
PAssert.that(output)
.containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testSplitsWithEmpty() {
PCollection<String> output =
p.apply(Create.of("The quick brown fox jumps over the lazy dog"))
.apply(Regex.split("\\s", true));
PAssert.that(output)
.containsInAnyOrder(
"The", "", "quick", "brown", "", "", "fox", "jumps", "over", "", "", "", "the", "lazy",
"dog");
p.run();
}
@Test
@Category(NeedsRunner.class)
public void testSplitsWithoutEmpty() {
PCollection<String> output =
p.apply(Create.of("The quick brown fox jumps over the lazy dog"))
.apply(Regex.split("\\s", false));
PAssert.that(output)
.containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
p.run();
}
}