title: “Try Apache Beam”

Try Apache Beam

You can try an Apache Beam pipeline using our interactive notebooks.

{{< language-switcher java py go >}}

Interactive WordCount in Colab

This interactive notebook shows you what a simple, minimal version of WordCount looks like.

{{< highlight java >}} package samples.quickstart;

import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.transforms.Count; import org.apache.beam.sdk.transforms.Filter; import org.apache.beam.sdk.transforms.FlatMapElements; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.TypeDescriptors;

import java.util.Arrays;

public class WordCount { public static void main(String[] args) { String inputsDir = “data/*”; String outputsPrefix = “outputs/part”;

PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
Pipeline pipeline = Pipeline.create(options);
pipeline
    .apply("Read lines", TextIO.read().from(inputsDir))
    .apply("Find words", FlatMapElements.into(TypeDescriptors.strings())
        .via((String line) -> Arrays.asList(line.split("[^\\p{L}]+"))))
    .apply("Filter empty words", Filter.by((String word) -> !word.isEmpty()))
    .apply("Count words", Count.perElement())
    .apply("Write results", MapElements.into(TypeDescriptors.strings())
        .via((KV<String, Long> wordCount) ->
              wordCount.getKey() + ": " + wordCount.getValue()))
    .apply(TextIO.write().to(outputsPrefix));
pipeline.run();

} } {{< /highlight >}}

{{< paragraph class=“language-java” >}} Run in Colab View on GitHub {{< /paragraph >}}

{{< paragraph class=“language-java” >}} To learn how to install and run the Apache Beam Java SDK on your own computer, follow the instructions in the Java Quickstart. {{< /paragraph >}}

{{< highlight py >}} import apache_beam as beam import re

inputs_pattern = ‘data/*’ outputs_prefix = ‘outputs/part’

with beam.Pipeline() as pipeline: ( pipeline | ‘Read lines’ >> beam.io.ReadFromText(inputs_pattern) | ‘Find words’ >> beam.FlatMap(lambda line: re.findall(r"[a-zA-Z']+", line)) | ‘Pair words with 1’ >> beam.Map(lambda word: (word, 1)) | ‘Group and sum’ >> beam.CombinePerKey(sum) | ‘Format results’ >> beam.Map(lambda word_count: str(word_count)) | ‘Write results’ >> beam.io.WriteToText(outputs_prefix) ) {{< /highlight >}}

{{< paragraph class=“language-py” >}} Run in Colab View on GitHub {{< /paragraph >}}

{{< paragraph class=“language-py” >}} To learn how to install and run the Apache Beam Python SDK on your own computer, follow the instructions in the Python Quickstart. {{< /paragraph >}}

{{< highlight go >}} package main

import ( “context” “flag” “fmt” “regexp”

"github.com/apache/beam/sdks/v2/go/pkg/beam"
"github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio"
"github.com/apache/beam/sdks/v2/go/pkg/beam/runners/direct"
"github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/stats"

_ "github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/local"

)

var ( input = flag.String(“input”, “data/*”, “File(s) to read.”) output = flag.String(“output”, “outputs/wordcounts.txt”, “Output filename.”) )

var wordRE = regexp.MustCompile([a-zA-Z]+('[a-z])?)

func main() { flag.Parse()

beam.Init()

pipeline := beam.NewPipeline()
root := pipeline.Root()

lines := textio.Read(root, *input)
words := beam.ParDo(root, func(line string, emit func(string)) {
	for _, word := range wordRE.FindAllString(line, -1) {
		emit(word)
	}
}, lines)
counted := stats.Count(root, words)
formatted := beam.ParDo(root, func(word string, count int) string {
	return fmt.Sprintf("%s: %v", word, count)
}, counted)
textio.Write(root, *output, formatted)

direct.Execute(context.Background(), pipeline)

} {{< /highlight >}}

{{< paragraph class=“language-go” >}} Run in Colab View on GitHub {{< /paragraph >}}

{{< paragraph class=“language-go” >}} To learn how to install and run the Apache Beam Go SDK on your own computer, follow the instructions in the Go Quickstart. {{< /paragraph >}}

For a more detailed explanation about how WordCount works, see the WordCount Example Walkthrough.

Next Steps

Please don't hesitate to reach out if you encounter any issues!