examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java - beam - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.beam.examples.complete;

 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.common.collect.Lists;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.List;
 import java.util.Map;
 import org.apache.avro.reflect.Nullable;
 import org.apache.beam.examples.common.ExampleBigQueryTableOptions;
 import org.apache.beam.examples.common.ExampleOptions;
 import org.apache.beam.examples.common.ExampleUtils;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.PipelineResult;
 import org.apache.beam.sdk.coders.AvroCoder;
 import org.apache.beam.sdk.coders.DefaultCoder;
 import org.apache.beam.sdk.io.TextIO;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
 import org.apache.beam.sdk.options.Default;
 import org.apache.beam.sdk.options.Description;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.GroupByKey;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.windowing.SlidingWindows;
 import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;

 /**
  * A Beam Example that runs in both batch and streaming modes with traffic sensor data.
  * You can configure the running mode by setting {@literal --streaming} to true or false.
  *
  * <p>Concepts: The batch and streaming runners, GroupByKey, sliding windows.
  *
  * <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
  * it calculates the average speed over the window for some small set of predefined 'routes',
  * and looks for 'slowdowns' in those routes. It writes its results to a BigQuery table.
  *
  * <p>The pipeline reads traffic sensor data from {@literal --inputFile}.
  *
  * <p>The example is configured to use the default BigQuery table from the example common package
  * (there are no defaults for a general Beam pipeline).
  * You can override them by using the {@literal --bigQueryDataset}, and {@literal --bigQueryTable}
  * options. If the BigQuery table do not exist, the example will try to create them.
  *
  * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
  * and then exits.
  */

 public class TrafficRoutes {

   // Instantiate some small predefined San Diego routes to analyze
   static Map<String, String> sdStations = buildStationInfo();
   static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
   static final int WINDOW_SLIDE_EVERY = 1;  // Default window 'slide every' setting in minutes

   /**
    * This class holds information about a station reading's average speed.
    */
   @DefaultCoder(AvroCoder.class)
   static class StationSpeed implements Comparable<StationSpeed> {
     @Nullable String stationId;
     @Nullable Double avgSpeed;
     @Nullable Long timestamp;

     public StationSpeed() {}

     public StationSpeed(String stationId, Double avgSpeed, Long timestamp) {
       this.stationId = stationId;
       this.avgSpeed = avgSpeed;
       this.timestamp = timestamp;
     }

     public String getStationId() {
       return this.stationId;
     }
     public Double getAvgSpeed() {
       return this.avgSpeed;
     }

     @Override
     public int compareTo(StationSpeed other) {
       return Long.compare(this.timestamp, other.timestamp);
     }
   }

   /**
    * This class holds information about a route's speed/slowdown.
    */
   @DefaultCoder(AvroCoder.class)
   static class RouteInfo {
     @Nullable String route;
     @Nullable Double avgSpeed;
     @Nullable Boolean slowdownEvent;


     public RouteInfo() {}

     public RouteInfo(String route, Double avgSpeed, Boolean slowdownEvent) {
       this.route = route;
       this.avgSpeed = avgSpeed;
       this.slowdownEvent = slowdownEvent;
     }

     public String getRoute() {
       return this.route;
     }
     public Double getAvgSpeed() {
       return this.avgSpeed;
     }
     public Boolean getSlowdownEvent() {
       return this.slowdownEvent;
     }
   }

   /**
    * Extract the timestamp field from the input string, and use it as the element timestamp.
    */
   static class ExtractTimestamps extends DoFn<String, String> {
     private static final DateTimeFormatter dateTimeFormat =
         DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");

     @ProcessElement
     public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
       String[] items = c.element().split(",");
       String timestamp = tryParseTimestamp(items);
       if (timestamp != null) {
         try {
           c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
         } catch (IllegalArgumentException e) {
           // Skip the invalid input.
         }
       }
     }
   }

   /**
    * Filter out readings for the stations along predefined 'routes', and output
    * (station, speed info) keyed on route.
    */
   static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {

     @ProcessElement
     public void processElement(ProcessContext c) {
       String[] items = c.element().split(",");
       String stationType = tryParseStationType(items);
       // For this analysis, use only 'main line' station types
       if (stationType != null && stationType.equals("ML")) {
         Double avgSpeed = tryParseAvgSpeed(items);
         String stationId = tryParseStationId(items);
         // For this simple example, filter out everything but some hardwired routes.
         if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
           StationSpeed stationSpeed =
               new StationSpeed(stationId, avgSpeed, c.timestamp().getMillis());
           // The tuple key is the 'route' name stored in the 'sdStations' hash.
           KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
           c.output(outputValue);
         }
       }
     }
   }

   /**
    * For a given route, track average speed for the window. Calculate whether
    * traffic is currently slowing down, via a predefined threshold. If a supermajority of
    * speeds in this sliding window are less than the previous reading we call this a 'slowdown'.
    * Note: these calculations are for example purposes only, and are unrealistic and oversimplified.
    */
   static class GatherStats
       extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>> {
     @ProcessElement
     public void processElement(ProcessContext c) throws IOException {
       String route = c.element().getKey();
       double speedSum = 0.0;
       int speedCount = 0;
       int speedups = 0;
       int slowdowns = 0;
       List<StationSpeed> infoList = Lists.newArrayList(c.element().getValue());
       // StationSpeeds sort by embedded timestamp.
       Collections.sort(infoList);
       Map<String, Double> prevSpeeds = new HashMap<>();
       // For all stations in the route, sum (non-null) speeds. Keep a count of the non-null speeds.
       for (StationSpeed item : infoList) {
         Double speed = item.getAvgSpeed();
         if (speed != null) {
           speedSum += speed;
           speedCount++;
           Double lastSpeed = prevSpeeds.get(item.getStationId());
           if (lastSpeed != null) {
             if (lastSpeed < speed) {
               speedups += 1;
             } else {
               slowdowns += 1;
             }
           }
           prevSpeeds.put(item.getStationId(), speed);
         }
       }
       if (speedCount == 0) {
         // No average to compute.
         return;
       }
       double speedAvg = speedSum / speedCount;
       boolean slowdownEvent = slowdowns >= 2 * speedups;
       RouteInfo routeInfo = new RouteInfo(route, speedAvg, slowdownEvent);
       c.output(KV.of(route, routeInfo));
     }
   }

   /**
    * Format the results of the slowdown calculations to a TableRow, to save to BigQuery.
    */
   static class FormatStatsFn extends DoFn<KV<String, RouteInfo>, TableRow> {
     @ProcessElement
     public void processElement(ProcessContext c) {
       RouteInfo routeInfo = c.element().getValue();
       TableRow row = new TableRow()
           .set("avg_speed", routeInfo.getAvgSpeed())
           .set("slowdown_event", routeInfo.getSlowdownEvent())
           .set("route", c.element().getKey())
           .set("window_timestamp", c.timestamp().toString());
       c.output(row);
     }

     /**
      * Defines the BigQuery schema used for the output.
      */
     static TableSchema getSchema() {
       List<TableFieldSchema> fields = new ArrayList<>();
       fields.add(new TableFieldSchema().setName("route").setType("STRING"));
       fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
       fields.add(new TableFieldSchema().setName("slowdown_event").setType("BOOLEAN"));
       fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
       TableSchema schema = new TableSchema().setFields(fields);
       return schema;
     }
   }

   /**
    * This PTransform extracts speed info from traffic station readings.
    * It groups the readings by 'route' and analyzes traffic slowdown for that route.
    * Lastly, it formats the results for BigQuery.
    */
   static class TrackSpeed extends
       PTransform<PCollection<KV<String, StationSpeed>>, PCollection<TableRow>> {
     @Override
     public PCollection<TableRow> expand(PCollection<KV<String, StationSpeed>> stationSpeed) {
       // Apply a GroupByKey transform to collect a list of all station
       // readings for a given route.
       PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = stationSpeed.apply(
         GroupByKey.<String, StationSpeed>create());

       // Analyze 'slowdown' over the route readings.
       PCollection<KV<String, RouteInfo>> stats = timeGroup.apply(ParDo.of(new GatherStats()));

       // Format the results for writing to BigQuery
       PCollection<TableRow> results = stats.apply(
           ParDo.of(new FormatStatsFn()));

       return results;
     }
   }

   static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
     private final String inputFile;

     public ReadFileAndExtractTimestamps(String inputFile) {
       this.inputFile = inputFile;
     }

     @Override
     public PCollection<String> expand(PBegin begin) {
       return begin
           .apply(TextIO.read().from(inputFile))
           .apply(ParDo.of(new ExtractTimestamps()));
     }
   }

   /**
   * Options supported by {@link TrafficRoutes}.
   *
   * <p>Inherits standard configuration options.
   */
   public interface TrafficRoutesOptions extends ExampleOptions, ExampleBigQueryTableOptions {
     @Description("Path of the file to read from")
     @Default.String("gs://apache-beam-samples/traffic_sensor/"
         + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
     String getInputFile();
     void setInputFile(String value);

     @Description("Numeric value of sliding window duration, in minutes")
     @Default.Integer(WINDOW_DURATION)
     Integer getWindowDuration();
     void setWindowDuration(Integer value);

     @Description("Numeric value of window 'slide every' setting, in minutes")
     @Default.Integer(WINDOW_SLIDE_EVERY)
     Integer getWindowSlideEvery();
     void setWindowSlideEvery(Integer value);
   }

   /**
    * Sets up and starts streaming pipeline.
    *
    * @throws IOException if there is a problem setting up resources
    */
   public static void main(String[] args) throws IOException {
     TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation()
         .as(TrafficRoutesOptions.class);

     options.setBigQuerySchema(FormatStatsFn.getSchema());
     // Using ExampleUtils to set up required resources.
     ExampleUtils exampleUtils = new ExampleUtils(options);
     exampleUtils.setup();

     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
     tableRef.setProjectId(options.getProject());
     tableRef.setDatasetId(options.getBigQueryDataset());
     tableRef.setTableId(options.getBigQueryTable());

     pipeline
         .apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
         // row... => <station route, station speed> ...
         .apply(ParDo.of(new ExtractStationSpeedFn()))
         // map the incoming data stream into sliding windows.
         .apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new TrackSpeed())
         .apply(BigQueryIO.writeTableRows().to(tableRef)
             .withSchema(FormatStatsFn.getSchema()));

     // Run the pipeline.
     PipelineResult result = pipeline.run();

     // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
     exampleUtils.waitToFinish(result);
   }

   private static Double tryParseAvgSpeed(String[] inputItems) {
     try {
       return Double.parseDouble(tryParseString(inputItems, 9));
     } catch (NumberFormatException e) {
       return null;
     } catch (NullPointerException e) {
       return null;
     }
   }

   private static String tryParseStationType(String[] inputItems) {
     return tryParseString(inputItems, 4);
   }

   private static String tryParseStationId(String[] inputItems) {
     return tryParseString(inputItems, 1);
   }

   private static String tryParseTimestamp(String[] inputItems) {
     return tryParseString(inputItems, 0);
   }

   private static String tryParseString(String[] inputItems, int index) {
     return inputItems.length >= index ? inputItems[index] : null;
   }

   /**
    * Define some small hard-wired San Diego 'routes' to track based on sensor station ID.
    */
   private static Map<String, String> buildStationInfo() {
     Map<String, String> stations = new Hashtable<String, String>();
       stations.put("1108413", "SDRoute1"); // from freeway 805 S
       stations.put("1108699", "SDRoute2"); // from freeway 78 E
       stations.put("1108702", "SDRoute2");
     return stations;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.beam.examples.complete;

	import com.google.api.services.bigquery.model.TableFieldSchema;
	import com.google.api.services.bigquery.model.TableReference;
	import com.google.api.services.bigquery.model.TableRow;
	import com.google.api.services.bigquery.model.TableSchema;
	import com.google.common.collect.Lists;
	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.Hashtable;
	import java.util.List;
	import java.util.Map;
	import org.apache.avro.reflect.Nullable;
	import org.apache.beam.examples.common.ExampleBigQueryTableOptions;
	import org.apache.beam.examples.common.ExampleOptions;
	import org.apache.beam.examples.common.ExampleUtils;
	import org.apache.beam.sdk.Pipeline;
	import org.apache.beam.sdk.PipelineResult;
	import org.apache.beam.sdk.coders.AvroCoder;
	import org.apache.beam.sdk.coders.DefaultCoder;
	import org.apache.beam.sdk.io.TextIO;
	import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
	import org.apache.beam.sdk.options.Default;
	import org.apache.beam.sdk.options.Description;
	import org.apache.beam.sdk.options.PipelineOptionsFactory;
	import org.apache.beam.sdk.transforms.DoFn;
	import org.apache.beam.sdk.transforms.GroupByKey;
	import org.apache.beam.sdk.transforms.PTransform;
	import org.apache.beam.sdk.transforms.ParDo;
	import org.apache.beam.sdk.transforms.windowing.SlidingWindows;
	import org.apache.beam.sdk.transforms.windowing.Window;
	import org.apache.beam.sdk.values.KV;
	import org.apache.beam.sdk.values.PBegin;
	import org.apache.beam.sdk.values.PCollection;
	import org.joda.time.Duration;
	import org.joda.time.Instant;
	import org.joda.time.format.DateTimeFormat;
	import org.joda.time.format.DateTimeFormatter;

	/**
	* A Beam Example that runs in both batch and streaming modes with traffic sensor data.
	* You can configure the running mode by setting {@literal --streaming} to true or false.
	*
	* <p>Concepts: The batch and streaming runners, GroupByKey, sliding windows.
	*
	* <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
	* it calculates the average speed over the window for some small set of predefined 'routes',
	* and looks for 'slowdowns' in those routes. It writes its results to a BigQuery table.
	*
	* <p>The pipeline reads traffic sensor data from {@literal --inputFile}.
	*
	* <p>The example is configured to use the default BigQuery table from the example common package
	* (there are no defaults for a general Beam pipeline).
	* You can override them by using the {@literal --bigQueryDataset}, and {@literal --bigQueryTable}
	* options. If the BigQuery table do not exist, the example will try to create them.
	*
	* <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
	* and then exits.
	*/

	public class TrafficRoutes {

	// Instantiate some small predefined San Diego routes to analyze
	static Map<String, String> sdStations = buildStationInfo();
	static final int WINDOW_DURATION = 3; // Default sliding window duration in minutes
	static final int WINDOW_SLIDE_EVERY = 1; // Default window 'slide every' setting in minutes

	/**
	* This class holds information about a station reading's average speed.
	*/
	@DefaultCoder(AvroCoder.class)
	static class StationSpeed implements Comparable<StationSpeed> {
	@Nullable String stationId;
	@Nullable Double avgSpeed;
	@Nullable Long timestamp;

	public StationSpeed() {}

	public StationSpeed(String stationId, Double avgSpeed, Long timestamp) {
	this.stationId = stationId;
	this.avgSpeed = avgSpeed;
	this.timestamp = timestamp;
	}

	public String getStationId() {
	return this.stationId;
	}
	public Double getAvgSpeed() {
	return this.avgSpeed;
	}

	@Override
	public int compareTo(StationSpeed other) {
	return Long.compare(this.timestamp, other.timestamp);
	}
	}

	/**
	* This class holds information about a route's speed/slowdown.
	*/
	@DefaultCoder(AvroCoder.class)
	static class RouteInfo {
	@Nullable String route;
	@Nullable Double avgSpeed;
	@Nullable Boolean slowdownEvent;


	public RouteInfo() {}

	public RouteInfo(String route, Double avgSpeed, Boolean slowdownEvent) {
	this.route = route;
	this.avgSpeed = avgSpeed;
	this.slowdownEvent = slowdownEvent;
	}

	public String getRoute() {
	return this.route;
	}
	public Double getAvgSpeed() {
	return this.avgSpeed;
	}
	public Boolean getSlowdownEvent() {
	return this.slowdownEvent;
	}
	}

	/**
	* Extract the timestamp field from the input string, and use it as the element timestamp.
	*/
	static class ExtractTimestamps extends DoFn<String, String> {
	private static final DateTimeFormatter dateTimeFormat =
	DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");

	@ProcessElement
	public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
	String[] items = c.element().split(",");
	String timestamp = tryParseTimestamp(items);
	if (timestamp != null) {
	try {
	c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
	} catch (IllegalArgumentException e) {
	// Skip the invalid input.
	}
	}
	}
	}

	/**
	* Filter out readings for the stations along predefined 'routes', and output
	* (station, speed info) keyed on route.
	*/
	static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {

	@ProcessElement
	public void processElement(ProcessContext c) {
	String[] items = c.element().split(",");
	String stationType = tryParseStationType(items);
	// For this analysis, use only 'main line' station types
	if (stationType != null && stationType.equals("ML")) {
	Double avgSpeed = tryParseAvgSpeed(items);
	String stationId = tryParseStationId(items);
	// For this simple example, filter out everything but some hardwired routes.
	if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
	StationSpeed stationSpeed =
	new StationSpeed(stationId, avgSpeed, c.timestamp().getMillis());
	// The tuple key is the 'route' name stored in the 'sdStations' hash.
	KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
	c.output(outputValue);
	}
	}
	}
	}

	/**
	* For a given route, track average speed for the window. Calculate whether
	* traffic is currently slowing down, via a predefined threshold. If a supermajority of
	* speeds in this sliding window are less than the previous reading we call this a 'slowdown'.
	* Note: these calculations are for example purposes only, and are unrealistic and oversimplified.
	*/
	static class GatherStats
	extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>> {
	@ProcessElement
	public void processElement(ProcessContext c) throws IOException {
	String route = c.element().getKey();
	double speedSum = 0.0;
	int speedCount = 0;
	int speedups = 0;
	int slowdowns = 0;
	List<StationSpeed> infoList = Lists.newArrayList(c.element().getValue());
	// StationSpeeds sort by embedded timestamp.
	Collections.sort(infoList);
	Map<String, Double> prevSpeeds = new HashMap<>();
	// For all stations in the route, sum (non-null) speeds. Keep a count of the non-null speeds.
	for (StationSpeed item : infoList) {
	Double speed = item.getAvgSpeed();
	if (speed != null) {
	speedSum += speed;
	speedCount++;
	Double lastSpeed = prevSpeeds.get(item.getStationId());
	if (lastSpeed != null) {
	if (lastSpeed < speed) {
	speedups += 1;
	} else {
	slowdowns += 1;
	}
	}
	prevSpeeds.put(item.getStationId(), speed);
	}
	}
	if (speedCount == 0) {
	// No average to compute.
	return;
	}
	double speedAvg = speedSum / speedCount;
	boolean slowdownEvent = slowdowns >= 2 * speedups;
	RouteInfo routeInfo = new RouteInfo(route, speedAvg, slowdownEvent);
	c.output(KV.of(route, routeInfo));
	}
	}

	/**
	* Format the results of the slowdown calculations to a TableRow, to save to BigQuery.
	*/
	static class FormatStatsFn extends DoFn<KV<String, RouteInfo>, TableRow> {
	@ProcessElement
	public void processElement(ProcessContext c) {
	RouteInfo routeInfo = c.element().getValue();
	TableRow row = new TableRow()
	.set("avg_speed", routeInfo.getAvgSpeed())
	.set("slowdown_event", routeInfo.getSlowdownEvent())
	.set("route", c.element().getKey())
	.set("window_timestamp", c.timestamp().toString());
	c.output(row);
	}

	/**
	* Defines the BigQuery schema used for the output.
	*/
	static TableSchema getSchema() {
	List<TableFieldSchema> fields = new ArrayList<>();
	fields.add(new TableFieldSchema().setName("route").setType("STRING"));
	fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
	fields.add(new TableFieldSchema().setName("slowdown_event").setType("BOOLEAN"));
	fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
	TableSchema schema = new TableSchema().setFields(fields);
	return schema;
	}
	}

	/**
	* This PTransform extracts speed info from traffic station readings.
	* It groups the readings by 'route' and analyzes traffic slowdown for that route.
	* Lastly, it formats the results for BigQuery.
	*/
	static class TrackSpeed extends
	PTransform<PCollection<KV<String, StationSpeed>>, PCollection<TableRow>> {
	@Override
	public PCollection<TableRow> expand(PCollection<KV<String, StationSpeed>> stationSpeed) {
	// Apply a GroupByKey transform to collect a list of all station
	// readings for a given route.
	PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = stationSpeed.apply(
	GroupByKey.<String, StationSpeed>create());

	// Analyze 'slowdown' over the route readings.
	PCollection<KV<String, RouteInfo>> stats = timeGroup.apply(ParDo.of(new GatherStats()));

	// Format the results for writing to BigQuery
	PCollection<TableRow> results = stats.apply(
	ParDo.of(new FormatStatsFn()));

	return results;
	}
	}

	static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
	private final String inputFile;

	public ReadFileAndExtractTimestamps(String inputFile) {
	this.inputFile = inputFile;
	}

	@Override
	public PCollection<String> expand(PBegin begin) {
	return begin
	.apply(TextIO.read().from(inputFile))
	.apply(ParDo.of(new ExtractTimestamps()));
	}
	}

	/**
	* Options supported by {@link TrafficRoutes}.
	*
	* <p>Inherits standard configuration options.
	*/
	public interface TrafficRoutesOptions extends ExampleOptions, ExampleBigQueryTableOptions {
	@Description("Path of the file to read from")
	@Default.String("gs://apache-beam-samples/traffic_sensor/"
	+ "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
	String getInputFile();
	void setInputFile(String value);

	@Description("Numeric value of sliding window duration, in minutes")
	@Default.Integer(WINDOW_DURATION)
	Integer getWindowDuration();
	void setWindowDuration(Integer value);

	@Description("Numeric value of window 'slide every' setting, in minutes")
	@Default.Integer(WINDOW_SLIDE_EVERY)
	Integer getWindowSlideEvery();
	void setWindowSlideEvery(Integer value);
	}

	/**
	* Sets up and starts streaming pipeline.
	*
	* @throws IOException if there is a problem setting up resources
	*/
	public static void main(String[] args) throws IOException {
	TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
	.withValidation()
	.as(TrafficRoutesOptions.class);

	options.setBigQuerySchema(FormatStatsFn.getSchema());
	// Using ExampleUtils to set up required resources.
	ExampleUtils exampleUtils = new ExampleUtils(options);
	exampleUtils.setup();

	Pipeline pipeline = Pipeline.create(options);
	TableReference tableRef = new TableReference();
	tableRef.setProjectId(options.getProject());
	tableRef.setDatasetId(options.getBigQueryDataset());
	tableRef.setTableId(options.getBigQueryTable());

	pipeline
	.apply("ReadLines", new ReadFileAndExtractTimestamps(options.getInputFile()))
	// row... => <station route, station speed> ...
	.apply(ParDo.of(new ExtractStationSpeedFn()))
	// map the incoming data stream into sliding windows.
	.apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
	Duration.standardMinutes(options.getWindowDuration())).
	every(Duration.standardMinutes(options.getWindowSlideEvery()))))
	.apply(new TrackSpeed())
	.apply(BigQueryIO.writeTableRows().to(tableRef)
	.withSchema(FormatStatsFn.getSchema()));

	// Run the pipeline.
	PipelineResult result = pipeline.run();

	// ExampleUtils will try to cancel the pipeline and the injector before the program exists.
	exampleUtils.waitToFinish(result);
	}

	private static Double tryParseAvgSpeed(String[] inputItems) {
	try {
	return Double.parseDouble(tryParseString(inputItems, 9));
	} catch (NumberFormatException e) {
	return null;
	} catch (NullPointerException e) {
	return null;
	}
	}

	private static String tryParseStationType(String[] inputItems) {
	return tryParseString(inputItems, 4);
	}

	private static String tryParseStationId(String[] inputItems) {
	return tryParseString(inputItems, 1);
	}

	private static String tryParseTimestamp(String[] inputItems) {
	return tryParseString(inputItems, 0);
	}

	private static String tryParseString(String[] inputItems, int index) {
	return inputItems.length >= index ? inputItems[index] : null;
	}

	/**
	* Define some small hard-wired San Diego 'routes' to track based on sensor station ID.
	*/
	private static Map<String, String> buildStationInfo() {
	Map<String, String> stations = new Hashtable<String, String>();
	stations.put("1108413", "SDRoute1"); // from freeway 805 S
	stations.put("1108699", "SDRoute2"); // from freeway 78 E
	stations.put("1108702", "SDRoute2");
	return stations;
	}
	}