blob: 548d6c9e139cfce0b71779c36498c8d6ac1d428a [file] [log] [blame]
<!doctype html><html lang=en class=no-js><head><meta charset=utf-8><meta http-equiv=x-ua-compatible content="IE=edge"><meta name=viewport content="width=device-width,initial-scale=1"><title>Real-time ML with Beam at Lyft</title><meta name=description content="Apache Beam is an open source, unified model and set of language-specific SDKs for defining and executing data processing workflows, and also data ingestion and integration flows, supporting Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). Dataflow pipelines simplify the mechanics of large-scale batch and streaming data processing and can run on a number of runtimes like Apache Flink, Apache Spark, and Google Cloud Dataflow (a cloud service). Beam also brings DSL in different languages, allowing users to easily implement their data integration processes."><link href="https://fonts.googleapis.com/css?family=Roboto:100,300,400,500,700" rel=stylesheet><link rel=preload href=/scss/main.min.408fddfe3e8a45f87a5a8c9a839d77db667c1c534e5e5cd0d957ffc3dd6c14cf.css as=style><link href=/scss/main.min.408fddfe3e8a45f87a5a8c9a839d77db667c1c534e5e5cd0d957ffc3dd6c14cf.css rel=stylesheet integrity><script src=https://code.jquery.com/jquery-2.2.4.min.js></script><style>.body__contained img{max-width:100%}</style><script type=text/javascript src=/js/bootstrap.min.2979f9a6e32fc42c3e7406339ee9fe76b31d1b52059776a02b4a7fa6a4fd280a.js defer></script>
<script type=text/javascript src=/js/language-switch-v2.min.121952b7980b920320ab229551857669209945e39b05ba2b433a565385ca44c6.js defer></script>
<script type=text/javascript src=/js/fix-menu.min.039174b67107465f2090a493f91e126f7aa797f29420f9edab8a54d9dd4b3d2d.js defer></script>
<script type=text/javascript src=/js/section-nav.min.1405fd5e70fab5f6c54037c269b1d137487d8f3d1b3009032525f6db3fbce991.js defer></script>
<script type=text/javascript src=/js/page-nav.min.af231204c9c52c5089d53a4c02739eacbb7f939e3be1c6ffcc212e0ac4dbf879.js defer></script>
<script type=text/javascript src=/js/expandable-list.min.75a4526624a3b8898fe7fb9e3428c205b581f8b38c7926922467aef17eac69f2.js defer></script>
<script type=text/javascript src=/js/copy-to-clipboard.min.364c06423d7e8993fc42bb4abc38c03195bc8386db26d18774ce775d08d5b18d.js defer></script>
<script type=text/javascript src=/js/calendar.min.336664054fa0f52b08bbd4e3c59b5cb6d63dcfb2b4d602839746516b0817446b.js defer></script>
<script type=text/javascript src=/js/fix-playground-nested-scroll.min.0283f1037cb1b9d5074c6eaf041292b524a8148a7cdb803d5ccd6d1fc4eb3253.js defer></script>
<script type=text/javascript src=/js/anchor-content-jump-fix.min.22d3240f81632e4c11179b9d2aaf37a40da9414333c43aa97344e8b21a7df0e4.js defer></script>
<link rel=alternate type=application/rss+xml title="Apache Beam" href=/feed.xml><link rel=canonical href=/case-studies/lyft/ data-proofer-ignore><link rel="shortcut icon" type=image/x-icon href=/images/favicon.ico><link rel=stylesheet href=https://use.fontawesome.com/releases/v5.4.1/css/all.css integrity=sha384-5sAR7xN1Nv6T6+dT2mhtzEpVJvfS3NScPQTrOxhwjIuvcA67KV2R5Jz6kr4abQsz crossorigin=anonymous><link rel=stylesheet href=https://unpkg.com/swiper@8/swiper-bundle.min.css><script async src=https://platform.twitter.com/widgets.js></script>
<script>(function(e,t,n,s,o,i,a){e.GoogleAnalyticsObject=o,e[o]=e[o]||function(){(e[o].q=e[o].q||[]).push(arguments)},e[o].l=1*new Date,i=t.createElement(n),a=t.getElementsByTagName(n)[0],i.async=1,i.src=s,a.parentNode.insertBefore(i,a)})(window,document,"script","//www.google-analytics.com/analytics.js","ga"),ga("create","UA-73650088-1","auto"),ga("send","pageview")</script><script>(function(e,t,n,s,o,i){e.hj=e.hj||function(){(e.hj.q=e.hj.q||[]).push(arguments)},e._hjSettings={hjid:2182187,hjsv:6},o=t.getElementsByTagName("head")[0],i=t.createElement("script"),i.async=1,i.src=n+e._hjSettings.hjid+s+e._hjSettings.hjsv,o.appendChild(i)})(window,document,"https://static.hotjar.com/c/hotjar-",".js?sv=")</script></head><body class=body data-spy=scroll data-target=.page-nav data-offset=0><nav class="navigation-bar-mobile header navbar navbar-fixed-top"><div class=navbar-header><a href=/ class=navbar-brand><img alt=Brand style=height:46px;width:43px src=/images/beam_logo_navbar_mobile.png></a>
<a class=navbar-link href=/get-started/>Get Started</a>
<a class=navbar-link href=/documentation/>Documentation</a>
<button type=button class="navbar-toggle menu-open" aria-expanded=false aria-controls=navbar onclick=openMenu()>
<span class=sr-only>Toggle navigation</span>
<span class=icon-bar></span>
<span class=icon-bar></span>
<span class=icon-bar></span></button></div><div class="navbar-mask closed"></div><div id=navbar class="navbar-container closed"><button type=button class=navbar-toggle aria-expanded=false aria-controls=navbar id=closeMenu>
<span class=sr-only>Toggle navigation</span>
<span class=icon-bar></span>
<span class=icon-bar></span>
<span class=icon-bar></span></button><ul class="nav navbar-nav"><li><div class=searchBar-mobile><script>(function(){var t,n="012923275103528129024:4emlchv9wzi",e=document.createElement("script");e.type="text/javascript",e.async=!0,e.src="https://cse.google.com/cse.js?cx="+n,t=document.getElementsByTagName("script")[0],t.parentNode.insertBefore(e,t)})()</script><gcse:search></gcse:search></div></li><li><a class=navbar-link href=/about>About</a></li><li><a class=navbar-link href=/get-started/>Get Started</a></li><li><span class=navbar-link>Documentation</span><ul><li><a href=/documentation/>General</a></li><li><a href=/documentation/sdks/java/>Languages</a></li><li><a href=/documentation/runners/capability-matrix/>Runners</a></li><li><a href=/documentation/io/connectors/>I/O Connectors</a></li></ul></li><li><a class=navbar-link href=/roadmap/>Roadmap</a></li><li><a class=navbar-link href=/community/>Community</a></li><li><a class=navbar-link href=/contribute/>Contribute</a></li><li><a class=navbar-link href=/blog/>Blog</a></li><li><a class=navbar-link href=/case-studies/>Case Studies</a></li></ul><ul class="nav navbar-nav navbar-right"><li><a href=https://github.com/apache/beam/edit/master/website/www/site/content/en/case-studies/lyft.md data-proofer-ignore><svg xmlns="http://www.w3.org/2000/svg" width="25" height="24" fill="none" viewBox="0 0 25 24"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M4.543 20h4l10.5-10.5c.53-.53.828-1.25.828-2s-.298-1.47-.828-2-1.25-.828-2-.828-1.47.298-2 .828L4.543 16v4zm9.5-13.5 4 4"/></svg></a></li><li class=dropdown><a href=# class=dropdown-toggle id=apache-dropdown data-toggle=dropdown role=button aria-haspopup=true aria-expanded=false><img src=https://www.apache.org/foundation/press/kit/feather_small.png alt="Apache Logo" style=height:20px>
&nbsp;Apache
<span class=arrow-icon><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="none" viewBox="0 0 20 20"><circle cx="10" cy="10" r="10" fill="#ff6d00"/><path stroke="#fff" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8.535 5.28l4.573 4.818-4.573 4.403"/></svg></span></a><ul class="dropdown-menu dropdown-menu-right"><li><a target=_blank href=https://www.apache.org/>ASF Homepage</a></li><li><a target=_blank href=https://www.apache.org/licenses/>License</a></li><li><a target=_blank href=https://www.apache.org/security/>Security</a></li><li><a target=_blank href=https://www.apache.org/foundation/thanks.html>Thanks</a></li><li><a target=_blank href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a></li><li><a target=_blank href=https://www.apache.org/foundation/policies/conduct>Code of Conduct</a></li></ul></li></ul></div></nav><nav class=navigation-bar-desktop><a href=/ class=navbar-logo><img src=/images/beam_logo_navbar.png alt="Beam Logo"></a><div class=navbar-bar-left><div class=navbar-links><a class=navbar-link href=/about>About</a>
<a class=navbar-link href=/get-started/>Get Started</a><li class="dropdown navbar-dropdown navbar-dropdown-documentation"><a href=# class="dropdown-toggle navbar-link" role=button aria-haspopup=true aria-expanded=false>Documentation
<span><svg xmlns="http://www.w3.org/2000/svg" width="12" height="11" fill="none" viewBox="0 0 12 11"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10.666 4.535 5.847 9.108 1.444 4.535"/></svg></span></a><ul class=dropdown-menu><li><a class=navbar-dropdown-menu-link href=/documentation/>General</a></li><li><a class=navbar-dropdown-menu-link href=/documentation/sdks/java/>Languages</a></li><li><a class=navbar-dropdown-menu-link href=/documentation/runners/capability-matrix/>Runners</a></li><li><a class=navbar-dropdown-menu-link href=/documentation/io/connectors/>I/O Connectors</a></li></ul></li><a class=navbar-link href=/roadmap/>Roadmap</a>
<a class=navbar-link href=/community/>Community</a>
<a class=navbar-link href=/contribute/>Contribute</a>
<a class=navbar-link href=/blog/>Blog</a>
<a class=navbar-link href=/case-studies/>Case Studies</a></div><div id=iconsBar><a type=button onclick=showSearch()><svg xmlns="http://www.w3.org/2000/svg" width="25" height="24" fill="none" viewBox="0 0 25 24"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M10.191 17c3.866.0 7-3.134 7-7s-3.134-7-7-7-7 3.134-7 7 3.134 7 7 7zm11 4-6-6"/></svg></a><a target=_blank href=https://github.com/apache/beam/edit/master/website/www/site/content/en/case-studies/lyft.md data-proofer-ignore><svg xmlns="http://www.w3.org/2000/svg" width="25" height="24" fill="none" viewBox="0 0 25 24"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M4.543 20h4l10.5-10.5c.53-.53.828-1.25.828-2s-.298-1.47-.828-2-1.25-.828-2-.828-1.47.298-2 .828L4.543 16v4zm9.5-13.5 4 4"/></svg></a><li class="dropdown navbar-dropdown navbar-dropdown-apache"><a href=# class=dropdown-toggle role=button aria-haspopup=true aria-expanded=false><img src=https://www.apache.org/foundation/press/kit/feather_small.png alt="Apache Logo" style=height:20px>
&nbsp;Apache
<span class=arrow-icon><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="none" viewBox="0 0 20 20"><circle cx="10" cy="10" r="10" fill="#ff6d00"/><path stroke="#fff" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8.535 5.28l4.573 4.818-4.573 4.403"/></svg></span></a><ul class=dropdown-menu><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/>ASF Homepage</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/licenses/>License</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/security/>Security</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/foundation/thanks.html>Thanks</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/foundation/policies/conduct>Code of Conduct</a></li></ul></li></div><div class="searchBar disappear"><script>(function(){var t,n="012923275103528129024:4emlchv9wzi",e=document.createElement("script");e.type="text/javascript",e.async=!0,e.src="https://cse.google.com/cse.js?cx="+n,t=document.getElementsByTagName("script")[0],t.parentNode.insertBefore(e,t)})()</script><gcse:search></gcse:search>
<a type=button onclick=endSearch()><svg xmlns="http://www.w3.org/2000/svg" width="25" height="25" fill="none" viewBox="0 0 25 25"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M21.122 20.827 4.727 4.432M21.122 4.43 4.727 20.827"/></svg></a></div></div></nav><div class=header-push></div><div class="top-banners swiper"><div class=swiper-wrapper><div class=swiper-slide><a href=https://tour.beam.apache.org><img class=banner-img-desktop src=/images/banners/tour-of-beam/tour-of-beam-desktop.png alt="Start Tour of Beam">
<img class=banner-img-mobile src=/images/banners/tour-of-beam/tour-of-beam-mobile.png alt="Start Tour of Beam"></a></div><div class=swiper-slide><a href=https://beam.apache.org/documentation/ml/overview/><img class=banner-img-desktop src=/images/banners/machine-learning/machine-learning-desktop.jpg alt="Machine Learning">
<img class=banner-img-mobile src=/images/banners/machine-learning/machine-learning-mobile.jpg alt="Machine Learning"></a></div></div><div class=swiper-pagination></div><div class=swiper-button-prev></div><div class=swiper-button-next></div></div><script src=/js/swiper-bundle.min.min.e0e8f81b0b15728d35ff73c07f42ddbb17a108d6f23df4953cb3e60df7ade675.js></script>
<script src=/js/sliders/top-banners.min.afa7d0a19acf7a3b28ca369490b3d401a619562a2a4c9612577be2f66a4b9855.js></script>
<script>function showSearch(){addPlaceholder();var e,t=document.querySelector(".searchBar");t.classList.remove("disappear"),e=document.querySelector("#iconsBar"),e.classList.add("disappear")}function addPlaceholder(){$("input:text").attr("placeholder","What are you looking for?")}function endSearch(){var e,t=document.querySelector(".searchBar");t.classList.add("disappear"),e=document.querySelector("#iconsBar"),e.classList.remove("disappear")}function blockScroll(){$("body").toggleClass("fixedPosition")}function openMenu(){addPlaceholder(),blockScroll()}</script><div class="clearfix container-main-content"><nav class="page-nav clearfix" data-offset-top=90 data-offset-bottom=500><nav id=TableOfContents><ul><li><a href=#background>Background</a></li><li><a href=#democratizing-stream-processing>Democratizing Stream Processing</a></li><li><a href=#powering-real-time-machine-learning-pipelines>Powering Real-time Machine Learning Pipelines</a></li><li><a href=#amplifying-use-cases>Amplifying Use Cases</a></li><li><a href=#results>Results</a></li></ul></nav></nav><div class=case-study-page><article itemscope itemtype=http://schema.org/BlogPosting><div class="arrow-list header-top-margin" itemprop=articleBody><div class=case-study><div class=case-study-breadcrumbs><a href=/case-studies class=case-study-breadcrumbs-link>Case Studies</a><div class=case-study-breadcrumbs-separator><svg width="6" height="11" viewBox="0 0 6 11" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 9.5l4-4-4-4" stroke="#e6e6e6" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/></svg></div><div>Lyft</div></div><div class=case-study-content><div class=case-study-opinion><div class=case-study-opinion-img><img class="case-study-opinion-img-cropped case-study-opinion-img-center" src=/images/logos/powered-by/lyft.png></div><blockquote class=case-study-quote-block><p class=case-study-quote-text>“Lyft Marketplace team aims to improve our business efficiency by being nimble to real-world dynamics. Apache Beam has enabled us to meet the goal of having a robust and scalable ML infrastructure for improving model accuracy with features in real-time. These real-time features support critical functions like Forecasting, Primetime, Dispatch.”</p><div class=case-study-quote-author><div class=case-study-quote-author-img><img src=/images/case-study/lyft/ravi_kiran_magham.png></div><div class=case-study-quote-author-info><div class=case-study-quote-author-name>Ravi Kiran Magham</div><div class=case-study-quote-author-position>Software Engineer @ Lyft</div></div></div></blockquote></div><div class=case-study-post><h1 id=real-time-ml-with-beam-at-lyft>Real-time ML with Beam at Lyft</h1><h2 id=background>Background</h2><p><a href=https://www.lyft.com/>Lyft, Inc.</a> is an American mobility-as-a-service provider that offers ride-hailing, car and motorized scooter rentals, bicycle-sharing, food delivery, and business transportation solutions. Lyft is based in San Francisco, California, and <a href=https://www.lyft.com/rider/cities>operates in</a> 644 cities in the United States and 12 cities in Canada.</p><p>As you might expect from a company as large as Lyft, connecting drivers and riders in space and time at such a scale requires a powerful real-time streaming infrastructure. Ravi Kiran Magham, Software Engineer at Lyft, shared the story of how Apache Beam has become a mission-critical and integral real-time data processing technology for Lyft by enabling large-scale streaming data processing and machine learning pipelines.</p><h2 id=democratizing-stream-processing>Democratizing Stream Processing</h2><p>Lyft originally built streaming ETL pipelines to transform, enrich, and sink events generated by application services to their data lake in <a href=https://aws.amazon.com/s3/>AWS S3</a> using <a href=https://aws.amazon.com/kinesis/>Amazon Kinesis</a> and <a href=https://flink.apache.org/>Apache Flink</a>. Apache Flink is the foundation of Lyft’s streaming architecture and was chosen over Apache Spark due to its robust, fault-tolerant, and intuitive API for distributed stateful stream processing, exactly-once processing, and variety of I/O connectors.</p><p>Lyft’s popularity and growth were bringing new demands to data streaming infrastructure: more teams with diverse programming language preferences wanted to explore event-driven streaming applications, and build streaming features for real-time machine learning models to make business more efficient, enhance customer experiences, and provide time-sensitive compliance operations. The Data Platform team looked into improving the prime time (surge pricing) computation for the Marketplace team, which had a service orchestrating an ensemble of ML models, exchanging data over <a href=https://redis.com/>Redis</a>. The teams aimed at reducing code complexity and improving latency (from 5 to &lt; 1 min end to end). With Python being a prerequisite by the Marketplace team and Java being heavily used by the Data Platform team, Lyft started exploring the <a href=/>Apache Beam</a> <a href=/roadmap/portability/>portability framework</a> in 2019 to democratize streaming for all teams.</p><blockquote class="case-study-quote-block case-study-quote-wrapped"><p class=case-study-quote-text>The Apache Beam portability and multi-language capabilities were the key pique and the primary reason for us to start exploring Beam in a bigger way.</p><div class=case-study-quote-author><div class=case-study-quote-author-img><img src=/images/case-study/lyft/ravi_kiran_magham.png></div><div class=case-study-quote-author-info><div class=case-study-quote-author-name>Ravi Kiran Magham</div><div class=case-study-quote-author-position>Software Engineer @ Lyft</div></div></div></blockquote><p>Apache Beam provides a solution to the programming language and data processing engine dilemma, as it offers a variety of <a href=/documentation/basics/#runner>runners</a> (including the <a href=/documentation/runners/flink/>Beam Flink runner</a> for Apache Flink) and a <a href=/documentation/sdks/java/>variety of programming language SDKs</a>. Apache Beam offers an ultimate level of portability with its concept of “write once, run anywhere” and its ability to create <a href=/documentation/programming-guide/#multi-language-pipelines>multi-language pipelines - data pipelines</a> that use transforms from more than one programming language.</p><blockquote class="case-study-quote-block case-study-quote-wrapped"><p class=case-study-quote-text>Leveraging Apache Beam has been a “win-win” decision for us because our data infra teams use Java but we are able to offer Python SDK for our product teams, as it has been the de-facto language that they prefer. We write streaming pipelines with ease and comfort and run them on the Beam Flink runner.</p><div class=case-study-quote-author><div class=case-study-quote-author-img><img src=/images/case-study/lyft/ravi_kiran_magham.png></div><div class=case-study-quote-author-info><div class=case-study-quote-author-name>Ravi Kiran Magham</div><div class=case-study-quote-author-position>Software Engineer @ Lyft</div></div></div></blockquote><p>The Data Platform team built a control plane of in-house services and <a href=https://github.com/lyft/flinkk8soperator>FlinkK8sOperator</a> to manage Flink applications on a Kubernetes cluster and deploy streaming Apache Beam and Apache Flink jobs. Lyft uses a blue/green deployment strategy on critical pipelines to minimize any downtime and uses custom macros for improved observability and seamless integration of the CI/CD deployments. To improve developer productivity, the Data Platform team offers a lightweight, YAML-based DSL to abstract the source and sink configurations, and provides reusable Apache Beam PTransforms for filtering and enrichment of incoming events.</p><h2 id=powering-real-time-machine-learning-pipelines>Powering Real-time Machine Learning Pipelines</h2><p>Lyft Marketplace plays a pivotal role in optimizing fleet demand and supply prediction, dynamic pricing, ETA calculation, and more. The Apache Beam Python SDK and Flink Runner enable the team to be nimble to change and support the demands for real-time ML – streaming feature generation and model execution. The Data Platform team has extended the streaming infrastructure to support Continual Learning use cases. Apache Beam powers continuous training of ML models with real-time data over larger windows of 2 hours to identify and fine-tune biases in cost and ETA.</p><div class=post-scheme><img src=/images/case-study/lyft/apache_beam_ml_features_generation.svg alt="Apache Beam Feature Generation and ML Model Execution">
<span>Apache Beam Feature Generation and ML Model Execution</span></div><p>Lyft separated Feature Generation and ML Model Execution into multiple streaming pipelines. The streaming Apache Beam pipeline generates features in real-time and writes them to a Kafka topic to be consumed by the model execution pipeline. Based on user configuration, the features are replicated and keyed out by model ID to <a href=/blog/stateful-processing/>stateful</a> ParDo transforms, which leverage <a href=/documentation/programming-guide/#timers>timers</a> and/or data (feature) availability to invoke ML models. Features are stored in a global window and the <a href=/documentation/programming-guide/#state-and-timers>state</a> is explicitly cleaned up. The ML models run as part of the Model Serving infrastructure and their output can be an input feature to another ML model. To support this DAG workflow, Apache Beam pipelines write the output to Kafka and feed it to the model execution streaming pipeline for processing, in addition to writing it to Redis.</p><p>The complex real-time Feature Generation involves processing ~4 million events of 1KB per minute with sub-second latency, generating ~100 features on multiple event attributes across space and time granularities (1 and 5 minutes). Apache Beam allowed the Lyft Marketplace team to reduce latency by <a href=https://conferences.oreilly.com/strata/strata-ca-2019/cdn.oreillystatic.com/en/assets/1/event/290/The%20magic%20behind%20your%20Lyft%20ride%20prices_%20A%20case%20study%20on%20machine%20learning%20and%20streaming%20Presentation.pdf>60%</a>, significantly simplify the code, and onboard many teams and use cases onto streaming.</p><blockquote class="case-study-quote-block case-study-quote-wrapped"><p class=case-study-quote-text>The Marketplace team are <a href=https://eng.lyft.com/gotchas-of-stream-processing-data-skewness-cfba58eb45d4>heavy users of Apache Beam</a> for real-time feature computation and model executions. Processing events in real-time with a sub-second latency allows our ML models to understand marketplace dynamics early and make informed decisions.</p><div class=case-study-quote-author><div class=case-study-quote-author-img><img src=/images/case-study/lyft/ravi_kiran_magham.png></div><div class=case-study-quote-author-info><div class=case-study-quote-author-name>Ravi Kiran Magham</div><div class=case-study-quote-author-position>Software Engineer @ Lyft</div></div></div></blockquote><h2 id=amplifying-use-cases>Amplifying Use Cases</h2><p>Lyft has leveraged Apache Beam for more than 60 use cases and enabled them to complete critical business commitments and improve real-time user experiences.</p><p>For example, Lyft&rsquo;s Map Data Delivery team moved from a batch process to a streaming pipeline for identifying road closures in real-time. Their Routing Engine uses this information to determine the best routes, improve ETA and provide a better driver and customer experience. The job processes ~400k events per second, conflates streams of data coming from 3rd party road closures and real-time traffic data to determine actual closures and publish them as events to Kafka. A custom S3 PTransform allows for the job to regularly publish a snapshot of closures for downstream batch processing.</p><p>Apache Beam enabled Lyft to optimize a very specific use case that relates to reporting pick-ups and drop-offs at airports. Airports require mobility applications to report every pick-up and drop-off and match them with the time of fleet entry and exit. Failing to do so results in a lower compliance score and even risk of being penalized. Originally, Lyft had a complicated implementation using the <a href=https://docs.aws.amazon.com/streams/latest/dev/kinesis-record-processor-implementation-app-py.html>KCL library</a> to consume events and store them in Redis. Python worker processes ran at regular intervals to consume data from Redis, join and enrich the data with service API calls, and send the output to airport applications. With that implementation, late-arriving updates and out-of-order events significantly impacted the completeness score. Lyft migrated the use case to a streaming Apache Beam pipeline with state and timers to keep events in a global window and manage sessions. Apache Beam helped Lyft achieve a top compliance score by improving the latency of event reporting from 5 to 2 seconds and reducing missing entry/exit data to 1.3%.</p><p>Like many companies shaking up standard business models, Lyft relies on open-source software and likes to give back to the community. Many of the big data frameworks, tools, and implementations developed by Lyft are open-sourced on their <a href=https://github.com/orgs/lyft/repositories>GitHub</a>. Lyft has been an ample Apache Beam contributor since 2018, and Lyft engineers have presented their Apache Beam integrations at various events, such as <a href="https://www.youtube.com/watch?v=D_NA-LY1xP0">Beam Summit North America</a>, <a href=https://2019.berlinbuzzwords.de/sites/2019.berlinbuzzwords.de/files/media/documents/streaming_at_lyft_-_berlin_buzzwords_2019.pdf>Berlin Buzzwords</a>, <a href=https://conferences.oreilly.com/strata/strata-ca-2019/cdn.oreillystatic.com/en/assets/1/event/290/The%20magic%20behind%20your%20Lyft%20ride%20prices_%20A%20case%20study%20on%20machine%20learning%20and%20streaming%20Presentation.pdf>O’Reilly Strata Data & AI</a>, and more.</p><h2 id=results>Results</h2><p>The portability of the Apache Beam model is the key to distributed execution. It enabled Lyft to run mission-critical data pipelines written in a non-JVM language on a JVM-based runner. Thus, they avoided code rewrites and sidestepped the potential cost of many API styles and runtime environments, reducing pipeline development time from multiple days to just hours. Full isolation of user code and native CPython execution without library restrictions resulted in easy onboarding and adoption. Apache Beam’s multi-language and cross-language capabilities solved Lyft’s programming language dilemma. With the unified programming model, Lyft is no longer tied to a specific technology stack.</p><p>Apache Beam enabled Lyft to switch from batch ML model training to real-time ML training with granular control of data freshness using windowing. Their data engineering and product teams can use both Python and Java, based on the appropriateness for a particular task or their preference. Apache Beam has helped Lyft successfully build and scale 60+ streaming pipelines processing events at very low latencies in near-real-time. New use cases keep coming, and Lyft is planning on leveraging <a href=/documentation/dsls/sql/overview/>Beam SQL</a> and the <a href=/documentation/sdks/go/>Go SDK</a> to provide a full range of Apache Beam multi-language capabilities for their teams.</p><div class=case-study-feedback id=case-study-feedback><p class=case-study-feedback-title>Was this information useful?</p><div><button class="btn case-study-feedback-btn" onclick='sendCaseStudyFeedback(!0,"Lyft")'>Yes</button>
<button class="btn case-study-feedback-btn" onclick='sendCaseStudyFeedback(!1,"Lyft")'>No</button></div></div></div><div class=clear-nav></div></div></div></div></article></div></div><footer class=footer><div class=footer__contained><div class=footer__cols><div class="footer__cols__col footer__cols__col__logos"><div class=footer__cols__col__logo><img src=/images/beam_logo_circle.svg class=footer__logo alt="Beam logo"></div><div class=footer__cols__col__logo><img src=/images/apache_logo_circle.svg class=footer__logo alt="Apache logo"></div></div><div class=footer-wrapper><div class=wrapper-grid><div class=footer__cols__col><div class=footer__cols__col__title>Start</div><div class=footer__cols__col__link><a href=/get-started/beam-overview/>Overview</a></div><div class=footer__cols__col__link><a href=/get-started/quickstart-java/>Quickstart (Java)</a></div><div class=footer__cols__col__link><a href=/get-started/quickstart-py/>Quickstart (Python)</a></div><div class=footer__cols__col__link><a href=/get-started/quickstart-go/>Quickstart (Go)</a></div><div class=footer__cols__col__link><a href=/get-started/downloads/>Downloads</a></div></div><div class=footer__cols__col><div class=footer__cols__col__title>Docs</div><div class=footer__cols__col__link><a href=/documentation/programming-guide/>Concepts</a></div><div class=footer__cols__col__link><a href=/documentation/pipelines/design-your-pipeline/>Pipelines</a></div><div class=footer__cols__col__link><a href=/documentation/runners/capability-matrix/>Runners</a></div></div><div class=footer__cols__col><div class=footer__cols__col__title>Community</div><div class=footer__cols__col__link><a href=/contribute/>Contribute</a></div><div class=footer__cols__col__link><a href=https://projects.apache.org/committee.html?beam target=_blank>Team<img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></div><div class=footer__cols__col__link><a href=/community/presentation-materials/>Media</a></div><div class=footer__cols__col__link><a href=/community/in-person/>Events/Meetups</a></div><div class=footer__cols__col__link><a href=/community/contact-us/>Contact Us</a></div></div><div class=footer__cols__col><div class=footer__cols__col__title>Resources</div><div class=footer__cols__col__link><a href=/blog/>Blog</a></div><div class=footer__cols__col__link><a href=https://github.com/apache/beam>GitHub</a></div></div></div><div class=footer__bottom>&copy;
<a href=https://www.apache.org>The Apache Software Foundation</a>
| <a href=/privacy_policy>Privacy Policy</a>
| <a href=/feed.xml>RSS Feed</a><br><br>Apache Beam, Apache, Beam, the Beam logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation. All other products or name brands are trademarks of their respective holders, including The Apache Software Foundation.</div></div><div class="footer__cols__col footer__cols__col__logos"><div class=footer__cols__col--group><div class=footer__cols__col__logo><a href=https://github.com/apache/beam><img src=/images/logos/social-icons/github-logo-150.png class=footer__logo alt="Github logo"></a></div><div class=footer__cols__col__logo><a href=https://www.linkedin.com/company/apache-beam/><img src=/images/logos/social-icons/linkedin-logo-150.png class=footer__logo alt="Linkedin logo"></a></div></div><div class=footer__cols__col--group><div class=footer__cols__col__logo><a href=https://twitter.com/apachebeam><img src=/images/logos/social-icons/twitter-logo-150.png class=footer__logo alt="Twitter logo"></a></div><div class=footer__cols__col__logo><a href=https://www.youtube.com/channel/UChNnb_YO_7B0HlW6FhAXZZQ><img src=/images/logos/social-icons/youtube-logo-150.png class=footer__logo alt="Youtube logo"></a></div></div></div></div></div></footer><script>function sendCaseStudyFeedback(e,t){ga("send","event","Case Study Feedback","Click",t+(e?", yes":", no"));var n="case-study-feedback";const s=document.getElementById(n);s.innerHTML='<p class="case-study-feedback-title">Thank you for your feedback!</p>'}</script></body></html>