| <!DOCTYPE html>
|
| <html lang="en"><head> |
| <meta charset="utf-8"> |
| <title>Efficient, Low Latency Ingestion to Large Tables via Apache Flink and Apache Iceberg | Community Over Code Europe</title> |
| |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" |
| content="Community Over Code Europe is the annual gathering in Europe of the Apache Software Foundation community."> |
| |
| |
| <meta name="generator" content="Hugo 0.119.0"><meta property="og:title" content="Efficient, Low Latency Ingestion to Large Tables via Apache Flink and Apache Iceberg" /> |
| <meta property="og:description" content="One of the primary challenges of data ingestion is the tradeoff between the latency of data availability for the downstream systems and the extent to which data is optimised for efficient reading. When ingesting continuous incoming data streams with low latency, Apache Flink is a data processing engine that shines. Apache Iceberg is one of the most popular table formats for large tables. To get the best of both worlds, and continuously ingest data and see near real-time changes to tables queried by various engines, tight integration is needed between these two Apache projects." /> |
| <meta property="og:type" content="article" /> |
| <meta property="og:url" content="https://eu.communityovercode.org/sessions/2024/efficient-low-latency-ingestion-to-large-tables-via-apache-flink-and-apache-iceberg/" /><meta property="og:image" content="https://eu.communityovercode.org/images/card.jpg"/><meta property="article:section" content="sessions" /> |
| |
| |
| <meta name="twitter:card" content="summary_large_image"/> |
| <meta name="twitter:image" content="https://eu.communityovercode.org/images/card.jpg"/> |
| |
| <meta name="twitter:title" content="Efficient, Low Latency Ingestion to Large Tables via Apache Flink and Apache Iceberg"/> |
| <meta name="twitter:description" content="One of the primary challenges of data ingestion is the tradeoff between the latency of data availability for the downstream systems and the extent to which data is optimised for efficient reading. When ingesting continuous incoming data streams with low latency, Apache Flink is a data processing engine that shines. Apache Iceberg is one of the most popular table formats for large tables. To get the best of both worlds, and continuously ingest data and see near real-time changes to tables queried by various engines, tight integration is needed between these two Apache projects."/> |
| <!-- plugins --> |
| |
| <link rel="stylesheet" href="/plugins/bootstrap.min.css"> |
| |
| <link rel="stylesheet" href="/plugins/bootstrap-table.min.css"> |
| |
| <link rel="stylesheet" href="/plugins/fontawesome.css"> |
| |
| |
| <!-- Main Stylesheet --> |
| |
| <link rel="stylesheet" href='/scss/style.min.css?v=202406201555' media="screen"> |
| |
| <!--Favicon--> |
| <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> |
| <link rel="icon" href="/favicon.ico" type="image/x-icon"> |
| |
| |
| </head><body class="interior">
|
| <header class="header-bar">
|
| <nav class="navbar navbar-expand-lg main-nav navbar-light fixed-top">
|
|
|
| <a class="navbar-brand ml-4 pb-2" href='/'>
|
| <img src="/images/coc-logo-color.svg" alt="Community Over Code Europe" class="img-fluid logo-b">
|
| </a>
|
| <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navigation"
|
| aria-controls="navigation" aria-expanded="false" aria-label="Toggle navigation">
|
| <span class="navbar-toggler-icon"></span>
|
| </button>
|
|
|
|
|
| <div class="collapse navbar-collapse text-center my-auto" id="navigation">
|
| <ul class="navbar-nav me-auto align-items-center">
|
|
|
|
|
| <li class="nav-item dropdown">
|
| <a class="nav-link dropdown-toggle" href="#" role="button" data-toggle="dropdown" aria-haspopup="true"
|
| aria-expanded="false">
|
| About
|
| </a>
|
| <div class="dropdown-menu">
|
|
|
| <a class="dropdown-item" href="/about">Community Over Code</a>
|
|
|
| <a class="dropdown-item" href="/about-the-asf">About the ASF</a>
|
|
|
| <a class="dropdown-item" href="/diversity-and-inclusion">Diversity & Inclusion</a>
|
|
|
| </div>
|
| </li>
|
|
|
|
|
|
|
| <li class="nav-item">
|
| <a class="nav-link" href="/program">Program</a>
|
| </li>
|
|
|
|
|
|
|
| <li class="nav-item">
|
| <a class="nav-link" href="/speakers">Speakers</a>
|
| </li>
|
|
|
|
|
|
|
| <li class="nav-item dropdown">
|
| <a class="nav-link dropdown-toggle" href="#" role="button" data-toggle="dropdown" aria-haspopup="true"
|
| aria-expanded="false">
|
| Venue
|
| </a>
|
| <div class="dropdown-menu">
|
|
|
| <a class="dropdown-item" href="/venue">About the venue</a>
|
|
|
| <a class="dropdown-item" href="/how-to-get-there">How to get there</a>
|
|
|
| </div>
|
| </li>
|
|
|
|
|
|
|
| <li class="nav-item">
|
| <a class="nav-link" href="/#latest-news">News</a>
|
| </li>
|
|
|
|
|
|
|
| <li class="nav-item">
|
| <a class="nav-link" href="/faq">FAQ</a>
|
| </li>
|
|
|
|
|
|
|
|
|
| <li class="nav-item">
|
| <a id="nav-button" href="/tickets" class="btn btn-orange text-white btn-rounded">Tickets</a>
|
| </li>
|
|
|
| </ul>
|
| </div>
|
| </nav>
|
| </header>
|
|
|
|
|
| <section class="page-header">
|
| <div class="container">
|
| <div class="row justify-content-center">
|
| <div class="col-lg-8">
|
| <div class="content text-center">
|
| <h1 class="mb-3">Efficient, Low Latency Ingestion to Large Tables via Apache Flink and Apache Iceberg</h1>
|
| <div class="divider mx-auto mb-4 bg-secondary"></div>
|
| </div>
|
| </div>
|
| </div>
|
| </div>
|
| </section>
|
| |
| |
| |
| <section class="speaker-detail"> |
| <div class="container"> |
| <div class="row mt-4"> |
| <div class="image-column col-lg-3 d-none d-lg-block"> |
| <div class="schedule-block col-lg-10 col-md-12 col-sm-12"> |
| <div class="sec-title text-center"> |
| <span class="title">Speaker(s):</span> |
| <div class="speaker-info" style="margin-bottom: 20px;"> |
| |
| <figure class="thumb my-3"> |
| <a href="/speakers/marton-balassi/"> |
| <div class="img-container"> |
| |
| |
| |
| <img src="/images/speakers/marton-balassi_hud740ec621704666a399972511b756adb_156847_400x0_resize_q75_h2_box.webp" alt="Photo of images/speakers/marton-balassi.jpg" class="img-fluid rounded-circle"> |
| |
| </div> |
| <h5 class="name">Marton Balassi</h5> |
| </a> |
| </figure> |
| |
| <figure class="thumb my-3"> |
| <a href="/speakers/peter-vary/"> |
| <div class="img-container"> |
| |
| |
| |
| <img src="/images/speakers/peter-vary_hub7386a9c8c142272313e3834b6c2f30e_171278_400x0_resize_q75_h2_box.webp" alt="Photo of images/speakers/peter-vary.jpg" class="img-fluid rounded-circle"> |
| |
| </div> |
| <h5 class="name">Peter Vary</h5> |
| </a> |
| </figure> |
| |
| </div> |
| |
| </div> |
| </div> |
| </div> |
| <div class="info-column col-lg-9 col-md-12 col-sm-12"> |
| <div class="inner-column"> |
| <div class="text-box"> |
| <div class="session-meta" id="date"> |
| |
| <div> |
| <em>Jun-05 11:10-11:40 in Symphony</em> |
| </div> |
| |
| <div class="d-lg-none d-xl-none"> |
| By |
| |
| <a class="speaker-inline-item" href="https://eu.communityovercode.org/speakers/marton-balassi/">Marton Balassi</a> |
| |
| <a class="speaker-inline-item" href="https://eu.communityovercode.org/speakers/peter-vary/">Peter Vary</a> |
| |
| </div> |
| |
| |
| |
| <div class="content mt-4"><p>One of the primary challenges of data ingestion is the tradeoff between the latency of data availability for the downstream systems and the extent to which data is optimised for efficient reading. When ingesting continuous incoming data streams with low latency, Apache Flink is a data processing engine that shines. Apache Iceberg is one of the most popular table formats for large tables. To get the best of both worlds, and continuously ingest data and see near real-time changes to tables queried by various engines, tight integration is needed between these two Apache projects.</p> |
| <p>Basic integration has been available in open source for a long time, but when processing high volume data, the performance becomes crucial. Near real-time read from Iceberg tables needs frequent commits, and each commit creates a new set of files. On the other hand, reading from Iceberg tables is more optimal when the number of files are smaller. There are several ongoing projects to balance these needs and keep the number of files small. Balanced writes helps when the number of partitions are comparable to the parallelization level. Performing periodic compaction helps when the write throughput is more important and additional resources could be used to rewrite the data in a more optimal format.</p> |
| <p>Development of these new features required changes in both the Apache Flink and the Apache Iceberg code base. In our talk discuss we the planning process coordinating two Apache communities, the implementation and the synchronization between projects. We compare our approach with alternative solutions like Apache Hudi and Apache Paimon, highlight the pros and cons of the different solutions, and showcase the possibilities in a brief demo.</p> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| |
| </div> |
| </div> |
| </section> |
| |
| |
| |
|
|
|
|
| <footer>
|
| <div class="container-fluid">
|
| <div class="container py-5">
|
| <div class="d-flex justify-content-between">
|
| <div class="col-6 col-md-4 col-lg-3">
|
| <div class="mb-3"> <img src="/images/logo-h.svg" class="img-fluid" alt="Community Over Code Europe"></div>
|
|
|
|
|
| <ul class="list-inline mb-0">
|
|
|
| <li class="list-inline-item mx-2 h3" data-toggle="tooltip" data-placement="top" title aria-label="Email us" data-original-title="Email us">
|
| <a title="Email us" target="_blank" href="mailto:coceu@sg.com.mx?subject=[EU]">
|
| <i class="fa fa-envelope" aria-hidden="true"></i>
|
| </a>
|
| </li>
|
|
|
| <li class="list-inline-item mx-2 h3" data-toggle="tooltip" data-placement="top" title aria-label="Slack" data-original-title="Slack">
|
| <a title="Slack" target="_blank" href="https://s.apache.org/apachecon-slack">
|
| <i class="fab fa-slack" aria-hidden="true"></i>
|
| </a>
|
| </li>
|
|
|
| <li class="list-inline-item mx-2 h3" data-toggle="tooltip" data-placement="top" title aria-label="Watch us on YouTube" data-original-title="Watch us on YouTube">
|
| <a title="Watch us on YouTube" target="_blank" href="https://www.youtube.com/@communityovercode">
|
| <i class="fab fa-youtube" aria-hidden="true"></i>
|
| </a>
|
| </li>
|
|
|
| </ul>
|
|
|
|
|
| </div>
|
| <div class="col-md-6 text-right">
|
| <div class="footer-links">
|
|
|
|
|
| <ul>
|
|
|
| <li><a href="/coc" >
|
| Code of Conduct
|
| </a></li>
|
|
|
| <li><a href="/accessibility" >
|
| Accessibility
|
| </a></li>
|
|
|
| <li><a href="/privacy" >
|
| Privacy Policy
|
| </a></li>
|
|
|
| <li><a href="/team" >
|
| Organizers
|
| </a></li>
|
|
|
| <li><a href="https://communityovercode.org/wp-content/uploads/2023/12/community-over-code-prospectus-2024.pdf" >
|
| Prospectus
|
| </a></li>
|
|
|
| </ul>
|
|
|
|
|
| </div>
|
| </div>
|
| </div>
|
| </div>
|
| </div>
|
| <div class="footer-section footer-section__policies-section bg-dark">
|
| <div class="container my-0 footer-section__policies-section--disclaimer">
|
| Community Over Code operates under the terms of <a href="https://apache.org/foundation/policies/conduct">The ASF Code of Conduct</a>.
|
|
|
| </div>
|
| </div>
|
| </footer>
|
|
|
|
|
| <!-- JS Plugins -->
|
|
|
| <script src="/plugins/jquery.min.js"></script>
|
|
|
| <script src="/plugins/bootstrap.bundle.min.js"></script>
|
|
|
| <script src="/plugins/bootstrap-table.min.js"></script>
|
|
|
| <script src="/plugins/bootstrap-table.min.js"></script>
|
|
|
| <script src="https://js.tito.io/v2"></script>
|
|
|
|
|
| <script>
|
| var _paq = window._paq = window._paq || [];
|
|
|
| _paq.push(["disableCookies"]);
|
| _paq.push(['trackPageView']);
|
| _paq.push(['enableLinkTracking']);
|
| (function() {
|
| var u="https://analytics.apache.org/";
|
| _paq.push(['setTrackerUrl', u+'matomo.php']);
|
| _paq.push(['setSiteId', '39']);
|
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
|
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
|
| })();
|
| </script>
|
|
|
| </body>
|
|
|
| </html> |