blob: 5a61d59ea82bfc4ae21500958d05566df5565321 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Apache Flink: Apache Flink 是什么?</title>
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Bootstrap -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/flink.css">
<link rel="stylesheet" href="/css/syntax.css">
<!-- Blog RSS feed -->
<link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" />
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
<!-- We need to load Jquery in the header for custom google analytics event tracking-->
<script src="/js/jquery.min.js"></script>
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
</head>
<body>
<!-- Main content. -->
<div class="container">
<div class="row">
<div id="sidebar" class="col-sm-3">
<!-- Top navbar. -->
<nav class="navbar navbar-default">
<!-- The logo. -->
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<div class="navbar-logo">
<a href="/zh/">
<img alt="Apache Flink" src="/img/flink-header-logo.svg" width="147px" height="73px">
</a>
</div>
</div><!-- /.navbar-header -->
<!-- The navigation links. -->
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav navbar-main">
<!-- First menu section explains visitors what Flink is -->
<!-- What is Stream Processing? -->
<!--
<li><a href="/zh/streamprocessing1.html">What is Stream Processing?</a></li>
-->
<!-- What is Flink? -->
<li><a href="/zh/flink-architecture.html">Apache Flink 是什么?</a></li>
<ul class="nav navbar-nav navbar-subnav">
<li class="active">
<a href="/zh/flink-architecture.html">架构</a>
</li>
<li >
<a href="/zh/flink-applications.html">应用</a>
</li>
<li >
<a href="/zh/flink-operations.html">运维</a>
</li>
</ul>
<!-- What is Stateful Functions? -->
<li><a href="/zh/stateful-functions.html">What is Stateful Functions?</a></li>
<!-- Use cases -->
<li><a href="/zh/usecases.html">应用场景</a></li>
<!-- Powered by -->
<li><a href="/zh/poweredby.html">Flink 用户</a></li>
&nbsp;
<!-- Second menu section aims to support Flink users -->
<!-- Downloads -->
<li><a href="/zh/downloads.html">下载</a></li>
<!-- Getting Started -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">教程<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/getting-started/index.html" target="_blank">With Flink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1/getting-started/project-setup.html" target="_blank">With Flink Stateful Functions <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="/zh/training.html">Training Course</a></li>
</ul>
</li>
<!-- Documentation -->
<li class="dropdown">
<a class="dropdown-toggle" data-toggle="dropdown" href="#">文档<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a href="https://ci.apache.org/projects/flink/flink-docs-release-1.11" target="_blank">Flink 1.11 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-docs-master" target="_blank">Flink Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-release-2.1" target="_blank">Flink Stateful Functions 2.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li><a href="https://ci.apache.org/projects/flink/flink-statefun-docs-master" target="_blank">Flink Stateful Functions Master (Latest Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
</ul>
</li>
<!-- getting help -->
<li><a href="/zh/gettinghelp.html">获取帮助</a></li>
<!-- Blog -->
<li><a href="/blog/"><b>Flink 博客</b></a></li>
<!-- Flink-packages -->
<li>
<a href="https://flink-packages.org" target="_blank">flink-packages.org <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Third menu section aim to support community and contributors -->
<!-- Community -->
<li><a href="/zh/community.html">社区 &amp; 项目信息</a></li>
<!-- Roadmap -->
<li><a href="/zh/roadmap.html">开发计划</a></li>
<!-- Contribute -->
<li><a href="/zh/contributing/how-to-contribute.html">如何参与贡献</a></li>
<!-- GitHub -->
<li>
<a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a>
</li>
&nbsp;
<!-- Language Switcher -->
<li>
<a href="/flink-architecture.html">English</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-bottom">
<hr />
<!-- Twitter -->
<li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<!-- Visualizer -->
<li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<hr />
<li><a href="https://apache.org" target="_blank">Apache Software Foundation <small><span class="glyphicon glyphicon-new-window"></span></small></a></li>
<li>
<style>
.smalllinks:link {
display: inline-block !important; background: none; padding-top: 0px; padding-bottom: 0px; padding-right: 0px; min-width: 75px;
}
</style>
<a class="smalllinks" href="https://www.apache.org/licenses/" target="_blank">License</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/security/" target="_blank">Security</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/sponsorship.html" target="_blank">Donate</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
<a class="smalllinks" href="https://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a> <small><span class="glyphicon glyphicon-new-window"></span></small>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</div>
<div class="col-sm-9">
<div class="row-fluid">
<div class="col-sm-12">
<h1>Apache Flink 是什么?</h1>
<hr />
<div class="row">
<div class="col-sm-12" style="background-color: #f8f8f8;">
<h2>
架构 &nbsp;
<span class="glyphicon glyphicon-chevron-right"></span> &nbsp;
<a href="/zh/flink-applications.html">应用</a> &nbsp;
<span class="glyphicon glyphicon-chevron-right"></span> &nbsp;
<a href="/zh/flink-operations.html">运维</a>
</h2>
</div>
</div>
<hr />
<p>Apache Flink 是一个框架和分布式处理引擎,用于在<em>无边界和有边界</em>数据流上进行有状态的计算。Flink 能在所有常见集群环境中运行,并能以内存速度和任意规模进行计算。</p>
<p>接下来,我们来介绍一下 Flink 架构中的重要方面。</p>
<!--
<div class="row front-graphic">
<img src="/img/flink-home-graphic-update3.png" width="800px" />
</div>
-->
<h2 id="section">处理无界和有界数据</h2>
<p>任何类型的数据都可以形成一种事件流。信用卡交易、传感器测量、机器日志、网站或移动应用程序上的用户交互记录,所有这些数据都形成一种流。</p>
<p>数据可以被作为 <em>无界</em> 或者 <em>有界</em> 流来处理。</p>
<ol>
<li>
<p><strong>无界流</strong> 有定义流的开始,但没有定义流的结束。它们会无休止地产生数据。无界流的数据必须持续处理,即数据被摄取后需要立刻处理。我们不能等到所有数据都到达再处理,因为输入是无限的,在任何时候输入都不会完成。处理无界数据通常要求以特定顺序摄取事件,例如事件发生的顺序,以便能够推断结果的完整性。</p>
</li>
<li>
<p><strong>有界流</strong> 有定义流的开始,也有定义流的结束。有界流可以在摄取所有数据后再进行计算。有界流所有数据可以被排序,所以并不需要有序摄取。有界流处理通常被称为批处理</p>
</li>
</ol>
<div class="row front-graphic">
<img src="/img/bounded-unbounded.png" width="600px" />
</div>
<p><strong>Apache Flink 擅长处理无界和有界数据集</strong> 精确的时间控制和状态化使得 Flink 的运行时(runtime)能够运行任何处理无界流的应用。有界流则由一些专为固定大小数据集特殊设计的算法和数据结构进行内部处理,产生了出色的性能。</p>
<p>通过探索 Flink 之上构建的 <a href="/zh/usecases.html">用例</a> 来加深理解。</p>
<h2 id="section-1">部署应用到任意地方</h2>
<p>Apache Flink 是一个分布式系统,它需要计算资源来执行应用程序。Flink 集成了所有常见的集群资源管理器,例如 <a href="https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/YARN.html">Hadoop YARN</a><a href="https://mesos.apache.org">Apache Mesos</a><a href="https://kubernetes.io/">Kubernetes</a>,但同时也可以作为独立集群运行。</p>
<p>Flink 被设计为能够很好地工作在上述每个资源管理器中,这是通过资源管理器特定(resource-manager-specific)的部署模式实现的。Flink 可以采用与当前资源管理器相适应的方式进行交互。</p>
<p>部署 Flink 应用程序时,Flink 会根据应用程序配置的并行性自动标识所需的资源,并从资源管理器请求这些资源。在发生故障的情况下,Flink 通过请求新资源来替换发生故障的容器。提交或控制应用程序的所有通信都是通过 REST 调用进行的,这可以简化 Flink 与各种环境中的集成。</p>
<!-- Add this section once library deployment mode is supported. -->
<!--
Flink 提供了两种应用程序部署模式,即 *框架模式* 和 *库模式*
* 在 **框架部署模式** 中,客户端将 Flink 应用程序提交到一个运行中的 Flink 服务中,由该服务负责执行提交的应用程序。这是大多数数据处理框架、查询引擎或数据库系统的通用部署模型。
* 在 **库部署模式中**,Flink 应用程序与 Flink 主可执行程序一起打包成 (Docker) 映像。另一个独立于作业的映像包含可执行的 Flink 工作程序。当从作业映像启动容器时,将启动 Flink 主进程并自动加载嵌入的应用程序。从工作镜像启动的容器,引导 Flink 工作进程自动连接到主进程。容器管理器(比如 Kubernetes)监控正在运行的容器并自动重启失败的容器。在这种模式下,你不需要在集群中安装和维护 Flink 服务。只需将 Flink 作为库打包到应用程序中。这种模型在部署微服务时非常流行。
<div class="row front-graphic">
<img src="/img/deployment-modes.png" width="600px" />
</div>
-->
<h2 id="section-2">运行任意规模应用</h2>
<p>Flink 旨在任意规模上运行有状态流式应用。因此,应用程序被并行化为可能数千个任务,这些任务分布在集群中并发执行。所以应用程序能够充分利用无尽的 CPU、内存、磁盘和网络 IO。而且 Flink 很容易维护非常大的应用程序状态。其异步和增量的检查点算法对处理延迟产生最小的影响,同时保证精确一次状态的一致性。</p>
<p><a href="/zh/poweredby.html">Flink 用户报告了其生产环境中一些令人印象深刻的扩展性数字</a></p>
<ul>
<li>处理<strong>每天处理数万亿的事件</strong>,</li>
<li>应用维护<strong>几TB大小的状态</strong>, 和</li>
<li>应用<strong>在数千个内核上运行</strong></li>
</ul>
<h2 id="section-3">利用内存性能</h2>
<p>有状态的 Flink 程序针对本地状态访问进行了优化。任务的状态始终保留在内存中,如果状态大小超过可用内存,则会保存在能高效访问的磁盘数据结构中。任务通过访问本地(通常在内存中)状态来进行所有的计算,从而产生非常低的处理延迟。Flink 通过定期和异步地对本地状态进行持久化存储来保证故障场景下精确一次的状态一致性。</p>
<div class="row front-graphic">
<img src="/img/local-state.png" width="600px" />
</div>
<hr />
<div class="row">
<div class="col-sm-12" style="background-color: #f8f8f8;">
<h2>
架构 &nbsp;
<span class="glyphicon glyphicon-chevron-right"></span> &nbsp;
<a href="/zh/flink-applications.html">应用</a> &nbsp;
<span class="glyphicon glyphicon-chevron-right"></span> &nbsp;
<a href="/zh/flink-operations.html">运维</a>
</h2>
</div>
</div>
<hr />
</div>
</div>
</div>
</div>
<hr />
<div class="row">
<div class="footer text-center col-sm-12">
<p>Copyright © 2014-2019 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p>
<p>Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p>
<p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a href="/blog/feed.xml">RSS feed</a></p>
</div>
</div>
</div><!-- /.container -->
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.matchHeight/0.7.0/jquery.matchHeight-min.js"></script>
<script src="/js/codetabs.js"></script>
<script src="/js/stickysidebar.js"></script>
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-52545728-1', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>