blob: 0d994505fcd767a90bdb6e164d3700d0441dc8d5 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Ttl</title>
<link rel="stylesheet" href="/zh/assets/css/app.css">
<link rel="shortcut icon" href="/zh/assets/images/favicon.ico">
<link rel="stylesheet" href="/zh/assets/css/utilities.min.css">
<link rel="stylesheet" href="/zh/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Ttl | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.2" />
<meta property="og:title" content="Ttl" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="原理 Pegasus支持TTL(Time-To-Live)功能,即在写入数据的时候,可以指定数据的过期时间。一旦过期,数据对用户就是不可见的,通过get/multiGet等查询接口获取不到数据,就跟数据没有写入一样。" />
<meta property="og:description" content="原理 Pegasus支持TTL(Time-To-Live)功能,即在写入数据的时候,可以指定数据的过期时间。一旦过期,数据对用户就是不可见的,通过get/multiGet等查询接口获取不到数据,就跟数据没有写入一样。" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2023-11-23T14:51:44+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Ttl" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2023-11-23T14:51:44+00:00","datePublished":"2023-11-23T14:51:44+00:00","description":"原理 Pegasus支持TTL(Time-To-Live)功能,即在写入数据的时候,可以指定数据的过期时间。一旦过期,数据对用户就是不可见的,通过get/multiGet等查询接口获取不到数据,就跟数据没有写入一样。","headline":"Ttl","mainEntityOfPage":{"@type":"WebPage","@id":"/api/ttl"},"url":"/api/ttl"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/zh/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">Pegasus产品文档</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/downloads"
class="">
下载
</a>
</li>
</ul>
<p class="menu-label">编译构建</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/build/compile-by-docker"
class="">
使用Docker完成编译(推荐)
</a>
</li>
<li>
<a href="/zh/docs/build/compile-from-source"
class="">
从源码编译
</a>
</li>
</ul>
<p class="menu-label">客户端库</p>
<ul class="menu-list">
<li>
<a href="/zh/clients/java-client"
class="">
Java客户端
</a>
</li>
<li>
<a href="/zh/clients/cpp-client"
class="">
C++客户端
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang客户端
</a>
</li>
<li>
<a href="/zh/clients/python2-client"
class="">
Python2客户端
</a>
</li>
<li>
<a href="/zh/clients/python3-client"
class="">
Python3客户端
</a>
</li>
<li>
<a href="/zh/clients/node-client"
class="">
NodeJS客户端
</a>
</li>
<li>
<a href="/zh/clients/scala-client"
class="">
Scala客户端
</a>
</li>
</ul>
<p class="menu-label">生态工具</p>
<ul class="menu-list">
<li>
<a href="/zh/docs/tools/shell"
class="">
Pegasus Shell 工具
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
集群管理命令行
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
数据访问命令行
</a>
</li>
</ul>
<p class="menu-label">用户接口</p>
<ul class="menu-list">
<li>
<a href="/zh/api/ttl"
class="is-active">
TTL
</a>
</li>
<li>
<a href="/zh/api/single-atomic"
class="">
单行原子操作
</a>
</li>
<li>
<a href="/zh/api/redis"
class="">
Redis适配
</a>
</li>
<li>
<a href="/zh/api/geo"
class="">
GEO支持
</a>
</li>
<li>
<a href="/zh/api/http"
class="">
HTTP接口
</a>
</li>
</ul>
<p class="menu-label">高效运维</p>
<ul class="menu-list">
<li>
<a href="/zh/administration/deployment"
class="">
集群部署
</a>
</li>
<li>
<a href="/zh/administration/config"
class="">
配置说明
</a>
</li>
<li>
<a href="/zh/administration/rebalance"
class="">
负载均衡
</a>
</li>
<li>
<a href="/zh/administration/monitoring"
class="">
可视化监控
</a>
</li>
<li>
<a href="/zh/administration/rolling-update"
class="">
集群升级
</a>
</li>
<li>
<a href="/zh/administration/scale-in-out"
class="">
集群扩容缩容
</a>
</li>
<li>
<a href="/zh/administration/resource-management"
class="">
资源管理
</a>
</li>
<li>
<a href="/zh/administration/cold-backup"
class="">
冷备份
</a>
</li>
<li>
<a href="/zh/administration/meta-recovery"
class="">
元数据恢复
</a>
</li>
<li>
<a href="/zh/administration/replica-recovery"
class="">
Replica数据恢复
</a>
</li>
<li>
<a href="/zh/administration/zk-migration"
class="">
Zookeeper迁移
</a>
</li>
<li>
<a href="/zh/administration/table-migration"
class="">
Table迁移
</a>
</li>
<li>
<a href="/zh/administration/table-soft-delete"
class="">
Table软删除
</a>
</li>
<li>
<a href="/zh/administration/table-env"
class="">
Table环境变量
</a>
</li>
<li>
<a href="/zh/administration/remote-commands"
class="">
远程命令
</a>
</li>
<li>
<a href="/zh/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/zh/administration/duplication"
class="">
跨机房同步
</a>
</li>
<li>
<a href="/zh/administration/compression"
class="">
数据压缩
</a>
</li>
<li>
<a href="/zh/administration/throttling"
class="">
流量控制
</a>
</li>
<li>
<a href="/zh/administration/experiences"
class="">
运维经验
</a>
</li>
<li>
<a href="/zh/administration/manual-compact"
class="">
Manual Compact功能
</a>
</li>
<li>
<a href="/zh/administration/usage-scenario"
class="">
Usage Scenario功能
</a>
</li>
<li>
<a href="/zh/administration/bad-disk"
class="">
坏盘检修
</a>
</li>
<li>
<a href="/zh/administration/whitelist"
class="">
白名单
</a>
</li>
<li>
<a href="/zh/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/zh/administration/hotspot-detection"
class="">
热点检测
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/zh/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /api/ttl"><strong>En</strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Pegasus产品文档
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/downloads"
class="navbar-item ">
下载
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
编译构建
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/build/compile-by-docker"
class="navbar-item ">
使用Docker完成编译(推荐)
</a>
<a href="/zh/docs/build/compile-from-source"
class="navbar-item ">
从源码编译
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
客户端库
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/clients/java-client"
class="navbar-item ">
Java客户端
</a>
<a href="/zh/clients/cpp-client"
class="navbar-item ">
C++客户端
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang客户端
</a>
<a href="/zh/clients/python2-client"
class="navbar-item ">
Python2客户端
</a>
<a href="/zh/clients/python3-client"
class="navbar-item ">
Python3客户端
</a>
<a href="/zh/clients/node-client"
class="navbar-item ">
NodeJS客户端
</a>
<a href="/zh/clients/scala-client"
class="navbar-item ">
Scala客户端
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
生态工具
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/docs/tools/shell"
class="navbar-item ">
Pegasus Shell 工具
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
集群管理命令行
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
数据访问命令行
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
用户接口
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/api/ttl"
class="navbar-item is-active">
TTL
</a>
<a href="/zh/api/single-atomic"
class="navbar-item ">
单行原子操作
</a>
<a href="/zh/api/redis"
class="navbar-item ">
Redis适配
</a>
<a href="/zh/api/geo"
class="navbar-item ">
GEO支持
</a>
<a href="/zh/api/http"
class="navbar-item ">
HTTP接口
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
高效运维
</span>
</a>
<div class="navbar-dropdown">
<a href="/zh/administration/deployment"
class="navbar-item ">
集群部署
</a>
<a href="/zh/administration/config"
class="navbar-item ">
配置说明
</a>
<a href="/zh/administration/rebalance"
class="navbar-item ">
负载均衡
</a>
<a href="/zh/administration/monitoring"
class="navbar-item ">
可视化监控
</a>
<a href="/zh/administration/rolling-update"
class="navbar-item ">
集群升级
</a>
<a href="/zh/administration/scale-in-out"
class="navbar-item ">
集群扩容缩容
</a>
<a href="/zh/administration/resource-management"
class="navbar-item ">
资源管理
</a>
<a href="/zh/administration/cold-backup"
class="navbar-item ">
冷备份
</a>
<a href="/zh/administration/meta-recovery"
class="navbar-item ">
元数据恢复
</a>
<a href="/zh/administration/replica-recovery"
class="navbar-item ">
Replica数据恢复
</a>
<a href="/zh/administration/zk-migration"
class="navbar-item ">
Zookeeper迁移
</a>
<a href="/zh/administration/table-migration"
class="navbar-item ">
Table迁移
</a>
<a href="/zh/administration/table-soft-delete"
class="navbar-item ">
Table软删除
</a>
<a href="/zh/administration/table-env"
class="navbar-item ">
Table环境变量
</a>
<a href="/zh/administration/remote-commands"
class="navbar-item ">
远程命令
</a>
<a href="/zh/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/zh/administration/duplication"
class="navbar-item ">
跨机房同步
</a>
<a href="/zh/administration/compression"
class="navbar-item ">
数据压缩
</a>
<a href="/zh/administration/throttling"
class="navbar-item ">
流量控制
</a>
<a href="/zh/administration/experiences"
class="navbar-item ">
运维经验
</a>
<a href="/zh/administration/manual-compact"
class="navbar-item ">
Manual Compact功能
</a>
<a href="/zh/administration/usage-scenario"
class="navbar-item ">
Usage Scenario功能
</a>
<a href="/zh/administration/bad-disk"
class="navbar-item ">
坏盘检修
</a>
<a href="/zh/administration/whitelist"
class="navbar-item ">
白名单
</a>
<a href="/zh/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/zh/administration/hotspot-detection"
class="navbar-item ">
热点检测
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<!--If you don't want a url to be relativized, you can add a space explicitly into the href to
prevents a url from being relativized by polyglot.-->
<a class="button is-light is-outlined is-inverted" href=" /api/ttl"><strong>En</strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">TTL</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<h1 id="原理">原理</h1>
<p>Pegasus支持TTL(Time-To-Live)功能,即在写入数据的时候,可以指定数据的过期时间。一旦过期,数据对用户就是不可见的,通过get/multiGet等查询接口获取不到数据,就跟数据没有写入一样。</p>
<p>设置的时候,用户通常都是提供<code class="language-plaintext highlighter-rouge">ttl_seconds</code>参数,表示从当前时间开始计算,多少秒之后为过期时间。如果为0,表示不设置TTL,即数据用不过期。</p>
<p>用户通常有疑问,数据过期后对用户不可见是怎么做到的呢?数据会被立即删除吗?实际上不是这样的,这要从TTL的实现原理说起。</p>
<p>Pegasus的TTL是通过在RocksDB中存储数据时记录数据的过期时间,然后在查询时对过期时间进行检查和过滤来实现的。如下图:</p>
<p><img src="/assets/images/pegasus-ttl.png" alt="pegasus-ttl.png" class="img-responsive" /></p>
<p><strong>写入过程:</strong></p>
<ul>
<li>在写入数据时,用户在客户端通过<code class="language-plaintext highlighter-rouge">ttl_seconds</code>参数设置TTL时间,客户端先计算数据的过期时间<code class="language-plaintext highlighter-rouge">ExpireTime = CurrentTime + ttl_seconds</code>,然后通过RPC将数据和ExpireTime一起传给ReplicaServer端执行。</li>
<li>ReplicaServer收到写请求后,经过各种处理(包括写日志、replication复制等),最后将数据存储到RocksDB中。在存储value的时候,会将ExpireTime放在value头部固定的4个字节中。</li>
</ul>
<p><strong>读取过程:</strong></p>
<ul>
<li>用户通过客户端查询指定key对应的value数据。</li>
<li>ReplicaServer收到读请求后,先从RocksDB获取到key对应的value,然后从value头部提取出ExpireTime:
<ul>
<li>如果ExpireTime == 0,表示数据没有设置TTL,是有效的。</li>
<li>如果ExpireTime &gt; 0,表示数据设置了TTL,则将ExpireTime与当前时间进行对比:如果没有过期,则数据是有效的;如果已经过期,则数据是无效的,返回NotFound。</li>
</ul>
</li>
</ul>
<p><strong>数据删除:</strong></p>
<ul>
<li>数据过期后,并不能立即从RocksDB中消失,而是通过compaction来进行删除的。我们定制了RocksDB的CompactionFilter,使其在compaction过程中检查数据value头部的ExpireTime,如果已经过期,则数据会被扔掉,不会出现在新生成的文件中。</li>
<li>因为删除过程是异步的,与compaction的执行时机和频率有关,所以数据过期与数据删除通常不是同时发生的,唯一能保证的是数据删除肯定发生在数据过期之后。未删除的过期数据会占据磁盘空间,这点是需要考虑到的。</li>
</ul>
<h1 id="接口">接口</h1>
<p>我们在客户端和Shell工具都提供了设置和查询TTL的接口。</p>
<p>Pegasus Java Client中以下接口可以查询和设置TTL:</p>
<ul>
<li><a href="/zh/clients/java-client#ttl">ttl</a>:获取指定数据的TTL信息。</li>
<li><a href="/zh/clients/java-client#set">set</a><a href="/zh/clients/java-client#batchset">batchSet</a>:都提供了设置TTL的参数,其中batchSet是在SetItem中设置的。</li>
<li><a href="/zh/clients/java-client#multiset">multiSet</a><a href="/zh/clients/java-client#batchmultiset">batchMultiSet</a>:都提供了设置TTL的参数。</li>
<li><a href="/zh/clients/java-client#batchmultiset">incr</a>:从v1.11.1版本开始,incr接口也提供了修改TTL的功能。</li>
<li><a href="/zh/clients/java-client#checkandset">checkAndSet</a>:在CheckAndSetOptions中提供了设置TTL的参数。</li>
</ul>
<p>Shell工具中以下命令可以查询和设置TTL:</p>
<ul>
<li><a href="/zh/overview/shell#ttl">ttl</a>命令:获取指定数据的TTL信息。</li>
<li><a href="/zh/overview/shell#set">set</a><a href="/zh/overview/shell#multi_set">multi_set</a>命令:都提供了设置TTL的参数。</li>
</ul>
<h1 id="表级ttl">表级TTL</h1>
<p>从v1.11.2版本开始,Pegasus支持表级TTL功能。</p>
<p>实现原理:</p>
<ul>
<li>用户在<a href="/zh/administration/table-env">Table环境变量</a>中设置<code class="language-plaintext highlighter-rouge">default_ttl</code>环境变量。</li>
<li>MetaServer将环境变量异步地通知到各个ReplicaServer,使该表的每个replica都获取到该环境变量,这个过程大约有几秒到几十秒不等的延迟,但是不会超过一分钟。</li>
<li>replica获得环境变量后,解析获得default_ttl配置,并立即开始生效。生效之后:
<ul>
<li>用户新写入的数据,如果TTL=0(使用默认TTL=0或者显式设置TTL=0),则将数据的实际TTL设置为default_ttl。</li>
<li>RocksDB在进行compaction的时候,如果compact输入文件的原数据没有TTL,则将compact输出文件的新数据的TTL设置为default_ttl。这个过程依赖于compaction的触发时机,所以时间点是不确定的。</li>
<li>如果执行<a href="/zh/administration/manual-compact">Manual Compact</a>,那么所有文件都会经过compaction处理,原来没有TTL的数据都会设置TTL为default_ttl。</li>
</ul>
</li>
</ul>
<p>考虑这样的场景:业务方在初期写入数据时没有设置TTL,后来改变需求,希望所有数据都加TTL,并且以前没有设置TTL的数据从现在开始计算TTL,那么就可以通过<code class="language-plaintext highlighter-rouge">表级TTL</code>加上<code class="language-plaintext highlighter-rouge">Manual Compact</code>的功能实现这个目的。</p>
<h1 id="通过ttl计算数据写入时间">通过TTL计算数据写入时间</h1>
<p>如果数据写入时设置了TTL,就可以通过TTL计算出数据写入时间。依据的公式是:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>TTLExpireTime = InsertTime + TTLSeconds = now + TTLRemainingSeconds
==&gt;
InsertTime = now + TTLRemainingSeconds - TTLSeconds
</code></pre></div></div>
<p>其中:</p>
<ul>
<li>TTLRemainingSeconds:通过<a href="/zh/overview/shell#ttl">Shell的ttl命令</a>获取。</li>
<li>now:执行Shell ttl命令的时间。</li>
<li>TTLSeconds:用户知道数据写入时设置的TTL。</li>
</ul>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
本页导航
</p>
<ul class="menu-list">
<li><a href="#原理">原理</a></li>
<li><a href="#接口">接口</a></li>
<li><a href="#表级ttl">表级TTL</a></li>
<li><a href="#通过ttl计算数据写入时间">通过TTL计算数据写入时间</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>