blob: c04f2f88bd320fd58c6b6e9416bb05f6ea3e41b6 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Pegasus | Ttl</title>
<link rel="stylesheet" href="/assets/css/app.css">
<link rel="shortcut icon" href="/assets/images/favicon.ico">
<link rel="stylesheet" href="/assets/css/utilities.min.css">
<link rel="stylesheet" href="/assets/css/docsearch.v3.css">
<script src="/assets/js/jquery.min.js"></script>
<script src="/assets/js/all.min.js"></script>
<script src="/assets/js/docsearch.v3.js"></script>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<title>Ttl | Pegasus</title>
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Ttl" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="Principle" />
<meta property="og:description" content="Principle" />
<meta property="og:site_name" content="Pegasus" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2024-04-22T06:39:52+00:00" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="Ttl" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"BlogPosting","dateModified":"2024-04-22T06:39:52+00:00","datePublished":"2024-04-22T06:39:52+00:00","description":"Principle","headline":"Ttl","mainEntityOfPage":{"@type":"WebPage","@id":"/api/ttl"},"url":"/api/ttl"}</script>
<!-- End Jekyll SEO tag -->
</head>
<body>
<div class="dashboard is-full-height">
<!-- left panel -->
<div class="dashboard-panel is-medium is-hidden-mobile pl-0">
<div class="dashboard-panel-header has-text-centered">
<a href="/">
<img src="/assets/images/pegasus-logo-inv.png" style="width: 80%;">
</a>
</div>
<div class="dashboard-panel-main is-scrollable pl-6">
<aside class="menu">
<p class="menu-label">The Pegasus documentation</p>
<ul class="menu-list">
<li>
<a href="/docs/downloads"
class="">
Downloads
</a>
</li>
</ul>
<p class="menu-label">Building Pegasus</p>
<ul class="menu-list">
<li>
<a href="/docs/build/compile-by-docker"
class="">
Compile by docker (recommended)
</a>
</li>
<li>
<a href="/docs/build/compile-from-source"
class="">
Compile from source
</a>
</li>
</ul>
<p class="menu-label">Client Libs</p>
<ul class="menu-list">
<li>
<a href="/clients/java-client"
class="">
Java Client
</a>
</li>
<li>
<a href="/clients/cpp-client"
class="">
C++ Client
</a>
</li>
<li>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="">
Golang Client
</a>
</li>
<li>
<a href="/clients/python-client"
class="">
Python Client
</a>
</li>
<li>
<a href="/clients/node-client"
class="">
NodeJS Client
</a>
</li>
<li>
<a href="/clients/scala-client"
class="">
Scala Client
</a>
</li>
</ul>
<p class="menu-label">Tools</p>
<ul class="menu-list">
<li>
<a href="/docs/tools/shell"
class="">
Pegasus Shell
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/admin-cli"
class="">
Admin CLI
</a>
</li>
<li>
<a href="https://github.com/pegasus-kv/pegic"
class="">
Pegasus data access CLI
</a>
</li>
</ul>
<p class="menu-label">API</p>
<ul class="menu-list">
<li>
<a href="/api/ttl"
class="is-active">
TTL(Time To Live)
</a>
</li>
<li>
<a href="/api/single-atomic"
class="">
Single-Atomic Operations
</a>
</li>
<li>
<a href="/api/redis"
class="">
Redis Adaption
</a>
</li>
<li>
<a href="/api/geo"
class="">
GEO Support
</a>
</li>
<li>
<a href="/api/http"
class="">
HTTP API
</a>
</li>
</ul>
<p class="menu-label">Admin</p>
<ul class="menu-list">
<li>
<a href="/administration/deployment"
class="">
Deployment
</a>
</li>
<li>
<a href="/administration/config"
class="">
Configurations
</a>
</li>
<li>
<a href="/administration/rebalance"
class="">
Rebalance
</a>
</li>
<li>
<a href="/administration/monitoring"
class="">
Monitoring
</a>
</li>
<li>
<a href="/administration/rolling-update"
class="">
Rolling Restart and Upgrade
</a>
</li>
<li>
<a href="/administration/scale-in-out"
class="">
Scale-in and Scale-out
</a>
</li>
<li>
<a href="/administration/resource-management"
class="">
Resource Management
</a>
</li>
<li>
<a href="/administration/cold-backup"
class="">
Cold Backup
</a>
</li>
<li>
<a href="/administration/meta-recovery"
class="">
Metadata Recovery
</a>
</li>
<li>
<a href="/administration/replica-recovery"
class="">
Replica Data Recovery
</a>
</li>
<li>
<a href="/administration/zk-migration"
class="">
Zookeeper Migration
</a>
</li>
<li>
<a href="/administration/table-migration"
class="">
Table Migration
</a>
</li>
<li>
<a href="/administration/table-soft-delete"
class="">
Table Soft-Delete
</a>
</li>
<li>
<a href="/administration/table-env"
class="">
Table Environment Variables
</a>
</li>
<li>
<a href="/administration/remote-commands"
class="">
Remote Command
</a>
</li>
<li>
<a href="/administration/partition-split"
class="">
Partition-Split
</a>
</li>
<li>
<a href="/administration/duplication"
class="">
Duplication
</a>
</li>
<li>
<a href="/administration/compression"
class="">
Data Compression
</a>
</li>
<li>
<a href="/administration/throttling"
class="">
Throttling
</a>
</li>
<li>
<a href="/administration/experiences"
class="">
Experiences
</a>
</li>
<li>
<a href="/administration/manual-compact"
class="">
Manual Compact
</a>
</li>
<li>
<a href="/administration/usage-scenario"
class="">
Usage Scenario
</a>
</li>
<li>
<a href="/administration/bad-disk"
class="">
Bad Disk Repair
</a>
</li>
<li>
<a href="/administration/whitelist"
class="">
Replica Server Whitelist
</a>
</li>
<li>
<a href="/administration/backup-request"
class="">
Backup Request
</a>
</li>
<li>
<a href="/administration/hotspot-detection"
class="">
Hotspot Detection
</a>
</li>
</ul>
</aside>
</div>
</div>
<!-- main section -->
<div class="dashboard-main is-scrollable">
<nav class="navbar is-hidden-desktop">
<div class="navbar-brand">
<a href="/" class="navbar-item">
<!-- Pegasus Icon -->
<img src="/assets/images/pegasus-square.png">
</a>
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/api/ttl"><strong></strong></a>
</div>
<a role="button" class="navbar-burger burger" aria-label="menu" aria-expanded="false" data-target="navMenu">
<!-- Appears in mobile mode only -->
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
<span aria-hidden="true"></span>
</a>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-end">
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
The Pegasus documentation
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/downloads"
class="navbar-item ">
Downloads
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Building Pegasus
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/build/compile-by-docker"
class="navbar-item ">
Compile by docker (recommended)
</a>
<a href="/docs/build/compile-from-source"
class="navbar-item ">
Compile from source
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Client Libs
</span>
</a>
<div class="navbar-dropdown">
<a href="/clients/java-client"
class="navbar-item ">
Java Client
</a>
<a href="/clients/cpp-client"
class="navbar-item ">
C++ Client
</a>
<a href="https://github.com/apache/incubator-pegasus/tree/master/go-client"
class="navbar-item ">
Golang Client
</a>
<a href="/clients/python-client"
class="navbar-item ">
Python Client
</a>
<a href="/clients/node-client"
class="navbar-item ">
NodeJS Client
</a>
<a href="/clients/scala-client"
class="navbar-item ">
Scala Client
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Tools
</span>
</a>
<div class="navbar-dropdown">
<a href="/docs/tools/shell"
class="navbar-item ">
Pegasus Shell
</a>
<a href="https://github.com/pegasus-kv/admin-cli"
class="navbar-item ">
Admin CLI
</a>
<a href="https://github.com/pegasus-kv/pegic"
class="navbar-item ">
Pegasus data access CLI
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
API
</span>
</a>
<div class="navbar-dropdown">
<a href="/api/ttl"
class="navbar-item is-active">
TTL(Time To Live)
</a>
<a href="/api/single-atomic"
class="navbar-item ">
Single-Atomic Operations
</a>
<a href="/api/redis"
class="navbar-item ">
Redis Adaption
</a>
<a href="/api/geo"
class="navbar-item ">
GEO Support
</a>
<a href="/api/http"
class="navbar-item ">
HTTP API
</a>
</div>
</div>
<!--dropdown-->
<div class="navbar-item has-dropdown is-hoverable">
<a href=""
class="navbar-link ">
<span>
Admin
</span>
</a>
<div class="navbar-dropdown">
<a href="/administration/deployment"
class="navbar-item ">
Deployment
</a>
<a href="/administration/config"
class="navbar-item ">
Configurations
</a>
<a href="/administration/rebalance"
class="navbar-item ">
Rebalance
</a>
<a href="/administration/monitoring"
class="navbar-item ">
Monitoring
</a>
<a href="/administration/rolling-update"
class="navbar-item ">
Rolling Restart and Upgrade
</a>
<a href="/administration/scale-in-out"
class="navbar-item ">
Scale-in and Scale-out
</a>
<a href="/administration/resource-management"
class="navbar-item ">
Resource Management
</a>
<a href="/administration/cold-backup"
class="navbar-item ">
Cold Backup
</a>
<a href="/administration/meta-recovery"
class="navbar-item ">
Metadata Recovery
</a>
<a href="/administration/replica-recovery"
class="navbar-item ">
Replica Data Recovery
</a>
<a href="/administration/zk-migration"
class="navbar-item ">
Zookeeper Migration
</a>
<a href="/administration/table-migration"
class="navbar-item ">
Table Migration
</a>
<a href="/administration/table-soft-delete"
class="navbar-item ">
Table Soft-Delete
</a>
<a href="/administration/table-env"
class="navbar-item ">
Table Environment Variables
</a>
<a href="/administration/remote-commands"
class="navbar-item ">
Remote Command
</a>
<a href="/administration/partition-split"
class="navbar-item ">
Partition-Split
</a>
<a href="/administration/duplication"
class="navbar-item ">
Duplication
</a>
<a href="/administration/compression"
class="navbar-item ">
Data Compression
</a>
<a href="/administration/throttling"
class="navbar-item ">
Throttling
</a>
<a href="/administration/experiences"
class="navbar-item ">
Experiences
</a>
<a href="/administration/manual-compact"
class="navbar-item ">
Manual Compact
</a>
<a href="/administration/usage-scenario"
class="navbar-item ">
Usage Scenario
</a>
<a href="/administration/bad-disk"
class="navbar-item ">
Bad Disk Repair
</a>
<a href="/administration/whitelist"
class="navbar-item ">
Replica Server Whitelist
</a>
<a href="/administration/backup-request"
class="navbar-item ">
Backup Request
</a>
<a href="/administration/hotspot-detection"
class="navbar-item ">
Hotspot Detection
</a>
</div>
</div>
</div>
</div>
</nav>
<nav class="navbar is-hidden-mobile">
<div class="navbar-start w-full">
<div class="navbar-item pl-0 w-full">
<!--TODO(wutao): Given the limitation of docsearch that couldn't handle multiple input,
I make searchbox only shown in desktop. Fix this issue when docsearch.js v3 released.
Related issue: https://github.com/algolia/docsearch/issues/230-->
<div id="docsearch"></div>
</div>
</div>
<div class="navbar-end">
<div class="navbar-item">
<!--A simple language switch button that only supports zh and en.-->
<!--IF its language is zh, then switches to en.-->
<a class="button is-light is-outlined is-inverted" href="/zh/api/ttl"><strong></strong></a>
</div>
</div>
</nav>
<section class="hero is-info lg:mr-3">
<div class="hero-body">
<p class="title is-size-2 is-centered">TTL(Time To Live)</p>
</div>
</section>
<section class="section" style="padding-top: 2rem;">
<div class="content">
<h1 id="principle">Principle</h1>
<p>Pegasus supports TTL (Time To Live) function, which means the expiration time of the data can be specified when writing data. Once data expired, it is invisible to the user and can no longer be accessed through interfaces such as get/multiGet.</p>
<p>Users set TTL via the <code class="language-plaintext highlighter-rouge">ttl_seconds</code> parameter which represents the number of seconds after which the data will expire, starting from the current time. Zero means that TTL is not set, that is, the data will never expire.</p>
<p>How does TTL implement? Will the data be deleted from the disk immediately? Next, let’s talk about the implementation principle of TTL.</p>
<p>Simply speaking, Pegasus TTL is achieved by recording the expiration time of data when writing and checking the expiration time during queries. As shown in the following figure:</p>
<p><img src="/assets/images/pegasus-ttl.png" alt="pegasus-ttl.png" class="img-responsive" /></p>
<p><strong>Writing process</strong></p>
<ul>
<li>When writing data, the user uses <code class="language-plaintext highlighter-rouge">ttl_seconds</code> parameter on the client side as the TTL, the client first calculates the expiration time of the data through <code class="language-plaintext highlighter-rouge">ExpireTime = CurrentTime + ttl_seconds</code>, and then pass the data and <code class="language-plaintext highlighter-rouge">ExpireTime</code> together to the ReplicaServer through RPC.</li>
<li>After receiving a write request, ReplicaServer undergoes various processes (including writing WAL, replication, etc.) and finally stores the data in RocksDB. When storing values, <code class="language-plaintext highlighter-rouge">ExpireTime</code> will be placed in the value header.</li>
</ul>
<p><strong>Reading process</strong></p>
<ul>
<li>Users query the value data corresponding to the specified key through the client</li>
<li>After receiving a read request, ReplicaServer first retrieves the value corresponding to the key from RocksDB, and then extracts the <code class="language-plaintext highlighter-rouge">ExpireTime</code> from the value header:
<ul>
<li>If ExpireTime == 0, it indicates that the data has not been set TTL, it’s always valid.</li>
<li>If ExpireTime &gt; 0, it indicates that TTL has been set for the data, and further comparison is made:
<ul>
<li>If ExpireTime &gt; now, the data has not expired and the user data in value is returned</li>
<li>If ExpireTime &lt;= now, then the data has expired and returns <code class="language-plaintext highlighter-rouge">NotFound</code></li>
</ul>
</li>
</ul>
</li>
</ul>
<p><strong>Data deletion</strong></p>
<ul>
<li>After the data expires, it does not immediately remove from RocksDB, but rather garbage collect through <a href="https://github.com/facebook/rocksdb/wiki/Compaction">compaction</a>.</li>
<li>Pegasus uses a custom RocksDB <a href="https://github.com/facebook/rocksdb/wiki/Compaction-Filter">CompactionFilter</a> during the compaction process, check the <code class="language-plaintext highlighter-rouge">ExpireTime</code> in the value header of the data. If it has expired, discard the data, and it will not appear in the newly generated file.</li>
<li>Because the GC process of expired data is asynchronous and depends on the timing and frequency of compaction execution, data expiration and deletion usually do not occur simultaneously. The only guarantee is that data deletion will definitely occur after data expiration.</li>
<li>Expired but undeleted data will still occupy disk space.</li>
</ul>
<h1 id="interface">Interface</h1>
<p>We provide interfaces for setting and querying TTL on both the client drivers and shell tools.</p>
<p>Taking Pegasus Java Client as an example, the interfaces for obtaining TTL include:</p>
<ul>
<li><a href="/clients/java-client#ttl">ttl</a></li>
</ul>
<p>The interfaces for setting TTL include:</p>
<ul>
<li><a href="/clients/java-client#set">set</a></li>
<li><a href="/clients/java-client#batchset">batchSet</a></li>
<li><a href="/clients/java-client#multiset">multiSet</a></li>
<li><a href="/clients/java-client#batchmultiset">batchMultiSet</a></li>
<li><a href="/clients/java-client#incr">incr</a> (Since Pegasus v1.11.1)</li>
<li><a href="/clients/java-client#checkandset">checkAndSet</a></li>
</ul>
<p>The following commands in Shell tools can query/set TTL:</p>
<ul>
<li><a href="/docs/tools/shell/#ttl">ttl</a></li>
<li><a href="/docs/tools/shell/#set">set</a></li>
<li><a href="/docs/tools/shell/#multi_set">multi_set</a></li>
</ul>
<h1 id="table-level-ttl">Table level TTL</h1>
<p>Since Pegasus v1.11.2, Pegasus supports table level TTL functionality.</p>
<h2 id="implementation-principle">Implementation principle</h2>
<ul>
<li>Users set <code class="language-plaintext highlighter-rouge">default_ttl</code> environment variable in the <a href="/administration/table-env">Table environment variable</a></li>
<li>MetaServer synchronizes environment variables to each ReplicaServer asynchronously, so that each replica of the table obtains the environment variable</li>
<li>After obtaining the environment variable in replica, parse to obtain the <code class="language-plaintext highlighter-rouge">default_ttl</code> parameter, and take effect immediately. Afterward:
<ul>
<li>If the user’s newly written data’s <code class="language-plaintext highlighter-rouge">ExpireTime</code> = 0, the actual <code class="language-plaintext highlighter-rouge">ExpireTime</code> of the data will be set to <code class="language-plaintext highlighter-rouge">default_ttl</code></li>
<li>When RocksDB performs compaction, if the original data in the compact input file <strong>does not have</strong> <code class="language-plaintext highlighter-rouge">ExpireTime</code>, then the <code class="language-plaintext highlighter-rouge">ExpireTime</code> of the new data in the compact output file will be set to <code class="language-plaintext highlighter-rouge">default_ttl</code></li>
<li>Due to the uncertainty of the execution timing of the background compaction, the time of data without TTL set <code class="language-plaintext highlighter-rouge">default_ttl</code> as TTL is also uncertain</li>
<li>If you want to set the TTL for all data quickly, you can use <a href="/administration/manual-compact">Manual Compact</a>. So all data will be processed by compaction, and data without TTL will be set TTL as <code class="language-plaintext highlighter-rouge">default_ttl</code></li>
</ul>
</li>
</ul>
<h2 id="application-scenarios">Application scenarios</h2>
<ul>
<li>The disk space occupied by data tables is increasing. Users want to reduce disk space usage, improve query performance by garbage-collecting data, or reduce disk and CPU consumption</li>
<li>All or part of the data in the table has no TTL set</li>
<li>The validity of data without TTL is related to the write time. For example, data written for more than a month will no longer have a query requirement and can be discarded
In scenarios where all three conditions are met, the purpose of cleaning up disks and releasing resources can be achieved through the functions of table level TTL and Manual Compact.</li>
</ul>
<h1 id="calculate-data-write-time-through-ttl">Calculate data write time through TTL</h1>
<p>If TTL is set during data writing, the time of data writing can be calculated using TTL.</p>
<p>Due to:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>ExpireTime = InsertTime + TTLSeconds = now + TTLRemainingSeconds
</code></pre></div></div>
<p>Therefore:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>InsertTime = now + TTLRemainingSeconds - TTLSeconds
</code></pre></div></div>
<p>Among them:</p>
<ul>
<li>Now: The time when executing the Shell ttl command.</li>
<li>TTLRemainingSeconds: Obtained through <a href="/overview/shell#ttl">Shell’s ttl command</a>.</li>
<li>TTL seconds: The TTL set by the user when writing data.</li>
</ul>
</div>
</section>
<footer class="footer">
<div class="container">
<div class="content is-small has-text-centered">
<div style="margin-bottom: 20px;">
<a href="http://incubator.apache.org">
<img src="/assets/images/egg-logo.png"
width="15%"
alt="Apache Incubator"/>
</a>
</div>
Copyright &copy; 2023 <a href="http://www.apache.org">The Apache Software Foundation</a>.
Licensed under the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version
2.0</a>.
<br><br>
Apache Pegasus is an effort undergoing incubation at The Apache Software Foundation (ASF),
sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
until a further review indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects. While incubation status is
not necessarily a reflection of the completeness or stability of the code, it does indicate that the
project has yet to be fully endorsed by the ASF.
<br><br>
Apache Pegasus, Pegasus, Apache, the Apache feather logo, and the Apache Pegasus project logo are either
registered trademarks or trademarks of The Apache Software Foundation in the United States and other
countries.
</div>
</div>
</footer>
</div>
<!-- right panel -->
<div class="dashboard-panel is-small is-scrollable is-hidden-mobile">
<p class="menu-label">
<span class="icon">
<i class="fa fa-bars" aria-hidden="true"></i>
</span>
Table of contents
</p>
<ul class="menu-list">
<li><a href="#principle">Principle</a></li>
<li><a href="#interface">Interface</a></li>
<li><a href="#table-level-ttl">Table level TTL</a>
<ul>
<li><a href="#implementation-principle">Implementation principle</a></li>
<li><a href="#application-scenarios">Application scenarios</a></li>
</ul>
</li>
<li><a href="#calculate-data-write-time-through-ttl">Calculate data write time through TTL</a></li>
</ul>
</div>
</div>
<script src="/assets/js/app.js" type="text/javascript"></script>
<script>
docsearch({
container: '#docsearch',
appId: 'QRN30RBW0S',
indexName: 'pegasus-apache',
apiKey: 'd3a3252fa344359766707a106c4ed88f',
debug: true
});
</script>
</body>
</html>