blob: 38ae61e3cd744d439bab1a3672f771e2f8f8c3a6 [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>Message deduplication · Apache Pulsar</title><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta name="generator" content="Docusaurus"/><meta name="description" content="**Message deduplication** is a feature of Pulsar that, when enabled, ensures that each message produced on Pulsar topics is persisted to disk *only once*, even if the message is produced more than once. Message deduplication essentially unburdens Pulsar applications of the responsibility of ensuring deduplication and instead handles it automatically on the server side."/><meta name="docsearch:version" content="2.4.0"/><meta name="docsearch:language" content="en"/><meta property="og:title" content="Message deduplication · Apache Pulsar"/><meta property="og:type" content="website"/><meta property="og:url" content="https://pulsar.apache.org/"/><meta property="og:description" content="**Message deduplication** is a feature of Pulsar that, when enabled, ensures that each message produced on Pulsar topics is persisted to disk *only once*, even if the message is produced more than once. Message deduplication essentially unburdens Pulsar applications of the responsibility of ensuring deduplication and instead handles it automatically on the server side."/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://pulsar.apache.org/img/pulsar.svg"/><link rel="shortcut icon" href="/img/pulsar.ico"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/atom-one-dark.min.css"/><link rel="alternate" type="application/atom+xml" href="https://pulsar.apache.org/blog/atom.xml" title="Apache Pulsar Blog ATOM Feed"/><link rel="alternate" type="application/rss+xml" href="https://pulsar.apache.org/blog/feed.xml" title="Apache Pulsar Blog RSS Feed"/><link rel="stylesheet" href="/css/code-blocks-buttons.css"/><script type="text/javascript" src="https://buttons.github.io/buttons.js"></script><script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.0/clipboard.min.js"></script><script type="text/javascript" src="/js/custom.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/en"><img class="logo" src="/img/pulsar.svg" alt="Apache Pulsar"/></a><a href="/en/versions"><h3>2.4.0</h3></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class="siteNavGroupActive"><a href="/docs/en/2.4.0/getting-started-standalone" target="_self">Docs</a></li><li class=""><a href="/en/download" target="_self">Download</a></li><li class="siteNavGroupActive"><a href="/docs/en/2.4.0/client-libraries" target="_self">Clients</a></li><li class=""><a href="#restapis" target="_self">REST APIs</a></li><li class=""><a href="#cli" target="_self">Cli</a></li><li class=""><a href="/blog/" target="_self">Blog</a></li><li class=""><a href="#community" target="_self">Community</a></li><li class=""><a href="#apache" target="_self">Apache</a></li><li class=""><a href="https://pulsar-next.staged.apache.org/" target="_self">New Website (Beta)</a></li><span><li><a id="languages-menu" href="#"><img class="languages-icon" src="/img/language.svg" alt="Languages icon"/>English</a><div id="languages-dropdown" class="hide"><ul id="languages-dropdown-items"><li><a href="/docs/ja/2.4.0/cookbooks-deduplication">日本語</a></li><li><a href="/docs/fr/2.4.0/cookbooks-deduplication">Français</a></li><li><a href="/docs/ko/2.4.0/cookbooks-deduplication">한국어</a></li><li><a href="/docs/zh-CN/2.4.0/cookbooks-deduplication">中文</a></li><li><a href="/docs/zh-TW/2.4.0/cookbooks-deduplication">繁體中文</a></li><li><a href="https://crowdin.com/project/apache-pulsar" target="_blank" rel="noreferrer noopener">Help Translate</a></li></ul></div></li><script>
const languagesMenuItem = document.getElementById("languages-menu");
const languagesDropDown = document.getElementById("languages-dropdown");
languagesMenuItem.addEventListener("click", function(event) {
event.preventDefault();
if (languagesDropDown.className == "hide") {
languagesDropDown.className = "visible";
} else {
languagesDropDown.className = "hide";
}
});
</script></span></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i></i><span>Cookbooks</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle">Get Started</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/pulsar-2.0">Pulsar 2.0</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/getting-started-standalone">Run Pulsar locally</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/getting-started-docker">Run Pulsar in Docker</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/client-libraries">Use Pulsar with client libraries</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Concepts and Architecture</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-overview">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-messaging">Messaging</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-architecture-overview">Architecture</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-clients">Clients</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-replication">Geo Replication</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-multi-tenancy">Multi Tenancy</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-authentication">Authentication and Authorization</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-topic-compaction">Topic Compaction</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-tiered-storage">Tiered Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/concepts-schema-registry">Schema Registry</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Pulsar Functions</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-overview">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-quickstart">Get started</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-api">API</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-deploying">Deploying functions</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-guarantees">Processing guarantees</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-state">State Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-metrics">Metrics</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-worker">Admin: Pulsar Functions Worker</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-runtime">Admin: Configure Functions runtime</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/functions-debugging">Debugging</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/window-functions-context">Window Functions: Context</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Pulsar IO</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/io-overview">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/io-quickstart">Get started</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/io-managing">Managing Connectors</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/io-connectors">Builtin Connectors</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/io-develop">Developing Connectors</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/io-cdc">CDC Connector</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Pulsar SQL</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/sql-overview">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/sql-getting-started">Get Started</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/sql-deployment-configurations">Deployment and Configuration</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Deployment</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/deploy-aws">Amazon Web Services</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/deploy-kubernetes">Kubernetes</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/deploy-bare-metal">Bare metal</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/deploy-bare-metal-multi-cluster">Bare metal multi-cluster</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/deploy-monitoring">Monitoring</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Administration</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/administration-zk-bk">ZooKeeper and BookKeeper</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/administration-geo">Geo-replication</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/administration-dashboard">Dashboard</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/administration-stats">Pulsar statistics</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/administration-load-balance">Load balance</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/administration-proxy">Pulsar proxy</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Security</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-overview">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-tls-transport">Transport Encryption using TLS</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-tls-authentication">Authentication using TLS</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-token-client">Client Authentication using tokens</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-token-admin">Token authentication admin</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-athenz">Authentication using Athenz</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-kerberos">Authentication using Kerberos</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-jwt">Authentication using JWT</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-authorization">Authorization and ACLs</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-encryption">End-to-End Encryption</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/security-extending">Extending</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Client Libraries</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/client-libraries-java">Java</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/client-libraries-go">Go</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/client-libraries-python">Python</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/client-libraries-cpp">C++</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/client-libraries-websocket">WebSocket</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Admin API</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-overview">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-clusters">Clusters</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-tenants">Tenants</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-brokers">Brokers</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-namespaces">Namespaces</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-permissions">Permissions</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-persistent-topics">Persistent topics</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-non-persistent-topics">Non-Persistent topics</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-partitioned-topics">Partitioned topics</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/admin-api-schemas">Schemas</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Adaptors</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/adaptors-kafka">Kafka client wrapper</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/adaptors-spark">Apache Spark</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/adaptors-storm">Apache Storm</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Cookbooks</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-tiered-storage">Tiered Storage</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-compaction">Topic compaction</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/en/2.4.0/cookbooks-deduplication">Message deduplication</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-non-persistent">Non-persistent messaging</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-partitioned">Partitioned Topics</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-retention-expiry">Message retention and expiry</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-encryption">Encryption</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-message-queue">Message queue</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/cookbooks-bookkeepermetadata">BookKeeper Ledger Metadata</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Development</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/develop-tools">Simulation tools</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/developing-binary-protocol">Binary protocol</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/develop-schema">Custom schema storage</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/develop-load-manager">Modular load manager</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/develop-cpp">Building Pulsar C++ client</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Reference</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/reference-terminology">Terminology</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/reference-cli-tools">Pulsar CLI tools</a></li><li class="navListItem"><a class="navItem" href="/docs/en/2.4.0/reference-configuration">Pulsar configuration</a></li></ul></div></div></section></div><script>
var coll = document.getElementsByClassName('collapsible');
var checkActiveCategory = true;
for (var i = 0; i < coll.length; i++) {
var links = coll[i].nextElementSibling.getElementsByTagName('*');
if (checkActiveCategory){
for (var j = 0; j < links.length; j++) {
if (links[j].classList.contains('navListItemActive')){
coll[i].nextElementSibling.classList.toggle('hide');
coll[i].childNodes[1].classList.toggle('rotate');
checkActiveCategory = false;
break;
}
}
}
coll[i].addEventListener('click', function() {
var arrow = this.childNodes[1];
arrow.classList.toggle('rotate');
var content = this.nextElementSibling;
content.classList.toggle('hide');
});
}
document.addEventListener('DOMContentLoaded', function() {
createToggler('#navToggler', '#docsNav', 'docsSliderActive');
createToggler('#tocToggler', 'body', 'tocActive');
var headings = document.querySelector('.toc-headings');
headings && headings.addEventListener('click', function(event) {
var el = event.target;
while(el !== headings){
if (el.tagName === 'A') {
document.body.classList.remove('tocActive');
break;
} else{
el = el.parentNode;
}
}
}, false);
function createToggler(togglerSelector, targetSelector, className) {
var toggler = document.querySelector(togglerSelector);
var target = document.querySelector(targetSelector);
if (!toggler) {
return;
}
toggler.onclick = function(event) {
event.preventDefault();
target.classList.toggle(className);
};
}
});
</script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"><a class="edit-page-link button" href="https://github.com/apache/pulsar/edit/master/site2/docs/cookbooks-deduplication.md" target="_blank" rel="noreferrer noopener">Edit</a><h1 id="__docusaurus" class="postHeaderTitle">Message deduplication</h1></header><article><div><span><p><strong>Message deduplication</strong> is a feature of Pulsar that, when enabled, ensures that each message produced on Pulsar topics is persisted to disk <em>only once</em>, even if the message is produced more than once. Message deduplication essentially unburdens Pulsar applications of the responsibility of ensuring deduplication and instead handles it automatically on the server side.</p>
<p>Using message deduplication in Pulsar involves making some <a href="#configuration">configuration changes</a> to your Pulsar brokers as well as some minor changes to the behavior of Pulsar <a href="#clients">clients</a>.</p>
<blockquote>
<p>For a more thorough theoretical explanation of message deduplication, see the <a href="/docs/en/2.4.0/concepts-messaging#message-deduplication">Concepts and Architecture</a> document.</p>
</blockquote>
<h2><a class="anchor" aria-hidden="true" id="how-it-works"></a><a href="#how-it-works" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>How it works</h2>
<p>Message deduplication can be enabled and disabled on a per-namespace basis. By default, it is <em>disabled</em> on all namespaces and can enabled in the following ways:</p>
<ul>
<li>Using the <a href="#enabling"><code>pulsar-admin namespaces</code></a> interface</li>
<li>As a broker-level <a href="#default">default</a> for all namespaces</li>
</ul>
<h2><a class="anchor" aria-hidden="true" id="configuration-for-message-deduplication"></a><a href="#configuration-for-message-deduplication" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Configuration for message deduplication</h2>
<p>You can configure message deduplication in Pulsar using the <a href="/docs/en/2.4.0/reference-configuration#broker"><code>broker.conf</code></a> configuration file. The following deduplication-related parameters are available:</p>
<table>
<thead>
<tr><th style="text-align:left">Parameter</th><th style="text-align:left">Description</th><th style="text-align:left">Default</th></tr>
</thead>
<tbody>
<tr><td style="text-align:left"><code>brokerDeduplicationEnabled</code></td><td style="text-align:left">Sets the default behavior for message deduplication in the Pulsar <a href="/docs/en/2.4.0/reference-terminology#broker">broker</a>. If set to <code>true</code>, message deduplication will be enabled by default on all namespaces; if set to <code>false</code> (the default), deduplication will have to be <a href="#enabling">enabled</a> and <a href="#disabling">disabled</a> on a per-namespace basis.</td><td style="text-align:left"><code>false</code></td></tr>
<tr><td style="text-align:left"><code>brokerDeduplicationMaxNumberOfProducers</code></td><td style="text-align:left">The maximum number of producers for which information will be stored for deduplication purposes.</td><td style="text-align:left"><code>10000</code></td></tr>
<tr><td style="text-align:left"><code>brokerDeduplicationEntriesInterval</code></td><td style="text-align:left">The number of entries after which a deduplication informational snapshot is taken. A larger interval will lead to fewer snapshots being taken, though this would also lengthen the topic recovery time (the time required for entries published after the snapshot to be replayed).</td><td style="text-align:left"><code>1000</code></td></tr>
<tr><td style="text-align:left"><code>brokerDeduplicationProducerInactivityTimeoutMinutes</code></td><td style="text-align:left">The time of inactivity (in minutes) after which the broker will discard deduplication information related to a disconnected producer.</td><td style="text-align:left"><code>360</code> (6 hours)</td></tr>
</tbody>
</table>
<h3><a class="anchor" aria-hidden="true" id="setting-the-broker-level-default-default"></a><a href="#setting-the-broker-level-default-default" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Setting the broker-level default {#default}</h3>
<p>By default, message deduplication is <em>disabled</em> on all Pulsar namespaces. To enable it by default on all namespaces, set the <code>brokerDeduplicationEnabled</code> parameter to <code>true</code> and re-start the broker.</p>
<p>Regardless of the value of <code>brokerDeduplicationEnabled</code>, <a href="#enabling">enabling</a> and <a href="#disabling">disabling</a> via the CLI will override the broker-level default.</p>
<h3><a class="anchor" aria-hidden="true" id="enabling-message-deduplication-enabling"></a><a href="#enabling-message-deduplication-enabling" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Enabling message deduplication {#enabling}</h3>
<p>You can enable message deduplication on specific namespaces, regardless of the the <a href="#default">default</a> for the broker, using the <a href="/docs/en/2.4.0/reference-pulsar-admin#namespace-set-deduplication"><code>pulsar-admin namespace set-deduplication</code></a> command. You can use the <code>--enable</code>/<code>-e</code> flag and specify the namespace. Here's an example with <code>&lt;tenant&gt;/&lt;namespace&gt;</code>:</p>
<pre><code class="hljs css language-bash">$ bin/pulsar-admin namespaces <span class="hljs-built_in">set</span>-deduplication \
public/default \
--<span class="hljs-built_in">enable</span> <span class="hljs-comment"># or just -e</span>
</code></pre>
<h3><a class="anchor" aria-hidden="true" id="disabling-message-deduplication-disabling"></a><a href="#disabling-message-deduplication-disabling" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Disabling message deduplication {#disabling}</h3>
<p>You can disable message deduplication on a specific namespace using the same method shown <a href="#enabling">above</a>, except using the <code>--disable</code>/<code>-d</code> flag instead. Here's an example with <code>&lt;tenant&gt;/&lt;namespace&gt;</code>:</p>
<pre><code class="hljs css language-bash">$ bin/pulsar-admin namespaces <span class="hljs-built_in">set</span>-deduplication \
public/default \
--<span class="hljs-built_in">disable</span> <span class="hljs-comment"># or just -d</span>
</code></pre>
<h2><a class="anchor" aria-hidden="true" id="message-deduplication-and-pulsar-clients-clients"></a><a href="#message-deduplication-and-pulsar-clients-clients" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Message deduplication and Pulsar clients {#clients}</h2>
<p>If you enable message deduplication in your Pulsar brokers, you won't need to make any major changes to your Pulsar clients. There are, however, two settings that you need to provide for your client producers:</p>
<ol>
<li>The producer must be given a name</li>
<li>The message send timeout needs to be set to infinity (i.e. no timeout)</li>
</ol>
<p>Instructions for <a href="#java">Java</a>, <a href="#python">Python</a>, and <a href="#cpp">C++</a> clients can be found below.</p>
<h3><a class="anchor" aria-hidden="true" id="java-clients-java"></a><a href="#java-clients-java" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Java clients {#java}</h3>
<p>To enable message deduplication on a <a href="/docs/en/2.4.0/client-libraries-java#producers">Java producer</a>, set the producer name using the <code>producerName</code> setter and set the timeout to 0 using the <code>sendTimeout</code> setter. Here's an example:</p>
<pre><code class="hljs css language-java"><span class="hljs-keyword">import</span> org.apache.pulsar.client.api.Producer;
<span class="hljs-keyword">import</span> org.apache.pulsar.client.api.PulsarClient;
<span class="hljs-keyword">import</span> java.util.concurrent.TimeUnit;
PulsarClient pulsarClient = PulsarClient.builder()
.serviceUrl(<span class="hljs-string">"pulsar://localhost:6650"</span>)
.build();
Producer producer = pulsarClient.newProducer()
.producerName(<span class="hljs-string">"producer-1"</span>)
.topic(<span class="hljs-string">"persistent://public/default/topic-1"</span>)
.sendTimeout(<span class="hljs-number">0</span>, TimeUnit.SECONDS)
.create();
</code></pre>
<h3><a class="anchor" aria-hidden="true" id="python-clients-python"></a><a href="#python-clients-python" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Python clients {#python}</h3>
<p>To enable message deduplication on a <a href="/docs/en/2.4.0/client-libraries-python#producers">Python producer</a>, set the producer name using <code>producer_name</code> and the timeout to 0 using <code>send_timeout_millis</code>. Here's an example:</p>
<pre><code class="hljs css language-python"><span class="hljs-keyword">import</span> pulsar
client = pulsar.Client(<span class="hljs-string">"pulsar://localhost:6650"</span>)
producer = client.create_producer(
<span class="hljs-string">"persistent://public/default/topic-1"</span>,
producer_name=<span class="hljs-string">"producer-1"</span>,
send_timeout_millis=<span class="hljs-number">0</span>)
</code></pre>
<h3><a class="anchor" aria-hidden="true" id="c-clients-cpp"></a><a href="#c-clients-cpp" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>C++ clients {#cpp}</h3>
<p>To enable message deduplication on a <a href="/docs/en/2.4.0/client-libraries-cpp#producer">C++ producer</a>, set the producer name using <code>producer_name</code> and the timeout to 0 using <code>send_timeout_millis</code>. Here's an example:</p>
<pre><code class="hljs css language-cpp"><span class="hljs-meta">#<span class="hljs-meta-keyword">include</span> <span class="hljs-meta-string">&lt;pulsar/Client.h&gt;</span></span>
<span class="hljs-built_in">std</span>::<span class="hljs-built_in">string</span> serviceUrl = <span class="hljs-string">"pulsar://localhost:6650"</span>;
<span class="hljs-built_in">std</span>::<span class="hljs-built_in">string</span> topic = <span class="hljs-string">"persistent://some-tenant/ns1/topic-1"</span>;
<span class="hljs-built_in">std</span>::<span class="hljs-built_in">string</span> producerName = <span class="hljs-string">"producer-1"</span>;
<span class="hljs-function">Client <span class="hljs-title">client</span><span class="hljs-params">(serviceUrl)</span></span>;
ProducerConfiguration producerConfig;
producerConfig.setSendTimeout(<span class="hljs-number">0</span>);
producerConfig.setProducerName(producerName);
Producer producer;
Result result = client.createProducer(topic, producerConfig, producer);
</code></pre>
</span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/en/2.4.0/cookbooks-compaction"><span class="arrow-prev"></span><span>Topic compaction</span></a><a class="docs-next button" href="/docs/en/2.4.0/cookbooks-non-persistent"><span>Non-persistent messaging</span><span class="arrow-next"></span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#how-it-works">How it works</a></li><li><a href="#configuration-for-message-deduplication">Configuration for message deduplication</a><ul class="toc-headings"><li><a href="#setting-the-broker-level-default-default">Setting the broker-level default {#default}</a></li><li><a href="#enabling-message-deduplication-enabling">Enabling message deduplication {#enabling}</a></li><li><a href="#disabling-message-deduplication-disabling">Disabling message deduplication {#disabling}</a></li></ul></li><li><a href="#message-deduplication-and-pulsar-clients-clients">Message deduplication and Pulsar clients {#clients}</a><ul class="toc-headings"><li><a href="#java-clients-java">Java clients {#java}</a></li><li><a href="#python-clients-python">Python clients {#python}</a></li><li><a href="#c-clients-cpp">C++ clients {#cpp}</a></li></ul></li></ul></nav></div><footer class="nav-footer" id="footer"><section class="copyright">Copyright © 2022 The Apache Software Foundation. All Rights Reserved. Apache, Apache Pulsar and the Apache feather logo are trademarks of The Apache Software Foundation.</section><span><script>
const community = document.querySelector("a[href='#community']").parentNode;
const communityMenu =
'<li>' +
'<a id="community-menu" href="#">Community <span style="font-size: 0.75em">&nbsp;▼</span></a>' +
'<div id="community-dropdown" class="hide">' +
'<ul id="community-dropdown-items">' +
'<li><a href="/en/contact">Contact</a></li>' +
'<li><a href="/en/contributing">Contributing</a></li>' +
'<li><a href="/en/coding-guide">Coding guide</a></li>' +
'<li><a href="/en/events">Events</a></li>' +
'<li><a href="https://twitter.com/Apache_Pulsar" target="_blank">Twitter &#x2750</a></li>' +
'<li><a href="https://github.com/apache/pulsar/wiki" target="_blank">Wiki &#x2750</a></li>' +
'<li><a href="https://github.com/apache/pulsar/issues" target="_blank">Issue tracking &#x2750</a></li>' +
'<li><a href="https://pulsar-summit.org/" target="_blank">Pulsar Summit &#x2750</a></li>' +
'<li>&nbsp;</li>' +
'<li><a href="/en/resources">Resources</a></li>' +
'<li><a href="/en/team">Team</a></li>' +
'<li><a href="/en/powered-by">Powered By</a></li>' +
'</ul>' +
'</div>' +
'</li>';
community.innerHTML = communityMenu;
const communityMenuItem = document.getElementById("community-menu");
const communityDropDown = document.getElementById("community-dropdown");
communityMenuItem.addEventListener("click", function(event) {
event.preventDefault();
if (communityDropDown.className == 'hide') {
communityDropDown.className = 'visible';
} else {
communityDropDown.className = 'hide';
}
});
</script></span></footer></div><script>window.twttr=(function(d,s, id){var js,fjs=d.getElementsByTagName(s)[0],t=window.twttr||{};if(d.getElementById(id))return t;js=d.createElement(s);js.id=id;js.src='https://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js, fjs);t._e = [];t.ready = function(f) {t._e.push(f);};return t;}(document, 'script', 'twitter-wjs'));</script></body></html>