<!DOCTYPE HTML>
<html lang="en-US">
<head>
  <meta charset="UTF-8">
  <title>Spark Configuration</title>
  <meta name="viewport" content="width=device-width,initial-scale=1">
  <meta name="generator" content="Jekyll v4.3.4">
  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
  <link rel="stylesheet" href="/css/screen.css">
  <link rel="icon" type="image/x-icon" href="/favicon.ico">
  <!--[if lt IE 9]>
  <script src="/js/html5shiv.min.js"></script>
  <script src="/js/respond.min.js"></script>
  <![endif]-->
  <!-- Matomo -->
  <script>
    var _paq = window._paq = window._paq || [];
    /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
    _paq.push(["setDoNotTrack", true]);
    _paq.push(["disableCookies"]);
    _paq.push(['trackPageView']);
    _paq.push(['enableLinkTracking']);
    (function() {
      var u="https://analytics.apache.org/";
      _paq.push(['setTrackerUrl', u+'matomo.php']);
      _paq.push(['setSiteId', '68']);
      var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
      g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
    })();
  </script>
  <!-- End Matomo Code -->
</head>


<body class="wrap">
  <header role="banner">
  <nav class="mobile-nav show-on-mobiles">
    <ul>
  <li class="">
    <a href="/">Home</a>
  </li>
  <li class="">
    <a href="/releases/"><span class="show-on-mobiles">Rel</span>
                         <span class="hide-on-mobiles">Releases</span></a>
  </li>
  <li class="current">
    <a href="/docs/"><span class="show-on-mobiles">Doc</span>
                     <span class="hide-on-mobiles">Documentation</span></a>
  </li>
  <li class="">
    <a href="/talks/"><span class="show-on-mobiles">Talk</span>
                      <span class="hide-on-mobiles">Talks</span></a>
  </li>
  <li class="">
    <a href="/news/">News</a>
  </li>
  <li class="">
    <a href="/develop/"><span class="show-on-mobiles">Dev</span>
                        <span class="hide-on-mobiles">Develop</span></a>
  </li>
  <li class="">
    <a href="/help/">Help</a>
  </li>
</ul>

  </nav>
  <div class="grid">
    <div class="unit one-quarter center-on-mobiles">
      <h1>
        <a href="/">
          <span class="sr-only">Apache ORC</span>
          <img src="/img/logo.png" width="249" height="101" alt="ORC Logo">
        </a>
      </h1>
    </div>
    <nav class="main-nav unit three-quarters hide-on-mobiles">
      <ul>
  <li class="">
    <a href="/">Home</a>
  </li>
  <li class="">
    <a href="/releases/"><span class="show-on-mobiles">Rel</span>
                         <span class="hide-on-mobiles">Releases</span></a>
  </li>
  <li class="current">
    <a href="/docs/"><span class="show-on-mobiles">Doc</span>
                     <span class="hide-on-mobiles">Documentation</span></a>
  </li>
  <li class="">
    <a href="/talks/"><span class="show-on-mobiles">Talk</span>
                      <span class="hide-on-mobiles">Talks</span></a>
  </li>
  <li class="">
    <a href="/news/">News</a>
  </li>
  <li class="">
    <a href="/develop/"><span class="show-on-mobiles">Dev</span>
                        <span class="hide-on-mobiles">Develop</span></a>
  </li>
  <li class="">
    <a href="/help/">Help</a>
  </li>
</ul>

    </nav>
  </div>
</header>


    <section class="docs">
    <div class="grid">

      <div class="docs-nav-mobile unit whole show-on-mobiles">
  <select onchange="if (this.value) window.location.href=this.value">
    <option value="">Navigate the docs…</option>
    
    <optgroup label="Overview">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/index.html">Background</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
      <option value="/docs/adopters.html">ORC Adopters</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/types.html">Types</option>
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/indexes.html">Indexes</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
      <option value="/docs/acid.html">ACID support</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Installing">
      


  

  
    
  
    
  
    
      <option value="/docs/building.html">Building ORC</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in Spark">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/spark-ddl.html">Spark DDL</option>
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/spark-config.html">Spark Configuration</option>
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in Python">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/pyarrow.html">PyArrow</option>
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/dask.html">Dask</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in Hive">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/hive-ddl.html">Hive DDL</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/hive-config.html">Hive Configuration</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using in MapReduce">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/mapred.html">Using in MapRed</option>
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/mapreduce.html">Using in MapReduce</option>
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Using ORC Core">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/core-java.html">Using Core Java</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
      <option value="/docs/core-cpp.html">Using Core C++</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
      <option value="/docs/core-java-config.html">ORC Java configuration</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
    <optgroup label="Tools">
      


  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/cpp-tools.html">C++ Tools</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <option value="/docs/java-tools.html">Java Tools</option>
    
  
    
  
    
  
    
  
    
  
    
  
    
  


    </optgroup>
    
  </select>
</div>


      <div class="unit four-fifths">
        <article>
          <h1>Spark Configuration</h1>
          <h2 id="table-properties">Table properties</h2>

<p>Tables stored as ORC files use table properties to control their behavior. By
using table properties, the table owner ensures that all clients store data
with the same options.</p>

<table>
  <thead>
    <tr>
      <th style="text-align: left">Key</th>
      <th style="text-align: left">Default</th>
      <th style="text-align: left">Notes</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td style="text-align: left">orc.compress</td>
      <td style="text-align: left">ZSTD</td>
      <td style="text-align: left">high level compression = {NONE, ZLIB, SNAPPY, LZO, LZ4, ZSTD}</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.compress.size</td>
      <td style="text-align: left">262,144</td>
      <td style="text-align: left">compression chunk size</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.stripe.size</td>
      <td style="text-align: left">67,108,864</td>
      <td style="text-align: left">memory buffer in bytes for writing</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.row.index.stride</td>
      <td style="text-align: left">10,000</td>
      <td style="text-align: left">number of rows between index entries</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.create.index</td>
      <td style="text-align: left">true</td>
      <td style="text-align: left">whether the ORC writer create indexes as part of the file or not</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.bloom.filter.columns</td>
      <td style="text-align: left">””</td>
      <td style="text-align: left">comma separated list of column names</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.bloom.filter.fpp</td>
      <td style="text-align: left">0.01</td>
      <td style="text-align: left">bloom filter false positive rate</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.key.provider</td>
      <td style="text-align: left">“hadoop”</td>
      <td style="text-align: left">key provider</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.encrypt</td>
      <td style="text-align: left">””</td>
      <td style="text-align: left">list of keys and columns to encrypt with</td>
    </tr>
    <tr>
      <td style="text-align: left">orc.mask</td>
      <td style="text-align: left">””</td>
      <td style="text-align: left">masks to apply to the encrypted columns</td>
    </tr>
  </tbody>
</table>

<p>For example, to create an ORC table with Zstandard compression:</p>

<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>CREATE TABLE encrypted (
  ssn STRING,
  email STRING,
  name STRING
)
USING ORC
OPTIONS (
  hadoop.security.key.provider.path "kms://http@localhost:9600/kms",
  orc.key.provider "hadoop",
  orc.encrypt "pii:ssn,email",
  orc.mask "nullify:ssn;sha256:email"
)
</code></pre></div></div>

<h2 id="configuration-properties">Configuration properties</h2>

<p>There are more Spark configuration properties related to ORC files:</p>

<table>
  <thead>
    <tr>
      <th style="text-align: left">Key</th>
      <th style="text-align: left">Default</th>
      <th style="text-align: left">Notes</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td style="text-align: left">spark.sql.orc.impl</td>
      <td style="text-align: left">native</td>
      <td style="text-align: left">The name of ORC implementation. It can be one of <code class="language-plaintext highlighter-rouge">native</code> or <code class="language-plaintext highlighter-rouge">hive</code>. <code class="language-plaintext highlighter-rouge">native</code> means the native ORC support. <code class="language-plaintext highlighter-rouge">hive</code> means the ORC library in Hive.</td>
    </tr>
    <tr>
      <td style="text-align: left">spark.sql.orc.enableVectorizedReader</td>
      <td style="text-align: left">true</td>
      <td style="text-align: left">Enables vectorized orc decoding in <code class="language-plaintext highlighter-rouge">native</code> implementation.</td>
    </tr>
    <tr>
      <td style="text-align: left">spark.sql.orc.mergeSchema</td>
      <td style="text-align: left">false</td>
      <td style="text-align: left">When true, the ORC data source merges schemas collected from all data files, otherwise the schema is picked from a random data file.</td>
    </tr>
    <tr>
      <td style="text-align: left">spark.sql.hive.convertMetastoreOrc</td>
      <td style="text-align: left">true</td>
      <td style="text-align: left">Spark SQL will use the Hive SerDe for ORC tables instead of the built-in support.</td>
    </tr>
  </tbody>
</table>

          





  
  

  
  

  
  

  
  

  
  

  
  

  
  

  
  
    <div class="section-nav">
      <div class="left align-right">
          
            
            
            <a href="/docs/spark-ddl.html" class="prev">Back</a>
          
      </div>
      <div class="right align-left">
          
            
            
            <a href="/docs/pyarrow.html" class="next">Next</a>
          
      </div>
    </div>
    <div class="clear"></div>
    

        </article>
      </div>

      <div class="unit one-fifth hide-on-mobiles">
  <aside>
    
    <h4>Overview</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/index.html">Background</a></li>
      


  

  

  
    
  
    
      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/types.html">Types</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
      


  

  

  
    
      <li class=""><a href="/docs/acid.html">ACID support</a></li>
      


</ul>

    
    <h4>Installing</h4>
    

<ul>

  

  

  
    
  
    
  
    
      <li class=""><a href="/docs/building.html">Building ORC</a></li>
      


</ul>

    
    <h4>Using in Spark</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/spark-ddl.html">Spark DDL</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class="current"><a href="/docs/spark-config.html">Spark Configuration</a></li>
      


</ul>

    
    <h4>Using in Python</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/pyarrow.html">PyArrow</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/dask.html">Dask</a></li>
      


</ul>

    
    <h4>Using in Hive</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
      


</ul>

    
    <h4>Using in MapReduce</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
      


</ul>

    
    <h4>Using ORC Core</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
      


  

  

  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/core-cpp.html">Using Core C++</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/core-java-config.html">ORC Java configuration</a></li>
      


</ul>

    
    <h4>Tools</h4>
    

<ul>

  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/cpp-tools.html">C++ Tools</a></li>
      


  

  

  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
  
    
      <li class=""><a href="/docs/java-tools.html">Java Tools</a></li>
      


</ul>

    
  </aside>
</div>


      <div class="clear"></div>

    </div>
  </section>


  <footer role="contentinfo">
  <p style="margin-left: 20px; margin-right; 20px; text-align: center">The contents of this website are &copy;&nbsp;2025
     <a href="https://www.apache.org/">Apache Software Foundation</a>
     under the terms of the <a
      href="https://www.apache.org/licenses/LICENSE-2.0.html">
      Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
      of the Apache Software Foundation.</p>
</footer>

  <script>
  var anchorForId = function (id) {
    var anchor = document.createElement("a");
    anchor.className = "header-link";
    anchor.href      = "#" + id;
    anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
    anchor.title = "Permalink";
    return anchor;
  };

  var linkifyAnchors = function (level, containingElement) {
    var headers = containingElement.getElementsByTagName("h" + level);
    for (var h = 0; h < headers.length; h++) {
      var header = headers[h];

      if (typeof header.id !== "undefined" && header.id !== "") {
        header.appendChild(anchorForId(header.id));
      }
    }
  };

  document.onreadystatechange = function () {
    if (this.readyState === "complete") {
      var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
      if (!contentBlock) {
        return;
      }
      for (var level = 1; level <= 6; level++) {
        linkifyAnchors(level, contentBlock);
      }
    }
  };
</script>


</body>
</html>
