blob: 3017273d308eb308ce7b3145ce65a4a0700705a1 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Format | Apache Arrow</title>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<meta name="generator" content="Jekyll v4.4.1" />
<meta property="og:title" content="Format" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="Arrow Format" />
<meta property="og:description" content="Arrow Format" />
<link rel="canonical" href="https://arrow.apache.org/overview/" />
<meta property="og:url" content="https://arrow.apache.org/overview/" />
<meta property="og:site_name" content="Apache Arrow" />
<meta property="og:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png" />
<meta property="og:type" content="website" />
<meta name="twitter:card" content="summary_large_image" />
<meta property="twitter:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png" />
<meta property="twitter:title" content="Format" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"WebPage","description":"Arrow Format","headline":"Format","image":"https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"https://arrow.apache.org/img/logo.png"}},"url":"https://arrow.apache.org/overview/"}</script>
<!-- End Jekyll SEO tag -->
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16.png" id="light1">
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32.png" id="light2">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon.png" id="light3">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120.png" id="light4">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76.png" id="light5">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60.png" id="light6">
<!-- dark mode favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16-dark.png" id="dark1">
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32-dark.png" id="dark2">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon-dark.png" id="dark3">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120-dark.png" id="dark4">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76-dark.png" id="dark5">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60-dark.png" id="dark6">
<script>
// Switch to the dark-mode favicons if prefers-color-scheme: dark
function onUpdate() {
light1 = document.querySelector('link#light1');
light2 = document.querySelector('link#light2');
light3 = document.querySelector('link#light3');
light4 = document.querySelector('link#light4');
light5 = document.querySelector('link#light5');
light6 = document.querySelector('link#light6');
dark1 = document.querySelector('link#dark1');
dark2 = document.querySelector('link#dark2');
dark3 = document.querySelector('link#dark3');
dark4 = document.querySelector('link#dark4');
dark5 = document.querySelector('link#dark5');
dark6 = document.querySelector('link#dark6');
if (matcher.matches) {
light1.remove();
light2.remove();
light3.remove();
light4.remove();
light5.remove();
light6.remove();
document.head.append(dark1);
document.head.append(dark2);
document.head.append(dark3);
document.head.append(dark4);
document.head.append(dark5);
document.head.append(dark6);
} else {
dark1.remove();
dark2.remove();
dark3.remove();
dark4.remove();
dark5.remove();
dark6.remove();
document.head.append(light1);
document.head.append(light2);
document.head.append(light3);
document.head.append(light4);
document.head.append(light5);
document.head.append(light6);
}
}
matcher = window.matchMedia('(prefers-color-scheme: dark)');
matcher.addListener(onUpdate);
onUpdate();
</script>
<link href="/css/main.css" rel="stylesheet">
<link href="/css/syntax.css" rel="stylesheet">
<script src="/javascript/main.js"></script>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
<link type="application/atom+xml" rel="alternate" href="https://arrow.apache.org/feed.xml" title="Apache Arrow" />
</head>
<body class="wrap">
<header>
<nav class="navbar navbar-expand-md navbar-dark bg-dark">
<a class="navbar-brand no-padding" href="/"><img src="/img/arrow-inverse-300px.png" height="40px"></a>
<button class="navbar-toggler ml-auto" type="button" data-toggle="collapse" data-target="#arrow-navbar" aria-controls="arrow-navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<!-- Collect the nav links, forms, and other content for toggling -->
<div class="collapse navbar-collapse justify-content-end" id="arrow-navbar">
<ul class="nav navbar-nav">
<li class="nav-item"><a class="nav-link" href="/overview/" role="button" aria-haspopup="true" aria-expanded="false">Overview</a></li>
<li class="nav-item"><a class="nav-link" href="/faq/" role="button" aria-haspopup="true" aria-expanded="false">FAQ</a></li>
<li class="nav-item"><a class="nav-link" href="/blog" role="button" aria-haspopup="true" aria-expanded="false">Blog</a></li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownGetArrow" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Get Arrow
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownGetArrow">
<a class="dropdown-item" href="/install/">Install</a>
<a class="dropdown-item" href="/release/">Releases</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownDocumentation" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Docs
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownDocumentation">
<a class="dropdown-item" href="/docs">Project Docs</a>
<a class="dropdown-item" href="/docs/format/Columnar.html">Format</a>
<hr>
<a class="dropdown-item" href="/docs/c_glib">C GLib</a>
<a class="dropdown-item" href="/docs/cpp">C++</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/csharp/README.md" target="_blank" rel="noopener">C#</a>
<a class="dropdown-item" href="https://godoc.org/github.com/apache/arrow/go/arrow" target="_blank" rel="noopener">Go</a>
<a class="dropdown-item" href="/docs/java">Java</a>
<a class="dropdown-item" href="/docs/js">JavaScript</a>
<a class="dropdown-item" href="/julia/">Julia</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/matlab/README.md" target="_blank" rel="noopener">MATLAB</a>
<a class="dropdown-item" href="/docs/python">Python</a>
<a class="dropdown-item" href="/docs/r">R</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/ruby/README.md" target="_blank" rel="noopener">Ruby</a>
<a class="dropdown-item" href="https://docs.rs/arrow/latest" target="_blank" rel="noopener">Rust</a>
<a class="dropdown-item" href="/swift">Swift</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownSource" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Source
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownSource">
<a class="dropdown-item" href="https://github.com/apache/arrow" target="_blank" rel="noopener">Main Repo</a>
<hr>
<a class="dropdown-item" href="https://github.com/apache/arrow/tree/main/c_glib" target="_blank" rel="noopener">C GLib</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/tree/main/cpp" target="_blank" rel="noopener">C++</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/tree/main/csharp" target="_blank" rel="noopener">C#</a>
<a class="dropdown-item" href="https://github.com/apache/arrow-go" target="_blank" rel="noopener">Go</a>
<a class="dropdown-item" href="https://github.com/apache/arrow-java" target="_blank" rel="noopener">Java</a>
<a class="dropdown-item" href="https://github.com/apache/arrow-js" target="_blank" rel="noopener">JavaScript</a>
<a class="dropdown-item" href="https://github.com/apache/arrow-julia" target="_blank" rel="noopener">Julia</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/tree/main/matlab" target="_blank" rel="noopener">MATLAB</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/tree/main/python" target="_blank" rel="noopener">Python</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/tree/main/r" target="_blank" rel="noopener">R</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/tree/main/ruby" target="_blank" rel="noopener">Ruby</a>
<a class="dropdown-item" href="https://github.com/apache/arrow-rs" target="_blank" rel="noopener">Rust</a>
<a class="dropdown-item" href="https://github.com/apache/arrow-swift" target="_blank" rel="noopener">Swift</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownSubprojects" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Subprojects
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownSubprojects">
<a class="dropdown-item" href="/adbc">ADBC</a>
<a class="dropdown-item" href="/docs/format/Flight.html">Arrow Flight</a>
<a class="dropdown-item" href="/docs/format/FlightSql.html">Arrow Flight SQL</a>
<a class="dropdown-item" href="https://datafusion.apache.org" target="_blank" rel="noopener">DataFusion</a>
<a class="dropdown-item" href="/nanoarrow">nanoarrow</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownCommunity" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Community
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownCommunity">
<a class="dropdown-item" href="/community/">Communication</a>
<a class="dropdown-item" href="/docs/developers/index.html">Contributing</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/issues" target="_blank" rel="noopener">Issue Tracker</a>
<a class="dropdown-item" href="/committers/">Governance</a>
<a class="dropdown-item" href="/use_cases/">Use Cases</a>
<a class="dropdown-item" href="/powered_by/">Powered By</a>
<a class="dropdown-item" href="/visual_identity/">Visual Identity</a>
<a class="dropdown-item" href="/security/">Security</a>
<a class="dropdown-item" href="https://www.apache.org/foundation/policies/conduct.html" target="_blank" rel="noopener">Code of Conduct</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdownASF" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
ASF Links
</a>
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdownASF">
<a class="dropdown-item" href="https://www.apache.org/" target="_blank" rel="noopener">ASF Website</a>
<a class="dropdown-item" href="https://www.apache.org/licenses/" target="_blank" rel="noopener">License</a>
<a class="dropdown-item" href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener">Donate</a>
<a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener">Thanks</a>
<a class="dropdown-item" href="https://www.apache.org/security/" target="_blank" rel="noopener">Security</a>
</div>
</li>
</ul>
</div>
<!-- /.navbar-collapse -->
</nav>
</header>
<div class="container p-4 pt-5">
<div class="col-lg-10 mx-auto">
<main role="main" class="pb-5">
<!--
-->
<h2>Apache Arrow Overview</h2>
<p>Apache Arrow is a multi-language toolbox for building high performance
applications that process and transport large data sets. It is designed to both
improve the performance of analytical algorithms and the efficiency of moving
data from one system or programming language to another.</p>
<p>A critical component of Apache Arrow is its <strong>in-memory columnar format</strong>, a
standardized, language-agnostic specification for representing structured,
table-like datasets in-memory. This data format has a rich data type system
(included nested and user-defined data types) designed to support the needs of
analytic database systems, data frame libraries, and more.</p>
<div class="row mt-4">
<div class="col-md-6">
<h3>Columnar is Fast</h3>
<p>
The Apache Arrow format allows computational routines and execution
engines to maximize their efficiency when scanning and iterating large
chunks of data. In particular, the contiguous columnar layout enables
vectorization using the latest SIMD (Single Instruction, Multiple Data)
operations included in modern processors.
</p>
</div>
<div class="col-md-6">
<img src="/img/simd.png" alt="SIMD" class="img-fluid mx-auto">
</div>
</div>
<div class="row mt-4">
<div class="col-md-6">
<img src="/img/copy.png" alt="common data layer" class="img-fluid mx-auto">
<img src="/img/shared.png" alt="common data layer" class="img-fluid mx-auto">
</div>
<div class="col-md-6">
<h3>Standardization Saves</h3>
<p>
Without a standard columnar data format, every database and language has
to implement its own internal data format. This generates a lot of
waste. Moving data from one system to another involves costly
serialization and deserialization. In addition, common algorithms must
often be rewritten for each data format.
</p>
<p>
Arrow's in-memory columnar data format is an out-of-the-box solution to
these problems. Systems that use or support Arrow can transfer data
between them at little-to-no cost. Moreover, they don't need to implement
custom connectors for every other system. On top of these savings, a
standardized memory format facilitates reuse of libraries of algorithms,
even across languages.
</p>
</div>
</div>
<div class="row mt-4">
<div class="col-md-12">
<h3>Arrow Libraries</h3>
<p>
The Arrow project contains libraries that enable you to work with data
in the Arrow columnar format in many languages. The
<a href="/docs/cpp/">C++</a>,
<a href="https://github.com/apache/arrow/blob/main/csharp/README.md" target="_blank" rel="noopener">C#</a>,
<a href="https://godoc.org/github.com/apache/arrow/go/arrow" target="_blank" rel="noopener">Go</a>,
<a href="/docs/java/">Java</a>,
<a href="/docs/js/">JavaScript</a>,
<a href="/julia/">Julia</a>,
<a href="https://docs.rs/crate/arrow/" target="_blank" rel="noopener">Rust</a>,
and <a href="https://github.com/apache/arrow/blob/main/swift" target="_blank" rel="noopener">Swift</a>
libraries contain distinct implementations of the Arrow format. These
libraries are
<a href="/docs/status.html">integration-tested</a>
against each other to ensure their fidelity to the format. In addition,
Arrow libraries for
<a href="/docs/c_glib/">C (GLib)</a>,
<a href="https://github.com/apache/arrow/blob/main/matlab/README.md" target="_blank" rel="noopener">MATLAB</a>,
<a href="/docs/python/">Python</a>,
<a href="/docs/r/">R</a>,
and <a href="https://github.com/apache/arrow/blob/main/ruby/README.md" target="_blank" rel="noopener">Ruby</a>
are built on top of the C++ library.
</p>
<p>
These official libraries enable third-party projects to work with Arrow
data without having to implement the Arrow columnar format
themselves. They also contain many software components that assist with
systems problems related to getting data in and out of remote storage
systems and moving Arrow-formatted data over network interfaces, among
other <a href="/use_cases/">use cases</a>.
</p>
</div>
</div>
</main>
</div>
<hr>
<footer class="footer">
<div class="row">
<div class="col-md-9">
<p>Apache Arrow, Arrow, Apache, the Apache logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
<p>© 2016-2025 The Apache Software Foundation</p>
</div>
<div class="col-md-3">
<a class="d-sm-none d-md-inline pr-2" href="https://www.apache.org/events/current-event.html" target="_blank" rel="noopener">
<img src="https://www.apache.org/events/current-event-234x60.png">
</a>
</div>
</div>
</footer>
</div>
</body>
</html>