blob: 98a8fb07a41eb01bfe35fc66b50e8d37db55460f [file] [log] [blame]
<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- The above meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>Powered by | Apache Arrow</title>
<!-- Begin Jekyll SEO tag v2.8.0 -->
<meta name="generator" content="Jekyll v4.3.3" />
<meta property="og:title" content="Powered by" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="List of projects powered by Apache Arrow" />
<meta property="og:description" content="List of projects powered by Apache Arrow" />
<link rel="canonical" href="https://arrow.apache.org/powered_by/" />
<meta property="og:url" content="https://arrow.apache.org/powered_by/" />
<meta property="og:site_name" content="Apache Arrow" />
<meta property="og:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png" />
<meta property="og:type" content="website" />
<meta name="twitter:card" content="summary_large_image" />
<meta property="twitter:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png" />
<meta property="twitter:title" content="Powered by" />
<meta name="twitter:site" content="@ApacheArrow" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"WebPage","description":"List of projects powered by Apache Arrow","headline":"Powered by","image":"https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"https://arrow.apache.org/img/logo.png"}},"url":"https://arrow.apache.org/powered_by/"}</script>
<!-- End Jekyll SEO tag -->
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16.png" id="light1">
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32.png" id="light2">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon.png" id="light3">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120.png" id="light4">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76.png" id="light5">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60.png" id="light6">
<!-- dark mode favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16-dark.png" id="dark1">
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32-dark.png" id="dark2">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon-dark.png" id="dark3">
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120-dark.png" id="dark4">
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76-dark.png" id="dark5">
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60-dark.png" id="dark6">
<script>
// Switch to the dark-mode favicons if prefers-color-scheme: dark
function onUpdate() {
light1 = document.querySelector('link#light1');
light2 = document.querySelector('link#light2');
light3 = document.querySelector('link#light3');
light4 = document.querySelector('link#light4');
light5 = document.querySelector('link#light5');
light6 = document.querySelector('link#light6');
dark1 = document.querySelector('link#dark1');
dark2 = document.querySelector('link#dark2');
dark3 = document.querySelector('link#dark3');
dark4 = document.querySelector('link#dark4');
dark5 = document.querySelector('link#dark5');
dark6 = document.querySelector('link#dark6');
if (matcher.matches) {
light1.remove();
light2.remove();
light3.remove();
light4.remove();
light5.remove();
light6.remove();
document.head.append(dark1);
document.head.append(dark2);
document.head.append(dark3);
document.head.append(dark4);
document.head.append(dark5);
document.head.append(dark6);
} else {
dark1.remove();
dark2.remove();
dark3.remove();
dark4.remove();
dark5.remove();
dark6.remove();
document.head.append(light1);
document.head.append(light2);
document.head.append(light3);
document.head.append(light4);
document.head.append(light5);
document.head.append(light6);
}
}
matcher = window.matchMedia('(prefers-color-scheme: dark)');
matcher.addListener(onUpdate);
onUpdate();
</script>
<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
<link href="/css/main.css" rel="stylesheet">
<link href="/css/syntax.css" rel="stylesheet">
<script src="/javascript/main.js"></script>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="wrap">
<header>
<nav class="navbar navbar-expand-md navbar-dark bg-dark">
<a class="navbar-brand no-padding" href="/"><img src="/img/arrow-inverse-300px.png" height="40px"/></a>
<button class="navbar-toggler ml-auto" type="button" data-toggle="collapse" data-target="#arrow-navbar" aria-controls="arrow-navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<!-- Collect the nav links, forms, and other content for toggling -->
<div class="collapse navbar-collapse justify-content-end" id="arrow-navbar">
<ul class="nav navbar-nav">
<li class="nav-item"><a class="nav-link" href="/overview/" role="button" aria-haspopup="true" aria-expanded="false">Overview</a></li>
<li class="nav-item"><a class="nav-link" href="/faq/" role="button" aria-haspopup="true" aria-expanded="false">FAQ</a></li>
<li class="nav-item"><a class="nav-link" href="/blog" role="button" aria-haspopup="true" aria-expanded="false">Blog</a></li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#"
id="navbarDropdownGetArrow" role="button" data-toggle="dropdown"
aria-haspopup="true" aria-expanded="false">
Get Arrow
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownGetArrow">
<a class="dropdown-item" href="/install/">Install</a>
<a class="dropdown-item" href="/release/">Releases</a>
<a class="dropdown-item" href="https://github.com/apache/arrow">Source Code</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#"
id="navbarDropdownDocumentation" role="button" data-toggle="dropdown"
aria-haspopup="true" aria-expanded="false">
Documentation
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownDocumentation">
<a class="dropdown-item" href="/docs">Project Docs</a>
<a class="dropdown-item" href="/docs/format/Columnar.html">Format</a>
<hr/>
<a class="dropdown-item" href="/docs/c_glib">C GLib</a>
<a class="dropdown-item" href="/docs/cpp">C++</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/csharp/README.md">C#</a>
<a class="dropdown-item" href="https://godoc.org/github.com/apache/arrow/go/arrow">Go</a>
<a class="dropdown-item" href="/docs/java">Java</a>
<a class="dropdown-item" href="/docs/js">JavaScript</a>
<a class="dropdown-item" href="/julia/">Julia</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/matlab/README.md">MATLAB</a>
<a class="dropdown-item" href="/docs/python">Python</a>
<a class="dropdown-item" href="/docs/r">R</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/ruby/README.md">Ruby</a>
<a class="dropdown-item" href="https://docs.rs/arrow/latest">Rust</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#"
id="navbarDropdownSubprojects" role="button" data-toggle="dropdown"
aria-haspopup="true" aria-expanded="false">
Subprojects
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownSubprojects">
<a class="dropdown-item" href="/adbc">ADBC</a>
<a class="dropdown-item" href="/docs/format/Flight.html">Arrow Flight</a>
<a class="dropdown-item" href="/docs/format/FlightSql.html">Arrow Flight SQL</a>
<a class="dropdown-item" href="https://datafusion.apache.org">DataFusion</a>
<a class="dropdown-item" href="/nanoarrow">nanoarrow</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#"
id="navbarDropdownCommunity" role="button" data-toggle="dropdown"
aria-haspopup="true" aria-expanded="false">
Community
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdownCommunity">
<a class="dropdown-item" href="/community/">Communication</a>
<a class="dropdown-item" href="/docs/developers/index.html">Contributing</a>
<a class="dropdown-item" href="https://github.com/apache/arrow/issues">Issue Tracker</a>
<a class="dropdown-item" href="/committers/">Governance</a>
<a class="dropdown-item" href="/use_cases/">Use Cases</a>
<a class="dropdown-item" href="/powered_by/">Powered By</a>
<a class="dropdown-item" href="/visual_identity/">Visual Identity</a>
<a class="dropdown-item" href="/security/">Security</a>
<a class="dropdown-item" href="https://www.apache.org/foundation/policies/conduct.html">Code of Conduct</a>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#"
id="navbarDropdownASF" role="button" data-toggle="dropdown"
aria-haspopup="true" aria-expanded="false">
ASF Links
</a>
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdownASF">
<a class="dropdown-item" href="https://www.apache.org/">ASF Website</a>
<a class="dropdown-item" href="https://www.apache.org/licenses/">License</a>
<a class="dropdown-item" href="https://www.apache.org/foundation/sponsorship.html">Donate</a>
<a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a>
<a class="dropdown-item" href="https://www.apache.org/security/">Security</a>
</div>
</li>
</ul>
</div><!-- /.navbar-collapse -->
</nav>
</header>
<div class="container p-4 pt-5">
<div class="col-lg-8 mx-auto">
<main role="main" class="pb-5">
<!--
-->
<h2 id="project-and-product-names-using-apache-arrow">Project and Product Names Using “Apache Arrow”</h2>
<p>Organizations creating products and projects for use with Apache Arrow, along
with associated marketing materials, should take care to respect the trademark
in “Apache Arrow” and its logo. Please refer to <a href="https://www.apache.org/foundation/marks/">ASF Trademarks Guidance</a>
and associated <a href="https://www.apache.org/foundation/marks/faq/">FAQ</a> for comprehensive and authoritative guidance on proper
usage of ASF trademarks.</p>
<p>Names that do not include “Apache Arrow” at all have no potential trademark
issue with the Apache Arrow project. This is recommended.</p>
<p>Names like “Apache Arrow BigCoProduct” are not OK, as are names including
“Apache Arrow” in general. The above links, however, describe some exceptions,
like for names such as “BigCoProduct, powered by Apache Arrow” or
“BigCoProduct for Apache Arrow”.</p>
<p>It is common practice to create software identifiers (Maven coordinates, module
names, etc.) like “arrow-foo”. These are permitted. Nominative use of trademarks
in descriptions is also always allowed, as in “BigCoProduct is a widget for
Apache Arrow”.</p>
<p>Projects and documents that want to include a logo for Apache Arrow should use
the official logo, and adhere to the guidelines listed on the <a href="/visual_identity/">Visual Identity</a> page:</p>
<p><img src="/img/arrow-logo_horizontal_black-txt_white-bg.png" style="max-width: 100%;" /></p>
<h2 id="projects-powered-by-apache-arrow">Projects Powered By Apache Arrow</h2>
<p>To add yourself to the list, please open a <a href="https://github.com/apache/arrow-site/edit/main/powered_by.md">pull request</a> adding your
organization name, URL, a list of which Arrow components you are using, and a
short description of your use case.</p>
<ul>
<li><strong><a href="https://parquet.apache.org/">Apache Parquet</a>:</strong> A columnar storage format available to any project
in the Hadoop ecosystem, regardless of the choice of data processing
framework, data model or programming language. The C++ and Java
implementation provide vectorized reads and write to/from Arrow data
structures.</li>
<li><strong><a href="https://spark.apache.org/">Apache Spark</a>:</strong> Apache Spark™ is a fast and general engine for
large-scale data processing. Spark uses Apache Arrow to
<ol>
<li>improve performance of conversion between Spark DataFrame and pandas DataFrame</li>
<li>enable a set of vectorized user-defined functions (<code class="language-plaintext highlighter-rouge">pandas_udf</code>) in PySpark.</li>
</ol>
</li>
<li><strong><a href="https://www.esri.com/en-us/arcgis/products/arcgis-python-libraries/libraries/arcpy">ArcPy</a>:</strong> ArcPy is Esri’s comprehensive and powerful API for working within
the ArcGIS suite of products to perform and automate spatial analysis, data management,
and conversion tasks (license required). ArcPy supports Arrow Tables as input
and output.</li>
<li><strong><a href="https://github.com/awslabs/aws-data-wrangler">AWS Data Wrangler</a>:</strong> Extends the power of Pandas library to AWS connecting
DataFrames and AWS data related services such as Amazon Redshift, AWS Glue, Amazon Athena,
Amazon EMR, Amazon QuickSight, etc.</li>
<li><strong><a href="https://bodo.ai">Bodo</a>:</strong> Bodo is a universal Python analytics engine that democratizes High Performance
Computing (HPC) architecture for mainstream enterprises, allowing Python analytics workloads to
scale efficiently. Bodo uses Arrow to support I/O for Parquet files, as well as internal support for data operations.</li>
<li><strong><a href="https://clickhouse.com/docs/en/interfaces/formats/#data-format-arrow">ClickHouse</a>:</strong> An open-source analytical database management system.
ClickHouse is using Apache Arrow for data import and export, and for direct querying of external datasets
in Arrow, ArrowStream, Parquet and ORC formats.</li>
<li><strong><a href="https://github.com/cloudquery/cloudquery">CloudQuery</a></strong>: An open-source high performance ELT framework powered by Apache Arrow’s type system.</li>
<li><strong><a href="https://cylondata.org/">Cylon</a>:</strong> An open-source high performance distributed data processing library
that can be seamlessly integrated with existing Big Data and AI/ML frameworks. Cylon
uses Arrow memory format and exposes language bindings to C++, Java, and Python.</li>
<li><strong><a href="https://github.com/dask/dask">Dask</a>:</strong> Python library for parallel and distributed execution of
dynamic task graphs. Dask supports using pyarrow for accessing Parquet
files</li>
<li><strong><a href="https://github.com/RandomFractals/vscode-data-preview">Data Preview</a>:</strong> Data Preview is a Visual Studio Code extension
for viewing text and binary data files. Data Preview uses Arrow JS API
for loading, transforming and saving Arrow data files and schemas.</li>
<li><strong><a href="https://github.com/delta-io/delta-rs">delta-rs</a>:</strong> A native Rust library for Delta Lake, with bindings to Python.
It can be integrated with Apache Arrow, increasing the efficiency of data exchange
over the network</li>
<li><strong><a href="https://www.dremio.com/">Dremio</a>:</strong> A self-service data platform. Dremio makes it easy for
users to discover, curate, accelerate, and share data from any source.
It includes a distributed SQL execution engine based on Apache Arrow.
Dremio reads data from any source (RDBMS, HDFS, S3, NoSQL) into Arrow
buffers, and provides fast SQL access via ODBC, JDBC, and REST for BI,
Python, R, and more (all backed by Apache Arrow).</li>
<li><strong><a href="https://github.com/uwdata/falcon">Falcon</a>:</strong> An interactive data exploration tool with coordinated views.
Falcon loads Arrow files using the Arrow JavaScript module. Since Arrow does
not need to be parsed (like text-based formats like CSV and JSON), startup cost
is significantly minimized.</li>
<li><strong><a href="https://fastdata.io/">FASTDATA.io</a></strong>: Plasma Engine (unrelated to Arrow’s Plasma In-Memory
Object Store) exploits the massive parallel processing power of GPUs for
stream and batch processing. It supports Arrow as input and output, uses
Arrow internally to maximize performance, and can be used with existing
Apache Spark™ APIs.</li>
<li><strong><a href="https://github.com/abs-tudelft/fletcher">Fletcher</a>:</strong> Fletcher is a framework that can integrate FPGA
accelerators with tools and frameworks that use the Apache Arrow in-memory
format. From a set of Arrow Schemas, Fletcher generates highly optimized
hardware structures that allow accelerator kernels to read and write
RecordBatches at system bandwidth through easy-to-use interfaces.</li>
<li><strong><a href="https://github.com/locationtech/geomesa">GeoMesa</a>:</strong> A suite of tools that enables large-scale geospatial query
and analytics on distributed computing systems. GeoMesa supports query
results in the Arrow IPC format, which can then be used for in-browser
visualizations and/or further analytics.</li>
<li><strong><a href="http://gpuopenanalytics.com">GOAI</a>:</strong> Open GPU-Accelerated Analytics Initiative for Arrow-powered
analytics across GPU tools and vendors</li>
<li><strong><a href="https://github.com/coady/graphique">graphique</a></strong> GraphQL service for arrow tables and parquet data sets. The schema for a query API is derived automatically.</li>
<li><strong><a href="https://www.graphistry.com">Graphistry</a>:</strong> Supercharged Visual Investigation Platform used by
teams for security, anti-fraud, and related investigations. The Graphistry
team uses Arrow in its NodeJS GPU backend and client libraries, and is an
early contributing member to GOAI and Arrow[JS] focused on bringing these
technologies to the enterprise.</li>
<li><strong><a href="https://github.com/GrepTimeTeam/greptimedb/">GreptimeDB</a>:</strong> GreptimeDB is an open-source time-series database with a special focus on scalability, analytical capabilities and efficiency.
It’s designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.
GreptimeDB uses Apache Arrow as the memory model and Apache Parquet as the persistent file format.</li>
<li><strong><a href="https://hash.ai">HASH</a>:</strong> HASH is an open-core platform for building, running, and learning
from simulations, with an in-browser IDE. HASH Engine uses Apache Arrow to power
the datastore for simulation state during computation, enabling zero-copy data</li>
<li><strong><a href="https://github.com/huggingface/datasets">Hugging Face Datasets</a>:</strong> A machine learning datasets library and hub
for accessing, processing and sharing datasets for audio, computer vision,
natural language processing, and tabular tasks. Dataset objects are wrappers around
Arrow Tables and memory-mapped from disk to support out-of-core parallel processing
for machine learning workflows.</li>
<li><strong><a href="https://iceburst.io">iceburst</a>:</strong> A real-time data lake for monitoring and security built
directly on top of Amazon S3. Our approach is simple: ingest the OpenTelemetry data in an S3 bucket as
Parquet files in Iceberg table format and query them using DuckDB with milliseond retrieval and zero egress cost.
Parquet is converted to Arrow format in-memory enhancing both speed and efficiency.</li>
<li><strong><a href="https://www.inaccel.com/">InAccel</a>:</strong> A machine learning acceleration framework which leverages
FPGAs-as-a-service. InAccel supports dataframes backed by Apache Arrow to
serve as input for our implemented ML algorithms. Those dataframes can be
accessed from the FPGAs with a single DMA operation by implementing a shared
memory communication schema.</li>
<li><strong><a href="https://github.com/influxdata/influxdb_iox">InfluxDB IOx</a>:</strong> InfluxDB IOx is an open source time series database
written in Rust. It is the future core of InfluxDB; supporting
industry standard SQL, InfluxQL, and Flux. IOx uses Apache Arrow as its in-memory
format, Apache Parquet as its persistence format and Apache Arrow Flight for RPC.</li>
<li><strong><a href="https://kaskada.io">Kaskada</a>:</strong> An open source event processing engine written in Rust and
built on Apache Arrow.</li>
<li><strong><a href="https://github.com/gpuopenanalytics/libgdf">libgdf</a>:</strong> A C library of CUDA-based analytics functions and GPU IPC
support for structured data. Uses the Arrow IPC format and targets the Arrow
memory layout in its analytic functions. This work is part of the <a href="https://gpuopenanalytics.com/">GPU Open
Analytics Initiative</a></li>
<li><strong><a href="https://www.mathworks.com">MATLAB</a>:</strong> A numerical computing environment for engineers and
scientists. MATLAB uses Apache Arrow to support reading and writing Parquet
and Feather files.</li>
<li><strong><a href="https://github.com/omnisci/mapd-core">OmniSci</a> (formerly MapD):</strong> In-memory columnar SQL engine designed to run
on both GPUs and CPUs. OmniSci supports Arrow for data ingest and data interchange
via CUDA IPC handles. This work is part of the <a href="https://gpuopenanalytics.com/">GPU Open Analytics Initiative</a></li>
<li><strong><a href="https://openobserve.ai">OpenObserve</a>:</strong> Petabyte scale observability tool for logs, metrics, and traces with visualizations. High focus on usability and simplicity. Supports opentelemetry and many existing log and metrics forwarders.</li>
<li><strong><a href="https://pandas.pydata.org">pandas</a>:</strong> data analysis toolkit for Python programmers. pandas
supports reading and writing Parquet files using pyarrow. Several pandas
core developers are also contributors to Apache Arrow.</li>
<li><strong><a href="https://github.com/innobi/pantab">pantab</a>:</strong> Allows high performance read/writes of popular dataframe libraries
like pandas, polars, pyarrow, etc… to/from Tableau’s Hyper database. pantab uses nanoarrow
and the Arrow PyCapsule interface to make that exchange process seamless.</li>
<li><strong><a href="https://parseable.io">Parseable</a>:</strong> Log analytics platform built for scale and usability. Ingest logs from anywhere and unify logs with Parseable. Parseable uses Arrow as the intermediary, in-memory data format for log data ingestion.</li>
<li><strong><a href="https://github.com/jpmorganchase/perspective">Perspective</a>:</strong> Perspective is a streaming data visualization engine in JavaScript for building real-time &amp; user-configurable analytics entirely in the browser.</li>
<li><strong><a href="https://github.com/uber/petastorm">Petastorm</a>:</strong> Petastorm enables single machine or distributed training
and evaluation of deep learning models directly from datasets in Apache
Parquet format. Petastorm supports popular Python-based machine learning
(ML) frameworks such as Tensorflow, Pytorch, and PySpark. It can also be
used from pure Python code.</li>
<li><strong><a href="https://github.com/pola-rs/polars">Polars</a>:</strong> Polars is a blazingly fast DataFrame library and query engine
that aims to utilize modern hardware efficiently.
(e.g. multi-threading, SIMD vectorization, hiding memory latencies).
Polars is built upon Apache Arrow and uses its columnar memory, compute kernels,
and several IO utilities. Polars is written in Rust and available in Rust and Python.</li>
<li><strong><a href="https://github.com/tradewelltech/protarrow">protarrow</a>:</strong> A Python library for converting from Apache Arrow to Protocol Buffers and back.</li>
<li><strong><a href="https://quiltdata.com/">Quilt Data</a>:</strong> Quilt is a data package manager, designed to make
managing data as easy as managing code. It supports Parquet format via
pyarrow for data access.</li>
<li><strong><a href="https://github.com/ray-project/ray">Ray</a>:</strong> A flexible, high-performance distributed execution framework
with a focus on machine learning and AI applications. Uses Arrow to
efficiently store Python data structures containing large arrays of numerical
data. Data can be accessed with zero-copy by multiple processes using the
<a href="https://ray-project.github.io/2017/08/08/plasma-in-memory-object-store.html">Plasma shared memory object store</a> which originated from Ray and is part
of Arrow now.</li>
<li><strong><a href="https://red-data-tools.github.io/">Red Data Tools</a>:</strong> A project that provides data processing
tools for Ruby. It provides <a href="https://github.com/red-data-tools/red-arrow/">Red Arrow</a> that is a Ruby bindings
of Apache Arrow based on Apache Arrow GLib. Red Arrow is a core
library for it. It also provides many Ruby libraries to integrate
existing Ruby libraries with Apache Arrow. They use Red Arrow.</li>
<li><strong><a href="https://www.paradigm4.com">SciDB</a>:</strong> Paradigm4’s SciDB is a scalable, scientific
database management system that helps researchers integrate and
analyze diverse, multi-dimensional, high resolution data - like
genomic, clinical, images, sensor, environmental, and IoT data -
all in one analytical platform. <a href="https://github.com/Paradigm4/stream">SciDB streaming</a> and
<a href="https://github.com/Paradigm4/accelerated_io_tools">accelerated_io_tools</a> are powered by Apache Arrow.</li>
<li><strong><a href="https://github.com/TileDB-Inc/TileDB">TileDB</a>:</strong> TileDB is an open-source, cloud-optimized engine for storing
and accessing dense/sparse multi-dimensional arrays and dataframes. It is an
embeddable C++ library that works on Linux, macOS, and Windows, which comes
with numerous APIs and integrations. We use Arrow in our <a href="https://github.com/TileDB-Inc/TileDB-VCF">TileDB-VCF</a>
project for genomics to achieve zero-copying when accessing TileDB data from
Spark and Dask.</li>
<li><strong><a href="https://github.com/blue-yonder/turbodbc">Turbodbc</a>:</strong> Python module to access relational databases via the Open
Database Connectivity (ODBC) interface. It provides the ability to return
Arrow Tables and RecordBatches in addition to the Python Database API
Specification 2.0.</li>
<li><strong><a href="https://unum.cloud/ukv/">UKV</a>:</strong> Open NoSQL binary database interface, with support for
LevelDB, RocksDB, UDisk, and in-memory Key-Value Stores. It extends
their functionality to support Document Collections, Graphs, and Vector
Search, similar to RedisJSON, RedisGraph, and RediSearch, and brings
familiar structured bindings on top, mimicking tools like pandas and NetworkX.
All UKV interfaces are compatible with Apache Arrow columnar format,
which minimizes copies when passing data between different language
runtimes. UKV also uses Apache Arrow Flight RPC for client-server communication.</li>
<li><strong><a href="https://github.com/vaexio/vaex">Vaex</a>:</strong> Out-of-Core hybrid Apache Arrow/NumPy DataFrame for Python,
ML, visualize and explore big tabular data at a billion rows per second.</li>
<li><strong><a href="https://github.com/tenzir/vast">VAST</a>:</strong> A network telemetry engine for data-driven security
investigations. VAST uses Arrow as standardized data plane to provide a
high-bandwidth output path for downstream analytics. This makes it easy and
efficient to access security data via pyarrow and other available bindings.</li>
</ul>
</main>
</div>
<hr/>
<footer class="footer">
<div class="row">
<div class="col-md-9">
<p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
<p>&copy; 2016-2024 The Apache Software Foundation</p>
</div>
<div class="col-md-3">
<a class="d-sm-none d-md-inline pr-2" href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png"/>
</a>
</div>
</div>
</footer>
</div>
</body>
</html>