| <!DOCTYPE html> |
| <html lang="en-US"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <!-- The above meta tags *must* come first in the head; any other head content must come *after* these tags --> |
| |
| <title>Powered by | Apache Arrow</title> |
| |
| |
| <!-- Begin Jekyll SEO tag v2.8.0 --> |
| <meta name="generator" content="Jekyll v4.3.3" /> |
| <meta property="og:title" content="Powered by" /> |
| <meta property="og:locale" content="en_US" /> |
| <meta name="description" content="List of projects powered by Apache Arrow" /> |
| <meta property="og:description" content="List of projects powered by Apache Arrow" /> |
| <link rel="canonical" href="https://arrow.apache.org/powered_by/" /> |
| <meta property="og:url" content="https://arrow.apache.org/powered_by/" /> |
| <meta property="og:site_name" content="Apache Arrow" /> |
| <meta property="og:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png" /> |
| <meta property="og:type" content="website" /> |
| <meta name="twitter:card" content="summary_large_image" /> |
| <meta property="twitter:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png" /> |
| <meta property="twitter:title" content="Powered by" /> |
| <meta name="twitter:site" content="@ApacheArrow" /> |
| <script type="application/ld+json"> |
| {"@context":"https://schema.org","@type":"WebPage","description":"List of projects powered by Apache Arrow","headline":"Powered by","image":"https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png","publisher":{"@type":"Organization","logo":{"@type":"ImageObject","url":"https://arrow.apache.org/img/logo.png"}},"url":"https://arrow.apache.org/powered_by/"}</script> |
| <!-- End Jekyll SEO tag --> |
| |
| |
| <!-- favicons --> |
| <link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16.png" id="light1"> |
| <link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32.png" id="light2"> |
| <link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon.png" id="light3"> |
| <link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120.png" id="light4"> |
| <link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76.png" id="light5"> |
| <link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60.png" id="light6"> |
| <!-- dark mode favicons --> |
| <link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16-dark.png" id="dark1"> |
| <link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32-dark.png" id="dark2"> |
| <link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon-dark.png" id="dark3"> |
| <link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120-dark.png" id="dark4"> |
| <link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76-dark.png" id="dark5"> |
| <link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60-dark.png" id="dark6"> |
| |
| <script> |
| // Switch to the dark-mode favicons if prefers-color-scheme: dark |
| function onUpdate() { |
| light1 = document.querySelector('link#light1'); |
| light2 = document.querySelector('link#light2'); |
| light3 = document.querySelector('link#light3'); |
| light4 = document.querySelector('link#light4'); |
| light5 = document.querySelector('link#light5'); |
| light6 = document.querySelector('link#light6'); |
| |
| dark1 = document.querySelector('link#dark1'); |
| dark2 = document.querySelector('link#dark2'); |
| dark3 = document.querySelector('link#dark3'); |
| dark4 = document.querySelector('link#dark4'); |
| dark5 = document.querySelector('link#dark5'); |
| dark6 = document.querySelector('link#dark6'); |
| |
| if (matcher.matches) { |
| light1.remove(); |
| light2.remove(); |
| light3.remove(); |
| light4.remove(); |
| light5.remove(); |
| light6.remove(); |
| document.head.append(dark1); |
| document.head.append(dark2); |
| document.head.append(dark3); |
| document.head.append(dark4); |
| document.head.append(dark5); |
| document.head.append(dark6); |
| } else { |
| dark1.remove(); |
| dark2.remove(); |
| dark3.remove(); |
| dark4.remove(); |
| dark5.remove(); |
| dark6.remove(); |
| document.head.append(light1); |
| document.head.append(light2); |
| document.head.append(light3); |
| document.head.append(light4); |
| document.head.append(light5); |
| document.head.append(light6); |
| } |
| } |
| matcher = window.matchMedia('(prefers-color-scheme: dark)'); |
| matcher.addListener(onUpdate); |
| onUpdate(); |
| </script> |
| |
| <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900"> |
| |
| <link href="/css/main.css" rel="stylesheet"> |
| <link href="/css/syntax.css" rel="stylesheet"> |
| <script src="/javascript/main.js"></script> |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| /* We explicitly disable cookie tracking to avoid privacy issues */ |
| _paq.push(['disableCookies']); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '20']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| |
| </head> |
| |
| |
| <body class="wrap"> |
| <header> |
| <nav class="navbar navbar-expand-md navbar-dark bg-dark"> |
| |
| <a class="navbar-brand no-padding" href="/"><img src="/img/arrow-inverse-300px.png" height="40px"/></a> |
| |
| <button class="navbar-toggler ml-auto" type="button" data-toggle="collapse" data-target="#arrow-navbar" aria-controls="arrow-navbar" aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| |
| <!-- Collect the nav links, forms, and other content for toggling --> |
| <div class="collapse navbar-collapse justify-content-end" id="arrow-navbar"> |
| <ul class="nav navbar-nav"> |
| <li class="nav-item"><a class="nav-link" href="/overview/" role="button" aria-haspopup="true" aria-expanded="false">Overview</a></li> |
| <li class="nav-item"><a class="nav-link" href="/faq/" role="button" aria-haspopup="true" aria-expanded="false">FAQ</a></li> |
| <li class="nav-item"><a class="nav-link" href="/blog" role="button" aria-haspopup="true" aria-expanded="false">Blog</a></li> |
| <li class="nav-item dropdown"> |
| <a class="nav-link dropdown-toggle" href="#" |
| id="navbarDropdownGetArrow" role="button" data-toggle="dropdown" |
| aria-haspopup="true" aria-expanded="false"> |
| Get Arrow |
| </a> |
| <div class="dropdown-menu" aria-labelledby="navbarDropdownGetArrow"> |
| <a class="dropdown-item" href="/install/">Install</a> |
| <a class="dropdown-item" href="/release/">Releases</a> |
| <a class="dropdown-item" href="https://github.com/apache/arrow">Source Code</a> |
| </div> |
| </li> |
| <li class="nav-item dropdown"> |
| <a class="nav-link dropdown-toggle" href="#" |
| id="navbarDropdownDocumentation" role="button" data-toggle="dropdown" |
| aria-haspopup="true" aria-expanded="false"> |
| Documentation |
| </a> |
| <div class="dropdown-menu" aria-labelledby="navbarDropdownDocumentation"> |
| <a class="dropdown-item" href="/docs">Project Docs</a> |
| <a class="dropdown-item" href="/docs/format/Columnar.html">Format</a> |
| <hr/> |
| <a class="dropdown-item" href="/docs/c_glib">C GLib</a> |
| <a class="dropdown-item" href="/docs/cpp">C++</a> |
| <a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/csharp/README.md">C#</a> |
| <a class="dropdown-item" href="https://godoc.org/github.com/apache/arrow/go/arrow">Go</a> |
| <a class="dropdown-item" href="/docs/java">Java</a> |
| <a class="dropdown-item" href="/docs/js">JavaScript</a> |
| <a class="dropdown-item" href="/julia/">Julia</a> |
| <a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/matlab/README.md">MATLAB</a> |
| <a class="dropdown-item" href="/docs/python">Python</a> |
| <a class="dropdown-item" href="/docs/r">R</a> |
| <a class="dropdown-item" href="https://github.com/apache/arrow/blob/main/ruby/README.md">Ruby</a> |
| <a class="dropdown-item" href="https://docs.rs/arrow/latest">Rust</a> |
| </div> |
| </li> |
| <li class="nav-item dropdown"> |
| <a class="nav-link dropdown-toggle" href="#" |
| id="navbarDropdownSubprojects" role="button" data-toggle="dropdown" |
| aria-haspopup="true" aria-expanded="false"> |
| Subprojects |
| </a> |
| <div class="dropdown-menu" aria-labelledby="navbarDropdownSubprojects"> |
| <a class="dropdown-item" href="/adbc">ADBC</a> |
| <a class="dropdown-item" href="/docs/format/Flight.html">Arrow Flight</a> |
| <a class="dropdown-item" href="/docs/format/FlightSql.html">Arrow Flight SQL</a> |
| <a class="dropdown-item" href="https://datafusion.apache.org">DataFusion</a> |
| <a class="dropdown-item" href="/nanoarrow">nanoarrow</a> |
| </div> |
| </li> |
| <li class="nav-item dropdown"> |
| <a class="nav-link dropdown-toggle" href="#" |
| id="navbarDropdownCommunity" role="button" data-toggle="dropdown" |
| aria-haspopup="true" aria-expanded="false"> |
| Community |
| </a> |
| <div class="dropdown-menu" aria-labelledby="navbarDropdownCommunity"> |
| <a class="dropdown-item" href="/community/">Communication</a> |
| <a class="dropdown-item" href="/docs/developers/index.html">Contributing</a> |
| <a class="dropdown-item" href="https://github.com/apache/arrow/issues">Issue Tracker</a> |
| <a class="dropdown-item" href="/committers/">Governance</a> |
| <a class="dropdown-item" href="/use_cases/">Use Cases</a> |
| <a class="dropdown-item" href="/powered_by/">Powered By</a> |
| <a class="dropdown-item" href="/visual_identity/">Visual Identity</a> |
| <a class="dropdown-item" href="/security/">Security</a> |
| <a class="dropdown-item" href="https://www.apache.org/foundation/policies/conduct.html">Code of Conduct</a> |
| </div> |
| </li> |
| <li class="nav-item dropdown"> |
| <a class="nav-link dropdown-toggle" href="#" |
| id="navbarDropdownASF" role="button" data-toggle="dropdown" |
| aria-haspopup="true" aria-expanded="false"> |
| ASF Links |
| </a> |
| <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdownASF"> |
| <a class="dropdown-item" href="https://www.apache.org/">ASF Website</a> |
| <a class="dropdown-item" href="https://www.apache.org/licenses/">License</a> |
| <a class="dropdown-item" href="https://www.apache.org/foundation/sponsorship.html">Donate</a> |
| <a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a> |
| <a class="dropdown-item" href="https://www.apache.org/security/">Security</a> |
| </div> |
| </li> |
| </ul> |
| </div><!-- /.navbar-collapse --> |
| </nav> |
| |
| </header> |
| |
| <div class="container p-4 pt-5"> |
| <div class="col-lg-8 mx-auto"> |
| <main role="main" class="pb-5"> |
| <!-- |
| |
| --> |
| |
| <h2 id="project-and-product-names-using-apache-arrow">Project and Product Names Using “Apache Arrow”</h2> |
| |
| <p>Organizations creating products and projects for use with Apache Arrow, along |
| with associated marketing materials, should take care to respect the trademark |
| in “Apache Arrow” and its logo. Please refer to <a href="https://www.apache.org/foundation/marks/">ASF Trademarks Guidance</a> |
| and associated <a href="https://www.apache.org/foundation/marks/faq/">FAQ</a> for comprehensive and authoritative guidance on proper |
| usage of ASF trademarks.</p> |
| |
| <p>Names that do not include “Apache Arrow” at all have no potential trademark |
| issue with the Apache Arrow project. This is recommended.</p> |
| |
| <p>Names like “Apache Arrow BigCoProduct” are not OK, as are names including |
| “Apache Arrow” in general. The above links, however, describe some exceptions, |
| like for names such as “BigCoProduct, powered by Apache Arrow” or |
| “BigCoProduct for Apache Arrow”.</p> |
| |
| <p>It is common practice to create software identifiers (Maven coordinates, module |
| names, etc.) like “arrow-foo”. These are permitted. Nominative use of trademarks |
| in descriptions is also always allowed, as in “BigCoProduct is a widget for |
| Apache Arrow”.</p> |
| |
| <p>Projects and documents that want to include a logo for Apache Arrow should use |
| the official logo, and adhere to the guidelines listed on the <a href="/visual_identity/">Visual Identity</a> page:</p> |
| |
| <p><img src="/img/arrow-logo_horizontal_black-txt_white-bg.png" style="max-width: 100%;" /></p> |
| |
| <h2 id="projects-powered-by-apache-arrow">Projects Powered By Apache Arrow</h2> |
| |
| <p>To add yourself to the list, please open a <a href="https://github.com/apache/arrow-site/edit/main/powered_by.md">pull request</a> adding your |
| organization name, URL, a list of which Arrow components you are using, and a |
| short description of your use case.</p> |
| |
| <ul> |
| <li><strong><a href="https://parquet.apache.org/">Apache Parquet</a>:</strong> A columnar storage format available to any project |
| in the Hadoop ecosystem, regardless of the choice of data processing |
| framework, data model or programming language. The C++ and Java |
| implementation provide vectorized reads and write to/from Arrow data |
| structures.</li> |
| <li><strong><a href="https://spark.apache.org/">Apache Spark</a>:</strong> Apache Spark™ is a fast and general engine for |
| large-scale data processing. Spark uses Apache Arrow to |
| <ol> |
| <li>improve performance of conversion between Spark DataFrame and pandas DataFrame</li> |
| <li>enable a set of vectorized user-defined functions (<code class="language-plaintext highlighter-rouge">pandas_udf</code>) in PySpark.</li> |
| </ol> |
| </li> |
| <li><strong><a href="https://www.esri.com/en-us/arcgis/products/arcgis-python-libraries/libraries/arcpy">ArcPy</a>:</strong> ArcPy is Esri’s comprehensive and powerful API for working within |
| the ArcGIS suite of products to perform and automate spatial analysis, data management, |
| and conversion tasks (license required). ArcPy supports Arrow Tables as input |
| and output.</li> |
| <li><strong><a href="https://github.com/awslabs/aws-data-wrangler">AWS Data Wrangler</a>:</strong> Extends the power of Pandas library to AWS connecting |
| DataFrames and AWS data related services such as Amazon Redshift, AWS Glue, Amazon Athena, |
| Amazon EMR, Amazon QuickSight, etc.</li> |
| <li><strong><a href="https://bodo.ai">Bodo</a>:</strong> Bodo is a universal Python analytics engine that democratizes High Performance |
| Computing (HPC) architecture for mainstream enterprises, allowing Python analytics workloads to |
| scale efficiently. Bodo uses Arrow to support I/O for Parquet files, as well as internal support for data operations.</li> |
| <li><strong><a href="https://clickhouse.com/docs/en/interfaces/formats/#data-format-arrow">ClickHouse</a>:</strong> An open-source analytical database management system. |
| ClickHouse is using Apache Arrow for data import and export, and for direct querying of external datasets |
| in Arrow, ArrowStream, Parquet and ORC formats.</li> |
| <li><strong><a href="https://github.com/cloudquery/cloudquery">CloudQuery</a></strong>: An open-source high performance ELT framework powered by Apache Arrow’s type system.</li> |
| <li><strong><a href="https://cylondata.org/">Cylon</a>:</strong> An open-source high performance distributed data processing library |
| that can be seamlessly integrated with existing Big Data and AI/ML frameworks. Cylon |
| uses Arrow memory format and exposes language bindings to C++, Java, and Python.</li> |
| <li><strong><a href="https://github.com/dask/dask">Dask</a>:</strong> Python library for parallel and distributed execution of |
| dynamic task graphs. Dask supports using pyarrow for accessing Parquet |
| files</li> |
| <li><strong><a href="https://github.com/RandomFractals/vscode-data-preview">Data Preview</a>:</strong> Data Preview is a Visual Studio Code extension |
| for viewing text and binary data files. Data Preview uses Arrow JS API |
| for loading, transforming and saving Arrow data files and schemas.</li> |
| <li><strong><a href="https://github.com/delta-io/delta-rs">delta-rs</a>:</strong> A native Rust library for Delta Lake, with bindings to Python. |
| It can be integrated with Apache Arrow, increasing the efficiency of data exchange |
| over the network</li> |
| <li><strong><a href="https://www.dremio.com/">Dremio</a>:</strong> A self-service data platform. Dremio makes it easy for |
| users to discover, curate, accelerate, and share data from any source. |
| It includes a distributed SQL execution engine based on Apache Arrow. |
| Dremio reads data from any source (RDBMS, HDFS, S3, NoSQL) into Arrow |
| buffers, and provides fast SQL access via ODBC, JDBC, and REST for BI, |
| Python, R, and more (all backed by Apache Arrow).</li> |
| <li><strong><a href="https://github.com/uwdata/falcon">Falcon</a>:</strong> An interactive data exploration tool with coordinated views. |
| Falcon loads Arrow files using the Arrow JavaScript module. Since Arrow does |
| not need to be parsed (like text-based formats like CSV and JSON), startup cost |
| is significantly minimized.</li> |
| <li><strong><a href="https://fastdata.io/">FASTDATA.io</a></strong>: Plasma Engine (unrelated to Arrow’s Plasma In-Memory |
| Object Store) exploits the massive parallel processing power of GPUs for |
| stream and batch processing. It supports Arrow as input and output, uses |
| Arrow internally to maximize performance, and can be used with existing |
| Apache Spark™ APIs.</li> |
| <li><strong><a href="https://github.com/abs-tudelft/fletcher">Fletcher</a>:</strong> Fletcher is a framework that can integrate FPGA |
| accelerators with tools and frameworks that use the Apache Arrow in-memory |
| format. From a set of Arrow Schemas, Fletcher generates highly optimized |
| hardware structures that allow accelerator kernels to read and write |
| RecordBatches at system bandwidth through easy-to-use interfaces.</li> |
| <li><strong><a href="https://github.com/locationtech/geomesa">GeoMesa</a>:</strong> A suite of tools that enables large-scale geospatial query |
| and analytics on distributed computing systems. GeoMesa supports query |
| results in the Arrow IPC format, which can then be used for in-browser |
| visualizations and/or further analytics.</li> |
| <li><strong><a href="http://gpuopenanalytics.com">GOAI</a>:</strong> Open GPU-Accelerated Analytics Initiative for Arrow-powered |
| analytics across GPU tools and vendors</li> |
| <li><strong><a href="https://github.com/coady/graphique">graphique</a></strong> GraphQL service for arrow tables and parquet data sets. The schema for a query API is derived automatically.</li> |
| <li><strong><a href="https://www.graphistry.com">Graphistry</a>:</strong> Supercharged Visual Investigation Platform used by |
| teams for security, anti-fraud, and related investigations. The Graphistry |
| team uses Arrow in its NodeJS GPU backend and client libraries, and is an |
| early contributing member to GOAI and Arrow[JS] focused on bringing these |
| technologies to the enterprise.</li> |
| <li><strong><a href="https://github.com/GrepTimeTeam/greptimedb/">GreptimeDB</a>:</strong> GreptimeDB is an open-source time-series database with a special focus on scalability, analytical capabilities and efficiency. |
| It’s designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage. |
| GreptimeDB uses Apache Arrow as the memory model and Apache Parquet as the persistent file format.</li> |
| <li><strong><a href="https://hash.ai">HASH</a>:</strong> HASH is an open-core platform for building, running, and learning |
| from simulations, with an in-browser IDE. HASH Engine uses Apache Arrow to power |
| the datastore for simulation state during computation, enabling zero-copy data</li> |
| <li><strong><a href="https://github.com/huggingface/datasets">Hugging Face Datasets</a>:</strong> A machine learning datasets library and hub |
| for accessing, processing and sharing datasets for audio, computer vision, |
| natural language processing, and tabular tasks. Dataset objects are wrappers around |
| Arrow Tables and memory-mapped from disk to support out-of-core parallel processing |
| for machine learning workflows.</li> |
| <li><strong><a href="https://iceburst.io">iceburst</a>:</strong> A real-time data lake for monitoring and security built |
| directly on top of Amazon S3. Our approach is simple: ingest the OpenTelemetry data in an S3 bucket as |
| Parquet files in Iceberg table format and query them using DuckDB with milliseond retrieval and zero egress cost. |
| Parquet is converted to Arrow format in-memory enhancing both speed and efficiency.</li> |
| <li><strong><a href="https://www.inaccel.com/">InAccel</a>:</strong> A machine learning acceleration framework which leverages |
| FPGAs-as-a-service. InAccel supports dataframes backed by Apache Arrow to |
| serve as input for our implemented ML algorithms. Those dataframes can be |
| accessed from the FPGAs with a single DMA operation by implementing a shared |
| memory communication schema.</li> |
| <li><strong><a href="https://github.com/influxdata/influxdb_iox">InfluxDB IOx</a>:</strong> InfluxDB IOx is an open source time series database |
| written in Rust. It is the future core of InfluxDB; supporting |
| industry standard SQL, InfluxQL, and Flux. IOx uses Apache Arrow as its in-memory |
| format, Apache Parquet as its persistence format and Apache Arrow Flight for RPC.</li> |
| <li><strong><a href="https://kaskada.io">Kaskada</a>:</strong> An open source event processing engine written in Rust and |
| built on Apache Arrow.</li> |
| <li><strong><a href="https://github.com/gpuopenanalytics/libgdf">libgdf</a>:</strong> A C library of CUDA-based analytics functions and GPU IPC |
| support for structured data. Uses the Arrow IPC format and targets the Arrow |
| memory layout in its analytic functions. This work is part of the <a href="https://gpuopenanalytics.com/">GPU Open |
| Analytics Initiative</a></li> |
| <li><strong><a href="https://www.mathworks.com">MATLAB</a>:</strong> A numerical computing environment for engineers and |
| scientists. MATLAB uses Apache Arrow to support reading and writing Parquet |
| and Feather files.</li> |
| <li><strong><a href="https://github.com/omnisci/mapd-core">OmniSci</a> (formerly MapD):</strong> In-memory columnar SQL engine designed to run |
| on both GPUs and CPUs. OmniSci supports Arrow for data ingest and data interchange |
| via CUDA IPC handles. This work is part of the <a href="https://gpuopenanalytics.com/">GPU Open Analytics Initiative</a></li> |
| <li><strong><a href="https://openobserve.ai">OpenObserve</a>:</strong> Petabyte scale observability tool for logs, metrics, and traces with visualizations. High focus on usability and simplicity. Supports opentelemetry and many existing log and metrics forwarders.</li> |
| <li><strong><a href="https://pandas.pydata.org">pandas</a>:</strong> data analysis toolkit for Python programmers. pandas |
| supports reading and writing Parquet files using pyarrow. Several pandas |
| core developers are also contributors to Apache Arrow.</li> |
| <li><strong><a href="https://github.com/innobi/pantab">pantab</a>:</strong> Allows high performance read/writes of popular dataframe libraries |
| like pandas, polars, pyarrow, etc… to/from Tableau’s Hyper database. pantab uses nanoarrow |
| and the Arrow PyCapsule interface to make that exchange process seamless.</li> |
| <li><strong><a href="https://parseable.io">Parseable</a>:</strong> Log analytics platform built for scale and usability. Ingest logs from anywhere and unify logs with Parseable. Parseable uses Arrow as the intermediary, in-memory data format for log data ingestion.</li> |
| <li><strong><a href="https://github.com/jpmorganchase/perspective">Perspective</a>:</strong> Perspective is a streaming data visualization engine in JavaScript for building real-time & user-configurable analytics entirely in the browser.</li> |
| <li><strong><a href="https://github.com/uber/petastorm">Petastorm</a>:</strong> Petastorm enables single machine or distributed training |
| and evaluation of deep learning models directly from datasets in Apache |
| Parquet format. Petastorm supports popular Python-based machine learning |
| (ML) frameworks such as Tensorflow, Pytorch, and PySpark. It can also be |
| used from pure Python code.</li> |
| <li><strong><a href="https://github.com/pola-rs/polars">Polars</a>:</strong> Polars is a blazingly fast DataFrame library and query engine |
| that aims to utilize modern hardware efficiently. |
| (e.g. multi-threading, SIMD vectorization, hiding memory latencies). |
| Polars is built upon Apache Arrow and uses its columnar memory, compute kernels, |
| and several IO utilities. Polars is written in Rust and available in Rust and Python.</li> |
| <li><strong><a href="https://github.com/tradewelltech/protarrow">protarrow</a>:</strong> A Python library for converting from Apache Arrow to Protocol Buffers and back.</li> |
| <li><strong><a href="https://quiltdata.com/">Quilt Data</a>:</strong> Quilt is a data package manager, designed to make |
| managing data as easy as managing code. It supports Parquet format via |
| pyarrow for data access.</li> |
| <li><strong><a href="https://github.com/ray-project/ray">Ray</a>:</strong> A flexible, high-performance distributed execution framework |
| with a focus on machine learning and AI applications. Uses Arrow to |
| efficiently store Python data structures containing large arrays of numerical |
| data. Data can be accessed with zero-copy by multiple processes using the |
| <a href="https://ray-project.github.io/2017/08/08/plasma-in-memory-object-store.html">Plasma shared memory object store</a> which originated from Ray and is part |
| of Arrow now.</li> |
| <li><strong><a href="https://red-data-tools.github.io/">Red Data Tools</a>:</strong> A project that provides data processing |
| tools for Ruby. It provides <a href="https://github.com/red-data-tools/red-arrow/">Red Arrow</a> that is a Ruby bindings |
| of Apache Arrow based on Apache Arrow GLib. Red Arrow is a core |
| library for it. It also provides many Ruby libraries to integrate |
| existing Ruby libraries with Apache Arrow. They use Red Arrow.</li> |
| <li><strong><a href="https://www.paradigm4.com">SciDB</a>:</strong> Paradigm4’s SciDB is a scalable, scientific |
| database management system that helps researchers integrate and |
| analyze diverse, multi-dimensional, high resolution data - like |
| genomic, clinical, images, sensor, environmental, and IoT data - |
| all in one analytical platform. <a href="https://github.com/Paradigm4/stream">SciDB streaming</a> and |
| <a href="https://github.com/Paradigm4/accelerated_io_tools">accelerated_io_tools</a> are powered by Apache Arrow.</li> |
| <li><strong><a href="https://github.com/TileDB-Inc/TileDB">TileDB</a>:</strong> TileDB is an open-source, cloud-optimized engine for storing |
| and accessing dense/sparse multi-dimensional arrays and dataframes. It is an |
| embeddable C++ library that works on Linux, macOS, and Windows, which comes |
| with numerous APIs and integrations. We use Arrow in our <a href="https://github.com/TileDB-Inc/TileDB-VCF">TileDB-VCF</a> |
| project for genomics to achieve zero-copying when accessing TileDB data from |
| Spark and Dask.</li> |
| <li><strong><a href="https://github.com/blue-yonder/turbodbc">Turbodbc</a>:</strong> Python module to access relational databases via the Open |
| Database Connectivity (ODBC) interface. It provides the ability to return |
| Arrow Tables and RecordBatches in addition to the Python Database API |
| Specification 2.0.</li> |
| <li><strong><a href="https://unum.cloud/ukv/">UKV</a>:</strong> Open NoSQL binary database interface, with support for |
| LevelDB, RocksDB, UDisk, and in-memory Key-Value Stores. It extends |
| their functionality to support Document Collections, Graphs, and Vector |
| Search, similar to RedisJSON, RedisGraph, and RediSearch, and brings |
| familiar structured bindings on top, mimicking tools like pandas and NetworkX. |
| All UKV interfaces are compatible with Apache Arrow columnar format, |
| which minimizes copies when passing data between different language |
| runtimes. UKV also uses Apache Arrow Flight RPC for client-server communication.</li> |
| <li><strong><a href="https://github.com/vaexio/vaex">Vaex</a>:</strong> Out-of-Core hybrid Apache Arrow/NumPy DataFrame for Python, |
| ML, visualize and explore big tabular data at a billion rows per second.</li> |
| <li><strong><a href="https://github.com/tenzir/vast">VAST</a>:</strong> A network telemetry engine for data-driven security |
| investigations. VAST uses Arrow as standardized data plane to provide a |
| high-bandwidth output path for downstream analytics. This makes it easy and |
| efficient to access security data via pyarrow and other available bindings.</li> |
| </ul> |
| |
| |
| </main> |
| </div> |
| |
| <hr/> |
| <footer class="footer"> |
| <div class="row"> |
| <div class="col-md-9"> |
| <p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p> |
| <p>© 2016-2024 The Apache Software Foundation</p> |
| </div> |
| <div class="col-md-3"> |
| <a class="d-sm-none d-md-inline pr-2" href="https://www.apache.org/events/current-event.html"> |
| <img src="https://www.apache.org/events/current-event-234x60.png"/> |
| </a> |
| </div> |
| </div> |
| </footer> |
| |
| </div> |
| </body> |
| </html> |