r/man/Partitioning.Rd - arrow - Git at Google

 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/dataset.R
 \name{Partitioning}
 \alias{Partitioning}
 \alias{DirectoryPartitioning}
 \alias{HivePartitioning}
 \alias{DirectoryPartitioningFactory}
 \alias{HivePartitioningFactory}
 \title{Define Partitioning for a Dataset}
 \description{
 Pass a \code{Partitioning} object to a \link{FileSystemDatasetFactory}'s \verb{$create()}
 method to indicate how the file's paths should be interpreted to define
 partitioning.

 \code{DirectoryPartitioning} describes how to interpret raw path segments, in
 order. For example, \code{schema(year = int16(), month = int8())} would define
 partitions for file paths like "2019/01/file.parquet",
 "2019/02/file.parquet", etc.

 \code{HivePartitioning} is for Hive-style partitioning, which embeds field
 names and values in path segments, such as
 "/year=2019/month=2/data.parquet". Because fields are named in the path
 segments, order does not matter.

 \code{PartitioningFactory} subclasses instruct the \code{DatasetFactory} to detect
 partition features from the file paths.
 }
 \section{Factory}{

 Both \code{DirectoryPartitioning$create()} and \code{HivePartitioning$create()}
 methods take a \link{Schema} as a single input argument. The helper
 function \code{\link[=hive_partition]{hive_partition(...)}} is shorthand for
 \code{HivePartitioning$create(schema(...))}.

 With \code{DirectoryPartitioningFactory$create()}, you can provide just the
 names of the path segments (in our example, \code{c("year", "month")}), and
 the \code{DatasetFactory} will infer the data types for those partition variables.
 \code{HivePartitioningFactory$create()} takes no arguments: both variable names
 and their types can be inferred from the file paths. \code{hive_partition()} with
 no arguments returns a \code{HivePartitioningFactory}.
 }
	% Generated by roxygen2: do not edit by hand
	% Please edit documentation in R/dataset.R
	\name{Partitioning}
	\alias{Partitioning}
	\alias{DirectoryPartitioning}
	\alias{HivePartitioning}
	\alias{DirectoryPartitioningFactory}
	\alias{HivePartitioningFactory}
	\title{Define Partitioning for a Dataset}
	\description{
	Pass a \code{Partitioning} object to a \link{FileSystemDatasetFactory}'s \verb{$create()}
	method to indicate how the file's paths should be interpreted to define
	partitioning.

	\code{DirectoryPartitioning} describes how to interpret raw path segments, in
	order. For example, \code{schema(year = int16(), month = int8())} would define
	partitions for file paths like "2019/01/file.parquet",
	"2019/02/file.parquet", etc.

	\code{HivePartitioning} is for Hive-style partitioning, which embeds field
	names and values in path segments, such as
	"/year=2019/month=2/data.parquet". Because fields are named in the path
	segments, order does not matter.

	\code{PartitioningFactory} subclasses instruct the \code{DatasetFactory} to detect
	partition features from the file paths.
	}
	\section{Factory}{

	Both \code{DirectoryPartitioning$create()} and \code{HivePartitioning$create()}
	methods take a \link{Schema} as a single input argument. The helper
	function \code{\link[=hive_partition]{hive_partition(...)}} is shorthand for
	\code{HivePartitioning$create(schema(...))}.

	With \code{DirectoryPartitioningFactory$create()}, you can provide just the
	names of the path segments (in our example, \code{c("year", "month")}), and
	the \code{DatasetFactory} will infer the data types for those partition variables.
	\code{HivePartitioningFactory$create()} takes no arguments: both variable names
	and their types can be inferred from the file paths. \code{hive_partition()} with
	no arguments returns a \code{HivePartitioningFactory}.
	}