blob: 87dc84553c331212eb909b474671119a651429a4 [file] [log] [blame]
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `arrow-row/src/variable.rs`."><title>variable.rs - source</title><script>if(window.location.protocol!=="file:")document.head.insertAdjacentHTML("beforeend","SourceSerif4-Regular-46f98efaafac5295.ttf.woff2,FiraSans-Regular-018c141bf0843ffd.woff2,FiraSans-Medium-8f9a781e4970d388.woff2,SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2,SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2".split(",").map(f=>`<link rel="preload" as="font" type="font/woff2" crossorigin href="../../static.files/${f}">`).join(""))</script><link rel="stylesheet" href="../../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../../static.files/rustdoc-dd39b87e5fcfba68.css"><meta name="rustdoc-vars" data-root-path="../../" data-static-root-path="../../static.files/" data-current-crate="arrow_row" data-themes="" data-resource-suffix="" data-rustdoc-version="1.80.0-nightly (8c127df75 2024-05-16)" data-channel="nightly" data-search-js="search-d52510db62a78183.js" data-settings-js="settings-4313503d2e1961c2.js" ><script src="../../static.files/storage-118b08c4c78b968e.js"></script><script defer src="../../static.files/src-script-e66d777a5a92e9b2.js"></script><script defer src="../../src-files.js"></script><script defer src="../../static.files/main-20a3ad099b048cf2.js"></script><noscript><link rel="stylesheet" href="../../static.files/noscript-df360f571f6edeae.css"></noscript><link rel="alternate icon" type="image/png" href="../../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc src"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><div class="src-sidebar-title"><h2>Files</h2></div></nav><div class="sidebar-resizer"></div><main><rustdoc-search></rustdoc-search><section id="main-content" class="content"><div class="example-wrap"><div data-nosnippet><pre class="src-line-numbers"><a href="#1" id="1">1</a>
<a href="#2" id="2">2</a>
<a href="#3" id="3">3</a>
<a href="#4" id="4">4</a>
<a href="#5" id="5">5</a>
<a href="#6" id="6">6</a>
<a href="#7" id="7">7</a>
<a href="#8" id="8">8</a>
<a href="#9" id="9">9</a>
<a href="#10" id="10">10</a>
<a href="#11" id="11">11</a>
<a href="#12" id="12">12</a>
<a href="#13" id="13">13</a>
<a href="#14" id="14">14</a>
<a href="#15" id="15">15</a>
<a href="#16" id="16">16</a>
<a href="#17" id="17">17</a>
<a href="#18" id="18">18</a>
<a href="#19" id="19">19</a>
<a href="#20" id="20">20</a>
<a href="#21" id="21">21</a>
<a href="#22" id="22">22</a>
<a href="#23" id="23">23</a>
<a href="#24" id="24">24</a>
<a href="#25" id="25">25</a>
<a href="#26" id="26">26</a>
<a href="#27" id="27">27</a>
<a href="#28" id="28">28</a>
<a href="#29" id="29">29</a>
<a href="#30" id="30">30</a>
<a href="#31" id="31">31</a>
<a href="#32" id="32">32</a>
<a href="#33" id="33">33</a>
<a href="#34" id="34">34</a>
<a href="#35" id="35">35</a>
<a href="#36" id="36">36</a>
<a href="#37" id="37">37</a>
<a href="#38" id="38">38</a>
<a href="#39" id="39">39</a>
<a href="#40" id="40">40</a>
<a href="#41" id="41">41</a>
<a href="#42" id="42">42</a>
<a href="#43" id="43">43</a>
<a href="#44" id="44">44</a>
<a href="#45" id="45">45</a>
<a href="#46" id="46">46</a>
<a href="#47" id="47">47</a>
<a href="#48" id="48">48</a>
<a href="#49" id="49">49</a>
<a href="#50" id="50">50</a>
<a href="#51" id="51">51</a>
<a href="#52" id="52">52</a>
<a href="#53" id="53">53</a>
<a href="#54" id="54">54</a>
<a href="#55" id="55">55</a>
<a href="#56" id="56">56</a>
<a href="#57" id="57">57</a>
<a href="#58" id="58">58</a>
<a href="#59" id="59">59</a>
<a href="#60" id="60">60</a>
<a href="#61" id="61">61</a>
<a href="#62" id="62">62</a>
<a href="#63" id="63">63</a>
<a href="#64" id="64">64</a>
<a href="#65" id="65">65</a>
<a href="#66" id="66">66</a>
<a href="#67" id="67">67</a>
<a href="#68" id="68">68</a>
<a href="#69" id="69">69</a>
<a href="#70" id="70">70</a>
<a href="#71" id="71">71</a>
<a href="#72" id="72">72</a>
<a href="#73" id="73">73</a>
<a href="#74" id="74">74</a>
<a href="#75" id="75">75</a>
<a href="#76" id="76">76</a>
<a href="#77" id="77">77</a>
<a href="#78" id="78">78</a>
<a href="#79" id="79">79</a>
<a href="#80" id="80">80</a>
<a href="#81" id="81">81</a>
<a href="#82" id="82">82</a>
<a href="#83" id="83">83</a>
<a href="#84" id="84">84</a>
<a href="#85" id="85">85</a>
<a href="#86" id="86">86</a>
<a href="#87" id="87">87</a>
<a href="#88" id="88">88</a>
<a href="#89" id="89">89</a>
<a href="#90" id="90">90</a>
<a href="#91" id="91">91</a>
<a href="#92" id="92">92</a>
<a href="#93" id="93">93</a>
<a href="#94" id="94">94</a>
<a href="#95" id="95">95</a>
<a href="#96" id="96">96</a>
<a href="#97" id="97">97</a>
<a href="#98" id="98">98</a>
<a href="#99" id="99">99</a>
<a href="#100" id="100">100</a>
<a href="#101" id="101">101</a>
<a href="#102" id="102">102</a>
<a href="#103" id="103">103</a>
<a href="#104" id="104">104</a>
<a href="#105" id="105">105</a>
<a href="#106" id="106">106</a>
<a href="#107" id="107">107</a>
<a href="#108" id="108">108</a>
<a href="#109" id="109">109</a>
<a href="#110" id="110">110</a>
<a href="#111" id="111">111</a>
<a href="#112" id="112">112</a>
<a href="#113" id="113">113</a>
<a href="#114" id="114">114</a>
<a href="#115" id="115">115</a>
<a href="#116" id="116">116</a>
<a href="#117" id="117">117</a>
<a href="#118" id="118">118</a>
<a href="#119" id="119">119</a>
<a href="#120" id="120">120</a>
<a href="#121" id="121">121</a>
<a href="#122" id="122">122</a>
<a href="#123" id="123">123</a>
<a href="#124" id="124">124</a>
<a href="#125" id="125">125</a>
<a href="#126" id="126">126</a>
<a href="#127" id="127">127</a>
<a href="#128" id="128">128</a>
<a href="#129" id="129">129</a>
<a href="#130" id="130">130</a>
<a href="#131" id="131">131</a>
<a href="#132" id="132">132</a>
<a href="#133" id="133">133</a>
<a href="#134" id="134">134</a>
<a href="#135" id="135">135</a>
<a href="#136" id="136">136</a>
<a href="#137" id="137">137</a>
<a href="#138" id="138">138</a>
<a href="#139" id="139">139</a>
<a href="#140" id="140">140</a>
<a href="#141" id="141">141</a>
<a href="#142" id="142">142</a>
<a href="#143" id="143">143</a>
<a href="#144" id="144">144</a>
<a href="#145" id="145">145</a>
<a href="#146" id="146">146</a>
<a href="#147" id="147">147</a>
<a href="#148" id="148">148</a>
<a href="#149" id="149">149</a>
<a href="#150" id="150">150</a>
<a href="#151" id="151">151</a>
<a href="#152" id="152">152</a>
<a href="#153" id="153">153</a>
<a href="#154" id="154">154</a>
<a href="#155" id="155">155</a>
<a href="#156" id="156">156</a>
<a href="#157" id="157">157</a>
<a href="#158" id="158">158</a>
<a href="#159" id="159">159</a>
<a href="#160" id="160">160</a>
<a href="#161" id="161">161</a>
<a href="#162" id="162">162</a>
<a href="#163" id="163">163</a>
<a href="#164" id="164">164</a>
<a href="#165" id="165">165</a>
<a href="#166" id="166">166</a>
<a href="#167" id="167">167</a>
<a href="#168" id="168">168</a>
<a href="#169" id="169">169</a>
<a href="#170" id="170">170</a>
<a href="#171" id="171">171</a>
<a href="#172" id="172">172</a>
<a href="#173" id="173">173</a>
<a href="#174" id="174">174</a>
<a href="#175" id="175">175</a>
<a href="#176" id="176">176</a>
<a href="#177" id="177">177</a>
<a href="#178" id="178">178</a>
<a href="#179" id="179">179</a>
<a href="#180" id="180">180</a>
<a href="#181" id="181">181</a>
<a href="#182" id="182">182</a>
<a href="#183" id="183">183</a>
<a href="#184" id="184">184</a>
<a href="#185" id="185">185</a>
<a href="#186" id="186">186</a>
<a href="#187" id="187">187</a>
<a href="#188" id="188">188</a>
<a href="#189" id="189">189</a>
<a href="#190" id="190">190</a>
<a href="#191" id="191">191</a>
<a href="#192" id="192">192</a>
<a href="#193" id="193">193</a>
<a href="#194" id="194">194</a>
<a href="#195" id="195">195</a>
<a href="#196" id="196">196</a>
<a href="#197" id="197">197</a>
<a href="#198" id="198">198</a>
<a href="#199" id="199">199</a>
<a href="#200" id="200">200</a>
<a href="#201" id="201">201</a>
<a href="#202" id="202">202</a>
<a href="#203" id="203">203</a>
<a href="#204" id="204">204</a>
<a href="#205" id="205">205</a>
<a href="#206" id="206">206</a>
<a href="#207" id="207">207</a>
<a href="#208" id="208">208</a>
<a href="#209" id="209">209</a>
<a href="#210" id="210">210</a>
<a href="#211" id="211">211</a>
<a href="#212" id="212">212</a>
<a href="#213" id="213">213</a>
<a href="#214" id="214">214</a>
<a href="#215" id="215">215</a>
<a href="#216" id="216">216</a>
<a href="#217" id="217">217</a>
<a href="#218" id="218">218</a>
<a href="#219" id="219">219</a>
<a href="#220" id="220">220</a>
<a href="#221" id="221">221</a>
<a href="#222" id="222">222</a>
<a href="#223" id="223">223</a>
<a href="#224" id="224">224</a>
<a href="#225" id="225">225</a>
<a href="#226" id="226">226</a>
<a href="#227" id="227">227</a>
<a href="#228" id="228">228</a>
<a href="#229" id="229">229</a>
<a href="#230" id="230">230</a>
<a href="#231" id="231">231</a>
<a href="#232" id="232">232</a>
<a href="#233" id="233">233</a>
<a href="#234" id="234">234</a>
<a href="#235" id="235">235</a>
<a href="#236" id="236">236</a>
<a href="#237" id="237">237</a>
<a href="#238" id="238">238</a>
<a href="#239" id="239">239</a>
<a href="#240" id="240">240</a>
<a href="#241" id="241">241</a>
<a href="#242" id="242">242</a>
<a href="#243" id="243">243</a>
<a href="#244" id="244">244</a>
<a href="#245" id="245">245</a>
<a href="#246" id="246">246</a>
<a href="#247" id="247">247</a>
<a href="#248" id="248">248</a>
<a href="#249" id="249">249</a>
<a href="#250" id="250">250</a>
<a href="#251" id="251">251</a>
<a href="#252" id="252">252</a>
<a href="#253" id="253">253</a>
<a href="#254" id="254">254</a>
<a href="#255" id="255">255</a>
<a href="#256" id="256">256</a>
<a href="#257" id="257">257</a>
<a href="#258" id="258">258</a>
<a href="#259" id="259">259</a>
<a href="#260" id="260">260</a>
<a href="#261" id="261">261</a>
<a href="#262" id="262">262</a>
<a href="#263" id="263">263</a>
<a href="#264" id="264">264</a>
<a href="#265" id="265">265</a>
<a href="#266" id="266">266</a>
</pre></div><pre class="rust"><code><span class="comment">// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
</span><span class="kw">use </span><span class="kw">crate</span>::null_sentinel;
<span class="kw">use </span>arrow_array::builder::BufferBuilder;
<span class="kw">use </span>arrow_array::<span class="kw-2">*</span>;
<span class="kw">use </span>arrow_buffer::bit_util::ceil;
<span class="kw">use </span>arrow_buffer::MutableBuffer;
<span class="kw">use </span>arrow_data::ArrayDataBuilder;
<span class="kw">use </span>arrow_schema::{DataType, SortOptions};
<span class="doccomment">/// The block size of the variable length encoding
</span><span class="kw">pub const </span>BLOCK_SIZE: usize = <span class="number">32</span>;
<span class="doccomment">/// The first block is split into `MINI_BLOCK_COUNT` mini-blocks
///
/// This helps to reduce the space amplification for small strings
</span><span class="kw">pub const </span>MINI_BLOCK_COUNT: usize = <span class="number">4</span>;
<span class="doccomment">/// The mini block size
</span><span class="kw">pub const </span>MINI_BLOCK_SIZE: usize = BLOCK_SIZE / MINI_BLOCK_COUNT;
<span class="doccomment">/// The continuation token
</span><span class="kw">pub const </span>BLOCK_CONTINUATION: u8 = <span class="number">0xFF</span>;
<span class="doccomment">/// Indicates an empty string
</span><span class="kw">pub const </span>EMPTY_SENTINEL: u8 = <span class="number">1</span>;
<span class="doccomment">/// Indicates a non-empty string
</span><span class="kw">pub const </span>NON_EMPTY_SENTINEL: u8 = <span class="number">2</span>;
<span class="doccomment">/// Returns the length of the encoded representation of a byte array, including the null byte
</span><span class="attr">#[inline]
</span><span class="kw">pub fn </span>encoded_len(a: <span class="prelude-ty">Option</span>&lt;<span class="kw-2">&amp;</span>[u8]&gt;) -&gt; usize {
padded_length(a.map(|x| x.len()))
}
<span class="doccomment">/// Returns the padded length of the encoded length of the given length
</span><span class="attr">#[inline]
</span><span class="kw">pub fn </span>padded_length(a: <span class="prelude-ty">Option</span>&lt;usize&gt;) -&gt; usize {
<span class="kw">match </span>a {
<span class="prelude-val">Some</span>(a) <span class="kw">if </span>a &lt;= BLOCK_SIZE =&gt; <span class="number">1 </span>+ ceil(a, MINI_BLOCK_SIZE) * (MINI_BLOCK_SIZE + <span class="number">1</span>),
<span class="comment">// Each miniblock ends with a 1 byte continuation, therefore add
// `(MINI_BLOCK_COUNT - 1)` additional bytes over non-miniblock size
</span><span class="prelude-val">Some</span>(a) =&gt; MINI_BLOCK_COUNT + ceil(a, BLOCK_SIZE) * (BLOCK_SIZE + <span class="number">1</span>),
<span class="prelude-val">None </span>=&gt; <span class="number">1</span>,
}
}
<span class="doccomment">/// Variable length values are encoded as
///
/// - single `0_u8` if null
/// - single `1_u8` if empty array
/// - `2_u8` if not empty, followed by one or more blocks
///
/// where a block is encoded as
///
/// - [`BLOCK_SIZE`] bytes of string data, padded with 0s
/// - `0xFF_u8` if this is not the last block for this string
/// - otherwise the length of the block as a `u8`
</span><span class="kw">pub fn </span>encode&lt;<span class="lifetime">'a</span>, I: Iterator&lt;Item = <span class="prelude-ty">Option</span>&lt;<span class="kw-2">&amp;</span><span class="lifetime">'a </span>[u8]&gt;&gt;&gt;(
data: <span class="kw-2">&amp;mut </span>[u8],
offsets: <span class="kw-2">&amp;mut </span>[usize],
i: I,
opts: SortOptions,
) {
<span class="kw">for </span>(offset, maybe_val) <span class="kw">in </span>offsets.iter_mut().skip(<span class="number">1</span>).zip(i) {
<span class="kw-2">*</span>offset += encode_one(<span class="kw-2">&amp;mut </span>data[<span class="kw-2">*</span>offset..], maybe_val, opts);
}
}
<span class="kw">pub fn </span>encode_one(out: <span class="kw-2">&amp;mut </span>[u8], val: <span class="prelude-ty">Option</span>&lt;<span class="kw-2">&amp;</span>[u8]&gt;, opts: SortOptions) -&gt; usize {
<span class="kw">match </span>val {
<span class="prelude-val">Some</span>([]) =&gt; {
out[<span class="number">0</span>] = <span class="kw">match </span>opts.descending {
<span class="bool-val">true </span>=&gt; !EMPTY_SENTINEL,
<span class="bool-val">false </span>=&gt; EMPTY_SENTINEL,
};
<span class="number">1
</span>}
<span class="prelude-val">Some</span>(val) =&gt; {
<span class="comment">// Write `2_u8` to demarcate as non-empty, non-null string
</span>out[<span class="number">0</span>] = NON_EMPTY_SENTINEL;
<span class="kw">let </span>len = <span class="kw">if </span>val.len() &lt;= BLOCK_SIZE {
<span class="number">1 </span>+ encode_blocks::&lt;MINI_BLOCK_SIZE&gt;(<span class="kw-2">&amp;mut </span>out[<span class="number">1</span>..], val)
} <span class="kw">else </span>{
<span class="kw">let </span>(initial, rem) = val.split_at(BLOCK_SIZE);
<span class="kw">let </span>offset = encode_blocks::&lt;MINI_BLOCK_SIZE&gt;(<span class="kw-2">&amp;mut </span>out[<span class="number">1</span>..], initial);
out[offset] = BLOCK_CONTINUATION;
<span class="number">1 </span>+ offset + encode_blocks::&lt;BLOCK_SIZE&gt;(<span class="kw-2">&amp;mut </span>out[<span class="number">1 </span>+ offset..], rem)
};
<span class="kw">if </span>opts.descending {
<span class="comment">// Invert bits
</span>out[..len].iter_mut().for_each(|v| <span class="kw-2">*</span>v = !<span class="kw-2">*</span>v)
}
len
}
<span class="prelude-val">None </span>=&gt; {
out[<span class="number">0</span>] = null_sentinel(opts);
<span class="number">1
</span>}
}
}
<span class="doccomment">/// Writes `val` in `SIZE` blocks with the appropriate continuation tokens
</span><span class="attr">#[inline]
</span><span class="kw">fn </span>encode_blocks&lt;<span class="kw">const </span>SIZE: usize&gt;(out: <span class="kw-2">&amp;mut </span>[u8], val: <span class="kw-2">&amp;</span>[u8]) -&gt; usize {
<span class="kw">let </span>block_count = ceil(val.len(), SIZE);
<span class="kw">let </span>end_offset = block_count * (SIZE + <span class="number">1</span>);
<span class="kw">let </span>to_write = <span class="kw-2">&amp;mut </span>out[..end_offset];
<span class="kw">let </span>chunks = val.chunks_exact(SIZE);
<span class="kw">let </span>remainder = chunks.remainder();
<span class="kw">for </span>(input, output) <span class="kw">in </span>chunks.clone().zip(to_write.chunks_exact_mut(SIZE + <span class="number">1</span>)) {
<span class="kw">let </span>input: <span class="kw-2">&amp;</span>[u8; SIZE] = input.try_into().unwrap();
<span class="kw">let </span>out_block: <span class="kw-2">&amp;mut </span>[u8; SIZE] = (<span class="kw-2">&amp;mut </span>output[..SIZE]).try_into().unwrap();
<span class="kw-2">*</span>out_block = <span class="kw-2">*</span>input;
<span class="comment">// Indicate that there are further blocks to follow
</span>output[SIZE] = BLOCK_CONTINUATION;
}
<span class="kw">if </span>!remainder.is_empty() {
<span class="kw">let </span>start_offset = (block_count - <span class="number">1</span>) * (SIZE + <span class="number">1</span>);
to_write[start_offset..start_offset + remainder.len()].copy_from_slice(remainder);
<span class="kw-2">*</span>to_write.last_mut().unwrap() = remainder.len() <span class="kw">as </span>u8;
} <span class="kw">else </span>{
<span class="comment">// We must overwrite the continuation marker written by the loop above
</span><span class="kw-2">*</span>to_write.last_mut().unwrap() = SIZE <span class="kw">as </span>u8;
}
end_offset
}
<span class="kw">fn </span>decode_blocks(row: <span class="kw-2">&amp;</span>[u8], options: SortOptions, <span class="kw-2">mut </span>f: <span class="kw">impl </span>FnMut(<span class="kw-2">&amp;</span>[u8])) -&gt; usize {
<span class="kw">let </span>(non_empty_sentinel, continuation) = <span class="kw">match </span>options.descending {
<span class="bool-val">true </span>=&gt; (!NON_EMPTY_SENTINEL, !BLOCK_CONTINUATION),
<span class="bool-val">false </span>=&gt; (NON_EMPTY_SENTINEL, BLOCK_CONTINUATION),
};
<span class="kw">if </span>row[<span class="number">0</span>] != non_empty_sentinel {
<span class="comment">// Empty or null string
</span><span class="kw">return </span><span class="number">1</span>;
}
<span class="comment">// Extracts the block length from the sentinel
</span><span class="kw">let </span>block_len = |sentinel: u8| <span class="kw">match </span>options.descending {
<span class="bool-val">true </span>=&gt; !sentinel <span class="kw">as </span>usize,
<span class="bool-val">false </span>=&gt; sentinel <span class="kw">as </span>usize,
};
<span class="kw">let </span><span class="kw-2">mut </span>idx = <span class="number">1</span>;
<span class="kw">for _ in </span><span class="number">0</span>..MINI_BLOCK_COUNT {
<span class="kw">let </span>sentinel = row[idx + MINI_BLOCK_SIZE];
<span class="kw">if </span>sentinel != continuation {
f(<span class="kw-2">&amp;</span>row[idx..idx + block_len(sentinel)]);
<span class="kw">return </span>idx + MINI_BLOCK_SIZE + <span class="number">1</span>;
}
f(<span class="kw-2">&amp;</span>row[idx..idx + MINI_BLOCK_SIZE]);
idx += MINI_BLOCK_SIZE + <span class="number">1</span>;
}
<span class="kw">loop </span>{
<span class="kw">let </span>sentinel = row[idx + BLOCK_SIZE];
<span class="kw">if </span>sentinel != continuation {
f(<span class="kw-2">&amp;</span>row[idx..idx + block_len(sentinel)]);
<span class="kw">return </span>idx + BLOCK_SIZE + <span class="number">1</span>;
}
f(<span class="kw-2">&amp;</span>row[idx..idx + BLOCK_SIZE]);
idx += BLOCK_SIZE + <span class="number">1</span>;
}
}
<span class="doccomment">/// Returns the number of bytes of encoded data
</span><span class="kw">fn </span>decoded_len(row: <span class="kw-2">&amp;</span>[u8], options: SortOptions) -&gt; usize {
<span class="kw">let </span><span class="kw-2">mut </span>len = <span class="number">0</span>;
decode_blocks(row, options, |block| len += block.len());
len
}
<span class="doccomment">/// Decodes a binary array from `rows` with the provided `options`
</span><span class="kw">pub fn </span>decode_binary&lt;I: OffsetSizeTrait&gt;(
rows: <span class="kw-2">&amp;mut </span>[<span class="kw-2">&amp;</span>[u8]],
options: SortOptions,
) -&gt; GenericBinaryArray&lt;I&gt; {
<span class="kw">let </span>len = rows.len();
<span class="kw">let </span><span class="kw-2">mut </span>null_count = <span class="number">0</span>;
<span class="kw">let </span>nulls = MutableBuffer::collect_bool(len, |x| {
<span class="kw">let </span>valid = rows[x][<span class="number">0</span>] != null_sentinel(options);
null_count += !valid <span class="kw">as </span>usize;
valid
});
<span class="kw">let </span>values_capacity = rows.iter().map(|row| decoded_len(row, options)).sum();
<span class="kw">let </span><span class="kw-2">mut </span>offsets = BufferBuilder::&lt;I&gt;::new(len + <span class="number">1</span>);
offsets.append(I::zero());
<span class="kw">let </span><span class="kw-2">mut </span>values = MutableBuffer::new(values_capacity);
<span class="kw">for </span>row <span class="kw">in </span>rows {
<span class="kw">let </span>offset = decode_blocks(row, options, |b| values.extend_from_slice(b));
<span class="kw-2">*</span>row = <span class="kw-2">&amp;</span>row[offset..];
offsets.append(I::from_usize(values.len()).expect(<span class="string">"offset overflow"</span>))
}
<span class="kw">if </span>options.descending {
values.as_slice_mut().iter_mut().for_each(|o| <span class="kw-2">*</span>o = !<span class="kw-2">*</span>o)
}
<span class="kw">let </span>d = <span class="kw">match </span>I::IS_LARGE {
<span class="bool-val">true </span>=&gt; DataType::LargeBinary,
<span class="bool-val">false </span>=&gt; DataType::Binary,
};
<span class="kw">let </span>builder = ArrayDataBuilder::new(d)
.len(len)
.null_count(null_count)
.null_bit_buffer(<span class="prelude-val">Some</span>(nulls.into()))
.add_buffer(offsets.finish())
.add_buffer(values.into());
<span class="comment">// SAFETY:
// Valid by construction above
</span><span class="kw">unsafe </span>{ GenericBinaryArray::from(builder.build_unchecked()) }
}
<span class="doccomment">/// Decodes a string array from `rows` with the provided `options`
///
/// # Safety
///
/// The row must contain valid UTF-8 data
</span><span class="kw">pub unsafe fn </span>decode_string&lt;I: OffsetSizeTrait&gt;(
rows: <span class="kw-2">&amp;mut </span>[<span class="kw-2">&amp;</span>[u8]],
options: SortOptions,
validate_utf8: bool,
) -&gt; GenericStringArray&lt;I&gt; {
<span class="kw">let </span>decoded = decode_binary::&lt;I&gt;(rows, options);
<span class="kw">if </span>validate_utf8 {
<span class="kw">return </span>GenericStringArray::from(decoded);
}
<span class="kw">let </span>builder = decoded
.into_data()
.into_builder()
.data_type(GenericStringArray::&lt;I&gt;::DATA_TYPE);
<span class="comment">// SAFETY:
// Row data must have come from a valid UTF-8 array
</span>GenericStringArray::from(builder.build_unchecked())
}
</code></pre></div></section></main></body></html>