| <!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="Source of the Rust file `/root/.cargo/registry/src/github.com-1ecc6299db9ec823/unicode-ident-1.0.9/src/lib.rs`."><meta name="keywords" content="rust, rustlang, rust-lang"><title>lib.rs - source</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../../SourceSerif4-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../FiraSans-Regular.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../FiraSans-Medium.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../SourceCodePro-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../SourceSerif4-Bold.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../../SourceCodePro-Semibold.ttf.woff2"><link rel="stylesheet" href="../../normalize.css"><link rel="stylesheet" href="../../rustdoc.css" id="mainThemeStyle"><link rel="stylesheet" href="../../ayu.css" disabled><link rel="stylesheet" href="../../dark.css" disabled><link rel="stylesheet" href="../../light.css" id="themeStyle"><script id="default-settings" ></script><script src="../../storage.js"></script><script defer src="../../source-script.js"></script><script defer src="../../source-files.js"></script><script defer src="../../main.js"></script><noscript><link rel="stylesheet" href="../../noscript.css"></noscript><link rel="alternate icon" type="image/png" href="../../favicon-16x16.png"><link rel="alternate icon" type="image/png" href="../../favicon-32x32.png"><link rel="icon" type="image/svg+xml" href="../../favicon.svg"></head><body class="rustdoc source"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="sidebar"><a class="sidebar-logo" href="../../unicode_ident/index.html"><div class="logo-container"><img class="rust-logo" src="../../rust-logo.svg" alt="logo"></div></a></nav><main><div class="width-limiter"><nav class="sub"><a class="sub-logo-container" href="../../unicode_ident/index.html"><img class="rust-logo" src="../../rust-logo.svg" alt="logo"></a><form class="search-form"><div class="search-container"><span></span><input class="search-input" name="search" autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to search, ‘?’ for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../../wheel.svg"></a></div></div></form></nav><section id="main-content" class="content"><div class="example-wrap"><pre class="src-line-numbers"><span id="1">1</span> |
| <span id="2">2</span> |
| <span id="3">3</span> |
| <span id="4">4</span> |
| <span id="5">5</span> |
| <span id="6">6</span> |
| <span id="7">7</span> |
| <span id="8">8</span> |
| <span id="9">9</span> |
| <span id="10">10</span> |
| <span id="11">11</span> |
| <span id="12">12</span> |
| <span id="13">13</span> |
| <span id="14">14</span> |
| <span id="15">15</span> |
| <span id="16">16</span> |
| <span id="17">17</span> |
| <span id="18">18</span> |
| <span id="19">19</span> |
| <span id="20">20</span> |
| <span id="21">21</span> |
| <span id="22">22</span> |
| <span id="23">23</span> |
| <span id="24">24</span> |
| <span id="25">25</span> |
| <span id="26">26</span> |
| <span id="27">27</span> |
| <span id="28">28</span> |
| <span id="29">29</span> |
| <span id="30">30</span> |
| <span id="31">31</span> |
| <span id="32">32</span> |
| <span id="33">33</span> |
| <span id="34">34</span> |
| <span id="35">35</span> |
| <span id="36">36</span> |
| <span id="37">37</span> |
| <span id="38">38</span> |
| <span id="39">39</span> |
| <span id="40">40</span> |
| <span id="41">41</span> |
| <span id="42">42</span> |
| <span id="43">43</span> |
| <span id="44">44</span> |
| <span id="45">45</span> |
| <span id="46">46</span> |
| <span id="47">47</span> |
| <span id="48">48</span> |
| <span id="49">49</span> |
| <span id="50">50</span> |
| <span id="51">51</span> |
| <span id="52">52</span> |
| <span id="53">53</span> |
| <span id="54">54</span> |
| <span id="55">55</span> |
| <span id="56">56</span> |
| <span id="57">57</span> |
| <span id="58">58</span> |
| <span id="59">59</span> |
| <span id="60">60</span> |
| <span id="61">61</span> |
| <span id="62">62</span> |
| <span id="63">63</span> |
| <span id="64">64</span> |
| <span id="65">65</span> |
| <span id="66">66</span> |
| <span id="67">67</span> |
| <span id="68">68</span> |
| <span id="69">69</span> |
| <span id="70">70</span> |
| <span id="71">71</span> |
| <span id="72">72</span> |
| <span id="73">73</span> |
| <span id="74">74</span> |
| <span id="75">75</span> |
| <span id="76">76</span> |
| <span id="77">77</span> |
| <span id="78">78</span> |
| <span id="79">79</span> |
| <span id="80">80</span> |
| <span id="81">81</span> |
| <span id="82">82</span> |
| <span id="83">83</span> |
| <span id="84">84</span> |
| <span id="85">85</span> |
| <span id="86">86</span> |
| <span id="87">87</span> |
| <span id="88">88</span> |
| <span id="89">89</span> |
| <span id="90">90</span> |
| <span id="91">91</span> |
| <span id="92">92</span> |
| <span id="93">93</span> |
| <span id="94">94</span> |
| <span id="95">95</span> |
| <span id="96">96</span> |
| <span id="97">97</span> |
| <span id="98">98</span> |
| <span id="99">99</span> |
| <span id="100">100</span> |
| <span id="101">101</span> |
| <span id="102">102</span> |
| <span id="103">103</span> |
| <span id="104">104</span> |
| <span id="105">105</span> |
| <span id="106">106</span> |
| <span id="107">107</span> |
| <span id="108">108</span> |
| <span id="109">109</span> |
| <span id="110">110</span> |
| <span id="111">111</span> |
| <span id="112">112</span> |
| <span id="113">113</span> |
| <span id="114">114</span> |
| <span id="115">115</span> |
| <span id="116">116</span> |
| <span id="117">117</span> |
| <span id="118">118</span> |
| <span id="119">119</span> |
| <span id="120">120</span> |
| <span id="121">121</span> |
| <span id="122">122</span> |
| <span id="123">123</span> |
| <span id="124">124</span> |
| <span id="125">125</span> |
| <span id="126">126</span> |
| <span id="127">127</span> |
| <span id="128">128</span> |
| <span id="129">129</span> |
| <span id="130">130</span> |
| <span id="131">131</span> |
| <span id="132">132</span> |
| <span id="133">133</span> |
| <span id="134">134</span> |
| <span id="135">135</span> |
| <span id="136">136</span> |
| <span id="137">137</span> |
| <span id="138">138</span> |
| <span id="139">139</span> |
| <span id="140">140</span> |
| <span id="141">141</span> |
| <span id="142">142</span> |
| <span id="143">143</span> |
| <span id="144">144</span> |
| <span id="145">145</span> |
| <span id="146">146</span> |
| <span id="147">147</span> |
| <span id="148">148</span> |
| <span id="149">149</span> |
| <span id="150">150</span> |
| <span id="151">151</span> |
| <span id="152">152</span> |
| <span id="153">153</span> |
| <span id="154">154</span> |
| <span id="155">155</span> |
| <span id="156">156</span> |
| <span id="157">157</span> |
| <span id="158">158</span> |
| <span id="159">159</span> |
| <span id="160">160</span> |
| <span id="161">161</span> |
| <span id="162">162</span> |
| <span id="163">163</span> |
| <span id="164">164</span> |
| <span id="165">165</span> |
| <span id="166">166</span> |
| <span id="167">167</span> |
| <span id="168">168</span> |
| <span id="169">169</span> |
| <span id="170">170</span> |
| <span id="171">171</span> |
| <span id="172">172</span> |
| <span id="173">173</span> |
| <span id="174">174</span> |
| <span id="175">175</span> |
| <span id="176">176</span> |
| <span id="177">177</span> |
| <span id="178">178</span> |
| <span id="179">179</span> |
| <span id="180">180</span> |
| <span id="181">181</span> |
| <span id="182">182</span> |
| <span id="183">183</span> |
| <span id="184">184</span> |
| <span id="185">185</span> |
| <span id="186">186</span> |
| <span id="187">187</span> |
| <span id="188">188</span> |
| <span id="189">189</span> |
| <span id="190">190</span> |
| <span id="191">191</span> |
| <span id="192">192</span> |
| <span id="193">193</span> |
| <span id="194">194</span> |
| <span id="195">195</span> |
| <span id="196">196</span> |
| <span id="197">197</span> |
| <span id="198">198</span> |
| <span id="199">199</span> |
| <span id="200">200</span> |
| <span id="201">201</span> |
| <span id="202">202</span> |
| <span id="203">203</span> |
| <span id="204">204</span> |
| <span id="205">205</span> |
| <span id="206">206</span> |
| <span id="207">207</span> |
| <span id="208">208</span> |
| <span id="209">209</span> |
| <span id="210">210</span> |
| <span id="211">211</span> |
| <span id="212">212</span> |
| <span id="213">213</span> |
| <span id="214">214</span> |
| <span id="215">215</span> |
| <span id="216">216</span> |
| <span id="217">217</span> |
| <span id="218">218</span> |
| <span id="219">219</span> |
| <span id="220">220</span> |
| <span id="221">221</span> |
| <span id="222">222</span> |
| <span id="223">223</span> |
| <span id="224">224</span> |
| <span id="225">225</span> |
| <span id="226">226</span> |
| <span id="227">227</span> |
| <span id="228">228</span> |
| <span id="229">229</span> |
| <span id="230">230</span> |
| <span id="231">231</span> |
| <span id="232">232</span> |
| <span id="233">233</span> |
| <span id="234">234</span> |
| <span id="235">235</span> |
| <span id="236">236</span> |
| <span id="237">237</span> |
| <span id="238">238</span> |
| <span id="239">239</span> |
| <span id="240">240</span> |
| <span id="241">241</span> |
| <span id="242">242</span> |
| <span id="243">243</span> |
| <span id="244">244</span> |
| <span id="245">245</span> |
| <span id="246">246</span> |
| <span id="247">247</span> |
| <span id="248">248</span> |
| <span id="249">249</span> |
| <span id="250">250</span> |
| <span id="251">251</span> |
| <span id="252">252</span> |
| <span id="253">253</span> |
| <span id="254">254</span> |
| <span id="255">255</span> |
| <span id="256">256</span> |
| <span id="257">257</span> |
| <span id="258">258</span> |
| <span id="259">259</span> |
| <span id="260">260</span> |
| <span id="261">261</span> |
| <span id="262">262</span> |
| <span id="263">263</span> |
| <span id="264">264</span> |
| <span id="265">265</span> |
| <span id="266">266</span> |
| <span id="267">267</span> |
| <span id="268">268</span> |
| <span id="269">269</span> |
| </pre><pre class="rust"><code><span class="doccomment">//! [![github]](https://github.com/dtolnay/unicode-ident)&ensp;[![crates-io]](https://crates.io/crates/unicode-ident)&ensp;[![docs-rs]](https://docs.rs/unicode-ident) |
| //! |
| //! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github |
| //! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust |
| //! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs |
| //! |
| //! <br> |
| //! |
| //! Implementation of [Unicode Standard Annex #31][tr31] for determining which |
| //! `char` values are valid in programming language identifiers. |
| //! |
| //! [tr31]: https://www.unicode.org/reports/tr31/ |
| //! |
| //! This crate is a better optimized implementation of the older `unicode-xid` |
| //! crate. This crate uses less static storage, and is able to classify both |
| //! ASCII and non-ASCII codepoints with better performance, 2&ndash;10&times; |
| //! faster than `unicode-xid`. |
| //! |
| //! <br> |
| //! |
| //! ## Comparison of performance |
| //! |
| //! The following table shows a comparison between five Unicode identifier |
| //! implementations. |
| //! |
| //! - `unicode-ident` is this crate; |
| //! - [`unicode-xid`] is a widely used crate run by the "unicode-rs" org; |
| //! - `ucd-trie` and `fst` are two data structures supported by the |
| //! [`ucd-generate`] tool; |
| //! - [`roaring`] is a Rust implementation of Roaring bitmap. |
| //! |
| //! The *static storage* column shows the total size of `static` tables that the |
| //! crate bakes into your binary, measured in 1000s of bytes. |
| //! |
| //! The remaining columns show the **cost per call** to evaluate whether a |
| //! single `char` has the XID\_Start or XID\_Continue Unicode property, |
| //! comparing across different ratios of ASCII to non-ASCII codepoints in the |
| //! input data. |
| //! |
| //! [`unicode-xid`]: https://github.com/unicode-rs/unicode-xid |
| //! [`ucd-generate`]: https://github.com/BurntSushi/ucd-generate |
| //! [`roaring`]: https://github.com/RoaringBitmap/roaring-rs |
| //! |
| //! | | static storage | 0% nonascii | 1% | 10% | 100% nonascii | |
| //! |---|---|---|---|---|---| |
| //! | **`unicode-ident`** | 9.75 K | 0.96 ns | 0.95 ns | 1.09 ns | 1.55 ns | |
| //! | **`unicode-xid`** | 11.34 K | 1.88 ns | 2.14 ns | 3.48 ns | 15.63 ns | |
| //! | **`ucd-trie`** | 9.95 K | 1.29 ns | 1.28 ns | 1.36 ns | 2.15 ns | |
| //! | **`fst`** | 133 K | 55.1 ns | 54.9 ns | 53.2 ns | 28.5 ns | |
| //! | **`roaring`** | 66.1 K | 2.78 ns | 3.09 ns | 3.37 ns | 4.70 ns | |
| //! |
| //! Source code for the benchmark is provided in the *bench* directory of this |
| //! repo and may be repeated by running `cargo criterion`. |
| //! |
| //! <br> |
| //! |
| //! ## Comparison of data structures |
| //! |
| //! #### unicode-xid |
| //! |
| //! They use a sorted array of character ranges, and do a binary search to look |
| //! up whether a given character lands inside one of those ranges. |
| //! |
| //! ```rust |
| //! # const _: &str = stringify! { |
| //! static XID_Continue_table: [(char, char); 763] = [ |
| //! ('\u{30}', '\u{39}'), // 0-9 |
| //! ('\u{41}', '\u{5a}'), // A-Z |
| //! # " |
| //! … |
| //! # " |
| //! ('\u{e0100}', '\u{e01ef}'), |
| //! ]; |
| //! # }; |
| //! ``` |
| //! |
| //! The static storage used by this data structure scales with the number of |
| //! contiguous ranges of identifier codepoints in Unicode. Every table entry |
| //! consumes 8 bytes, because it consists of a pair of 32-bit `char` values. |
| //! |
| //! In some ranges of the Unicode codepoint space, this is quite a sparse |
| //! representation &ndash; there are some ranges where tens of thousands of |
| //! adjacent codepoints are all valid identifier characters. In other places, |
| //! the representation is quite inefficient. A characater like `µ` (U+00B5) |
| //! which is surrounded by non-identifier codepoints consumes 64 bits in the |
| //! table, while it would be just 1 bit in a dense bitmap. |
| //! |
| //! On a system with 64-byte cache lines, binary searching the table touches 7 |
| //! cache lines on average. Each cache line fits only 8 table entries. |
| //! Additionally, the branching performed during the binary search is probably |
| //! mostly unpredictable to the branch predictor. |
| //! |
| //! Overall, the crate ends up being about 10&times; slower on non-ASCII input |
| //! compared to the fastest crate. |
| //! |
| //! A potential improvement would be to pack the table entries more compactly. |
| //! Rust's `char` type is a 21-bit integer padded to 32 bits, which means every |
| //! table entry is holding 22 bits of wasted space, adding up to 3.9 K. They |
| //! could instead fit every table entry into 6 bytes, leaving out some of the |
| //! padding, for a 25% improvement in space used. With some cleverness it may be |
| //! possible to fit in 5 bytes or even 4 bytes by storing a low char and an |
| //! extent, instead of low char and high char. I don't expect that performance |
| //! would improve much but this could be the most efficient for space across all |
| //! the libraries, needing only about 7 K to store. |
| //! |
| //! #### ucd-trie |
| //! |
| //! Their data structure is a compressed trie set specifically tailored for |
| //! Unicode codepoints. The design is credited to Raph Levien in |
| //! [rust-lang/rust#33098]. |
| //! |
| //! [rust-lang/rust#33098]: https://github.com/rust-lang/rust/pull/33098 |
| //! |
| //! ```rust |
| //! pub struct TrieSet { |
| //! tree1_level1: &'static [u64; 32], |
| //! tree2_level1: &'static [u8; 992], |
| //! tree2_level2: &'static [u64], |
| //! tree3_level1: &'static [u8; 256], |
| //! tree3_level2: &'static [u8], |
| //! tree3_level3: &'static [u64], |
| //! } |
| //! ``` |
| //! |
| //! It represents codepoint sets using a trie to achieve prefix compression. The |
| //! final states of the trie are embedded in leaves or "chunks", where each |
| //! chunk is a 64-bit integer. Each bit position of the integer corresponds to |
| //! whether a particular codepoint is in the set or not. These chunks are not |
| //! just a compact representation of the final states of the trie, but are also |
| //! a form of suffix compression. In particular, if multiple ranges of 64 |
| //! contiguous codepoints have the same Unicode properties, then they all map to |
| //! the same chunk in the final level of the trie. |
| //! |
| //! Being tailored for Unicode codepoints, this trie is partitioned into three |
| //! disjoint sets: tree1, tree2, tree3. The first set corresponds to codepoints |
| //! \[0, 0x800), the second \[0x800, 0x10000) and the third \[0x10000, |
| //! 0x110000). These partitions conveniently correspond to the space of 1 or 2 |
| //! byte UTF-8 encoded codepoints, 3 byte UTF-8 encoded codepoints and 4 byte |
| //! UTF-8 encoded codepoints, respectively. |
| //! |
| //! Lookups in this data structure are significantly more efficient than binary |
| //! search. A lookup touches either 1, 2, or 3 cache lines based on which of the |
| //! trie partitions is being accessed. |
| //! |
| //! One possible performance improvement would be for this crate to expose a way |
| //! to query based on a UTF-8 encoded string, returning the Unicode property |
| //! corresponding to the first character in the string. Without such an API, the |
| //! caller is required to tokenize their UTF-8 encoded input data into `char`, |
| //! hand the `char` into `ucd-trie`, only for `ucd-trie` to undo that work by |
| //! converting back into the variable-length representation for trie traversal. |
| //! |
| //! #### fst |
| //! |
| //! Uses a [finite state transducer][fst]. This representation is built into |
| //! [ucd-generate] but I am not aware of any advantage over the `ucd-trie` |
| //! representation. In particular `ucd-trie` is optimized for storing Unicode |
| //! properties while `fst` is not. |
| //! |
| //! [fst]: https://github.com/BurntSushi/fst |
| //! [ucd-generate]: https://github.com/BurntSushi/ucd-generate |
| //! |
| //! As far as I can tell, the main thing that causes `fst` to have large size |
| //! and slow lookups for this use case relative to `ucd-trie` is that it does |
| //! not specialize for the fact that only 21 of the 32 bits in a `char` are |
| //! meaningful. There are some dense arrays in the structure with large ranges |
| //! that could never possibly be used. |
| //! |
| //! #### roaring |
| //! |
| //! This crate is a pure-Rust implementation of [Roaring Bitmap], a data |
| //! structure designed for storing sets of 32-bit unsigned integers. |
| //! |
| //! [Roaring Bitmap]: https://roaringbitmap.org/about/ |
| //! |
| //! Roaring bitmaps are compressed bitmaps which tend to outperform conventional |
| //! compressed bitmaps such as WAH, EWAH or Concise. In some instances, they can |
| //! be hundreds of times faster and they often offer significantly better |
| //! compression. |
| //! |
| //! In this use case the performance was reasonably competitive but still |
| //! substantially slower than the Unicode-optimized crates. Meanwhile the |
| //! compression was significantly worse, requiring 6&times; as much storage for |
| //! the data structure. |
| //! |
| //! I also benchmarked the [`croaring`] crate which is an FFI wrapper around the |
| //! C reference implementation of Roaring Bitmap. This crate was consistently |
| //! about 15% slower than pure-Rust `roaring`, which could just be FFI overhead. |
| //! I did not investigate further. |
| //! |
| //! [`croaring`]: https://crates.io/crates/croaring |
| //! |
| //! #### unicode-ident |
| //! |
| //! This crate is most similar to the `ucd-trie` library, in that it's based on |
| //! bitmaps stored in the leafs of a trie representation, achieving both prefix |
| //! compression and suffix compression. |
| //! |
| //! The key differences are: |
| //! |
| //! - Uses a single 2-level trie, rather than 3 disjoint partitions of different |
| //! depth each. |
| //! - Uses significantly larger chunks: 512 bits rather than 64 bits. |
| //! - Compresses the XID\_Start and XID\_Continue properties together |
| //! simultaneously, rather than duplicating identical trie leaf chunks across |
| //! the two. |
| //! |
| //! The following diagram show the XID\_Start and XID\_Continue Unicode boolean |
| //! properties in uncompressed form, in row-major order: |
| //! |
| //! <table> |
| //! <tr><th>XID_Start</th><th>XID_Continue</th></tr> |
| //! <tr> |
| //! <td><img alt="XID_Start bitmap" width="256" src="https://user-images.githubusercontent.com/1940490/168647353-c6eeb922-afec-49b2-9ef5-c03e9d1e0760.png"></td> |
| //! <td><img alt="XID_Continue bitmap" width="256" src="https://user-images.githubusercontent.com/1940490/168647367-f447cca7-2362-4d7d-8cd7-d21c011d329b.png"></td> |
| //! </tr> |
| //! </table> |
| //! |
| //! Uncompressed, these would take 140 K to store, which is beyond what would be |
| //! reasonable. However, as you can see there is a large degree of similarity |
| //! between the two bitmaps and across the rows, which lends well to |
| //! compression. |
| //! |
| //! This crate stores one 512-bit "row" of the above bitmaps in the leaf level |
| //! of a trie, and a single additional level to index into the leafs. It turns |
| //! out there are 124 unique 512-bit chunks across the two bitmaps so 7 bits are |
| //! sufficient to index them. |
| //! |
| //! The chunk size of 512 bits is selected as the size that minimizes the total |
| //! size of the data structure. A smaller chunk, like 256 or 128 bits, would |
| //! achieve better deduplication but require a larger index. A larger chunk |
| //! would increase redundancy in the leaf bitmaps. 512 bit chunks are the |
| //! optimum for total size of the index plus leaf bitmaps. |
| //! |
| //! In fact since there are only 124 unique chunks, we can use an 8-bit index |
| //! with a spare bit to index at the half-chunk level. This achieves an |
| //! additional 8.5% compression by eliminating redundancies between the second |
| //! half of any chunk and the first half of any other chunk. Note that this is |
| //! not the same as using chunks which are half the size, because it does not |
| //! necessitate raising the size of the trie's first level. |
| //! |
| //! In contrast to binary search or the `ucd-trie` crate, performing lookups in |
| //! this data structure is straight-line code with no need for branching. |
| |
| </span><span class="attribute">#![no_std] |
| #![doc(html_root_url = <span class="string">"https://docs.rs/unicode-ident/1.0.9"</span>)] |
| #![allow(clippy::doc_markdown, clippy::must_use_candidate)] |
| |
| #[rustfmt::skip] |
| </span><span class="kw">mod </span>tables; |
| |
| <span class="kw">use </span><span class="kw">crate</span>::tables::{ASCII_CONTINUE, ASCII_START, CHUNK, LEAF, TRIE_CONTINUE, TRIE_START}; |
| |
| <span class="kw">pub fn </span>is_xid_start(ch: char) -> bool { |
| <span class="kw">if </span>ch.is_ascii() { |
| <span class="kw">return </span>ASCII_START.<span class="number">0</span>[ch <span class="kw">as </span>usize]; |
| } |
| <span class="kw">let </span>chunk = <span class="kw-2">*</span>TRIE_START.<span class="number">0</span>.get(ch <span class="kw">as </span>usize / <span class="number">8 </span>/ CHUNK).unwrap_or(<span class="kw-2">&</span><span class="number">0</span>); |
| <span class="kw">let </span>offset = chunk <span class="kw">as </span>usize * CHUNK / <span class="number">2 </span>+ ch <span class="kw">as </span>usize / <span class="number">8 </span>% CHUNK; |
| <span class="kw">unsafe </span>{ LEAF.<span class="number">0</span>.get_unchecked(offset) }.wrapping_shr(ch <span class="kw">as </span>u32 % <span class="number">8</span>) & <span class="number">1 </span>!= <span class="number">0 |
| </span>} |
| |
| <span class="kw">pub fn </span>is_xid_continue(ch: char) -> bool { |
| <span class="kw">if </span>ch.is_ascii() { |
| <span class="kw">return </span>ASCII_CONTINUE.<span class="number">0</span>[ch <span class="kw">as </span>usize]; |
| } |
| <span class="kw">let </span>chunk = <span class="kw-2">*</span>TRIE_CONTINUE.<span class="number">0</span>.get(ch <span class="kw">as </span>usize / <span class="number">8 </span>/ CHUNK).unwrap_or(<span class="kw-2">&</span><span class="number">0</span>); |
| <span class="kw">let </span>offset = chunk <span class="kw">as </span>usize * CHUNK / <span class="number">2 </span>+ ch <span class="kw">as </span>usize / <span class="number">8 </span>% CHUNK; |
| <span class="kw">unsafe </span>{ LEAF.<span class="number">0</span>.get_unchecked(offset) }.wrapping_shr(ch <span class="kw">as </span>u32 % <span class="number">8</span>) & <span class="number">1 </span>!= <span class="number">0 |
| </span>} |
| </code></pre></div> |
| </section></div></main><div id="rustdoc-vars" data-root-path="../../" data-current-crate="unicode_ident" data-themes="ayu,dark,light" data-resource-suffix="" data-rustdoc-version="1.66.0-nightly (5c8bff74b 2022-10-21)" ></div></body></html> |