| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "id": "3425268d-6430-4e52-9019-969d61ef5458", |
| "metadata": {}, |
| "source": [ |
| "# SedonaDB + Delta Lake\n", |
| "\n", |
| "This page shows how to read and write Delta Lake tables with SedonaDB.\n", |
| "\n", |
| "Make sure you run `pip install deltalake` to run the code snippets below." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 42, |
| "id": "83f5ca25-8059-4624-bd00-e44cd172d9c2", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "from deltalake import write_deltalake, DeltaTable\n", |
| "import sedona.db\n", |
| "import pyarrow.compute as pc\n", |
| "\n", |
| "sd = sedona.db.connect()" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "1a90b1bf-99f9-4f50-987b-f1f30bf9988a", |
| "metadata": {}, |
| "source": [ |
| "Read in a GeoParquet dataset into a SedonaDB DataFrame." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 43, |
| "id": "a37f3a30-3267-4bfc-8e5b-e234053927af", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "countries = sd.read_parquet(\n", |
| " \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet\"\n", |
| ")" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "1f82b047-8182-4120-82d7-945ff38ecbca", |
| "metadata": {}, |
| "source": [ |
| "## Create a Delta Lake table with WKT\n", |
| "\n", |
| "Now write the DataFrame to a Delta Lake table. Notice that you must convert the geometry column to Well-Known Text (WKT) or Well-Known Binary (WKB) before writing to the Delta table.\n", |
| "\n", |
| "Delta Lake does not support geometry columns.\n", |
| "\n", |
| "This example shows how to create a Delta table with a WKT column. The section that follows shows how to create a Delta table with a WKB column." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 44, |
| "id": "35bdc296-d9ef-4ea2-9ba5-dac1cfd115ef", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "countries.to_view(\"countries\", True)\n", |
| "df = sd.sql(\n", |
| " \"select name, continent, ST_AsText(geometry) as geometry_wkt from countries\"\n", |
| ")\n", |
| "table_path = \"/tmp/delta_with_wkt\"\n", |
| "write_deltalake(table_path, df.to_pandas(), mode=\"overwrite\")" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "974a5558-fb18-49b7-a304-fa93bdd363e8", |
| "metadata": {}, |
| "source": [ |
| "## Read Delta table into SedonaDB\n", |
| "\n", |
| "Now read the Delta table back into a SedonaDB DataFrame." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 45, |
| "id": "c15d4605-483a-4041-973a-547098bdeef4", |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐\n", |
| "│ name ┆ continent ┆ geometry_wkt │\n", |
| "│ utf8 ┆ utf8 ┆ utf8 │\n", |
| "╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡\n", |
| "│ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180 -16.067132663642447,180 -16.55… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ United Republic of Tanzania ┆ Africa ┆ POLYGON((33.90371119710453 -0.9500000000000001,34… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Western Sahara ┆ Africa ┆ POLYGON((-8.665589565454809 27.656425889592356,-8… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Canada ┆ North America ┆ MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ United States of America ┆ North America ┆ MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Kazakhstan ┆ Asia ┆ POLYGON((87.35997033076265 49.21498078062912,86.5… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Uzbekistan ┆ Asia ┆ POLYGON((55.96819135928291 41.30864166926936,55.9… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Papua New Guinea ┆ Oceania ┆ MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Indonesia ┆ Asia ┆ MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Argentina ┆ South America ┆ MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │\n", |
| "└─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘\n" |
| ] |
| } |
| ], |
| "source": [ |
| "dt = DeltaTable(table_path)\n", |
| "arrow_table = dt.to_pyarrow_table()\n", |
| "df = sd.create_data_frame(arrow_table)\n", |
| "df.show()" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "a44fd9bd-85d2-4f1b-9df4-cc98ac6223f0", |
| "metadata": {}, |
| "source": [ |
| "Notice that the `geometry_wkt` column is `utf8`. It's not a geometry column.\n", |
| "\n", |
| "Let's convert the `geometry_wkt` column to a geometry column." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 46, |
| "id": "9052c4af-4478-4038-81ae-cf9c0ba84345", |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "┌─────────────────────────────┬───────────────┬────────────────────────────────────────────────────┐\n", |
| "│ name ┆ continent ┆ geom │\n", |
| "│ utf8 ┆ utf8 ┆ geometry │\n", |
| "╞═════════════════════════════╪═══════════════╪════════════════════════════════════════════════════╡\n", |
| "│ Fiji ┆ Oceania ┆ MULTIPOLYGON(((180 -16.067132663642447,180 -16.55… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ United Republic of Tanzania ┆ Africa ┆ POLYGON((33.90371119710453 -0.9500000000000001,34… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Western Sahara ┆ Africa ┆ POLYGON((-8.665589565454809 27.656425889592356,-8… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Canada ┆ North America ┆ MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ United States of America ┆ North America ┆ MULTIPOLYGON(((-122.84000000000003 49.00000000000… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Kazakhstan ┆ Asia ┆ POLYGON((87.35997033076265 49.21498078062912,86.5… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Uzbekistan ┆ Asia ┆ POLYGON((55.96819135928291 41.30864166926936,55.9… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Papua New Guinea ┆ Oceania ┆ MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Indonesia ┆ Asia ┆ MULTIPOLYGON(((141.00021040259185 -2.600151055515… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Argentina ┆ South America ┆ MULTIPOLYGON(((-68.63401022758323 -52.63637045887… │\n", |
| "└─────────────────────────────┴───────────────┴────────────────────────────────────────────────────┘\n" |
| ] |
| } |
| ], |
| "source": [ |
| "df.to_view(\"my_table\", True)\n", |
| "res = sd.sql(\"\"\"\n", |
| "SELECT\n", |
| " name,\n", |
| " continent,\n", |
| " ST_GeomFromWKT(geometry_wkt) as geom\n", |
| "from my_table\n", |
| "\"\"\")\n", |
| "res.show()" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "802be3a7-5b1d-4f02-85f9-9460743a921d", |
| "metadata": {}, |
| "source": [ |
| "Confirm the schema of the DataFrame." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 47, |
| "id": "5758bfec-86c0-4a14-8e4e-201149a59fac", |
| "metadata": {}, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": [ |
| "SedonaSchema with 3 fields:\n", |
| " name: utf8<Utf8>\n", |
| " continent: utf8<Utf8>\n", |
| " geom: geometry<Wkb>" |
| ] |
| }, |
| "execution_count": 47, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "res.schema" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "7c975e52-5b2d-4ee9-a7c6-51cb3304c0d6", |
| "metadata": {}, |
| "source": [ |
| "## Filter countries in a particular geographic region\n", |
| "\n", |
| "Now, let's grab some countries in the western portion of South America using a polygon region.\n", |
| "\n", |
| "SedonaDB can run these types of queries on geometric data." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 48, |
| "id": "c210e29a-170b-4a01-b464-c7d2f79998eb", |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "┌──────────┬───────────────┬───────────────────────────────────────────────────────────────────────┐\n", |
| "│ name ┆ continent ┆ geom │\n", |
| "│ utf8 ┆ utf8 ┆ geometry │\n", |
| "╞══════════╪═══════════════╪═══════════════════════════════════════════════════════════════════════╡\n", |
| "│ Chile ┆ South America ┆ MULTIPOLYGON(((-68.63401022758323 -52.63637045887449,-68.63335000000… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Peru ┆ South America ┆ POLYGON((-69.89363521999663 -4.2981869441943275,-70.7947688463023 -4… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Colombia ┆ South America ┆ POLYGON((-66.87632585312258 1.253360500489336,-67.0650481838525 1.13… │\n", |
| "├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Ecuador ┆ South America ┆ POLYGON((-75.37322323271385 -0.1520317521204504,-75.23372270374195 -… │\n", |
| "└──────────┴───────────────┴───────────────────────────────────────────────────────────────────────┘\n" |
| ] |
| } |
| ], |
| "source": [ |
| "res = sd.sql(\"\"\"\n", |
| "SELECT\n", |
| " name,\n", |
| " continent,\n", |
| " ST_GeomFromWKT(geometry_wkt) as geom\n", |
| "FROM my_table\n", |
| "WHERE ST_Intersects(\n", |
| " ST_GeomFromWKT(geometry_wkt),\n", |
| " ST_GeomFromWKT('POLYGON((-81 5, -75 5, -75 -56, -81 -56, -81 5))')\n", |
| ")\n", |
| "\"\"\")\n", |
| "res.show()" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "996a56e2-a5b6-42f8-88b0-90d3a0c4c30c", |
| "metadata": {}, |
| "source": [ |
| "## Create a Delta Lake table with WKB\n", |
| "\n", |
| "You can also create a Delta table with WKB.\n", |
| "\n", |
| "WKB is binary, can be compressed more effectively than WKT, and results in smaller file sizes.\n", |
| "\n", |
| "The following example shows how to store the cities dataset in a Delta table with the geometry data stored as WKB.\n", |
| "\n", |
| "It also demonstrates how to add a `bbox` column to the Delta table, enabling more efficient filtering." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 49, |
| "id": "143a8485-773f-4b94-b487-40ad63706669", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "cities = sd.read_parquet(\n", |
| " \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet\"\n", |
| ")\n", |
| "cities.to_view(\"cities\", True)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 50, |
| "id": "36bce474-51b8-468e-931e-347c19624221", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "df = sd.sql(\"\"\"\n", |
| "select \n", |
| " name, \n", |
| " ST_AsBinary(geometry) as geometry_wkb,\n", |
| " STRUCT(\n", |
| " ST_XMin(geometry) as xmin,\n", |
| " ST_YMin(geometry) as ymin,\n", |
| " ST_XMax(geometry) as xmax,\n", |
| " ST_YMax(geometry) as ymax\n", |
| " ) as bbox\n", |
| "from cities\n", |
| "\"\"\")\n", |
| "table_path = \"/tmp/delta_with_wkb\"\n", |
| "write_deltalake(table_path, df.to_pandas(), mode=\"overwrite\", schema_mode=\"overwrite\")" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "cfb1739c-6e60-4330-be98-48b2faa4c6cf", |
| "metadata": {}, |
| "source": [ |
| "Read the Delta table and filter it to only include cities in the eastern half of North America." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 51, |
| "id": "590d056d-60bb-4637-9981-eea721c559a2", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "dt = DeltaTable(table_path)\n", |
| "dataset = dt.to_pyarrow_dataset()\n", |
| "filter_expr = (\n", |
| " (pc.field(\"bbox\", \"xmax\") >= -97.0)\n", |
| " & (pc.field(\"bbox\", \"xmin\") <= -67.0)\n", |
| " & (pc.field(\"bbox\", \"ymax\") >= 25.0)\n", |
| " & (pc.field(\"bbox\", \"ymin\") <= 50.0)\n", |
| ")\n", |
| "filtered_table = dataset.to_table(filter=filter_expr)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 52, |
| "id": "65578ebb-4797-4aaa-9377-87893f9a7694", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "df = sd.create_data_frame(filtered_table.to_pandas())" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 53, |
| "id": "a0cb1afd-153b-413d-8f04-0d533e717b30", |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "┌──────────────────┬──────────────────────────────────────────────┐\n", |
| "│ name ┆ geom │\n", |
| "│ utf8 ┆ geometry │\n", |
| "╞══════════════════╪══════════════════════════════════════════════╡\n", |
| "│ Ottawa ┆ POINT(-75.7019612 45.4186427) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Nassau ┆ POINT(-77.3500438 25.0833901) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Houston ┆ POINT(-95.34843625672217 29.741272831862542) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Miami ┆ POINT(-80.2260519 25.7895566) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Atlanta ┆ POINT(-84.36764186571386 33.73945728378348) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Chicago ┆ POINT(-87.63523655322338 41.847961283364114) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Toronto ┆ POINT(-79.38945855491194 43.66464454743429) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ Washington, D.C. ┆ POINT(-77.0113644 38.9014952) │\n", |
| "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", |
| "│ New York ┆ POINT(-73.99571754361698 40.72156174972766) │\n", |
| "└──────────────────┴──────────────────────────────────────────────┘\n" |
| ] |
| } |
| ], |
| "source": [ |
| "df.to_view(\"us_east_cities\", True)\n", |
| "sd.sql(\"select name, ST_GeomFromWKB(geometry_wkb) as geom from us_east_cities\").show()" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3 (ipykernel)", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.12.4" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 5 |
| } |