Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: add use_arrow to geopandas tests with sql statement #306

Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
302631a
TST add use_arrow for geopandas tests with sql statement
theroggy Oct 2, 2023
e1a0601
Try fixing skip_features for gdal>=3.8
theroggy Oct 2, 2023
04e27a9
Try if a default sql + skip behaves different than LIMIT
theroggy Oct 3, 2023
8f57933
Merge remote-tracking branch 'upstream/main' into TST-add-use_arrow-f…
theroggy Oct 15, 2023
d74ccfc
Revert change to sql_skip_max test
theroggy Oct 15, 2023
21757aa
Try SQLITE dialect in test
theroggy Oct 15, 2023
dcaf384
Try if sql_dialect is not specified
theroggy Oct 15, 2023
cbe0637
Check if offset and skip are combined in gdal3.8
theroggy Oct 15, 2023
1aca3d6
Update test_geopandas_io.py
theroggy Oct 15, 2023
b15d116
Some more experimental testing
theroggy Oct 15, 2023
1e5bd58
Another experimental gdal3.8 test
theroggy Oct 15, 2023
4311baf
Test diff if skip after open arrow
theroggy Oct 15, 2023
4c42649
Revert experimental changes to test test_read_sql_skip_max
theroggy Oct 15, 2023
9dcdbc1
local skip filtering if sql stmt not based on gdal version
theroggy Oct 15, 2023
49a2476
Merge remote-tracking branch 'upstream/main' into TST-add-use_arrow-f…
theroggy Oct 21, 2023
64236cb
Revert changes to when filtering is done by gdal now the sql offset +…
theroggy Oct 25, 2023
659631b
part 2
theroggy Oct 25, 2023
aabf37b
Merge remote-tracking branch 'upstream/main' into TST-add-use_arrow-f…
theroggy Dec 2, 2023
f6e9e36
Merge remote-tracking branch 'upstream/main' into TST-add-use_arrow-f…
theroggy Dec 4, 2023
c559795
Merge remote-tracking branch 'upstream/main' into TST-add-use_arrow-f…
theroggy Jan 23, 2024
0f64f86
Rollback changes to comment
theroggy Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 61 additions & 21 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,21 +576,27 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
# The geometry column cannot be specified when using the
# default OGRSQL dialect but is returned nonetheless, so 4 columns.
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
)
assert len(df.columns) == 4
assert len(df) == 177

# Should return single row
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
)
assert len(df) == 1
assert len(df.columns) == 6
assert df.iloc[0].iso_a3 == "CAN"

sql = """SELECT *
FROM naturalearth_lowres
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
)
assert len(df.columns) == 6
assert len(df) == 3
assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
Expand All @@ -599,7 +605,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
FROM naturalearth_lowres
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
ORDER BY name"""
df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
)
assert len(df.columns) == 6
assert len(df) == 3
assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
Expand All @@ -608,47 +616,61 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
sql = """SELECT *
FROM naturalearth_lowres
WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
)
assert len(df) == 75
assert len(df.columns) == 6
assert df.pop_est.min() >= 10000000
assert df.pop_est.max() < 100000000

# Should match no items.
sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
)
assert len(df) == 0


def test_read_sql_invalid(naturalearth_lowres_all_ext):
def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
if naturalearth_lowres_all_ext.suffix == ".gpkg":
with pytest.raises(Exception, match="In ExecuteSQL().*"):
read_dataframe(naturalearth_lowres_all_ext, sql="invalid")
read_dataframe(
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
)
else:
with pytest.raises(Exception, match="SQL Expression Parsing Error"):
read_dataframe(naturalearth_lowres_all_ext, sql="invalid")
read_dataframe(
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
)

with pytest.raises(
ValueError, match="'sql' paramater cannot be combined with 'layer'"
):
read_dataframe(naturalearth_lowres_all_ext, sql="whatever", layer="invalid")
read_dataframe(
naturalearth_lowres_all_ext,
sql="whatever",
layer="invalid",
use_arrow=use_arrow,
)


def test_read_sql_columns_where(naturalearth_lowres_all_ext):
def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
df = read_dataframe(
naturalearth_lowres_all_ext,
sql=sql,
sql_dialect="OGRSQL",
columns=["iso_a3_renamed", "name"],
where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
use_arrow=use_arrow,
)
assert len(df.columns) == 3
assert len(df) == 3
assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]


def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
df = read_dataframe(
naturalearth_lowres_all_ext,
Expand All @@ -657,13 +679,14 @@ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
columns=["iso_a3_renamed", "name"],
where="iso_a3_renamed IN ('CRI', 'PAN')",
bbox=(-85, 8, -80, 10),
use_arrow=use_arrow,
)
assert len(df.columns) == 3
assert len(df) == 2
assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]


def test_read_sql_skip_max(naturalearth_lowres_all_ext):
def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
sql = """SELECT *
FROM naturalearth_lowres
WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
Expand All @@ -674,20 +697,29 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
skip_features=1,
max_features=1,
sql_dialect="OGRSQL",
use_arrow=use_arrow,
)
assert len(df.columns) == 6
assert len(df) == 1
assert df.iso_a3.tolist() == ["MEX"]

sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, max_features=3, sql_dialect="OGRSQL"
naturalearth_lowres_all_ext,
sql=sql,
max_features=3,
sql_dialect="OGRSQL",
use_arrow=use_arrow,
)
assert len(df) == 1

sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
df = read_dataframe(
naturalearth_lowres_all_ext, sql=sql, skip_features=1, sql_dialect="OGRSQL"
naturalearth_lowres_all_ext,
sql=sql,
sql_dialect="OGRSQL",
skip_features=1,
use_arrow=use_arrow,
)
assert len(df) == 0

Expand All @@ -698,10 +730,12 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
[ext for ext in ALL_EXTS if ext != ".gpkg"],
indirect=["naturalearth_lowres"],
)
def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
# Should return singular item
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="SQLITE")
df = read_dataframe(
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
)
assert len(df) == 1
assert len(df.columns) == 6
assert df.iloc[0].iso_a3 == "CAN"
Expand All @@ -711,7 +745,9 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
FROM naturalearth_lowres
WHERE ISO_A3 = 'CAN'"""
df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="SQLITE")
df = read_dataframe(
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
)
assert len(df) == 1
assert len(df.columns) == 4
assert df.iloc[0].geometry.area > area_canada
Expand All @@ -721,12 +757,14 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
@pytest.mark.parametrize(
"naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
)
def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
# "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
# Because the statement is processed within GDAL it is possible to use
# spatialite functions even if sqlite isn't built with spatialite support.
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE")
df = read_dataframe(
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
)
assert len(df) == 1
assert len(df.columns) == 6
assert df.iloc[0].iso_a3 == "CAN"
Expand All @@ -736,7 +774,9 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
FROM naturalearth_lowres
WHERE ISO_A3 = 'CAN'"""
df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE")
df = read_dataframe(
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
)
assert len(df) == 1
assert len(df.columns) == 4
assert df.iloc[0].geometry.area > area_canada
Expand Down