From e87b4def21f9a0bc00ccd67eec57ed33556f09bf Mon Sep 17 00:00:00 2001
From: David Katz <41651296+DavidKatz-il@users.noreply.github.com>
Date: Mon, 6 May 2024 14:14:56 +0000
Subject: [PATCH 1/2] Support json type in athena2pandas

---
 awswrangler/_data_types.py |  2 +-
 tests/unit/test_athena.py  | 41 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py
index 54949d109..b8cf9ec18 100644
--- a/awswrangler/_data_types.py
+++ b/awswrangler/_data_types.py
@@ -376,7 +376,7 @@ def athena2pandas(dtype: str, dtype_backend: str | None = None) -> str:  # noqa:
         return "decimal" if dtype_backend != "pyarrow" else "double[pyarrow]"
     if dtype in ("binary", "varbinary"):
         return "bytes" if dtype_backend != "pyarrow" else "binary[pyarrow]"
-    if any(dtype.startswith(t) for t in ["array", "row", "map", "struct"]):
+    if any(dtype.startswith(t) for t in ["array", "row", "map", "struct", "json"]):
         return "object"
     if dtype == "geometry":
         return "string"
diff --git a/tests/unit/test_athena.py b/tests/unit/test_athena.py
index eda53c0ab..d8e9503d0 100644
--- a/tests/unit/test_athena.py
+++ b/tests/unit/test_athena.py
@@ -560,6 +560,47 @@ def test_athena_read_list(glue_database):
     assert df["col0"].iloc[0] == "[1, 2, 3]"
 
 
+def test_athena_read_json(glue_database):
+    sql = """
+        WITH dataset AS (
+        SELECT
+            CAST('HELLO ATHENA' AS JSON) AS some_str,
+            CAST(12345 AS JSON) AS some_int,
+            CAST(MAP(ARRAY['a', 'b'], ARRAY[1,2]) AS JSON) AS some_map
+        )
+        SELECT * FROM dataset
+    """
+    df = wr.athena.read_sql_query(sql=sql, database=glue_database, ctas_approach=False)
+    assert len(df) == 1
+    assert len(df.index) == 1
+    assert len(df.columns) == 3
+    assert df["some_str"].iloc[0] == '"HELLO ATHENA"'
+    assert df["some_int"].iloc[0] == '12345'
+    assert df["some_map"].iloc[0] == '{"a":1,"b":2}'
+
+
+def test_athena_read_json_extract(glue_database):
+    sql = """
+        WITH dataset AS (
+          SELECT '{"name": "Susan Smith",
+                   "org": "engineering",
+                   "projects": [{"name":"project1", "completed":false},
+                   {"name":"project2", "completed":true}]}'
+            AS myblob
+        )
+        SELECT
+          json_extract(myblob, '$.name') AS name,
+          json_extract(myblob, '$.projects') AS projects
+        FROM dataset
+    """
+    df = wr.athena.read_sql_query(sql=sql, database=glue_database, ctas_approach=False)
+    assert len(df) == 1
+    assert len(df.index) == 1
+    assert len(df.columns) == 2
+    assert df["name"].iloc[0] == '"Susan Smith"'
+    assert df["projects"].iloc[0] == '[{"name":"project1","completed":false},{"name":"project2","completed":true}]'
+
+
 def test_sanitize_dataframe_column_names():
     with pytest.warns(UserWarning, match=r"Duplicate*"):
         test_df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

From 0803d85ab9f8750756cf889a0252b9fa4d8c1a6c Mon Sep 17 00:00:00 2001
From: David Katz <41651296+DavidKatz-il@users.noreply.github.com>
Date: Mon, 6 May 2024 14:30:21 +0000
Subject: [PATCH 2/2] fix formatting

---
 tests/unit/test_athena.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_athena.py b/tests/unit/test_athena.py
index d8e9503d0..ba8d458be 100644
--- a/tests/unit/test_athena.py
+++ b/tests/unit/test_athena.py
@@ -575,7 +575,7 @@ def test_athena_read_json(glue_database):
     assert len(df.index) == 1
     assert len(df.columns) == 3
     assert df["some_str"].iloc[0] == '"HELLO ATHENA"'
-    assert df["some_int"].iloc[0] == '12345'
+    assert df["some_int"].iloc[0] == "12345"
     assert df["some_map"].iloc[0] == '{"a":1,"b":2}'