coiled · jrbourbeau · May 23, 2023 · May 16, 2023 · May 16, 2023
diff --git a/dask_snowflake/core.py b/dask_snowflake/core.py
@@ -77,6 +77,7 @@ def to_snowflake(
     df: dd.DataFrame,
     name: str,
     connection_kwargs: dict,
+    compute: bool = True,
 ):
     """Write a Dask DataFrame to a Snowflake table.
 
@@ -89,6 +90,10 @@ def to_snowflake(
     connection_kwargs:
         Connection arguments used when connecting to Snowflake with
         ``snowflake.connector.connect``.
+    compute:
+        Whether or not to compute immediately. If ``True``, write DataFrame
+        partitions to Snowflake immediately. If ``False``, return a list of
+        delayed objects that can be computed later. Defaults to ``True``.
 
     Examples
     --------
@@ -113,12 +118,14 @@ def to_snowflake(
     # We run `ensure_db_exists` on the cluster to ensure we capture the
     # right partner application ID.
     ensure_db_exists(df._meta, name, connection_kwargs).compute()
-    dask.compute(
-        [
-            write_snowflake(partition, name, connection_kwargs)
-            for partition in df.to_delayed()
-        ]
-    )
+    parts = [
+        write_snowflake(partition, name, connection_kwargs)
+        for partition in df.to_delayed()
+    ]
+    if compute:
+        dask.compute(parts)
+    else:
+        return parts
 
 
 def _fetch_batches(chunks: list[ArrowResultBatch], arrow_options: dict):

diff --git a/dask_snowflake/tests/test_core.py b/dask_snowflake/tests/test_core.py
@@ -79,6 +79,29 @@ def test_read_empty_result(table, connection_kwargs, client):
     assert len(result.columns) == 0
 
 
+def test_to_snowflake_compute_false(table, connection_kwargs, client):
+    result = to_snowflake(
+        ddf, name=table, connection_kwargs=connection_kwargs, compute=False
+    )
+    assert isinstance(result, list)
+    assert len(result) == ddf.npartitions
+
+    dask.compute(result)
+
+    ddf2 = read_snowflake(
+        f"SELECT * FROM {table}",
+        connection_kwargs=connection_kwargs,
+        npartitions=2,
+    )
+    # FIXME: Why does read_snowflake return lower-case columns names?
+    ddf2.columns = ddf2.columns.str.upper()
+    # FIXME: We need to sort the DataFrame because paritions are written
+    # in a non-sequential order.
+    dd.utils.assert_eq(
+        df, ddf2.sort_values(by="A").reset_index(drop=True), check_dtype=False
+    )
+
+
 def test_arrow_options(table, connection_kwargs, client):
     # We use a single partition Dask DataFrame to ensure the
     # categories used below are always in the same order.