From d891924e1ee241784d2234c30fcc82e1e71534d0 Mon Sep 17 00:00:00 2001
From: John Bodley <4567245+john-bodley@users.noreply.github.com>
Date: Fri, 28 Apr 2023 07:38:44 +1200
Subject: [PATCH 1/6] fix(tests): Ensure deterministic zip file ordering for
 CSV uploads

---
 tests/integration_tests/csv_upload_tests.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integration_tests/csv_upload_tests.py b/tests/integration_tests/csv_upload_tests.py
index 97b83bb8fc9e5..72a8d9e57ef6c 100644
--- a/tests/integration_tests/csv_upload_tests.py
+++ b/tests/integration_tests/csv_upload_tests.py
@@ -20,6 +20,7 @@
 import logging
 import os
 import shutil
+import zipfile
 from typing import Dict, Optional, Union
 
 from unittest import mock
@@ -119,7 +120,10 @@ def create_columnar_files():
     os.mkdir(ZIP_DIRNAME)
     pd.DataFrame({"a": ["john", "paul"], "b": [1, 2]}).to_parquet(PARQUET_FILENAME1)
     pd.DataFrame({"a": ["max", "bob"], "b": [3, 4]}).to_parquet(PARQUET_FILENAME2)
-    shutil.make_archive(ZIP_DIRNAME, "zip", ZIP_DIRNAME)
+    
+    with zipfile.ZipFile(ZIP_FILENAME, "w") as archive:
+        for filename in [PARQUET_FILENAME1, PARQUET_FILENAME2]:
+            archive.write(filename, filename)
     yield
     os.remove(ZIP_FILENAME)
     shutil.rmtree(ZIP_DIRNAME)
@@ -556,4 +560,4 @@ def test_import_parquet(mock_event_logger):
 
     with test_db.get_sqla_engine_with_context() as engine:
         data = engine.execute(f"SELECT * from {PARQUET_UPLOAD_TABLE}").fetchall()
-        assert data == [("max", 3), ("bob", 4), ("john", 1), ("paul", 2)]
+        assert data == [("john", 1), ("paul", 2), ("max", 3), ("bob", 4)]

From cf70389240660ff2e0c95724ba2f9019d563b76d Mon Sep 17 00:00:00 2001
From: John Bodley <4567245+john-bodley@users.noreply.github.com>
Date: Fri, 28 Apr 2023 07:50:12 +1200
Subject: [PATCH 2/6] Update csv_upload_tests.py

---
 tests/integration_tests/csv_upload_tests.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration_tests/csv_upload_tests.py b/tests/integration_tests/csv_upload_tests.py
index 72a8d9e57ef6c..51035c1db5f0d 100644
--- a/tests/integration_tests/csv_upload_tests.py
+++ b/tests/integration_tests/csv_upload_tests.py
@@ -122,8 +122,9 @@ def create_columnar_files():
     pd.DataFrame({"a": ["max", "bob"], "b": [3, 4]}).to_parquet(PARQUET_FILENAME2)
     
     with zipfile.ZipFile(ZIP_FILENAME, "w") as archive:
-        for filename in [PARQUET_FILENAME1, PARQUET_FILENAME2]:
-            archive.write(filename, filename)
+        archive.write(PARQUET_FILENAME1)
+        archive.write(PARQUET_FILENAME2)
+
     yield
     os.remove(ZIP_FILENAME)
     shutil.rmtree(ZIP_DIRNAME)

From 1b4125f3b446e932d526f2afd8bf165d462c29c0 Mon Sep 17 00:00:00 2001
From: John Bodley <4567245+john-bodley@users.noreply.github.com>
Date: Fri, 28 Apr 2023 08:02:58 +1200
Subject: [PATCH 3/6] Update csv_upload_tests.py

---
 tests/integration_tests/csv_upload_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration_tests/csv_upload_tests.py b/tests/integration_tests/csv_upload_tests.py
index 51035c1db5f0d..be1660780b932 100644
--- a/tests/integration_tests/csv_upload_tests.py
+++ b/tests/integration_tests/csv_upload_tests.py
@@ -120,7 +120,7 @@ def create_columnar_files():
     os.mkdir(ZIP_DIRNAME)
     pd.DataFrame({"a": ["john", "paul"], "b": [1, 2]}).to_parquet(PARQUET_FILENAME1)
     pd.DataFrame({"a": ["max", "bob"], "b": [3, 4]}).to_parquet(PARQUET_FILENAME2)
-    
+
     with zipfile.ZipFile(ZIP_FILENAME, "w") as archive:
         archive.write(PARQUET_FILENAME1)
         archive.write(PARQUET_FILENAME2)

From 9e9b1c6b2ec83aa92cd65fac9af94701a02f0d61 Mon Sep 17 00:00:00 2001
From: John Bodley <4567245+john-bodley@users.noreply.github.com>
Date: Fri, 28 Apr 2023 08:17:52 +1200
Subject: [PATCH 4/6] Update csv_upload_tests.py

---
 tests/integration_tests/csv_upload_tests.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/tests/integration_tests/csv_upload_tests.py b/tests/integration_tests/csv_upload_tests.py
index be1660780b932..0809f4007a82b 100644
--- a/tests/integration_tests/csv_upload_tests.py
+++ b/tests/integration_tests/csv_upload_tests.py
@@ -20,7 +20,6 @@
 import logging
 import os
 import shutil
-import zipfile
 from typing import Dict, Optional, Union
 
 from unittest import mock
@@ -120,11 +119,7 @@ def create_columnar_files():
     os.mkdir(ZIP_DIRNAME)
     pd.DataFrame({"a": ["john", "paul"], "b": [1, 2]}).to_parquet(PARQUET_FILENAME1)
     pd.DataFrame({"a": ["max", "bob"], "b": [3, 4]}).to_parquet(PARQUET_FILENAME2)
-
-    with zipfile.ZipFile(ZIP_FILENAME, "w") as archive:
-        archive.write(PARQUET_FILENAME1)
-        archive.write(PARQUET_FILENAME2)
-
+    shutil.make_archive(ZIP_DIRNAME, "zip", ZIP_DIRNAME)
     yield
     os.remove(ZIP_FILENAME)
     shutil.rmtree(ZIP_DIRNAME)
@@ -279,7 +274,7 @@ def test_import_csv_enforced_schema(mock_event_logger):
 
     with get_upload_db().get_sqla_engine_with_context() as engine:
         data = engine.execute(
-            f"SELECT * from {ADMIN_SCHEMA_NAME}.{CSV_UPLOAD_TABLE_W_SCHEMA}"
+            f"SELECT * from {ADMIN_SCHEMA_NAME}.{CSV_UPLOAD_TABLE_W_SCHEMA} ORDER BY b"
         ).fetchall()
         assert data == [("john", 1), ("paul", 2)]
 
@@ -390,12 +385,12 @@ def test_import_csv(mock_event_logger):
     )
     # make sure that john and empty string are replaced with None
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
+        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY b").fetchall()
         assert data == [(None, 1, "x"), ("paul", 2, None)]
         # default null values
         upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"})
         # make sure that john and empty string are replaced with None
-        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
+        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY b").fetchall()
         assert data == [("john", 1, "x"), ("paul", 2, None)]
 
     # cleanup
@@ -413,7 +408,7 @@ def test_import_csv(mock_event_logger):
     # or an int to a float
     # file upload should work as normal
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
+        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY b").fetchall()
         assert data == [("john", 1), ("paul", 2)]
 
     # cleanup
@@ -485,7 +480,7 @@ def test_import_excel(mock_event_logger):
     )
 
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {EXCEL_UPLOAD_TABLE}").fetchall()
+        data = engine.execute(f"SELECT * from {EXCEL_UPLOAD_TABLE} ORDER BY b").fetchall()
         assert data == [(0, "john", 1), (1, "paul", 2)]
 
 
@@ -549,7 +544,7 @@ def test_import_parquet(mock_event_logger):
     assert success_msg_f1 in resp
 
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {PARQUET_UPLOAD_TABLE}").fetchall()
+        data = engine.execute(f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b").fetchall()
         assert data == [("john", 1), ("paul", 2)]
 
     # replace table with zip file
@@ -560,5 +555,5 @@ def test_import_parquet(mock_event_logger):
     assert success_msg_f2 in resp
 
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {PARQUET_UPLOAD_TABLE}").fetchall()
+        data = engine.execute(f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b").fetchall()
         assert data == [("john", 1), ("paul", 2), ("max", 3), ("bob", 4)]

From 2bb6407b812472e91f1e1b4fbffec17a309070cc Mon Sep 17 00:00:00 2001
From: John Bodley <john.bodley@gmail.com>
Date: Fri, 28 Apr 2023 08:33:44 +1200
Subject: [PATCH 5/6] pre-commit

---
 tests/integration_tests/csv_upload_tests.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tests/integration_tests/csv_upload_tests.py b/tests/integration_tests/csv_upload_tests.py
index 0809f4007a82b..4d77f6113b179 100644
--- a/tests/integration_tests/csv_upload_tests.py
+++ b/tests/integration_tests/csv_upload_tests.py
@@ -480,7 +480,9 @@ def test_import_excel(mock_event_logger):
     )
 
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {EXCEL_UPLOAD_TABLE} ORDER BY b").fetchall()
+        data = engine.execute(
+            f"SELECT * from {EXCEL_UPLOAD_TABLE} ORDER BY b"
+        ).fetchall()
         assert data == [(0, "john", 1), (1, "paul", 2)]
 
 
@@ -544,7 +546,9 @@ def test_import_parquet(mock_event_logger):
     assert success_msg_f1 in resp
 
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b").fetchall()
+        data = engine.execute(
+            f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b"
+        ).fetchall()
         assert data == [("john", 1), ("paul", 2)]
 
     # replace table with zip file
@@ -555,5 +559,7 @@ def test_import_parquet(mock_event_logger):
     assert success_msg_f2 in resp
 
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b").fetchall()
+        data = engine.execute(
+            f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b"
+        ).fetchall()
         assert data == [("john", 1), ("paul", 2), ("max", 3), ("bob", 4)]

From 50db0de87ffd7f13690129fe42b2dd0ea210d09b Mon Sep 17 00:00:00 2001
From: John Bodley <john.bodley@gmail.com>
Date: Fri, 28 Apr 2023 08:45:28 +1200
Subject: [PATCH 6/6] Fix ORDER BY

---
 tests/integration_tests/csv_upload_tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration_tests/csv_upload_tests.py b/tests/integration_tests/csv_upload_tests.py
index 4d77f6113b179..70f984775db59 100644
--- a/tests/integration_tests/csv_upload_tests.py
+++ b/tests/integration_tests/csv_upload_tests.py
@@ -385,12 +385,12 @@ def test_import_csv(mock_event_logger):
     )
     # make sure that john and empty string are replaced with None
     with test_db.get_sqla_engine_with_context() as engine:
-        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY b").fetchall()
+        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY c").fetchall()
         assert data == [(None, 1, "x"), ("paul", 2, None)]
         # default null values
         upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"})
         # make sure that john and empty string are replaced with None
-        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY b").fetchall()
+        data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE} ORDER BY c").fetchall()
         assert data == [("john", 1, "x"), ("paul", 2, None)]
 
     # cleanup