sodadata · sdebruyn · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024
diff --git a/.env.example b/.env.example
@@ -51,3 +51,6 @@ CONTRACTS_POSTGRES_PASSWORD=***
 CONTRACTS_POSTGRES_DATABASE=***
 
 ATLAN_API_KEY=***
+
+FABRIC_ENDPOINT=***
+FABRIC_DWH=***
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -175,4 +175,4 @@ zipp==3.19.2
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip
-# setuptools
+# setuptools
diff --git a/pytest.ini b/pytest.ini
@@ -24,3 +24,4 @@ pythonpath =
     soda/teradata/tests
     soda/contracts/tests
     soda/oracle/tests
+    soda/fabric/tests
diff --git a/requirements.txt b/requirements.txt
@@ -22,3 +22,4 @@
 ./soda/teradata
 ./soda/contracts
 ./soda/atlan
+./soda/fabric
diff --git a/soda/core/tests/data_source/test_bug_double_metric_computation.py b/soda/core/tests/data_source/test_bug_double_metric_computation.py
@@ -5,7 +5,7 @@
 
 
 @pytest.mark.skipif(
-    test_data_source == "sqlserver",
+    test_data_source in ["fabric", "sqlserver"],
     reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
 )
 def test_double_metric_computation(data_source_fixture: DataSourceFixture):

diff --git a/soda/core/tests/data_source/test_data_source_specific_aggregation_functions.py b/soda/core/tests/data_source/test_data_source_specific_aggregation_functions.py
@@ -31,7 +31,7 @@ def test_data_source_specific_statistics_aggregation_metrics(data_source_fixture
         supported_checks.pop("stddev_samp")
     # TODO see what's going wrong with Vertica later:
     # Message: Function APPROXIMATE_PERCENTILE(int) does not exist
-    if test_data_source in ["sqlserver", "mysql", "spark_df", "oracle", "vertica"]:
+    if test_data_source in ["sqlserver", "mysql", "spark_df", "oracle", "vertica", "fabric"]:
         supported_checks = {}
 
     if supported_checks:

diff --git a/soda/core/tests/data_source/test_distribution_check.py b/soda/core/tests/data_source/test_distribution_check.py
@@ -132,7 +132,7 @@ def test_distribution_sql(data_source_fixture: DataSourceFixture, mock_file_syst
             table_name=table_name,
             schema_name=f"{data_source_fixture.data_source.database}.{data_source_fixture.schema_name}.",
         )
-    elif test_data_source == "sqlserver":
+    elif test_data_source in ["fabric", "sqlserver"]:
         expectation = "SELECT TOP 1000000 \n  cst_size \nFROM {schema_name}{table_name}"
         assert scan._checks[0].query.sql == expectation.format(
             table_name=table_name, schema_name=f"{data_source_fixture.schema_name}."
@@ -498,7 +498,7 @@ def test_continuous_distribution_check_large_sample_size(data_source_fixture: Da
     data_source_name = data_source_fixture.data_source_name
     if data_source_name in ["spark_df", "dask"]:
         assert sorted(distro_check.query.rows) == sorted([[1.0], [1.0], [2.0], [2.0], [3.0]])
-    elif data_source_name in ["snowflake", "bigquery", "sqlserver"]:
+    elif data_source_name in ["snowflake", "bigquery", "sqlserver", "fabric"]:
         assert len(distro_check.query.rows) == 5
     else:
         assert distro_check.query.rows == sorted([(1.0,), (1.0,), (2.0,), (2.0,), (3.0,)])

diff --git a/soda/core/tests/data_source/test_formats.py b/soda/core/tests/data_source/test_formats.py
@@ -7,6 +7,10 @@ def test_formats(data_source_fixture: DataSourceFixture):
     table_name = data_source_fixture.ensure_test_table(customers_test_table)
 
     test_definitions = {
+        "email": {
+            "passing_values": ["info@soda.io", "some+email@gmail.com", "a@b.be"],
+            "failing_values": ["", "a", " ", "1.5", "4,2", "@@@@@"],
+        },
         "integer": {
             "passing_values": ["0", "1234567890", "-0", "- 1234567890", "+0", "+1"],
             "failing_values": ["", "a", " ", "1.5", "4,2"],
@@ -133,7 +137,7 @@ def test_formats(data_source_fixture: DataSourceFixture):
         },
     }
 
-    if test_data_source == "sqlserver":
+    if test_data_source in ["fabric", "sqlserver"]:
         test_definitions.pop("percentage")  # Partially supported.
         test_definitions.pop("date us")  # Partially supported.
         test_definitions.pop("date eu")  # Partially supported.
@@ -159,7 +163,7 @@ def assert_format_values(format, data_source_fixture: DataSourceFixture, table_n
     def set_up_expression(value: str, format: str) -> str:
         expression = data_source.get_default_format_expression(f"'{value}'", format)
         # Special handling for sqlserver and teradata - expression matching cannot be used in the SELECT statement, so wrap it in CASE ... THEN ... ELSE for this test.
-        if test_data_source in ["sqlserver", "teradata"]:
+        if test_data_source in ["sqlserver", "teradata", "fabric"]:
             expression = f"CASE WHEN {expression} THEN 1 ELSE 0 END"
 
         return expression

diff --git a/soda/core/tests/data_source/test_freshness.py b/soda/core/tests/data_source/test_freshness.py
@@ -116,7 +116,7 @@ def test_freshness_with_table_filter(data_source_fixture: DataSourceFixture):
     table_name = data_source_fixture.ensure_test_table(customers_test_table)
     where_cond = (
         f"""CONVERT(DATETIME,'${{START_TIME}}') <= ts AND ts < CONVERT(DATETIME,'${{END_TIME}}')"""
-        if test_data_source == "sqlserver"
+        if test_data_source in ["fabric", "sqlserver"]
         else f"""TIMESTAMP '${{START_TIME}}' <= ts AND ts < TIMESTAMP '${{END_TIME}}'"""
     )
 
@@ -146,7 +146,7 @@ def test_freshness_with_table_filter(data_source_fixture: DataSourceFixture):
 def test_freshness_no_rows(data_source_fixture: DataSourceFixture):
     table_name = data_source_fixture.ensure_test_table(customers_test_table)
     # There is no boolean type and variables in Teradata
-    cond = "1 = 0" if test_data_source in ["sqlserver", "teradata"] else "FALSE"
+    cond = "1 = 0" if test_data_source in ["sqlserver", "teradata", "fabric"] else "FALSE"
     scan = data_source_fixture.create_test_scan()
     scan.add_variables(
         {
@@ -174,7 +174,7 @@ def test_freshness_with_check_filter(data_source_fixture: DataSourceFixture):
     table_name = data_source_fixture.ensure_test_table(customers_test_table)
     where_cond = (
         f"""CONVERT(DATETIME,'${{START_TIME}}') <= ts AND ts < CONVERT(DATETIME,'${{END_TIME}}')"""
-        if test_data_source == "sqlserver"
+        if test_data_source in ["fabric", "sqlserver"]
         else f"""TIMESTAMP '${{START_TIME}}' <= ts AND ts < TIMESTAMP '${{END_TIME}}'"""
     )
 
@@ -206,7 +206,7 @@ def test_freshness_with_check_filter(data_source_fixture: DataSourceFixture):
 def test_freshness_check_filter_no_rows(data_source_fixture: DataSourceFixture):
     table_name = data_source_fixture.ensure_test_table(customers_test_table)
     # There is no boolean type and variables in Teradata
-    cond = "1 = 0" if test_data_source in ["sqlserver", "teradata"] else "FALSE"
+    cond = "1 = 0" if test_data_source in ["sqlserver", "teradata", "fabric"] else "FALSE"
     scan = data_source_fixture.create_test_scan()
     scan.add_variables(
         {

diff --git a/soda/core/tests/data_source/test_invalid.py b/soda/core/tests/data_source/test_invalid.py
@@ -86,7 +86,7 @@ def test_valid_min_max(data_source_fixture: DataSourceFixture):
 
 
 @pytest.mark.skipif(
-    test_data_source == "sqlserver",
+    test_data_source in ["fabric", "sqlserver"],
     reason="Full regex support is not supported by SQLServer",
 )
 def test_valid_format_email(data_source_fixture: DataSourceFixture):
@@ -107,7 +107,7 @@ def test_valid_format_email(data_source_fixture: DataSourceFixture):
 
 
 @pytest.mark.skipif(
-    test_data_source == "sqlserver",
+    test_data_source in ["fabric", "sqlserver"],
     reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
 )
 def test_column_configured_invalid_and_missing_values(data_source_fixture: DataSourceFixture):

diff --git a/soda/core/tests/data_source/test_metric_check_filter.py b/soda/core/tests/data_source/test_metric_check_filter.py
@@ -65,7 +65,7 @@ def test_missing_filtered_sample_query(data_source_fixture: DataSourceFixture):
 
 
 @pytest.mark.skipif(
-    test_data_source == "sqlserver",
+    test_data_source in ["fabric", "sqlserver"],
     reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
 )
 def test_valid_filtered(data_source_fixture: DataSourceFixture):
@@ -88,7 +88,7 @@ def test_valid_filtered(data_source_fixture: DataSourceFixture):
 
 
 @pytest.mark.skipif(
-    test_data_source == "sqlserver",
+    test_data_source in ["fabric", "sqlserver"],
     reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
 )
 def test_valid_percentage_filtered(data_source_fixture: DataSourceFixture):

diff --git a/soda/core/tests/data_source/test_numerical_metric_checks_on_text_columns.py b/soda/core/tests/data_source/test_numerical_metric_checks_on_text_columns.py
@@ -5,7 +5,7 @@
 
 
 @pytest.mark.skipif(
-    test_data_source in ["sqlserver"],
+    test_data_source in ["sqlserver", "fabric"],
     reason="Full regex support is not supported by SQLServer. REGEXP_REPLACE is used in this check but it is not supported.",
 )
 def test_numeric_metric_checks_on_text_column(data_source_fixture: DataSourceFixture):
@@ -32,7 +32,7 @@ def test_numeric_metric_checks_on_text_column(data_source_fixture: DataSourceFix
 
 
 @pytest.mark.skipif(
-    test_data_source in ["sqlserver"],
+    test_data_source in ["sqlserver", "fabric"],
     reason="Full regex support is not supported by SQLServer. REGEXP_REPLACE is used in this check but it is not supported.",
 )
 def test_numeric_metric_checks_on_text_column_local_format(data_source_fixture: DataSourceFixture):

diff --git a/soda/core/tests/data_source/test_percentage_metrics.py b/soda/core/tests/data_source/test_percentage_metrics.py
@@ -5,7 +5,7 @@
 
 
 @pytest.mark.skipif(
-    test_data_source == "sqlserver",
+    test_data_source in ["fabric", "sqlserver"],
     reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
 )
 def test_default_missing_percentage(data_source_fixture: DataSourceFixture):

diff --git a/soda/core/tests/data_source/test_table_filter.py b/soda/core/tests/data_source/test_table_filter.py
@@ -14,7 +14,7 @@ def test_filter_on_date(data_source_fixture: DataSourceFixture):
     scan.add_variables(
         {"DATE_LOWER": "2020-06-23", "DATE_UPPER": "2020-06-24"}
     )  # use DATE_LOWER and DATE_UPPER to avoid issues with dask
-    date_expr = "" if test_data_source == "sqlserver" else "DATE"
+    date_expr = "" if test_data_source in ["fabric", "sqlserver"] else "DATE"
     scan.add_sodacl_yaml_str(
         f"""
           filter {table_name} [daily]:
@@ -69,7 +69,7 @@ def test_table_filter_on_timestamp(data_source_fixture: DataSourceFixture):
     table_name = data_source_fixture.ensure_test_table(customers_test_table)
 
     scan = data_source_fixture.create_test_scan()
-    if test_data_source == "sqlserver":
+    if test_data_source in ["fabric", "sqlserver"]:
         where_cond = f"""CONVERT(DATETIME, '${{ts_start}}') <= ts AND ts <  CONVERT(DATETIME,'${{ts_end}}')"""
     elif test_data_source == "dask":
         where_cond = f"""\"'${{ts_start}}' <= ts AND ts < '${{ts_end}}'\""""

diff --git a/soda/core/tests/data_source/test_user_defined_metric_checks.py b/soda/core/tests/data_source/test_user_defined_metric_checks.py
@@ -10,7 +10,7 @@ def test_user_defined_table_expression_metric_check(data_source_fixture: DataSou
     table_name = data_source_fixture.ensure_test_table(customers_test_table)
 
     scan = data_source_fixture.create_test_scan()
-    length_expr = "LEN" if data_source_fixture.data_source_name == "sqlserver" else "LENGTH"
+    length_expr = "LEN" if data_source_fixture.data_source_name in ["sqlserver", "fabric"] else "LENGTH"
 
     ones_expression = f"SUM({length_expr}(cst_size_txt))"