Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Azure SQL, Synapse, and Microsoft Fabric and extend support for SQL Server #2160

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,6 @@ CONTRACTS_POSTGRES_PASSWORD=***
CONTRACTS_POSTGRES_DATABASE=***

ATLAN_API_KEY=***

FABRIC_ENDPOINT=***
FABRIC_DWH=***
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,4 @@ zipp==3.19.2

# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools
# setuptools
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ pythonpath =
soda/teradata/tests
soda/contracts/tests
soda/oracle/tests
soda/fabric/tests
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@
./soda/teradata
./soda/contracts
./soda/atlan
./soda/fabric
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@pytest.mark.skipif(
test_data_source == "sqlserver",
test_data_source in ["fabric", "sqlserver"],
reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
)
def test_double_metric_computation(data_source_fixture: DataSourceFixture):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_data_source_specific_statistics_aggregation_metrics(data_source_fixture
supported_checks.pop("stddev_samp")
# TODO see what's going wrong with Vertica later:
# Message: Function APPROXIMATE_PERCENTILE(int) does not exist
if test_data_source in ["sqlserver", "mysql", "spark_df", "oracle", "vertica"]:
if test_data_source in ["sqlserver", "mysql", "spark_df", "oracle", "vertica", "fabric"]:
supported_checks = {}

if supported_checks:
Expand Down
4 changes: 2 additions & 2 deletions soda/core/tests/data_source/test_distribution_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_distribution_sql(data_source_fixture: DataSourceFixture, mock_file_syst
table_name=table_name,
schema_name=f"{data_source_fixture.data_source.database}.{data_source_fixture.schema_name}.",
)
elif test_data_source == "sqlserver":
elif test_data_source in ["fabric", "sqlserver"]:
expectation = "SELECT TOP 1000000 \n cst_size \nFROM {schema_name}{table_name}"
assert scan._checks[0].query.sql == expectation.format(
table_name=table_name, schema_name=f"{data_source_fixture.schema_name}."
Expand Down Expand Up @@ -498,7 +498,7 @@ def test_continuous_distribution_check_large_sample_size(data_source_fixture: Da
data_source_name = data_source_fixture.data_source_name
if data_source_name in ["spark_df", "dask"]:
assert sorted(distro_check.query.rows) == sorted([[1.0], [1.0], [2.0], [2.0], [3.0]])
elif data_source_name in ["snowflake", "bigquery", "sqlserver"]:
elif data_source_name in ["snowflake", "bigquery", "sqlserver", "fabric"]:
assert len(distro_check.query.rows) == 5
else:
assert distro_check.query.rows == sorted([(1.0,), (1.0,), (2.0,), (2.0,), (3.0,)])
Expand Down
8 changes: 6 additions & 2 deletions soda/core/tests/data_source/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ def test_formats(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)

test_definitions = {
"email": {
"passing_values": ["info@soda.io", "some+email@gmail.com", "a@b.be"],
"failing_values": ["", "a", " ", "1.5", "4,2", "@@@@@"],
},
"integer": {
"passing_values": ["0", "1234567890", "-0", "- 1234567890", "+0", "+1"],
"failing_values": ["", "a", " ", "1.5", "4,2"],
Expand Down Expand Up @@ -133,7 +137,7 @@ def test_formats(data_source_fixture: DataSourceFixture):
},
}

if test_data_source == "sqlserver":
if test_data_source in ["fabric", "sqlserver"]:
test_definitions.pop("percentage") # Partially supported.
test_definitions.pop("date us") # Partially supported.
test_definitions.pop("date eu") # Partially supported.
Expand All @@ -159,7 +163,7 @@ def assert_format_values(format, data_source_fixture: DataSourceFixture, table_n
def set_up_expression(value: str, format: str) -> str:
expression = data_source.get_default_format_expression(f"'{value}'", format)
# Special handling for sqlserver and teradata - expression matching cannot be used in the SELECT statement, so wrap it in CASE ... THEN ... ELSE for this test.
if test_data_source in ["sqlserver", "teradata"]:
if test_data_source in ["sqlserver", "teradata", "fabric"]:
expression = f"CASE WHEN {expression} THEN 1 ELSE 0 END"

return expression
Expand Down
8 changes: 4 additions & 4 deletions soda/core/tests/data_source/test_freshness.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_freshness_with_table_filter(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)
where_cond = (
f"""CONVERT(DATETIME,'${{START_TIME}}') <= ts AND ts < CONVERT(DATETIME,'${{END_TIME}}')"""
if test_data_source == "sqlserver"
if test_data_source in ["fabric", "sqlserver"]
else f"""TIMESTAMP '${{START_TIME}}' <= ts AND ts < TIMESTAMP '${{END_TIME}}'"""
)

Expand Down Expand Up @@ -146,7 +146,7 @@ def test_freshness_with_table_filter(data_source_fixture: DataSourceFixture):
def test_freshness_no_rows(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)
# There is no boolean type and variables in Teradata
cond = "1 = 0" if test_data_source in ["sqlserver", "teradata"] else "FALSE"
cond = "1 = 0" if test_data_source in ["sqlserver", "teradata", "fabric"] else "FALSE"
scan = data_source_fixture.create_test_scan()
scan.add_variables(
{
Expand Down Expand Up @@ -174,7 +174,7 @@ def test_freshness_with_check_filter(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)
where_cond = (
f"""CONVERT(DATETIME,'${{START_TIME}}') <= ts AND ts < CONVERT(DATETIME,'${{END_TIME}}')"""
if test_data_source == "sqlserver"
if test_data_source in ["fabric", "sqlserver"]
else f"""TIMESTAMP '${{START_TIME}}' <= ts AND ts < TIMESTAMP '${{END_TIME}}'"""
)

Expand Down Expand Up @@ -206,7 +206,7 @@ def test_freshness_with_check_filter(data_source_fixture: DataSourceFixture):
def test_freshness_check_filter_no_rows(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)
# There is no boolean type and variables in Teradata
cond = "1 = 0" if test_data_source in ["sqlserver", "teradata"] else "FALSE"
cond = "1 = 0" if test_data_source in ["sqlserver", "teradata", "fabric"] else "FALSE"
scan = data_source_fixture.create_test_scan()
scan.add_variables(
{
Expand Down
4 changes: 2 additions & 2 deletions soda/core/tests/data_source/test_invalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_valid_min_max(data_source_fixture: DataSourceFixture):


@pytest.mark.skipif(
test_data_source == "sqlserver",
test_data_source in ["fabric", "sqlserver"],
reason="Full regex support is not supported by SQLServer",
)
def test_valid_format_email(data_source_fixture: DataSourceFixture):
Expand All @@ -107,7 +107,7 @@ def test_valid_format_email(data_source_fixture: DataSourceFixture):


@pytest.mark.skipif(
test_data_source == "sqlserver",
test_data_source in ["fabric", "sqlserver"],
reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
)
def test_column_configured_invalid_and_missing_values(data_source_fixture: DataSourceFixture):
Expand Down
4 changes: 2 additions & 2 deletions soda/core/tests/data_source/test_metric_check_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_missing_filtered_sample_query(data_source_fixture: DataSourceFixture):


@pytest.mark.skipif(
test_data_source == "sqlserver",
test_data_source in ["fabric", "sqlserver"],
reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
)
def test_valid_filtered(data_source_fixture: DataSourceFixture):
Expand All @@ -88,7 +88,7 @@ def test_valid_filtered(data_source_fixture: DataSourceFixture):


@pytest.mark.skipif(
test_data_source == "sqlserver",
test_data_source in ["fabric", "sqlserver"],
reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
)
def test_valid_percentage_filtered(data_source_fixture: DataSourceFixture):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@pytest.mark.skipif(
test_data_source in ["sqlserver"],
test_data_source in ["sqlserver", "fabric"],
reason="Full regex support is not supported by SQLServer. REGEXP_REPLACE is used in this check but it is not supported.",
)
def test_numeric_metric_checks_on_text_column(data_source_fixture: DataSourceFixture):
Expand All @@ -32,7 +32,7 @@ def test_numeric_metric_checks_on_text_column(data_source_fixture: DataSourceFix


@pytest.mark.skipif(
test_data_source in ["sqlserver"],
test_data_source in ["sqlserver", "fabric"],
reason="Full regex support is not supported by SQLServer. REGEXP_REPLACE is used in this check but it is not supported.",
)
def test_numeric_metric_checks_on_text_column_local_format(data_source_fixture: DataSourceFixture):
Expand Down
2 changes: 1 addition & 1 deletion soda/core/tests/data_source/test_percentage_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@pytest.mark.skipif(
test_data_source == "sqlserver",
test_data_source in ["fabric", "sqlserver"],
reason="Full regex support is not supported by SQLServer. 'Percentage' format is supported but with limited functionality.",
)
def test_default_missing_percentage(data_source_fixture: DataSourceFixture):
Expand Down
4 changes: 2 additions & 2 deletions soda/core/tests/data_source/test_table_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_filter_on_date(data_source_fixture: DataSourceFixture):
scan.add_variables(
{"DATE_LOWER": "2020-06-23", "DATE_UPPER": "2020-06-24"}
) # use DATE_LOWER and DATE_UPPER to avoid issues with dask
date_expr = "" if test_data_source == "sqlserver" else "DATE"
date_expr = "" if test_data_source in ["fabric", "sqlserver"] else "DATE"
scan.add_sodacl_yaml_str(
f"""
filter {table_name} [daily]:
Expand Down Expand Up @@ -69,7 +69,7 @@ def test_table_filter_on_timestamp(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)

scan = data_source_fixture.create_test_scan()
if test_data_source == "sqlserver":
if test_data_source in ["fabric", "sqlserver"]:
where_cond = f"""CONVERT(DATETIME, '${{ts_start}}') <= ts AND ts < CONVERT(DATETIME,'${{ts_end}}')"""
elif test_data_source == "dask":
where_cond = f"""\"'${{ts_start}}' <= ts AND ts < '${{ts_end}}'\""""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_user_defined_table_expression_metric_check(data_source_fixture: DataSou
table_name = data_source_fixture.ensure_test_table(customers_test_table)

scan = data_source_fixture.create_test_scan()
length_expr = "LEN" if data_source_fixture.data_source_name == "sqlserver" else "LENGTH"
length_expr = "LEN" if data_source_fixture.data_source_name in ["sqlserver", "fabric"] else "LENGTH"

ones_expression = f"SUM({length_expr}(cst_size_txt))"

Expand Down
Loading