Skip to content

Commit

Permalink
Add option to specify type specific date truncation functions (#9238)
Browse files Browse the repository at this point in the history
  • Loading branch information
villebro authored Mar 5, 2020
1 parent 7d572d9 commit ef2ebbd
Show file tree
Hide file tree
Showing 27 changed files with 109 additions and 79 deletions.
2 changes: 2 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ assists people when migrating to a new version.

## Next

* [9238](https://github.com/apache/incubator-superset/pull/9238): the config option `TIME_GRAIN_FUNCTIONS` has been renamed to `TIME_GRAIN_EXPRESSIONS` to better reflect the content of the dictionary.

* [9218](https://github.com/apache/incubator-superset/pull/9218): SQLite connections have been disabled by default
for analytics databases. You can optionally enable SQLite by setting `PREVENT_UNSAFE_DB_CONNECTIONS` to `False`.
It is not recommended to change this setting, as arbitrary SQLite connections can lead to security vulnerabilities.
Expand Down
5 changes: 3 additions & 2 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,13 +360,14 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument
TIME_GRAIN_ADDONS: Dict[str, str] = {}

# Implementation of additional time grains per engine.
# The column to be truncated is denoted `{col}` in the expression.
# For example: To implement 2 second time grain on clickhouse engine:
# TIME_GRAIN_ADDON_FUNCTIONS = {
# TIME_GRAIN_ADDON_EXPRESSIONS = {
# 'clickhouse': {
# 'PT2S': 'toDateTime(intDiv(toUInt32(toDateTime({col})), 2)*2)'
# }
# }
TIME_GRAIN_ADDON_FUNCTIONS: Dict[str, Dict[str, str]] = {}
TIME_GRAIN_ADDON_EXPRESSIONS: Dict[str, Dict[str, str]] = {}

# ---------------------------------------------------
# List of viz_types not allowed in your environment
Expand Down
4 changes: 3 additions & 1 deletion superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,9 @@ def get_timestamp_expression(
col = literal_column(self.expression)
else:
col = column(self.column_name)
time_expr = db.db_engine_spec.get_timestamp_expr(col, pdf, time_grain)
time_expr = db.db_engine_spec.get_timestamp_expr(
col, pdf, time_grain, self.type
)
return self.table.make_sqla_column_compatible(time_expr, label)

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
class AthenaEngineSpec(BaseEngineSpec):
engine = "awsathena"

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))",
"PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))",
Expand Down
33 changes: 21 additions & 12 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
"""Abstract class for database engine specific configurations"""

engine = "base" # str as defined in sqlalchemy.engine.engine
_time_grain_functions: Dict[Optional[str], str] = {}
_date_trunc_functions: Dict[str, str] = {}
_time_grain_expressions: Dict[Optional[str], str] = {}
time_groupby_inline = False
limit_method = LimitMethod.FORCE_LIMIT
time_secondary_columns = False
Expand Down Expand Up @@ -204,22 +205,31 @@ def get_engine(

@classmethod
def get_timestamp_expr(
cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str]
cls,
col: ColumnClause,
pdf: Optional[str],
time_grain: Optional[str],
type_: Optional[str] = None,
) -> TimestampExpression:
"""
Construct a TimestampExpression to be used in a SQLAlchemy query.
:param col: Target column for the TimestampExpression
:param pdf: date format (seconds or milliseconds)
:param time_grain: time grain, e.g. P1Y for 1 year
:param type_: the source column type
:return: TimestampExpression object
"""
if time_grain:
time_expr = cls.get_time_grain_functions().get(time_grain)
time_expr = cls.get_time_grain_expressions().get(time_grain)
if not time_expr:
raise NotImplementedError(
f"No grain spec for {time_grain} for database {cls.engine}"
)
if type_ and "{func}" in time_expr:
date_trunc_function = cls._date_trunc_functions.get(type_)
if date_trunc_function:
time_expr = time_expr.replace("{func}", date_trunc_function)
else:
time_expr = "{col}"

Expand All @@ -240,31 +250,30 @@ def get_time_grains(cls) -> Tuple[TimeGrain, ...]:
"""

ret_list = []
time_grain_functions = cls.get_time_grain_functions()
time_grains = builtin_time_grains.copy()
time_grains.update(config["TIME_GRAIN_ADDONS"])
for duration, func in time_grain_functions.items():
for duration, func in cls.get_time_grain_expressions().items():
if duration in time_grains:
name = time_grains[duration]
ret_list.append(TimeGrain(name, _(name), func, duration))
return tuple(ret_list)

@classmethod
def get_time_grain_functions(cls) -> Dict[Optional[str], str]:
def get_time_grain_expressions(cls) -> Dict[Optional[str], str]:
"""
Return a dict of all supported time grains including any potential added grains
but excluding any potentially blacklisted grains in the config file.
:return: All time grain functions supported by the engine
:return: All time grain expressions supported by the engine
"""
# TODO: use @memoize decorator or similar to avoid recomputation on every call
time_grain_functions = cls._time_grain_functions.copy()
grain_addon_functions = config["TIME_GRAIN_ADDON_FUNCTIONS"]
time_grain_functions.update(grain_addon_functions.get(cls.engine, {}))
time_grain_expressions = cls._time_grain_expressions.copy()
grain_addon_expressions = config["TIME_GRAIN_ADDON_EXPRESSIONS"]
time_grain_expressions.update(grain_addon_expressions.get(cls.engine, {}))
blacklist: List[str] = config["TIME_GRAIN_BLACKLIST"]
for key in blacklist:
time_grain_functions.pop(key)
return time_grain_functions
time_grain_expressions.pop(key)
return time_grain_expressions

@classmethod
def make_select_compatible(
Expand Down
29 changes: 19 additions & 10 deletions superset/db_engine_specs/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,23 @@ class BigQueryEngineSpec(BaseEngineSpec):
"""
arraysize = 5000

_time_grain_functions = {
_date_trunc_functions = {
"DATE": "DATE_TRUNC",
"DATETIME": "DATETIME_TRUNC",
"TIME": "TIME_TRUNC",
"TIMESTAMP": "TIMESTAMP_TRUNC",
}

_time_grain_expressions = {
None: "{col}",
"PT1S": "TIMESTAMP_TRUNC({col}, SECOND)",
"PT1M": "TIMESTAMP_TRUNC({col}, MINUTE)",
"PT1H": "TIMESTAMP_TRUNC({col}, HOUR)",
"P1D": "TIMESTAMP_TRUNC({col}, DAY)",
"P1W": "TIMESTAMP_TRUNC({col}, WEEK)",
"P1M": "TIMESTAMP_TRUNC({col}, MONTH)",
"P0.25Y": "TIMESTAMP_TRUNC({col}, QUARTER)",
"P1Y": "TIMESTAMP_TRUNC({col}, YEAR)",
"PT1S": "{func}({col}, SECOND)",
"PT1M": "{func}({col}, MINUTE)",
"PT1H": "{func}({col}, HOUR)",
"P1D": "{func}({col}, DAY)",
"P1W": "{func}({col}, WEEK)",
"P1M": "{func}({col}, MONTH)",
"P0.25Y": "{func}({col}, QUARTER)",
"P1Y": "{func}({col}, YEAR)",
}

@classmethod
Expand All @@ -68,13 +75,15 @@ def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]:
return f"CAST('{dttm.date().isoformat()}' AS DATE)"
if tt == "DATETIME":
return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS DATETIME)"""
if tt == "TIME":
return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME)"""
if tt == "TIMESTAMP":
return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS TIMESTAMP)"""
return None

@classmethod
def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple]:
data = super(BigQueryEngineSpec, cls).fetch_data(cursor, limit)
data = super().fetch_data(cursor, limit)
if data and type(data[0]).__name__ == "Row":
data = [r.values() for r in data] # type: ignore
return data
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class ClickHouseEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
time_secondary_columns = True
time_groupby_inline = True

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1M": "toStartOfMinute(toDateTime({col}))",
"PT5M": "toDateTime(intDiv(toUInt32(toDateTime({col})), 300)*300)",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/db2.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Db2EngineSpec(BaseEngineSpec):
force_column_alias_quotes = True
max_column_name_length = 30

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "CAST({col} as TIMESTAMP)" " - MICROSECOND({col}) MICROSECONDS",
"PT1M": "CAST({col} as TIMESTAMP)"
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/dremio.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DremioBaseEngineSpec(BaseEngineSpec):

engine = "dremio"

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "DATE_TRUNC('second', {col})",
"PT1M": "DATE_TRUNC('minute', {col})",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/drill.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DrillEngineSpec(BaseEngineSpec):

engine = "drill"

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "NEARESTDATE({col}, 'SECOND')",
"PT1M": "NEARESTDATE({col}, 'MINUTE')",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/druid.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class DruidEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
allows_joins = False
allows_subqueries = True

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "FLOOR({col} TO SECOND)",
"PT1M": "FLOOR({col} TO MINUTE)",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ElasticSearchEngineSpec(BaseEngineSpec): # pylint: disable=abstract-metho
allows_joins = False
allows_subqueries = True

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)",
"PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/exasol.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class ExasolEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
max_column_name_length = 128

# Exasol's DATE_TRUNC function is PostgresSQL compatible
_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "DATE_TRUNC('second', {col})",
"PT1M": "DATE_TRUNC('minute', {col})",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/hana.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class HanaEngineSpec(PostgresBaseEngineSpec):
force_column_alias_quotes = True
max_column_name_length = 30

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,20))",
"PT1M": "TO_TIMESTAMP(SUBSTRING(TO_TIMESTAMP({col}),0,17) || '00')",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ImpalaEngineSpec(BaseEngineSpec):

engine = "impala"

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1M": "TRUNC({col}, 'MI')",
"PT1H": "TRUNC({col}, 'HH')",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/kylin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class KylinEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method

engine = "kylin"

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO SECOND) AS TIMESTAMP)",
"PT1M": "CAST(FLOOR(CAST({col} AS TIMESTAMP) TO MINUTE) AS TIMESTAMP)",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class MssqlEngineSpec(BaseEngineSpec):
limit_method = LimitMethod.WRAP_SQL
max_column_name_length = 128

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "DATEADD(second, DATEDIFF(second, '2000-01-01', {col}), '2000-01-01')",
"PT1M": "DATEADD(minute, DATEDIFF(minute, 0, {col}), 0)",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class MySQLEngineSpec(BaseEngineSpec):
engine = "mysql"
max_column_name_length = 64

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "DATE_ADD(DATE({col}), "
"INTERVAL (HOUR({col})*60*60 + MINUTE({col})*60"
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class OracleEngineSpec(BaseEngineSpec):
force_column_alias_quotes = True
max_column_name_length = 30

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "CAST({col} as DATE)",
"PT1M": "TRUNC(CAST({col} as DATE), 'MI')",
Expand Down
10 changes: 7 additions & 3 deletions superset/db_engine_specs/pinot.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
allows_column_aliases = False

# Pinot does its own conversion below
_time_grain_functions: Dict[Optional[str], str] = {
_time_grain_expressions: Dict[Optional[str], str] = {
"PT1S": "1:SECONDS",
"PT1M": "1:MINUTES",
"PT1H": "1:HOURS",
Expand All @@ -51,7 +51,11 @@ class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method

@classmethod
def get_timestamp_expr(
cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str]
cls,
col: ColumnClause,
pdf: Optional[str],
time_grain: Optional[str],
type_: Optional[str] = None,
) -> TimestampExpression:
is_epoch = pdf in ("epoch_s", "epoch_ms")

Expand All @@ -75,7 +79,7 @@ def get_timestamp_expr(
else:
seconds_or_ms = "MILLISECONDS" if pdf == "epoch_ms" else "SECONDS"
tf = f"1:{seconds_or_ms}:EPOCH"
granularity = cls.get_time_grain_functions().get(time_grain)
granularity = cls.get_time_grain_expressions().get(time_grain)
if not granularity:
raise NotImplementedError("No pinot grain spec for " + str(time_grain))
# In pinot the output is a string since there is no timestamp column like pg
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PostgresBaseEngineSpec(BaseEngineSpec):

engine = ""

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "DATE_TRUNC('second', {col})",
"PT1M": "DATE_TRUNC('minute', {col})",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
class PrestoEngineSpec(BaseEngineSpec):
engine = "presto"

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))",
"PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
force_column_alias_quotes = True
max_column_name_length = 256

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "DATE_TRUNC('SECOND', {col})",
"PT1M": "DATE_TRUNC('MINUTE', {col})",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
class SqliteEngineSpec(BaseEngineSpec):
engine = "sqlite"

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1S": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
"PT1M": "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
Expand Down
2 changes: 1 addition & 1 deletion superset/db_engine_specs/teradata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TeradataEngineSpec(BaseEngineSpec):
limit_method = LimitMethod.WRAP_SQL
max_column_name_length = 30 # since 14.10 this is 128

_time_grain_functions = {
_time_grain_expressions = {
None: "{col}",
"PT1M": "TRUNC(CAST({col} as DATE), 'MI')",
"PT1H": "TRUNC(CAST({col} as DATE), 'HH')",
Expand Down
6 changes: 3 additions & 3 deletions tests/db_engine_specs/base_engine_spec_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ def test_limit_with_non_token_limit(self):
def test_time_grain_blacklist(self):
with app.app_context():
app.config["TIME_GRAIN_BLACKLIST"] = ["PT1M"]
time_grain_functions = SqliteEngineSpec.get_time_grain_functions()
time_grain_functions = SqliteEngineSpec.get_time_grain_expressions()
self.assertNotIn("PT1M", time_grain_functions)

def test_time_grain_addons(self):
with app.app_context():
app.config["TIME_GRAIN_ADDONS"] = {"PTXM": "x seconds"}
app.config["TIME_GRAIN_ADDON_FUNCTIONS"] = {
app.config["TIME_GRAIN_ADDON_EXPRESSIONS"] = {
"sqlite": {"PTXM": "ABC({col})"}
}
time_grains = SqliteEngineSpec.get_time_grains()
Expand All @@ -174,7 +174,7 @@ def test_engine_time_grain_validity(self):
for engine in engines.values():
if engine is not BaseEngineSpec:
# make sure time grain functions have been defined
self.assertGreater(len(engine.get_time_grain_functions()), 0)
self.assertGreater(len(engine.get_time_grain_expressions()), 0)
# make sure all defined time grains are supported
defined_grains = {grain.duration for grain in engine.get_time_grains()}
intersection = time_grains.intersection(defined_grains)
Expand Down
Loading

0 comments on commit ef2ebbd

Please sign in to comment.