Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Ability to share saved queries #24301

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions superset-frontend/src/SqlLab/actions/sqlLab.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,8 @@ export function updateQueryEditor(alterations) {
export function scheduleQuery(query) {
return dispatch =>
SupersetClient.post({
endpoint: '/savedqueryviewapi/api/create',
endpoint: '/api/v1/saved_query/',
postPayload: query,
stringify: false,
})
.then(() =>
dispatch(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ const ShareSqlLabQuery = ({
}: ShareSqlLabQueryProps) => {
const theme = useTheme();

const { dbId, name, schema, autorun, sql, remoteId, templateParams } =
const { dbId, name, schema, autorun, sql, remoteId, templateParams, uuid } =
useQueryEditor(queryEditorId, [
'dbId',
'name',
Expand All @@ -57,6 +57,7 @@ const ShareSqlLabQuery = ({
'sql',
'remoteId',
'templateParams',
'uuid',
]);

const getCopyUrlForKvStore = (callback: Function) => {
Expand All @@ -76,10 +77,10 @@ const ShareSqlLabQuery = ({
const getCopyUrlForSavedQuery = (callback: Function) => {
let savedQueryToastContent;

if (remoteId) {
if (uuid) {
savedQueryToastContent = `${
window.location.origin + window.location.pathname
}?savedQueryId=${remoteId}`;
}?savedQueryId=${uuid}`;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a concern that by switching to UUID existing URLs will break?

Copy link
Member Author

@jfrag1 jfrag1 Jun 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Existing URLs will still work just fine as the get saved query endpoint still accepts numerical id's

callback(savedQueryToastContent);
} else {
savedQueryToastContent = t('Please save the query to enable sharing');
Expand Down
1 change: 1 addition & 0 deletions superset-frontend/src/SqlLab/fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ export const defaultQueryEditor = {
remoteId: null,
hideLeftBar: false,
templateParams: '{}',
uuid: '8a7f4812-fa3a-4cf8-86f3-67a55fcc9102',
};

export const extraQueryEditor1 = {
Expand Down
1 change: 1 addition & 0 deletions superset-frontend/src/SqlLab/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export interface QueryEditor {
selectedText?: string;
queryLimit?: number;
description?: string;
uuid: string | null;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use an optional string like the ones above?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that uuid: string | null means the key will always be there, but may be null, and uuid?: string means that the key may or may not be there, but if it is it's a string. This way seemed more accurate to me, I copied from the type def for the remoteId here (which is the saved query numerical id), since it should be the same for UUID

}

export type toastState = {
Expand Down
6 changes: 3 additions & 3 deletions superset-frontend/src/pages/SavedQueryList/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,10 @@ function SavedQueryList({
};

const copyQueryLink = useCallback(
(id: number) => {
(uuid: string) => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does TypeScript support a UUID type?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not that I'm aware of, didn't find anything on a quick google search either

copyTextToClipboard(() =>
Promise.resolve(
`${window.location.origin}/superset/sqllab?savedQueryId=${id}`,
`${window.location.origin}/superset/sqllab?savedQueryId=${uuid}`,
),
)
.then(() => {
Expand Down Expand Up @@ -390,7 +390,7 @@ function SavedQueryList({
handleSavedQueryPreview(original.id);
};
const handleEdit = () => openInSqlLab(original.id);
const handleCopy = () => copyQueryLink(original.id);
const handleCopy = () => copyQueryLink(original.uuid);
const handleExport = () => handleBulkSavedQueryExport([original]);
const handleDelete = () => setQueryCurrentlyDeleting(original);

Expand Down
4 changes: 2 additions & 2 deletions superset/dashboards/dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
from superset import security_manager
from superset.dao.base import BaseDAO
from superset.dashboards.commands.exceptions import DashboardNotFoundError
from superset.dashboards.filters import DashboardAccessFilter, is_uuid
from superset.dashboards.filters import DashboardAccessFilter
from superset.extensions import db
from superset.models.core import FavStar, FavStarClassName
from superset.models.dashboard import Dashboard, id_or_slug_filter
from superset.models.slice import Slice
from superset.utils.core import get_user_id
from superset.utils.core import get_user_id, is_uuid
from superset.utils.dashboard_filter_scopes_converter import copy_filter_scopes

logger = logging.getLogger(__name__)
Expand Down
4 changes: 2 additions & 2 deletions superset/dashboards/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
from superset import db, is_feature_enabled, security_manager
from superset.connectors.sqla.models import SqlaTable
from superset.models.core import Database, FavStar
from superset.models.dashboard import Dashboard, is_uuid
from superset.models.dashboard import Dashboard
from superset.models.embedded_dashboard import EmbeddedDashboard
from superset.models.slice import Slice
from superset.security.guest_token import GuestTokenResourceType, GuestUser
from superset.utils.core import get_user_id
from superset.utils.core import get_user_id, is_uuid
from superset.utils.filters import get_dataset_access_filters
from superset.views.base import BaseFilter
from superset.views.base_api import BaseFavoriteFilter, BaseTagFilter
Expand Down
10 changes: 1 addition & 9 deletions superset/models/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
from superset.tasks.utils import get_current_user
from superset.thumbnails.digest import get_dashboard_digest
from superset.utils import core as utils
from superset.utils.core import get_user_id
from superset.utils.core import get_user_id, is_uuid
from superset.utils.decorators import debounce

metadata = Model.metadata # pylint: disable=no-member
Expand Down Expand Up @@ -452,14 +452,6 @@ def get(cls, id_or_slug: str | int) -> Dashboard:
return qry.one_or_none()


def is_uuid(value: str | int) -> bool:
try:
uuid.UUID(str(value))
return True
except ValueError:
return False


def is_int(value: str | int) -> bool:
try:
int(value)
Expand Down
4 changes: 2 additions & 2 deletions superset/models/sql_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def to_dict(self) -> dict[str, Any]:
def pop_tab_link(self) -> Markup:
return Markup(
f"""
<a href="/superset/sqllab?savedQueryId={self.id}">
<a href="/superset/sqllab?savedQueryId={self.uuid}">
<i class="fa fa-link"></i>
</a>
"""
Expand All @@ -425,7 +425,7 @@ def sqlalchemy_uri(self) -> URL:
return self.database.sqlalchemy_uri

def url(self) -> str:
return f"/superset/sqllab?savedQueryId={self.id}"
return f"/superset/sqllab?savedQueryId={self.uuid}"

@property
def sql_tables(self) -> list[Table]:
Expand Down
56 changes: 55 additions & 1 deletion superset/queries/saved_queries/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from zipfile import is_zipfile, ZipFile

from flask import g, request, Response, send_file
from flask_appbuilder.api import expose, protect, rison, safe
from flask_appbuilder.api import expose, permission_name, protect, rison, safe
from flask_appbuilder.models.sqla.interface import SQLAInterface
from flask_babel import ngettext

Expand All @@ -47,6 +47,7 @@
from superset.queries.saved_queries.commands.importers.dispatcher import (
ImportSavedQueriesCommand,
)
from superset.queries.saved_queries.dao import SavedQueryDAO
from superset.queries.saved_queries.filters import (
SavedQueryAllTextFilter,
SavedQueryFavoriteFilter,
Expand All @@ -57,6 +58,7 @@
get_delete_ids_schema,
get_export_ids_schema,
openapi_spec_methods_override,
SavedQueryGetSchema,
)
from superset.views.base_api import (
BaseSupersetModelRestApi,
Expand Down Expand Up @@ -99,6 +101,7 @@ class SavedQueryRestApi(BaseSupersetModelRestApi):
"sql",
"sql_tables",
"template_parameters",
"uuid",
]
list_columns = [
"changed_on_delta_humanized",
Expand All @@ -118,6 +121,7 @@ class SavedQueryRestApi(BaseSupersetModelRestApi):
"schema",
"sql",
"sql_tables",
"uuid",
]
if is_feature_enabled("TAGGING_SYSTEM"):
list_columns += ["tags.id", "tags.name", "tags.type"]
Expand All @@ -129,6 +133,7 @@ class SavedQueryRestApi(BaseSupersetModelRestApi):
"schema",
"sql",
"template_parameters",
"extra_json",
]
edit_columns = add_columns
order_columns = [
Expand Down Expand Up @@ -158,9 +163,12 @@ class SavedQueryRestApi(BaseSupersetModelRestApi):
"get_delete_ids_schema": get_delete_ids_schema,
"get_export_ids_schema": get_export_ids_schema,
}
openapi_spec_component_schemas = (SavedQueryGetSchema,)
openapi_spec_tag = "Queries"
openapi_spec_methods = openapi_spec_methods_override

saved_query_get_schema = SavedQueryGetSchema()

related_field_filters = {
"database": "database_name",
}
Expand All @@ -174,6 +182,52 @@ def pre_add(self, item: SavedQuery) -> None:
def pre_update(self, item: SavedQuery) -> None:
self.pre_add(item)

@expose("/<_id>", methods=("GET",))
@protect()
@safe
@permission_name("get")
@statsd_metrics
@event_logger.log_this_with_context(
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.get",
log_to_statsd=False,
)
def get(self, _id: str, **kwargs: Any) -> Response:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def get(self, _id: str, **kwargs: Any) -> Response:
def get(self, id_: str, **kwargs: Any) -> Response:

The convention is to add (rather than prepend) an underscore to variable names which conflict with the global namespace.

Additionally can't the ID be an integer (rather than a string) or UUID.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the string type captures both integer id's and UUIDs. The dao will then cast the type appropriately depending on which is passed

"""Get a saved query by id or uuid
---
get:
summary: >-
Gets a saved query
description: >-
Users who did not create a saved query can only discover it via UUID
parameters:
- in: path
name: _id
schema:
type: string
description: ID or UUID of the saved query
responses:
200:
description: Saved query
content:
application/json:
schema:
type: object
properties:
result:
$ref: '#/components/schemas/SavedQueryGetSchema'
401:
$ref: '#/components/responses/401'
404:
$ref: '#/components/responses/404'
500:
$ref: '#/components/responses/500'
"""
saved_query = SavedQueryDAO.get_by_id(_id)
if not saved_query:
return self.response_404()
result = self.saved_query_get_schema.dump(saved_query)
return self.response(200, result=result)

@expose("/", methods=("DELETE",))
@protect()
@safe
Expand Down
20 changes: 20 additions & 0 deletions superset/queries/saved_queries/dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,18 @@
# specific language governing permissions and limitations
# under the License.
import logging
import uuid
from typing import Optional

from flask_appbuilder.models.sqla.interface import SQLAInterface
from sqlalchemy.exc import SQLAlchemyError

from superset.dao.base import BaseDAO
from superset.dao.exceptions import DAODeleteFailedError
from superset.extensions import db
from superset.models.sql_lab import SavedQuery
from superset.queries.saved_queries.filters import SavedQueryFilter
from superset.utils.core import is_uuid

logger = logging.getLogger(__name__)

Expand All @@ -44,3 +47,20 @@ def bulk_delete(models: Optional[list[SavedQuery]], commit: bool = True) -> None
except SQLAlchemyError as ex:
db.session.rollback()
raise DAODeleteFailedError() from ex

@classmethod
def get_by_id(cls, _id: str) -> Optional[SavedQuery]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe this is the right approach, i.e., per this code block the UUID option is bypass the security check (by way of the filter on line #63). The correct approach would be to generate a permalink similar to charts and dashboards.

cc @michael-s-molina and @villebro as we've discussed this previously.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bypassing the security check if a UUID is passed is intentional, it allows users to access saved queries via the API only if the UUID link has been shared with them. There is precedence for this approach in the codebase, see the dashboard DAO: https://github.com/apache/superset/blob/master/superset/dashboards/dao.py#L47

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use the permalink logic for this purpose and probably migrate the dashboard DAO too. Keeping multiple strategies for sharing resources only increases code complexity and opens the door for inconsistent security checks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michael-s-molina or @john-bodley given the discussion in the all hands meetup, did we come to a decision on whether we can move towards UUIDs here and save the permalink feature for a moment in time state that you want to share?

Copy link
Member

@john-bodley john-bodley Jun 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eschutho I’m not sure that the UUID seems correct both from a security and consistency perspective.

It feels like an internal representation that we’re now exposing to circumvent security, i.e., it feels akin to saying you enter a bar and have to present your ID (an integer or slug) for verification, however if you merely wave a UUID you bypass the bouncer. Granted it’s harder to forge a UUID as opposed to guess a valid ID number, but that still doesn’t seem secure.

In my opinion the precedence which was set previously (the example @jfrag1 linked to—which is the same one I was referring to in the town hall) should be thought of as the exception rather than the rule, i.e., that logic would also need to be updated in a future PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @michael-s-molina's comment regarding making this a SIP. I understand that this feels somewhat like a blocker to what feels like a somewhat benign fix, but I think it's important that we flush out (and detangle) the key concepts that Michael and myself raised to set the right precedence, as opposed to a bandaid solution.

Historically UUIDs were obfuscated and solely used to make charts, dashboards, etc. globally unique across shared Superset instances, however their use within the API is more of an organic phenomenon and thus I think there's merit in pausing, taking a step back, and evaluating how we think about UUIDs, shareable links, etc. in a more holistic manner.

I agree that finding a matching UUID is near impossible, but using UUIDs for security (via cryptography) isn't valid. Note this is in no way a criticism of your work, but more with the state of the Superset code base. At Airbnb we support thousands of weekly active users and are constantly fixing issues related to problematic features and/or inconsistent/non-coherent/complex functionality (you can see the vast majority of my commits to this repo are actually fixes and/or mechanism to improve quality).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agreed with using permalink as long term solution for copy link or any other share with link feature as opposed to directly sharing with raw uuid. Even though uuid is impossible to guess, it's still security through obscurity(link can be leaked unintentionally).
And we should be consistent for authorization policy on a resource regardless of the format of their IDs.
But like @jfrag1 said this PR is to fix a breaking changes of the other PR. We will need to either revert that breaking PR or allow exception of this PR for now until formal SIP is made around how we represent/export resource application wide.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree that a SIP on this functionality is a good idea. We were planning to write one up, I think @john-bodley is also interested in writing it. If we think this PR is too premature at this point, what do you all think about us reverting this one line back to the old api so that we can fix the sharing capabilities that are currently broken, and then we can talk about a longer-term solution in an upcoming SIP? cc @hughhhh who is also helping on this.

Copy link
Member

@john-bodley john-bodley Jun 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zephyring, @jfrag1, @eschutho, @michael-s-molina, et al. I'm supportive of "reverting" the original PR to get us back to square one and then with said SIP we can proceed. I suspect the old API endpoint is a legacy FAB REST API which was slated for deletion in 2.3.X (we're now using FAB 4.3.2+).

If the one line revert to the old API isn't via I think the alternative is something akin to what we do at Airbnb, where our PR description states:

Though this logic,

class SavedQueryFilter(BaseFilter):  # pylint: disable=too-few-public-methods
   def apply(self, query: BaseQuery, value: Any) -> BaseQuery:
       """
       Filter saved queries to only those created by current user.

       :returns: flask-sqlalchemy query
       """
       
       return query.filter(
           SavedQuery.created_by == g.user  # pylint: disable=comparison-with-callable
       )

which, restricts only the creator of the saved query to access it, has been around for years we suspect it only came into force when the /api/v1/saved_query/{pk} API was enabled in Q4 of 2022 which prevented people from sharing saved queries.

This PR simply relaxes the filter to allow anyone to access a saved query.

Thus I think an alternative to changing the API endpoint would be to simply replace these lines with return query.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

opened #24434 to revert the original change which broke this functionality

if is_uuid(_id):
return (
db.session.query(SavedQuery)
.filter(SavedQuery.uuid == uuid.UUID(_id))
.scalar()
)
try:
query = db.session.query(SavedQuery).filter(SavedQuery.id == int(_id))
except ValueError: # Invalid id
return None
query = cls.base_filter("id", SQLAInterface(SavedQuery, db.session)).apply(
query, None
)
return query.scalar()
49 changes: 48 additions & 1 deletion superset/queries/saved_queries/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from marshmallow import fields, Schema
from typing import Any

from marshmallow import fields, post_dump, Schema
from marshmallow.validate import Length

from superset.models.sql_lab import SavedQuery

openapi_spec_methods_override = {
"get": {
"get": {
Expand Down Expand Up @@ -48,3 +52,46 @@ class ImportV1SavedQuerySchema(Schema):
uuid = fields.UUID(required=True)
version = fields.String(required=True)
database_uuid = fields.UUID(required=True)


class SavedQueryCreatorSchema(Schema):
id = fields.Integer(metadata={"description": "Creator's ID"})
first_name = fields.String(metadata={"description": "Creator's first name"})
last_name = fields.String(metadata={"description": "Creator's last name"})


class SavedQueryDatabaseSchema(Schema):
database_name = fields.String(metadata={"description": "Database's name"})
id = fields.Integer(metadata={"description": "Database's ID"})


class SavedQuerySqlTableSchema(Schema):
table = fields.String()
schema = fields.String()
catalog = fields.String()


class SavedQueryGetSchema(Schema):
changed_on_delta_humanized = fields.String(
metadata={"description": "How long ago the query was changed"}
)
created_by = fields.Nested(SavedQueryCreatorSchema)
database = fields.Nested(SavedQueryDatabaseSchema)
description = fields.String(metadata={"description": "Query's description"})
id = fields.Integer(metadata={"description": "Query's ID"})
label = fields.String(metadata={"description": "Query's label"})
schema = fields.String(metadata={"description": "Query's schema"})
sql = fields.String(metadata={"description": "Query's SQL"})
sql_tables = fields.List(
fields.Nested(SavedQuerySqlTableSchema),
metadata={"description": "Tables accessed by query SQL"},
)
uuid = fields.UUID(metadata={"description": "Query's UUID"})

# pylint: disable=no-self-use, unused-argument
@post_dump(pass_original=True)
def post_dump(
self, serialized: dict[str, Any], saved_query: SavedQuery, **kwargs: Any
) -> dict[str, Any]:
serialized["changed_on_delta_humanized"] = saved_query.changed_on_humanized
return serialized
8 changes: 8 additions & 0 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1974,3 +1974,11 @@ def remove_extra_adhoc_filters(form_data: dict[str, Any]) -> None:
form_data[key] = [
filter_ for filter_ in value or [] if not filter_.get("isExtra")
]


def is_uuid(value: str | int) -> bool:
try:
uuid.UUID(str(value))
return True
except ValueError:
return False
Loading