Skip to content

Commit

Permalink
chore: Migrating reports to AuthWebdriverProxy (apache#10567)
Browse files Browse the repository at this point in the history
* Migrating reports to AuthWebdriverProxy

* Extracting out webdriver proxy / Adding thumbnail tests to CI

* Adding license

* Adding license again

* Empty commit

* Adding thumbnail tests to CI

* Switching thumbnail test to Postgres

* Linting

* Adding mypy:ignore / removing thumbnail tests from CI

* Putting ignore statement back

* Updating docs

* First cut at authprovider

* First cut at authprovider mostly working - still needs more tests

* Auth provider tests added

* Linting

* Linting again...

* Linting again...

* Busting CI cache

* Reverting workflow change

* Fixing dataclasses

* Reverting back to master

* linting?

* Reverting installation.rst

* Reverting package-lock.json

* Addressing feedback

* Blacking

* Lazy logging strings

* UPDATING.md note
  • Loading branch information
craig-rueda authored and Ofeknielsen committed Oct 5, 2020
1 parent 35edac3 commit ea938c3
Show file tree
Hide file tree
Showing 15 changed files with 376 additions and 240 deletions.
2 changes: 2 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ assists people when migrating to a new version.

## Next

* [10567](https://github.com/apache/incubator-superset/pull/10567): Default WEBDRIVER_OPTION_ARGS are Chrome-specific. If you're using FF, should be `--headless` only

* [10241](https://github.com/apache/incubator-superset/pull/10241): change on Alpha role, users started to have access to "Annotation Layers", "Css Templates" and "Import Dashboards".

* [10324](https://github.com/apache/incubator-superset/pull/10324): Facebook Prophet has been introduced as an optional dependency to add support for timeseries forecasting in the chart data API. To enable this feature, install Superset with the optional dependency `prophet` or directly `pip install fbprophet`.
Expand Down
10 changes: 3 additions & 7 deletions scripts/tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ function reset_db() {
echo --------------------
echo Reseting test DB
echo --------------------
docker-compose stop superset-tests-worker
RESET_DB_CMD="psql \"postgresql://superset:superset@127.0.0.1:5432\" <<-EOF
docker-compose stop superset-tests-worker superset || true
RESET_DB_CMD="psql \"postgresql://${DB_USER}:${DB_PASSWORD}@127.0.0.1:5432\" <<-EOF
DROP DATABASE IF EXISTS ${DB_NAME};
CREATE DATABASE ${DB_NAME};
\\c ${DB_NAME}
Expand All @@ -53,10 +53,6 @@ function test_init() {
echo Superset init
echo --------------------
superset init
echo --------------------
echo Load examples
echo --------------------
pytest -s tests/load_examples_test.py
}

#
Expand Down Expand Up @@ -142,5 +138,5 @@ fi

if [ $RUN_TESTS -eq 1 ]
then
pytest -x -s --ignore=load_examples_test "${TEST_MODULE}"
pytest -x -s "${TEST_MODULE}"
fi
5 changes: 5 additions & 0 deletions superset/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
db,
feature_flag_manager,
jinja_context_manager,
machine_auth_provider_factory,
manifest_processor,
migrate,
results_backend_manager,
Expand Down Expand Up @@ -468,6 +469,7 @@ def init_app_in_ctx(self) -> None:
self.configure_fab()
self.configure_url_map_converters()
self.configure_data_sources()
self.configure_auth_provider()

# Hook that provides administrators a handle on the Flask APP
# after initialization
Expand Down Expand Up @@ -499,6 +501,9 @@ def init_app(self) -> None:

self.post_init()

def configure_auth_provider(self) -> None:
machine_auth_provider_factory.init_app(self.flask_app)

def setup_event_logger(self) -> None:
_event_logger["event_logger"] = get_event_logger_from_cfg_value(
self.flask_app.config.get("EVENT_LOGGER", DBEventLogger())
Expand Down
18 changes: 18 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,11 @@ class CeleryConfig: # pylint: disable=too-few-public-methods
# * Emails are sent using dry-run mode (logging only)
SCHEDULED_EMAIL_DEBUG_MODE = False

# This auth provider is used by background (offline) tasks that need to access
# protected resources. Can be overridden by end users in order to support
# custom auth mechanisms
MACHINE_AUTH_PROVIDER_CLASS = "superset.utils.machine_auth.MachineAuthProvider"

# Email reports - minimum time resolution (in minutes) for the crontab
EMAIL_REPORTS_CRON_RESOLUTION = 15

Expand Down Expand Up @@ -795,9 +800,22 @@ class CeleryConfig: # pylint: disable=too-few-public-methods
# Window size - this will impact the rendering of the data
WEBDRIVER_WINDOW = {"dashboard": (1600, 2000), "slice": (3000, 1200)}

# An optional override to the default auth hook used to provide auth to the
# offline webdriver
WEBDRIVER_AUTH_FUNC = None

# Any config options to be passed as-is to the webdriver
WEBDRIVER_CONFIGURATION: Dict[Any, Any] = {}

# Additional args to be passed as arguments to the config object
# Note: these options are Chrome-specific. For FF, these should
# only include the "--headless" arg
WEBDRIVER_OPTION_ARGS = [
"--force-device-scale-factor=2.0",
"--high-dpi-support=2.0",
"--headless",
]

# The base URL to query for accessing the user interface
WEBDRIVER_BASEURL = "http://0.0.0.0:8080/"
# The base URL for the email report hyperlinks.
Expand Down
2 changes: 2 additions & 0 deletions superset/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

from superset.utils.cache_manager import CacheManager
from superset.utils.feature_flag_manager import FeatureFlagManager
from superset.utils.machine_auth import MachineAuthProviderFactory

if TYPE_CHECKING:
from superset.jinja_context import ( # pylint: disable=unused-import
Expand Down Expand Up @@ -139,6 +140,7 @@ def get_manifest_files(self, bundle: str, asset_type: str) -> List[str]:
event_logger = LocalProxy(lambda: _event_logger.get("event_logger"))
feature_flag_manager = FeatureFlagManager()
jinja_context_manager = JinjaContextManager()
machine_auth_provider_factory = MachineAuthProviderFactory()
manifest_processor = UIManifestProcessor(APP_DIR)
migrate = Migrate()
results_backend_manager = ResultsBackendManager()
Expand Down
85 changes: 18 additions & 67 deletions superset/tasks/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
Callable,
Dict,
Iterator,
List,
NamedTuple,
Optional,
Tuple,
Expand All @@ -42,17 +41,16 @@
import simplejson as json
from celery.app.task import Task
from dateutil.tz import tzlocal
from flask import current_app, render_template, Response, session, url_for
from flask import current_app, render_template, url_for
from flask_babel import gettext as __
from flask_login import login_user
from retry.api import retry_call
from selenium.common.exceptions import WebDriverException
from selenium.webdriver import chrome, firefox
from selenium.webdriver.remote.webdriver import WebDriver
from sqlalchemy.exc import NoSuchColumnError, ResourceClosedError
from werkzeug.http import parse_cookie

from superset import app, db, security_manager, thumbnail_cache
from superset.extensions import celery_app
from superset.extensions import celery_app, machine_auth_provider_factory
from superset.models.alerts import Alert, AlertLog
from superset.models.core import Database
from superset.models.dashboard import Dashboard
Expand All @@ -66,14 +64,15 @@
from superset.sql_parse import ParsedQuery
from superset.tasks.slack_util import deliver_slack_msg
from superset.utils.core import get_email_address_list, send_email_smtp
from superset.utils.screenshots import ChartScreenshot
from superset.utils.screenshots import ChartScreenshot, WebDriverProxy
from superset.utils.urls import get_url_path

# pylint: disable=too-few-public-methods

if TYPE_CHECKING:
# pylint: disable=unused-import
from werkzeug.datastructures import TypeConversionDict
from flask_appbuilder.security.sqla.models import User


# Globals
Expand Down Expand Up @@ -191,27 +190,6 @@ def _generate_report_content(
return ReportContent(body, data, images, slack_message, screenshot)


def _get_auth_cookies() -> List["TypeConversionDict[Any, Any]"]:
# Login with the user specified to get the reports
with app.test_request_context():
user = security_manager.find_user(config["EMAIL_REPORTS_USER"])
login_user(user)

# A mock response object to get the cookie information from
response = Response()
app.session_interface.save_session(app, session, response)

cookies = []

# Set the cookies in the driver
for name, value in response.headers:
if name.lower() == "set-cookie":
cookie = parse_cookie(value)
cookies.append(cookie["session"])

return cookies


def _get_url_path(view: str, user_friendly: bool = False, **kwargs: Any) -> str:
with app.test_request_context():
base_url = (
Expand All @@ -220,44 +198,14 @@ def _get_url_path(view: str, user_friendly: bool = False, **kwargs: Any) -> str:
return urllib.parse.urljoin(str(base_url), url_for(view, **kwargs))


def create_webdriver() -> Union[
chrome.webdriver.WebDriver, firefox.webdriver.WebDriver
]:
# Create a webdriver for use in fetching reports
if config["EMAIL_REPORTS_WEBDRIVER"] == "firefox":
driver_class = firefox.webdriver.WebDriver
options = firefox.options.Options()
elif config["EMAIL_REPORTS_WEBDRIVER"] == "chrome":
driver_class = chrome.webdriver.WebDriver
options = chrome.options.Options()

options.add_argument("--headless")

# Prepare args for the webdriver init
kwargs = dict(options=options)
kwargs.update(config["WEBDRIVER_CONFIGURATION"])

# Initialize the driver
driver = driver_class(**kwargs)

# Some webdrivers need an initial hit to the welcome URL
# before we set the cookie
welcome_url = _get_url_path("Superset.welcome")

# Hit the welcome URL and check if we were asked to login
driver.get(welcome_url)
elements = driver.find_elements_by_id("loginbox")

# This indicates that we were not prompted for a login box.
if not elements:
return driver
def create_webdriver() -> WebDriver:
return WebDriverProxy(driver_type=config["EMAIL_REPORTS_WEBDRIVER"]).auth(
get_reports_user()
)

# Set the cookies in the driver
for cookie in _get_auth_cookies():
info = dict(name="session", value=cookie)
driver.add_cookie(info)

return driver
def get_reports_user() -> "User":
return security_manager.find_user(config["EMAIL_REPORTS_USER"])


def destroy_webdriver(
Expand Down Expand Up @@ -364,12 +312,15 @@ def _get_slice_data(slc: Slice, delivery_type: EmailDeliveryType) -> ReportConte
"Superset.slice", slice_id=slc.id, user_friendly=True
)

cookies = {}
for cookie in _get_auth_cookies():
cookies["session"] = cookie
# Login on behalf of the "reports" user in order to get cookies to deal with auth
auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies(
get_reports_user()
)
# Build something like "session=cool_sess.val;other-cookie=awesome_other_cookie"
cookie_str = ";".join([f"{key}={val}" for key, val in auth_cookies.items()])

opener = urllib.request.build_opener()
opener.addheaders.append(("Cookie", f"session={cookies['session']}"))
opener.addheaders.append(("Cookie", cookie_str))
response = opener.open(slice_url)
if response.getcode() != 200:
raise URLError(response.getcode())
Expand Down
5 changes: 2 additions & 3 deletions superset/tasks/thumbnails.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
"""Utility functions used across Superset"""

import logging
from typing import Optional, Tuple
from typing import Optional

from flask import current_app

from superset import app, security_manager, thumbnail_cache
from superset.extensions import celery_app
from superset.utils.screenshots import ChartScreenshot, DashboardScreenshot
from superset.utils.webdriver import WindowSize

logger = logging.getLogger(__name__)

WindowSize = Tuple[int, int]


@celery_app.task(name="cache_chart_thumbnail", soft_time_limit=300)
def cache_chart_thumbnail(
Expand Down
113 changes: 113 additions & 0 deletions superset/utils/machine_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import importlib
import logging
from typing import Callable, Dict, TYPE_CHECKING

from flask import current_app, Flask, request, Response, session
from flask_login import login_user
from selenium.webdriver.remote.webdriver import WebDriver
from werkzeug.http import parse_cookie

from superset.utils.urls import headless_url

logger = logging.getLogger(__name__)

if TYPE_CHECKING:
# pylint: disable=unused-import
from flask_appbuilder.security.sqla.models import User


class MachineAuthProvider:
def __init__(
self, auth_webdriver_func_override: Callable[[WebDriver, "User"], WebDriver]
):
# This is here in order to allow for the authenticate_webdriver func to be
# overridden via config, as opposed to the entire provider implementation
self._auth_webdriver_func_override = auth_webdriver_func_override

def authenticate_webdriver(self, driver: WebDriver, user: "User",) -> WebDriver:
"""
Default AuthDriverFuncType type that sets a session cookie flask-login style
:return: The WebDriver passed in (fluent)
"""
# Short-circuit this method if we have an override configured
if self._auth_webdriver_func_override:
return self._auth_webdriver_func_override(driver, user)

# Setting cookies requires doing a request first
driver.get(headless_url("/login/"))

if user:
cookies = self.get_auth_cookies(user)
elif request.cookies:
cookies = request.cookies
else:
cookies = {}

for cookie_name, cookie_val in cookies.items():
driver.add_cookie(dict(name=cookie_name, value=cookie_val))

return driver

@staticmethod
def get_auth_cookies(user: "User") -> Dict[str, str]:
# Login with the user specified to get the reports
with current_app.test_request_context("/login"):
login_user(user)
# A mock response object to get the cookie information from
response = Response()
current_app.session_interface.save_session(current_app, session, response)

cookies = {}

# Grab any "set-cookie" headers from the login response
for name, value in response.headers:
if name.lower() == "set-cookie":
# This yields a MultiDict, which is ordered -- something like
# MultiDict([('session', 'value-we-want), ('HttpOnly', ''), etc...
# Therefore, we just need to grab the first tuple and add it to our
# final dict
cookie = parse_cookie(value)
cookie_tuple = list(cookie.items())[0]
cookies[cookie_tuple[0]] = cookie_tuple[1]

return cookies


class MachineAuthProviderFactory:
def __init__(self) -> None:
self._auth_provider = None

def init_app(self, app: Flask) -> None:
auth_provider_fqclass = app.config["MACHINE_AUTH_PROVIDER_CLASS"]
auth_provider_classname = auth_provider_fqclass[
auth_provider_fqclass.rfind(".") + 1 :
]
auth_provider_module_name = auth_provider_fqclass[
0 : auth_provider_fqclass.rfind(".")
]
auth_provider_class = getattr(
importlib.import_module(auth_provider_module_name), auth_provider_classname
)

self._auth_provider = auth_provider_class(app.config["WEBDRIVER_AUTH_FUNC"])

@property
def instance(self) -> MachineAuthProvider:
return self._auth_provider # type: ignore
Loading

0 comments on commit ea938c3

Please sign in to comment.