Skip to content

Commit

Permalink
Merge branch 'main' into ensure_serialized_data_meassured_correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky committed Mar 13, 2023
2 parents 6c2f4ad + 700f14a commit 52eee9e
Show file tree
Hide file tree
Showing 29 changed files with 570 additions and 260 deletions.
1 change: 1 addition & 0 deletions continuous_integration/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ dependencies:
- git+https://github.com/dask/zict
- git+https://github.com/fsspec/filesystem_spec
- keras
- gilknocker
1 change: 1 addition & 0 deletions continuous_integration/environment-3.11.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ dependencies:
- git+https://github.com/dask/zict
- git+https://github.com/fsspec/filesystem_spec
- keras
- gilknocker
1 change: 1 addition & 0 deletions continuous_integration/environment-3.8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ dependencies:
- git+https://github.com/dask/dask
- git+https://github.com/jcrist/crick # Only tested here
- keras
- gilknocker
1 change: 1 addition & 0 deletions continuous_integration/environment-3.9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ dependencies:
- pip:
- git+https://github.com/dask/dask
- keras
- gilknocker
2 changes: 1 addition & 1 deletion continuous_integration/environment-mindeps.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- defaults
dependencies:
- python=3.8
- click=7.0
- click=8.0
- cloudpickle=1.5.0
- cytoolz=0.10.1
- jinja2=2.10.3
Expand Down
3 changes: 2 additions & 1 deletion continuous_integration/recipes/dask/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ requirements:
- {{ pin_compatible('dask-core', max_pin='x.x.x.x') }}
- {{ pin_compatible('distributed', exact=True) }}
- cytoolz >=0.8.2
- lz4
- lz4 >=4.3.2
- numpy >=1.21
- pandas >=1.3
- bokeh >=2.4.2,<3
- jinja2 >=2.10.3
- pyarrow >=7.0

run_constrained:
- openssl !=1.1.1e
Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/recipes/distributed/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ requirements:
- dask-core {{ dask_version }}
run:
- python >=3.8
- click >=7.0
- click >=8.0
- cloudpickle >=1.5.0
- cytoolz >=0.10.1
- {{ pin_compatible('dask-core', max_pin='x.x.x.x') }}
Expand Down
3 changes: 0 additions & 3 deletions distributed/cli/tests/test_dask_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,6 @@ async def test_dashboard_non_standard_ports(c, s, requires_default_ports):
requests.get("http://localhost:4833/status/")


@pytest.mark.filterwarnings(
"ignore:pkg_resources is deprecated as an API:DeprecationWarning"
)
def test_version_option():
runner = CliRunner()
result = runner.invoke(main, ["--version"])
Expand Down
54 changes: 40 additions & 14 deletions distributed/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)
from dask.widgets import get_template

from distributed.core import ErrorMessage
from distributed.utils import wait_for

try:
Expand Down Expand Up @@ -3530,22 +3531,40 @@ def restart(self, timeout=no_default, wait_for_workers=True):
)

async def _restart_workers(
self, workers: list[str], timeout: int | float | None = None
) -> dict[str, str]:
results: dict[str, str] = await self.scheduler.broadcast(
msg={"op": "restart", "timeout": timeout}, workers=workers, nanny=True
self,
workers: list[str],
timeout: int | float | None = None,
raise_for_error: bool = True,
) -> dict[str, str | ErrorMessage]:
info = self.scheduler_info()
name_to_addr = {meta["name"]: addr for addr, meta in info["workers"].items()}
worker_addrs = [name_to_addr.get(w, w) for w in workers]

restart_out: dict[str, str | ErrorMessage] = await self.scheduler.broadcast(
msg={"op": "restart", "timeout": timeout},
workers=worker_addrs,
nanny=True,
)
timeout_workers = {
key: value for key, value in results.items() if value == "timed out"
}
if timeout_workers:

# Map keys back to original `workers` input names/addresses
results = {w: restart_out[w_addr] for w, w_addr in zip(workers, worker_addrs)}

timeout_workers = [w for w, status in results.items() if status == "timed out"]
if timeout_workers and raise_for_error:
raise TimeoutError(
f"The following workers failed to restart with {timeout} seconds: {list(timeout_workers.keys())}"
f"The following workers failed to restart with {timeout} seconds: {timeout_workers}"
)

errored: list[ErrorMessage] = [m for m in results.values() if "exception" in m] # type: ignore
if errored and raise_for_error:
raise pickle.loads(errored[0]["exception"]) # type: ignore
return results

def restart_workers(
self, workers: list[str], timeout: int | float | None = None
self,
workers: list[str],
timeout: int | float | None = None,
raise_for_error: bool = True,
) -> dict[str, str]:
"""Restart a specified set of workers
Expand All @@ -3558,14 +3577,19 @@ def restart_workers(
Parameters
----------
workers : list[str]
Workers to restart.
Workers to restart. This can be a list of worker addresses, names, or a both.
timeout : int | float | None
Number of seconds to wait
raise_for_error: bool (default True)
Whether to raise a :py:class:`TimeoutError` if restarting worker(s) doesn't
finish within ``timeout``, or another exception caused from restarting
worker(s).
Returns
-------
dict[str, str]
Mapping of worker and restart status.
Mapping of worker and restart status, the keys will match the original
values passed in via ``workers``.
Notes
-----
Expand Down Expand Up @@ -3593,15 +3617,17 @@ def restart_workers(
Client.restart
"""
info = self.scheduler_info()
for worker in workers:
if info["workers"][worker]["nanny"] is None:

for worker, meta in info["workers"].items():
if (worker in workers or meta["name"] in workers) and meta["nanny"] is None:
raise ValueError(
f"Restarting workers requires a nanny to be used. Worker {worker} has type {info['workers'][worker]['type']}."
)
return self.sync(
self._restart_workers,
workers=workers,
timeout=timeout,
raise_for_error=raise_for_error,
)

async def _upload_large_file(self, local_filename, remote_filename=None):
Expand Down
8 changes: 6 additions & 2 deletions distributed/dashboard/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@


@memoize
def template_variables():
def template_variables(scheduler):
from distributed.diagnostics.nvml import device_get_count

template_variables = {
Expand All @@ -146,6 +146,7 @@ def template_variables():
if "individual" in x
]
+ [{"url": "hardware", "name": "Hardware"}],
"jupyter": scheduler.jupyter,
}
template_variables["plots"] = sorted(
template_variables["plots"], key=lambda d: d["name"]
Expand All @@ -155,7 +156,10 @@ def template_variables():

def connect(application, http_server, scheduler, prefix=""):
bokeh_app = BokehApplication(
applications, scheduler, prefix=prefix, template_variables=template_variables()
applications,
scheduler,
prefix=prefix,
template_variables=template_variables(scheduler),
)
application.add_application(bokeh_app)
bokeh_app.initialize(IOLoop.current())
Expand Down
3 changes: 3 additions & 0 deletions distributed/distributed-schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,9 @@ properties:
host-cpu:
type: boolean
description: Should we include host-wide CPU usage, with very granular breakdown?
gil-contention:
type: boolean
description: Should we include GIL contention metrics, requires `gilknocker` to be installed.

rmm:
type: object
Expand Down
1 change: 1 addition & 0 deletions distributed/distributed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ distributed:
interval: 500ms
disk: true # Monitor host-wide disk I/O
host-cpu: false # Monitor host-wide CPU usage, with very granular breakdown
gil-contention: false # Monitor GIL contention
event-loop: tornado
rmm:
pool-size: null
15 changes: 12 additions & 3 deletions distributed/http/static/css/base.css
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ body {
background-color: #eaaa6d;
}

.navbar .pull-right {
float: right;
.pull-right {
float: right !important;
}

#dask-logo img {
Expand Down Expand Up @@ -81,6 +81,7 @@ body {
.navbar li:not(#dask-logo):not(#navbar-toggle-icon) a {
display: none;
}

#navbar-toggle-icon a {
display: block;
}
Expand All @@ -90,10 +91,12 @@ body {
.navbar.responsive li:not(#navbar-toggle-icon) {
float: none;
}

.navbar.responsive li:not(#dask-logo):not(#navbar-toggle-icon) a {
display: block;
text-align: left;
}

.navbar.responsive #navbar-toggle-icon a {
position: absolute;
right: 0;
Expand Down Expand Up @@ -128,7 +131,7 @@ minimum supported version */
top: 40px;
background-color: #EEE;
min-width: 160px;
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
box-shadow: 0px 8px 16px 0px rgba(0, 0, 0, 0.2);
z-index: 2;
max-height: 90%;
overflow-y: scroll;
Expand All @@ -141,3 +144,9 @@ minimum supported version */
.dropdown:hover .dropdown-content {
display: block;
}

.jupyter-link img {
height: 1.3rem;
margin-bottom: -0.3rem;
margin-right: 0.25rem;
}
90 changes: 90 additions & 0 deletions distributed/http/static/images/jupyter.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions distributed/http/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@
<li class="pull-right">
<a href="https://docs.dask.org/en/stable/" target="_blank">Documentation</a>
</li>
{% if jupyter %}
<li class="pull-right">
<a class="jupyter-link" href="jupyter/lab" target="_blank">
<img src="statics/images/jupyter.svg" />
Launch Jupyter
</a>
</li>
{% endif %}
</ul>
</div>
<div class="content">
Expand Down
Loading

0 comments on commit 52eee9e

Please sign in to comment.