Skip to content

Commit

Permalink
Patch release 2.3.1 (#20021)
Browse files Browse the repository at this point in the history
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Etay Livne <53942171+EtayLivne@users.noreply.github.com>
Co-authored-by: Etay Livne <etay.livne@mobileye.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: thomas chaton <thomas@grid.ai>
  • Loading branch information
5 people authored Jun 27, 2024
1 parent a42484c commit 8b69285
Show file tree
Hide file tree
Showing 50 changed files with 757 additions and 445 deletions.
12 changes: 6 additions & 6 deletions .github/checkgroup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,9 @@ subprojects:
- "!*.md"
- "!**/*.md"
checks:
- "app-pytest (macOS-11, lightning, 3.8, latest)"
- "app-pytest (macOS-11, lightning, 3.8, oldest)"
- "app-pytest (macOS-11, app, 3.9, latest)"
- "app-pytest (macOS-12, lightning, 3.8, latest)"
- "app-pytest (macOS-12, lightning, 3.8, oldest)"
- "app-pytest (macOS-12, app, 3.9, latest)"
- "app-pytest (macOS-12, app, 3.11, latest)"
- "app-pytest (ubuntu-20.04, lightning, 3.8, latest)"
- "app-pytest (ubuntu-20.04, lightning, 3.8, oldest)"
Expand All @@ -270,9 +270,9 @@ subprojects:
- "!*.md"
- "!**/*.md"
checks:
- "app-examples (macOS-11, lightning, 3.9, latest)"
- "app-examples (macOS-11, lightning, 3.9, oldest)"
- "app-examples (macOS-11, app, 3.9, latest)"
- "app-examples (macOS-12, lightning, 3.9, latest)"
- "app-examples (macOS-12, lightning, 3.9, oldest)"
- "app-examples (macOS-12, app, 3.9, latest)"
- "app-examples (ubuntu-20.04, lightning, 3.9, latest)"
- "app-examples (ubuntu-20.04, lightning, 3.9, oldest)"
- "app-examples (ubuntu-20.04, app, 3.9, latest)"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci-examples-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, macOS-11, windows-2022]
os: [ubuntu-20.04, macOS-12, windows-2022]
pkg-name: ["lightning"]
python-version: ["3.9"]
requires: ["oldest", "latest"]
include:
# "app" installs the standalone package
- { os: "macOS-11", pkg-name: "app", python-version: "3.9", requires: "latest" }
- { os: "macOS-12", pkg-name: "app", python-version: "3.9", requires: "latest" }
- { os: "ubuntu-20.04", pkg-name: "app", python-version: "3.9", requires: "latest" }
- { os: "windows-2022", pkg-name: "app", python-version: "3.9", requires: "latest" }
# Timeout: https://stackoverflow.com/a/59076067/4521646
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci-tests-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ["ubuntu-20.04", "macOS-11", "windows-2022"]
os: ["ubuntu-20.04", "macOS-12", "windows-2022"]
pkg-name: ["lightning"]
python-version: ["3.8"]
requires: ["oldest", "latest"]
Expand All @@ -48,7 +48,7 @@ jobs:
- { os: "ubuntu-22.04", pkg-name: "app", python-version: "3.11", requires: "latest" }
- { os: "windows-2022", pkg-name: "app", python-version: "3.11", requires: "latest" }
# "app" installs the standalone package
- { os: "macOS-11", pkg-name: "app", python-version: "3.9", requires: "latest" }
- { os: "macOS-12", pkg-name: "app", python-version: "3.9", requires: "latest" }
- { os: "ubuntu-20.04", pkg-name: "app", python-version: "3.9", requires: "latest" }
- { os: "windows-2022", pkg-name: "app", python-version: "3.8", requires: "latest" }
# Timeout: https://stackoverflow.com/a/59076067/4521646
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
gh_env.write("DOCKER_TAGS=" + ",".join(tags))
shell: python

- uses: docker/build-push-action@v5
- uses: docker/build-push-action@v6
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
Expand Down Expand Up @@ -119,7 +119,7 @@ jobs:
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- uses: docker/build-push-action@v5
- uses: docker/build-push-action@v6
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
Expand Down Expand Up @@ -151,7 +151,7 @@ jobs:
- name: Build Conda Docker
# publish master/release
continue-on-error: true
uses: docker/build-push-action@v5
uses: docker/build-push-action@v6
with:
file: dockers/nvidia/Dockerfile
push: false
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ exclude = [
"src/lightning/app/cli/component-template",
"src/lightning/app/cli/pl-app-template",
"src/lightning/app/cli/react-ui-template",
"src/lightning/app/launcher/utils.py",
]
install_types = "True"
non_interactive = "True"
Expand Down
6 changes: 4 additions & 2 deletions requirements/app/app.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
lightning-cloud == 0.5.69 # Must be pinned to ensure compatibility
lightning-cloud == 0.5.70 # Must be pinned to ensure compatibility
packaging
typing-extensions >=4.4.0, <4.10.0
deepdiff >=5.7.0, <6.6.0
fsspec[http] >=2022.5.0, <2023.11.0
croniter >=1.3.0, <1.5.0 # strict; TODO: for now until we find something more robust.
traitlets >=5.3.0, <5.12.0
arrow >=1.2.0, <1.3.0
lightning-utilities >=0.8.0, <0.12.0
lightning-utilities >=0.10.0, <0.12.0
beautifulsoup4 >=4.8.0, <4.13.0
inquirer >=2.10.0, <3.2.0
psutil <5.9.6
Expand All @@ -27,3 +27,5 @@ urllib3 <2.0.0
uvicorn <0.24.0
websocket-client <1.7.0
websockets <11.1.0
numpy >=1.17.2, <2.0
msgpack
7 changes: 4 additions & 3 deletions requirements/ci.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
setuptools
wheel
setuptools <70.1.1
wheel <0.44.0
awscli >=1.30.0, <1.31.0
twine ==4.0.1
importlib-metadata <8.0.0
wget
packaging
packaging <24.2
2 changes: 1 addition & 1 deletion requirements/fabric/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ torch >=2.0.0, <2.4.0
fsspec[http] >=2022.5.0, <2024.4.0
packaging >=20.0, <=23.1
typing-extensions >=4.4.0, <4.10.0
lightning-utilities >=0.8.0, <0.12.0
lightning-utilities >=0.10.0, <0.12.0
2 changes: 1 addition & 1 deletion requirements/pytorch/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ fsspec[http] >=2022.5.0, <2024.4.0
torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version
packaging >=20.0, <=23.1
typing-extensions >=4.4.0, <4.10.0
lightning-utilities >=0.8.0, <0.12.0
lightning-utilities >=0.10.0, <0.12.0
4 changes: 2 additions & 2 deletions requirements/pytorch/extra.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

# extended list of package dependencies to reach full functionality
matplotlib>3.1, <3.9.0
omegaconf >=2.0.5, <2.4.0
hydra-core >=1.0.5, <1.4.0
omegaconf >=2.2.3, <2.4.0
hydra-core >=1.2.0, <1.4.0
jsonargparse[signatures] >=4.27.7, <4.28.0
rich >=12.3.0, <13.6.0
tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute
Expand Down
110 changes: 109 additions & 1 deletion src/lightning/app/cli/lightning_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,19 @@
from lightning.app.cli.lightning_cli_delete import delete
from lightning.app.cli.lightning_cli_launch import launch
from lightning.app.cli.lightning_cli_list import get_list
from lightning.app.core.constants import ENABLE_APP_COMMENT_COMMAND_EXECUTION, get_lightning_cloud_url
from lightning.app.core.constants import (
APP_SERVER_HOST,
APP_SERVER_PORT,
ENABLE_APP_COMMENT_COMMAND_EXECUTION,
get_lightning_cloud_url,
)
from lightning.app.launcher.launcher import (
run_lightning_flow,
run_lightning_work,
serve_frontend,
start_application_server,
start_flow_and_servers,
)
from lightning.app.runners.cloud import CloudRuntime
from lightning.app.runners.runtime import dispatch
from lightning.app.runners.runtime_type import RuntimeType
Expand Down Expand Up @@ -393,3 +405,99 @@ def _prepare_file(file: str) -> str:
return file

raise FileNotFoundError(f"The provided file {file} hasn't been found.")


@run.command("server")
@click.argument("file", type=click.Path(exists=True))
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
@click.option("--host", help="Application running host", default=APP_SERVER_HOST, type=str)
@click.option("--port", help="Application running port", default=APP_SERVER_PORT, type=int)
def run_server(file: str, queue_id: str, host: str, port: int) -> None:
"""It takes the application file as input, build the application object and then use that to run the application
server.
This is used by the cloud runners to start the status server for the application
"""
logger.debug(f"Run Server: {file} {queue_id} {host} {port}")
start_application_server(file, host, port, queue_id=queue_id)


@run.command("flow")
@click.argument("file", type=click.Path(exists=True))
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
@click.option("--base-url", help="Base url at which the app server is hosted", default="")
def run_flow(file: str, queue_id: str, base_url: str) -> None:
"""It takes the application file as input, build the application object, proxy all the work components and then run
the application flow defined in the root component.
It does exactly what a singleprocess dispatcher would do but with proxied work components.
"""
logger.debug(f"Run Flow: {file} {queue_id} {base_url}")
run_lightning_flow(file, queue_id=queue_id, base_url=base_url)


@run.command("work")
@click.argument("file", type=click.Path(exists=True))
@click.option("--work-name", type=str)
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
def run_work(file: str, work_name: str, queue_id: str) -> None:
"""Unlike other entrypoints, this command will take the file path or module details for a work component and run
that by fetching the states from the queues."""
logger.debug(f"Run Work: {file} {work_name} {queue_id}")
run_lightning_work(
file=file,
work_name=work_name,
queue_id=queue_id,
)


@run.command("frontend")
@click.argument("file", type=click.Path(exists=True))
@click.option("--flow-name")
@click.option("--host")
@click.option("--port", type=int)
def run_frontend(file: str, flow_name: str, host: str, port: int) -> None:
"""Serve the frontend specified by the given flow."""
logger.debug(f"Run Frontend: {file} {flow_name} {host}")
serve_frontend(file=file, flow_name=flow_name, host=host, port=port)


@run.command("flow-and-servers")
@click.argument("file", type=click.Path(exists=True))
@click.option("--queue-id", help="ID for identifying queue", default="", type=str)
@click.option("--base-url", help="Base url at which the app server is hosted", default="")
@click.option("--host", help="Application running host", default=APP_SERVER_HOST, type=str)
@click.option("--port", help="Application running port", default=APP_SERVER_PORT, type=int)
@click.option(
"--flow-port",
help="Pair of flow name and frontend port",
type=(str, int),
multiple=True,
)
def run_flow_and_servers(
file: str,
base_url: str,
queue_id: str,
host: str,
port: int,
flow_port: Tuple[Tuple[str, int]],
) -> None:
"""It takes the application file as input, build the application object and then use that to run the application
flow defined in the root component, the application server and all the flow frontends.
This is used by the cloud runners to start the flow, the status server and all frontends for the application
"""
logger.debug(f"Run Flow: {file} {queue_id} {base_url}")
logger.debug(f"Run Server: {file} {queue_id} {host} {port}.")
logger.debug(f"Run Frontend's: {flow_port}")
start_flow_and_servers(
entrypoint_file=file,
base_url=base_url,
queue_id=queue_id,
host=host,
port=port,
flow_names_and_ports=flow_port,
)
5 changes: 5 additions & 0 deletions src/lightning/app/core/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
FLOW_DURATION_SAMPLES,
FLOW_DURATION_THRESHOLD,
FRONTEND_DIR,
SHOULD_START_WORKS_WITH_FLOW,
STATE_ACCUMULATE_WAIT,
)
from lightning.app.core.queues import BaseQueue
Expand Down Expand Up @@ -144,6 +145,7 @@ def __init__(
self.threads: List[threading.Thread] = []
self.exception = None
self.collect_changes: bool = True
self._should_start_works_with_flow: bool = SHOULD_START_WORKS_WITH_FLOW

self.status: Optional[AppStatus] = None
# TODO: Enable ready locally for opening the UI.
Expand Down Expand Up @@ -733,6 +735,9 @@ def _send_flow_to_work_deltas(self, state: dict) -> None:
self.flow_to_work_delta_queues[w.name].put(deep_diff)

def _start_with_flow_works(self) -> None:
if not self._should_start_works_with_flow:
return

for w in self.works:
if w._start_with_flow:
parallel = w.parallel
Expand Down
34 changes: 33 additions & 1 deletion src/lightning/app/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
from pathlib import Path
from typing import Optional
from typing import Any, Optional

import lightning_cloud.env

Expand Down Expand Up @@ -101,6 +102,37 @@ def get_lightning_cloud_url() -> str:

BATCH_DELTA_COUNT = int(os.getenv("BATCH_DELTA_COUNT", "128"))
CHECK_ERROR_QUEUE_INTERVAL = float(os.getenv("CHECK_ERROR_QUEUE_INTERVAL", "30"))
SHOULD_START_WORKS_WITH_FLOW = bool(int(os.getenv("SHOULD_START_WORKS_WITH_FLOW", "1")))
IS_RUNNING_IN_FLOW = os.getenv("LIGHTNING_CLOUD_WORK_NAME", None) is None


class DistributedPluginChecker:
def __init__(self) -> None:
self.distributed_arguments = os.getenv("DISTRIBUTED_ARGUMENTS", None)
if self.distributed_arguments:
self.distributed_arguments = json.loads(self.distributed_arguments)

self.running_distributed_plugin = False

if self.distributed_arguments and os.getenv("LIGHTNING_CLOUD_WORK_NAME"):
self.running_distributed_plugin = True

def __bool__(self) -> bool:
return self.running_distributed_plugin

def should_create_work(self, work: Any) -> bool:
if not self.distributed_arguments:
return True

num_nodes = self.distributed_arguments.get("num_instances", 0)
node_rank = int(work.name.split(".")[-1])

# Only the start with flow works are skipped for performance purposes
return node_rank >= num_nodes


# TODO (tchaton): Add LitData and JobPlugin optimizations
PLUGIN_CHECKER = IS_DISTRIBUTED_PLUGIN = DistributedPluginChecker()


def enable_multiple_works_in_default_container() -> bool:
Expand Down
5 changes: 5 additions & 0 deletions src/lightning/app/core/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,11 @@ def load_state_dict(self, flow_state, children_states, strict) -> None:
elif strict:
raise ValueError(f"The component {child_name} wasn't instantiated for the component {self.name}")

def stop_works(self, works: List[Any]) -> None:
if self._backend is None:
raise RuntimeError("Your flow should have a backend attached. Found None.")
self._backend.stop_works(works)


class _RootFlow(LightningFlow):
def __init__(self, work: LightningWork) -> None:
Expand Down
Loading

0 comments on commit 8b69285

Please sign in to comment.