Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A small reporting hack #3621

Merged
merged 4 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions lib/pbench/cli/server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ class DateParser(ParamType):
def convert(
self, value: Any, param: Optional[Parameter], ctx: Optional[Context]
) -> Any:
if isinstance(value, datetime.datetime):
return value

try:
return parser.parse(value)
except Exception as e:
self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}")
if isinstance(value, str):
try:
value = parser.parse(value)
except Exception as e:
self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}")
if not isinstance(value, datetime.datetime):
self.fail(f"{value!r} ({type(value).__name__}) is unsupported.")
if value.tzinfo is None:
value = value.replace(tzinfo=datetime.timezone.utc)
return value


class Detail:
Expand Down
120 changes: 93 additions & 27 deletions lib/pbench/cli/server/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
import re
import shutil
import time
from typing import Any, Iterator, Optional, Union
from typing import Any, Optional, Union

import click
import humanize
from sqlalchemy import cast, inspect, Row, select, text
from sqlalchemy import cast, inspect, select, text
from sqlalchemy.orm import Query

from pbench.cli import pass_cli_context
from pbench.cli.server import config_setup, Detail, Verify, Watch
from pbench.cli.server import config_setup, DateParser, Detail, Verify, Watch
from pbench.cli.server.options import common_options
from pbench.common.logger import get_pbench_logger
from pbench.server import BadConfig
Expand Down Expand Up @@ -368,30 +369,82 @@ def columnize(
click.echo(line)


def summarize_dates(rows: Iterator[Row], width: int = 80):
def summarize_dates(base_query: Query, options: dict[str, Any]):
"""Collect and report statistics

The caller supplies a base query providing a "date" column, effectively
"SELECT dataset.upload AS date" so that we can filter on the date and
process each match.

Args:
base_query: a SQLAlchemy Query producing a "date" column
options: The Click option dictionary
"""
width: int = options.get("width")
since = options.get("since")
until = options.get("until")

if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

by_year = defaultdict(int)
by_month = defaultdict(int)
by_day = defaultdict(int)
by_weekday = defaultdict(int)
by_hour = defaultdict(int)

day = datetime.datetime.now(datetime.timezone.utc).replace(
hour=0, minute=0, second=0, microsecond=0
start = (
since if since else datetime.datetime.fromtimestamp(0.0, datetime.timezone.utc)
)
end = until if until else datetime.datetime.now(datetime.timezone.utc)

# It's convenient to use `--until YYYY-MM-01` to see a month (though
# technically that would include a YYYY-MM-01:00:00.00 timestamp), but
# bucketizing the day or week based on that anomaly isn't very useful, so
# back up the "day" one millisecond to move it into the last day of the
# previous month.
day = end - datetime.timedelta(milliseconds=1)
day = day.replace(hour=0, minute=0, second=0, microsecond=0)
webbnh marked this conversation as resolved.
Show resolved Hide resolved
month = day.replace(day=1)
year = month.replace(month=1)
week = day - datetime.timedelta(days=7)

first: Optional[datetime.datetime] = None
last: Optional[datetime.datetime] = None

this_year = 0
this_month = 0
this_week = 0
this_day = 0
in_range = 0

filters = []

# Create a subquery from our basic select parameters so that we can use
# the label (SQL "AS date") in our WHERE filter clauses. (In a direct query
# PostgreSQL doesn't allow filtering on renamed columns.)
subquery = base_query.subquery()
query = Database.db_session.query(subquery.c.date).order_by(subquery.c.date)

if since:
verifier.status(f"Filter since {since}")
filters.append(subquery.c.date >= since)
if until:
verifier.status(f"Filter until {until}")
filters.append(subquery.c.date <= until)
if filters:
query = query.filter(*filters)
rows = query.execution_options(stream_results=True).yield_per(SQL_CHUNK)

for row in rows:
date: datetime.datetime = row[0]
if not isinstance(date, datetime.datetime):
detailer.message(f"Got non-datetime row {row}")
continue
if not first:
first = date
last = date
in_range += 1
by_year[date.year] += 1
by_month[date.month] += 1
by_day[date.day] += 1
Expand All @@ -407,10 +460,22 @@ def summarize_dates(rows: Iterator[Row], width: int = 80):
if date >= day:
this_day += 1

click.echo(f" {this_year:,d} this year ({year:%Y})")
click.echo(f" {this_month:,d} this month ({month:%B %Y})")
click.echo(f" {this_week:,d} this week ({week:%B %d} to {day:%B %d})")
click.echo(f" {this_day:,d} today ({day:%d %B %Y})")
if not first:
click.echo(
f" No datasets found between {start:%Y-%m-%d %H:%M} and {end:%Y-%m-%d %H:%M}"
)
return

click.echo(f" {in_range:,d} from {first:%Y-%m-%d %H:%M} to {last:%Y-%m-%d %H:%M}")

if start < year:
click.echo(f" {this_year:,d} in year {year:%Y}")
if start < month:
click.echo(f" {this_month:,d} in month {month:%B %Y}")
if start < week:
click.echo(f" {this_week:,d} in week {week:%B %d} to {day:%B %d}")
if start < day:
click.echo(f" {this_day:,d} on {day:%d %B %Y}")

click.echo(" Total by year:")
columnize(by_year, width)
Expand All @@ -429,30 +494,21 @@ def report_creation(options: dict[str, Any]):

watcher.update("analyzing upload patterns")

rows = (
Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime)
)
.filter(Metadata.key == "metalog")
.execution_options(stream_results=True)
.yield_per(SQL_CHUNK)
)
rows = Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime).label("date")
).filter(Metadata.key == "metalog")
click.echo("Dataset statistics by creation date:")
summarize_dates(rows, options.get("width"))
summarize_dates(rows, options)


def report_uploads(options: dict[str, Any]):
"""Report dataset statistics by upload date"""

watcher.update("analyzing upload patterns")

rows = (
Database.db_session.query(Dataset.uploaded)
.execution_options(stream_results=True)
.yield_per(SQL_CHUNK)
)
rows = Database.db_session.query(Dataset.uploaded.label("date"))
click.echo("Dataset statistics by upload date:")
summarize_dates(rows, options.get("width"))
summarize_dates(rows, options)


def report_audit():
Expand Down Expand Up @@ -664,14 +720,24 @@ def report_states():
@click.option(
"--progress", "-p", type=float, default=0.0, help="Show periodic progress messages"
)
@click.option(
"--since",
type=DateParser(),
help="Confine statistics to datasets uploaded/created since date/time",
)
@click.option("--sql", "-s", default=False, is_flag=True, help="Display SQL statistics")
@click.option(
"--states", "-S", default=False, is_flag=True, help="Display operational states"
)
@click.option(
"--statistics",
type=click.Choice(["creation", "upload"], case_sensitive=False),
help="Show upload statistics",
help="Show dataset statistics by creation or upload timestamp",
)
@click.option(
"--until",
type=DateParser(),
help="Confine statistics to datasets uploaded/created until date/time",
)
@click.option(
"--verify", "-v", default=False, is_flag=True, help="Display intermediate messages"
Expand Down Expand Up @@ -733,7 +799,7 @@ def report(context: object, **kwargs):
logger.exception("An error occurred discovering the file tree: {}", exc)
if kwargs.get("verify"):
raise
click.secho(exc, err=True, bg="red")
click.secho(exc, err=True, fg="red")
rv = 2 if isinstance(exc, BadConfig) else 1

click.get_current_context().exit(rv)
Loading