From 952511bfc759d1573c5b3e84785deaf4441454fd Mon Sep 17 00:00:00 2001 From: Jacob Chapman <7908073+chapmanjacobd@users.noreply.github.com> Date: Tue, 10 Sep 2024 12:53:31 +0800 Subject: [PATCH] 2.9.049 --- .github/README.md | 2 +- tests/tablefiles/test_markdown_tables.py | 6 +++ .../test_lb_markdown_tables_skip_header.yml | 8 ++++ xklb/__main__.py | 2 +- xklb/createdb/tables_add.py | 1 + xklb/tablefiles/columns.py | 1 + xklb/tablefiles/eda.py | 1 + xklb/tablefiles/markdown_tables.py | 1 + xklb/tablefiles/mcda.py | 1 + xklb/utils/arggroups.py | 1 + xklb/utils/file_utils.py | 40 +++++++++++++++++-- 11 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 tests/tablefiles/test_markdown_tables/test_lb_markdown_tables_skip_header.yml diff --git a/.github/README.md b/.github/README.md index 7eab4c7a..2cd36a92 100644 --- a/.github/README.md +++ b/.github/README.md @@ -99,7 +99,7 @@ To stop playing press Ctrl+C in either the terminal or mpv
List all subcommands $ library - library (v2.9.048; 86 subcommands) + library (v2.9.049; 86 subcommands) Create database subcommands: ╭─────────────────┬──────────────────────────────────────────╮ diff --git a/tests/tablefiles/test_markdown_tables.py b/tests/tablefiles/test_markdown_tables.py index 772e7e06..297d7a19 100644 --- a/tests/tablefiles/test_markdown_tables.py +++ b/tests/tablefiles/test_markdown_tables.py @@ -40,3 +40,9 @@ def test_lb_markdown_tables_transpose(mock_stdin, assert_unchanged, capsys): lb(["markdown-tables", "--from-json", "--transpose", "--to-json"]) captured = capsys.readouterr().out assert_unchanged([json.loads(s) for s in captured.splitlines()]) + + +def test_lb_markdown_tables_skip_header(assert_unchanged, capsys): + lb(["markdown-tables", "--skip-header", "--start-row=1", "--to-json", "tests/data/test.xlsx"]) + captured = capsys.readouterr().out + assert_unchanged([json.loads(s) for s in captured.splitlines()]) diff --git a/tests/tablefiles/test_markdown_tables/test_lb_markdown_tables_skip_header.yml b/tests/tablefiles/test_markdown_tables/test_lb_markdown_tables_skip_header.yml new file mode 100644 index 00000000..3d873120 --- /dev/null +++ b/tests/tablefiles/test_markdown_tables/test_lb_markdown_tables_skip_header.yml @@ -0,0 +1,8 @@ +- column0: 0 + column1: 1 + column2: 3 + column3: 5 +- column0: 1 + column1: 2 + column2: 4 + column3: 6 diff --git a/xklb/__main__.py b/xklb/__main__.py index cfe6578b..8808211b 100644 --- a/xklb/__main__.py +++ b/xklb/__main__.py @@ -5,7 +5,7 @@ from xklb.utils import argparse_utils, iterables from xklb.utils.log_utils import log -__version__ = "2.9.048" +__version__ = "2.9.049" progs = { "Create database subcommands": { diff --git a/xklb/createdb/tables_add.py b/xklb/createdb/tables_add.py index 8dfcdb3a..2501778d 100644 --- a/xklb/createdb/tables_add.py +++ b/xklb/createdb/tables_add.py @@ -47,6 +47,7 @@ def table_add(args, path): mimetype=args.mimetype, join_tables=args.join_tables, transpose=args.transpose, + skip_headers=args.skip_headers, ) for i, df in enumerate(dfs): if args.table_rename: diff --git a/xklb/tablefiles/columns.py b/xklb/tablefiles/columns.py index 42682604..dba0aa7d 100644 --- a/xklb/tablefiles/columns.py +++ b/xklb/tablefiles/columns.py @@ -36,6 +36,7 @@ def file_columns(args, path): mimetype=args.mimetype, join_tables=args.join_tables, transpose=args.transpose, + skip_headers=args.skip_headers, ): df_name = df.name diff --git a/xklb/tablefiles/eda.py b/xklb/tablefiles/eda.py index 2e61c0ec..b49bed9e 100644 --- a/xklb/tablefiles/eda.py +++ b/xklb/tablefiles/eda.py @@ -190,6 +190,7 @@ def file_eda(args, path): mimetype=args.mimetype, join_tables=args.join_tables, transpose=args.transpose, + skip_headers=args.skip_headers, ) if getattr(args, "repl", False): breakpoint() diff --git a/xklb/tablefiles/markdown_tables.py b/xklb/tablefiles/markdown_tables.py index 6c82b843..39872326 100644 --- a/xklb/tablefiles/markdown_tables.py +++ b/xklb/tablefiles/markdown_tables.py @@ -32,6 +32,7 @@ def file_markdown(args, path): mimetype=args.mimetype, join_tables=args.join_tables, transpose=args.transpose, + skip_headers=args.skip_headers, ): if getattr(args, "repl", False): breakpoint() diff --git a/xklb/tablefiles/mcda.py b/xklb/tablefiles/mcda.py index 3da6b59c..cf111006 100644 --- a/xklb/tablefiles/mcda.py +++ b/xklb/tablefiles/mcda.py @@ -247,6 +247,7 @@ def file_mcda(args, path): mimetype=args.mimetype, join_tables=args.join_tables, transpose=args.transpose, + skip_headers=args.skip_headers, ) for df in dfs: diff --git a/xklb/utils/arggroups.py b/xklb/utils/arggroups.py index 8853950d..c073c7b4 100644 --- a/xklb/utils/arggroups.py +++ b/xklb/utils/arggroups.py @@ -1323,6 +1323,7 @@ def table_like(parent_parser): parser.add_argument("--table-name", "--table", "-t", help="Load from a specific table by name") parser.add_argument("--table-index", type=int, help="Load from a specific table by index") parser.add_argument("--table-rename", "--rename-table", "--as-table-name", help="Load to specific table by name") + parser.add_argument("--skip-headers", "--ignore-headers", action="store_true") parser.add_argument("--start-row", "--skiprows", type=int, default=None, help="Skip reading N rows") parser.add_argument( "--end-row", diff --git a/xklb/utils/file_utils.py b/xklb/utils/file_utils.py index 6b23654c..fe33c13c 100644 --- a/xklb/utils/file_utils.py +++ b/xklb/utils/file_utils.py @@ -466,6 +466,7 @@ def read_file_to_dataframes( mimetype=None, join_tables=False, transpose=False, + skip_headers=False, ): import pandas as pd @@ -508,7 +509,13 @@ def read_file_to_dataframes( "excel spreadsheet subheader", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ): - excel_data = pd.read_excel(path, sheet_name=table_name or table_index, nrows=end_row, skiprows=start_row) + excel_data = pd.read_excel( + path, + sheet_name=table_name or table_index, + nrows=end_row, + skiprows=start_row, + header=None if skip_headers else 0, + ) dfs = [] if isinstance(excel_data, pd.DataFrame): worksheet_names = excel_data.index.levels[0] # type: ignore @@ -549,7 +556,11 @@ def read_file_to_dataframes( "csv", "text/csv", ): - dfs = [pd.read_csv(path, nrows=end_row, skiprows=start_row or 0, encoding=encoding)] + dfs = [ + pd.read_csv( + path, nrows=end_row, skiprows=start_row or 0, encoding=encoding, header=None if skip_headers else 0 + ) + ] elif mimetype in ( "plain", "plaintext", @@ -557,13 +568,31 @@ def read_file_to_dataframes( "text/wsv", "text/whitespace-separated-values", ): - dfs = [pd.read_csv(path, delim_whitespace=True, nrows=end_row, skiprows=start_row or 0, encoding=encoding)] + dfs = [ + pd.read_csv( + path, + delim_whitespace=True, + nrows=end_row, + skiprows=start_row or 0, + encoding=encoding, + header=None if skip_headers else 0, + ) + ] elif mimetype in ( "tsv", "text/tsv", "text/tab-separated-values", ): - dfs = [pd.read_csv(path, delimiter="\t", nrows=end_row, skiprows=start_row or 0, encoding=encoding)] + dfs = [ + pd.read_csv( + path, + delimiter="\t", + nrows=end_row, + skiprows=start_row or 0, + encoding=encoding, + header=None if skip_headers else 0, + ) + ] elif mimetype in ("parq", "parquet", "application/parquet"): dfs = [pd.read_parquet(path)] elif mimetype in ("pkl", "pickle", "application/octet-stream"): @@ -618,6 +647,9 @@ def read_file_to_dataframes( if not hasattr(df, "name"): df.name = str(table_index_as_name) + if skip_headers: + df.columns = [f"column{i}" for i in range(len(df.columns))] + if transpose: def t(df):