Updating python reference

altering invalid arguments
Merging main
2026-04-18 13:43:29 -05:00 · 2026-04-18 13:41:54 -05:00 · 2026-04-18 13:39:37 -05:00 · 2026-04-18 13:35:32 -05:00 · 2026-04-18 11:19:38 -05:00
4 changed files with 125 additions and 4 deletions
--- a/generic_loader/.gitignore
+++ b/generic_loader/.gitignore
@ -1,5 +1,5 @@
 /.venv
 /samples
 /.env
-/__pycache__
-/venv
+__pycache__/
+venv/
--- a/generic_loader/load_sas.py
+++ b/generic_loader/load_sas.py
@ -4,8 +4,8 @@ Library-style functions plus a thin CLI wrapper. Designed so an orchestrator
 can wrap the library for directory/batch mode; orchestration is out of scope
 here.

-Python 3.9 compatible (target is an air-gapped host that currently only has
-3.9). ``from __future__ import annotations`` lets us use PEP 585 generics
+Python 3.14 compatible (target is an air-gapped host that currently only has
+3.14). ``from __future__ import annotations`` lets us use PEP 585 generics
 as annotations; runtime-resolved type uses (dataclass defaults, etc.) stick
 to ``typing``.

--- a/generic_loader/requirements.txt
+++ b/generic_loader/requirements.txt
--- a/utils/file_viewer.py
+++ b/utils/file_viewer.py
@ -0,0 +1,121 @@
+"""Standalone utility to download a .sas7bdat file from S3 and print a
+column-level summary of the first 10 rows.
+
+Configure the four constants below, then run::
+
+    python3 file_viewer.py
+
+Python 3.14 compatible.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+import boto3
+import pandas as pd
+import pyreadstat
+
+
+# ---------------------------------------------------------------------------
+# Configuration — edit these before running
+# ---------------------------------------------------------------------------
+
+S3_BUCKET: str = "my-bucket"
+"""S3 bucket name."""
+
+S3_KEY: str = "path/to/file.sas7bdat"
+"""Object key (path) within the bucket to the .sas7bdat file."""
+
+LOCAL_FOLDER: str = "./downloads"
+"""Local directory to download the file into."""
+
+AWS_PROFILE: str = "default"
+"""AWS CLI profile name used for authentication."""
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _download_from_s3(bucket: str, key: str, local_path: str) -> None:
+    """Download *key* from *bucket* to *local_path* using a named session."""
+    session = boto3.Session(profile_name=AWS_PROFILE)
+    s3 = session.client("s3")
+    print(f"Downloading s3://{bucket}/{key} -> {local_path}")
+    s3.download_file(bucket, key, local_path)
+    print("Download complete.")
+
+
+def _read_sas_head(path: str, row_count: int = 10) -> pd.DataFrame:
+    """Read the first *row_count* rows of a .sas7bdat file."""
+    df, _ = pyreadstat.read_sas7bdat(path, row_offset=0, row_limit=row_count)
+    return df
+
+
+def _sample_values(series: pd.Series, n: int = 3) -> str:
+    """Return up to *n* non-null sample values as a comma-separated string."""
+    non_null = series.dropna()
+    samples = non_null.head(n).tolist()
+    if not samples:
+        return "(all null)"
+    return ", ".join(repr(v) for v in samples)
+
+
+def _print_summary(df: pd.DataFrame) -> None:
+    """Print a nicely formatted summary table to stdout."""
+    # Pre-compute column data
+    rows = []
+    for col in df.columns:
+        rows.append((col, str(df[col].dtype), _sample_values(df[col], 3)))
+
+    # Determine column widths
+    hdr_name = "Column Name"
+    hdr_dtype = "Data Type"
+    hdr_samples = "Sample Values (up to 3)"
+
+    w_name = max(len(hdr_name), *(len(r[0]) for r in rows))
+    w_dtype = max(len(hdr_dtype), *(len(r[1]) for r in rows))
+    w_samples = max(len(hdr_samples), *(len(r[2]) for r in rows))
+
+    fmt = f"  {{:<{w_name}}}  {{:<{w_dtype}}}  {{:<{w_samples}}}"
+    sep = f"  {'-' * w_name}  {'-' * w_dtype}  {'-' * w_samples}"
+
+    print()
+    print(f"  Summary of first {len(df)} row(s)  ({len(df.columns)} columns)")
+    print(sep)
+    print(fmt.format(hdr_name, hdr_dtype, hdr_samples))
+    print(sep)
+    for name, dtype, samples in rows:
+        print(fmt.format(name, dtype, samples))
+    print(sep)
+    print()
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+if __name__ == "__main__":
+    # --- Download -----------------------------------------------------------
+    os.makedirs(LOCAL_FOLDER, exist_ok=True)
+    local_filename = os.path.basename(S3_KEY)
+    local_path = os.path.join(LOCAL_FOLDER, local_filename)
+
+    try:
+        _download_from_s3(S3_BUCKET, S3_KEY, local_path)
+    except Exception as exc:
+        print(f"S3 download error: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    # --- Read & summarize ---------------------------------------------------
+    try:
+        df = _read_sas_head(local_path, row_count=10)
+    except Exception as exc:
+        print(f"File read error: {exc}", file=sys.stderr)
+        sys.exit(2)
+
+    _print_summary(df)
Author	SHA1	Message	Date
michael-corey	2d95711d9d	Updating python reference	2026-04-18 13:43:29 -05:00
michael-corey	f1e99d887d	altering invalid arguments	2026-04-18 13:41:54 -05:00
michael-corey	f101eacffd	Merging main	2026-04-18 13:39:37 -05:00
michael-corey	edb9146682	moving files	2026-04-18 13:35:32 -05:00
michael-corey	6b12ab969b	adding file_viewer	2026-04-18 11:19:38 -05:00