foxtrot/utils/file_viewer.py

"""Standalone utility to download a SAS or delimited text file from S3 and
print a column-level summary of the first *N* rows.

Supported formats
-----------------
* **SAS** – ``.sas7bdat``, ``.xpt``, ``.xport``  (read via *pyreadstat*)
* **Text** – ``.csv``, ``.tsv``, ``.txt``  (read via *pandas.read_csv*)

Configure the four constants below **or** use the CLI arguments, then run::

    python3 file_viewer.py
    python3 file_viewer.py --local path/to/file.csv
    python3 file_viewer.py --local path/to/data.tsv --delimiter $'\\t'

Python 3.14 compatible.
"""

from __future__ import annotations

import argparse
import os
import sys

import boto3
import pandas as pd
import pyreadstat


# ---------------------------------------------------------------------------
# Supported file extensions
# ---------------------------------------------------------------------------

SAS_EXTENSIONS: set[str] = {".sas7bdat", ".xpt", ".xport"}
"""File extensions recognised as SAS data files."""

TEXT_EXTENSIONS: set[str] = {".txt", ".csv", ".tsv"}
"""File extensions recognised as delimited text files."""

SUPPORTED_EXTENSIONS: set[str] = SAS_EXTENSIONS | TEXT_EXTENSIONS
"""Union of all supported file extensions."""


# ---------------------------------------------------------------------------
# Configuration — edit these before running (or use CLI arguments)
# ---------------------------------------------------------------------------

S3_BUCKET: str = "my-bucket"
"""S3 bucket name."""

S3_KEY: str = "path/to/file.sas7bdat"
"""Object key (path) within the bucket to a supported data file."""

LOCAL_FOLDER: str = "./downloads"
"""Local directory to download the file into."""

AWS_PROFILE: str = "default"
"""AWS CLI profile name used for authentication."""


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _ensure_local_copy(bucket: str, key: str, local_path: str) -> None:
    """Download *key* from *bucket* to *local_path*, skipping if already present.

    If *local_path* exists and its size matches the S3 object's size, the
    download is skipped and a message is printed.

    Supports any file whose extension is in :data:`SUPPORTED_EXTENSIONS`.
    """
    session = boto3.Session(profile_name=AWS_PROFILE)
    s3 = session.client("s3")

    remote_size = s3.head_object(Bucket=bucket, Key=key)["ContentLength"]

    if os.path.exists(local_path):
        local_size = os.path.getsize(local_path)
        if local_size == remote_size:
            print(
                f"Local file {local_path} already matches s3://{bucket}/{key} "
                f"({local_size} bytes); skipping download."
            )
            return
        print(
            f"Local file {local_path} size ({local_size} bytes) differs from "
            f"S3 ({remote_size} bytes); re-downloading."
        )

    print(f"Downloading s3://{bucket}/{key} -> {local_path}")
    s3.download_file(bucket, key, local_path)
    print("Download complete.")


# -- SAS readers -------------------------------------------------------------


def _read_sas_head(path: str, row_count: int = 10) -> pd.DataFrame:
    """Read the first *row_count* rows of a SAS file (``.sas7bdat``, ``.xpt``, ``.xport``)."""
    ext = os.path.splitext(path)[1].lower()
    if ext == ".sas7bdat":
        df, _ = pyreadstat.read_sas7bdat(path, row_offset=0, row_limit=row_count)
    elif ext in {".xpt", ".xport"}:
        df, _ = pyreadstat.read_xport(path, row_offset=0, row_limit=row_count)
    else:
        raise ValueError(f"Unsupported SAS extension: {ext}")
    return df


# -- Text readers ------------------------------------------------------------


def _read_text_head(
    path: str,
    row_count: int = 10,
    delimiter: str = ",",
    encoding: str = "utf-8",
    quotechar: str = '"',
) -> pd.DataFrame:
    """Read the first *row_count* rows of a delimited text file.

    Parameters
    ----------
    path : str
        Path to the ``.csv``, ``.tsv``, or ``.txt`` file.
    row_count : int, optional
        Number of data rows to read (default ``10``).
    delimiter : str, optional
        Column delimiter (default ``","``).  For ``.tsv`` files the caller
        should pass ``"\\t"``.
    encoding : str, optional
        File encoding (default ``"utf-8"``).
    quotechar : str, optional
        Character used to quote fields (default ``'"'``).
    """
    return pd.read_csv(
        path,
        sep=delimiter,
        encoding=encoding,
        quotechar=quotechar,
        nrows=row_count,
    )


# -- Unified reader ----------------------------------------------------------


def _read_head(
    path: str,
    row_count: int = 10,
    delimiter: str | None = None,
    encoding: str = "utf-8",
    quotechar: str = '"',
) -> pd.DataFrame:
    """Read the first *row_count* rows of a supported data file.

    Auto-detects the file type from its extension and delegates to the
    appropriate reader.  For ``.tsv`` files the delimiter defaults to tab
    (``"\\t"``); for other text files it defaults to ``","``.

    Parameters
    ----------
    path : str
        Path to the data file.
    row_count : int, optional
        Number of data rows to read (default ``10``).
    delimiter : str or None, optional
        Column delimiter for text files.  ``None`` means *auto-detect*
        (tab for ``.tsv``, comma otherwise).
    encoding : str, optional
        Encoding for text files (default ``"utf-8"``).
    quotechar : str, optional
        Quote character for text files (default ``'"'``).

    Returns
    -------
    pandas.DataFrame
    """
    ext = os.path.splitext(path)[1].lower()

    if ext not in SUPPORTED_EXTENSIONS:
        raise ValueError(
            f"Unsupported file extension '{ext}'. "
            f"Supported extensions: {sorted(SUPPORTED_EXTENSIONS)}"
        )

    if ext in SAS_EXTENSIONS:
        return _read_sas_head(path, row_count=row_count)

    # --- Text file path ---
    if delimiter is None:
        delimiter = "\t" if ext == ".tsv" else ","

    return _read_text_head(
        path,
        row_count=row_count,
        delimiter=delimiter,
        encoding=encoding,
        quotechar=quotechar,
    )


# -- Display -----------------------------------------------------------------


def _sample_values(series: pd.Series, n: int = 3) -> str:
    """Return up to *n* non-null sample values as a comma-separated string."""
    non_null = series.dropna()
    samples = non_null.head(n).tolist()
    if not samples:
        return "(all null)"
    return ", ".join(repr(v) for v in samples)


def _print_summary(df: pd.DataFrame) -> None:
    """Print a nicely formatted summary table to stdout."""
    # Pre-compute column data
    rows = []
    for col in df.columns:
        rows.append((col, str(df[col].dtype), _sample_values(df[col], 3)))

    # Determine column widths
    hdr_name = "Column Name"
    hdr_dtype = "Data Type"
    hdr_samples = "Sample Values (up to 3)"

    w_name = max(len(hdr_name), *(len(r[0]) for r in rows))
    w_dtype = max(len(hdr_dtype), *(len(r[1]) for r in rows))
    w_samples = max(len(hdr_samples), *(len(r[2]) for r in rows))

    fmt = f"  {{:<{w_name}}}  {{:<{w_dtype}}}  {{:<{w_samples}}}"
    sep = f"  {'-' * w_name}  {'-' * w_dtype}  {'-' * w_samples}"

    print()
    print(f"  Summary of first {len(df)} row(s)  ({len(df.columns)} columns)")
    print(sep)
    print(fmt.format(hdr_name, hdr_dtype, hdr_samples))
    print(sep)
    for name, dtype, samples in rows:
        print(fmt.format(name, dtype, samples))
    print(sep)
    print()


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------


def _build_parser() -> argparse.ArgumentParser:
    """Build the argument parser for the file-viewer CLI."""
    parser = argparse.ArgumentParser(
        description=(
            "Download a SAS or delimited text file from S3 (or read a local "
            "file) and print a column-level summary of the first N rows.\n\n"
            "Supported extensions: "
            + ", ".join(sorted(SUPPORTED_EXTENSIONS))
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    source = parser.add_mutually_exclusive_group()
    source.add_argument(
        "--local",
        metavar="FILE",
        default=None,
        help=(
            "Path to a local data file to summarise (skips S3 download).  "
            "Supported extensions: "
            + ", ".join(sorted(SUPPORTED_EXTENSIONS))
        ),
    )
    source.add_argument(
        "--s3-key",
        metavar="KEY",
        default=None,
        help="Override the S3_KEY constant with this object key.",
    )

    parser.add_argument(
        "--rows",
        type=int,
        default=10,
        metavar="N",
        help="Number of rows to read (default: 10).",
    )

    # Text-file-specific options
    text_group = parser.add_argument_group(
        "text file options",
        "These options apply only to .csv / .tsv / .txt files.",
    )
    text_group.add_argument(
        "--delimiter",
        default=None,
        help=(
            'Column delimiter for text files (default: "," for .csv/.txt, '
            '"\\t" for .tsv).  Use $\'\\t\' in the shell for a literal tab.'
        ),
    )
    text_group.add_argument(
        "--encoding",
        default="utf-8",
        help='File encoding for text files (default: "utf-8").',
    )
    text_group.add_argument(
        "--quotechar",
        default='"',
        help='Quote character for text files (default: \'"\').',
    )

    return parser


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------


if __name__ == "__main__":
    parser = _build_parser()
    args = parser.parse_args()

    if args.local:
        # ---- Local file mode -----------------------------------------------
        local_path = args.local
        ext = os.path.splitext(local_path)[1].lower()
        if ext not in SUPPORTED_EXTENSIONS:
            parser.error(
                f"Unsupported file extension '{ext}'. "
                f"Supported: {sorted(SUPPORTED_EXTENSIONS)}"
            )
        if not os.path.isfile(local_path):
            print(f"File not found: {local_path}", file=sys.stderr)
            sys.exit(1)
    else:
        # ---- S3 download mode ----------------------------------------------
        s3_key = args.s3_key or S3_KEY
        ext = os.path.splitext(s3_key)[1].lower()
        if ext not in SUPPORTED_EXTENSIONS:
            parser.error(
                f"Unsupported file extension '{ext}' in S3 key. "
                f"Supported: {sorted(SUPPORTED_EXTENSIONS)}"
            )

        os.makedirs(LOCAL_FOLDER, exist_ok=True)
        local_filename = os.path.basename(s3_key)
        local_path = os.path.join(LOCAL_FOLDER, local_filename)

        try:
            _ensure_local_copy(S3_BUCKET, s3_key, local_path)
        except Exception as exc:
            print(f"S3 download error: {exc}", file=sys.stderr)
            sys.exit(1)

    # ---- Read & summarise --------------------------------------------------
    try:
        df = _read_head(
            local_path,
            row_count=args.rows,
            delimiter=args.delimiter,
            encoding=args.encoding,
            quotechar=args.quotechar,
        )
    except Exception as exc:
        print(f"File read error: {exc}", file=sys.stderr)
        sys.exit(2)

    _print_summary(df)
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								"""Standalone utility to download a SAS or delimited text file from S3 and
 								print a column-level summary of the first *N* rows.
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								Supported formats
 								-----------------
 								* **SAS** – ``.sas7bdat``, ``.xpt``, ``.xport``  (read via *pyreadstat*)
 								* **Text** – ``.csv``, ``.tsv``, ``.txt``  (read via *pandas.read_csv*)
 								Configure the four constants below **or** use the CLI arguments, then run::
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
 								    python3 file_viewer.py
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								    python3 file_viewer.py --local path/to/file.csv
 								    python3 file_viewer.py --local path/to/data.tsv --delimiter $'\\t'
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
-												Updating python reference

											
										
										
											2026-04-18 18:43:29 +00:00
+								Python 3.14 compatible.
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								"""
 								from __future__ import annotations
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								import argparse
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								import os
 								import sys
 								import boto3
 								import pandas as pd
 								import pyreadstat
 								# ---------------------------------------------------------------------------
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								# Supported file extensions
 								# ---------------------------------------------------------------------------
 								SAS_EXTENSIONS: set[str] = {".sas7bdat", ".xpt", ".xport"}
 								"""File extensions recognised as SAS data files."""
 								TEXT_EXTENSIONS: set[str] = {".txt", ".csv", ".tsv"}
 								"""File extensions recognised as delimited text files."""
 								SUPPORTED_EXTENSIONS: set[str] = SAS_EXTENSIONS | TEXT_EXTENSIONS
 								"""Union of all supported file extensions."""
 								# ---------------------------------------------------------------------------
 								# Configuration — edit these before running (or use CLI arguments)
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								# ---------------------------------------------------------------------------
 								S3_BUCKET: str = "my-bucket"
 								"""S3 bucket name."""
 								S3_KEY: str = "path/to/file.sas7bdat"
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								"""Object key (path) within the bucket to a supported data file."""
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
 								LOCAL_FOLDER: str = "./downloads"
 								"""Local directory to download the file into."""
 								AWS_PROFILE: str = "default"
 								"""AWS CLI profile name used for authentication."""
 								# ---------------------------------------------------------------------------
 								# Helpers
 								# ---------------------------------------------------------------------------
-												adding local check

											
										
										
											2026-04-20 13:25:27 +00:00
+								def _ensure_local_copy(bucket: str, key: str, local_path: str) -> None:
 								    """Download *key* from *bucket* to *local_path*, skipping if already present.
 								    If *local_path* exists and its size matches the S3 object's size, the
 								    download is skipped and a message is printed.
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
 								    Supports any file whose extension is in :data:`SUPPORTED_EXTENSIONS`.
-												adding local check

											
										
										
											2026-04-20 13:25:27 +00:00
+								    """
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								    session = boto3.Session(profile_name=AWS_PROFILE)
 								    s3 = session.client("s3")
-												adding local check

											
										
										
											2026-04-20 13:25:27 +00:00
 								    remote_size = s3.head_object(Bucket=bucket, Key=key)["ContentLength"]
 								    if os.path.exists(local_path):
 								        local_size = os.path.getsize(local_path)
 								        if local_size == remote_size:
 								            print(
 								                f"Local file {local_path} already matches s3://{bucket}/{key} "
 								                f"({local_size} bytes); skipping download."
 								            )
 								            return
 								        print(
 								            f"Local file {local_path} size ({local_size} bytes) differs from "
 								            f"S3 ({remote_size} bytes); re-downloading."
 								        )
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								    print(f"Downloading s3://{bucket}/{key} -> {local_path}")
 								    s3.download_file(bucket, key, local_path)
 								    print("Download complete.")
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								# -- SAS readers -------------------------------------------------------------
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								def _read_sas_head(path: str, row_count: int = 10) -> pd.DataFrame:
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								    """Read the first *row_count* rows of a SAS file (``.sas7bdat``, ``.xpt``, ``.xport``)."""
 								    ext = os.path.splitext(path)[1].lower()
 								    if ext == ".sas7bdat":
 								        df, _ = pyreadstat.read_sas7bdat(path, row_offset=0, row_limit=row_count)
 								    elif ext in {".xpt", ".xport"}:
 								        df, _ = pyreadstat.read_xport(path, row_offset=0, row_limit=row_count)
 								    else:
 								        raise ValueError(f"Unsupported SAS extension: {ext}")
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								    return df
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								# -- Text readers ------------------------------------------------------------
 								def _read_text_head(
 								    path: str,
 								    row_count: int = 10,
 								    delimiter: str = ",",
 								    encoding: str = "utf-8",
 								    quotechar: str = '"',
 								) -> pd.DataFrame:
 								    """Read the first *row_count* rows of a delimited text file.
 								    Parameters
 								    ----------
 								    path : str
 								        Path to the ``.csv``, ``.tsv``, or ``.txt`` file.
 								    row_count : int, optional
 								        Number of data rows to read (default ``10``).
 								    delimiter : str, optional
 								        Column delimiter (default ``","``).  For ``.tsv`` files the caller
 								        should pass ``"\\t"``.
 								    encoding : str, optional
 								        File encoding (default ``"utf-8"``).
 								    quotechar : str, optional
 								        Character used to quote fields (default ``'"'``).
 								    """
 								    return pd.read_csv(
 								        path,
 								        sep=delimiter,
 								        encoding=encoding,
 								        quotechar=quotechar,
 								        nrows=row_count,
 								    )
 								# -- Unified reader ----------------------------------------------------------
 								def _read_head(
 								    path: str,
 								    row_count: int = 10,
 								    delimiter: str | None = None,
 								    encoding: str = "utf-8",
 								    quotechar: str = '"',
 								) -> pd.DataFrame:
 								    """Read the first *row_count* rows of a supported data file.
 								    Auto-detects the file type from its extension and delegates to the
 								    appropriate reader.  For ``.tsv`` files the delimiter defaults to tab
 								    (``"\\t"``); for other text files it defaults to ``","``.
 								    Parameters
 								    ----------
 								    path : str
 								        Path to the data file.
 								    row_count : int, optional
 								        Number of data rows to read (default ``10``).
 								    delimiter : str or None, optional
 								        Column delimiter for text files.  ``None`` means *auto-detect*
 								        (tab for ``.tsv``, comma otherwise).
 								    encoding : str, optional
 								        Encoding for text files (default ``"utf-8"``).
 								    quotechar : str, optional
 								        Quote character for text files (default ``'"'``).
 								    Returns
 								    -------
 								    pandas.DataFrame
 								    """
 								    ext = os.path.splitext(path)[1].lower()
 								    if ext not in SUPPORTED_EXTENSIONS:
 								        raise ValueError(
 								            f"Unsupported file extension '{ext}'. "
 								            f"Supported extensions: {sorted(SUPPORTED_EXTENSIONS)}"
 								        )
 								    if ext in SAS_EXTENSIONS:
 								        return _read_sas_head(path, row_count=row_count)
 								    # --- Text file path ---
 								    if delimiter is None:
 								        delimiter = "\t" if ext == ".tsv" else ","
 								    return _read_text_head(
 								        path,
 								        row_count=row_count,
 								        delimiter=delimiter,
 								        encoding=encoding,
 								        quotechar=quotechar,
 								    )
 								# -- Display -----------------------------------------------------------------
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								def _sample_values(series: pd.Series, n: int = 3) -> str:
 								    """Return up to *n* non-null sample values as a comma-separated string."""
 								    non_null = series.dropna()
 								    samples = non_null.head(n).tolist()
 								    if not samples:
 								        return "(all null)"
 								    return ", ".join(repr(v) for v in samples)
 								def _print_summary(df: pd.DataFrame) -> None:
 								    """Print a nicely formatted summary table to stdout."""
 								    # Pre-compute column data
 								    rows = []
 								    for col in df.columns:
 								        rows.append((col, str(df[col].dtype), _sample_values(df[col], 3)))
 								    # Determine column widths
 								    hdr_name = "Column Name"
 								    hdr_dtype = "Data Type"
 								    hdr_samples = "Sample Values (up to 3)"
 								    w_name = max(len(hdr_name), *(len(r[0]) for r in rows))
 								    w_dtype = max(len(hdr_dtype), *(len(r[1]) for r in rows))
 								    w_samples = max(len(hdr_samples), *(len(r[2]) for r in rows))
 								    fmt = f"  {{:<{w_name}}}  {{:<{w_dtype}}}  {{:<{w_samples}}}"
 								    sep = f"  {'-' * w_name}  {'-' * w_dtype}  {'-' * w_samples}"
 								    print()
 								    print(f"  Summary of first {len(df)} row(s)  ({len(df.columns)} columns)")
 								    print(sep)
 								    print(fmt.format(hdr_name, hdr_dtype, hdr_samples))
 								    print(sep)
 								    for name, dtype, samples in rows:
 								        print(fmt.format(name, dtype, samples))
 								    print(sep)
 								    print()
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								# ---------------------------------------------------------------------------
 								# CLI
 								# ---------------------------------------------------------------------------
 								def _build_parser() -> argparse.ArgumentParser:
 								    """Build the argument parser for the file-viewer CLI."""
 								    parser = argparse.ArgumentParser(
 								        description=(
 								            "Download a SAS or delimited text file from S3 (or read a local "
 								            "file) and print a column-level summary of the first N rows.\n\n"
 								            "Supported extensions: "
 								            + ", ".join(sorted(SUPPORTED_EXTENSIONS))
 								        ),
 								        formatter_class=argparse.RawDescriptionHelpFormatter,
 								    )
 								    source = parser.add_mutually_exclusive_group()
 								    source.add_argument(
 								        "--local",
 								        metavar="FILE",
 								        default=None,
 								        help=(
 								            "Path to a local data file to summarise (skips S3 download).  "
 								            "Supported extensions: "
 								            + ", ".join(sorted(SUPPORTED_EXTENSIONS))
 								        ),
 								    )
 								    source.add_argument(
 								        "--s3-key",
 								        metavar="KEY",
 								        default=None,
 								        help="Override the S3_KEY constant with this object key.",
 								    )
 								    parser.add_argument(
 								        "--rows",
 								        type=int,
 								        default=10,
 								        metavar="N",
 								        help="Number of rows to read (default: 10).",
 								    )
 								    # Text-file-specific options
 								    text_group = parser.add_argument_group(
 								        "text file options",
 								        "These options apply only to .csv / .tsv / .txt files.",
 								    )
 								    text_group.add_argument(
 								        "--delimiter",
 								        default=None,
 								        help=(
 								            'Column delimiter for text files (default: "," for .csv/.txt, '
 								            '"\\t" for .tsv).  Use $\'\\t\' in the shell for a literal tab.'
 								        ),
 								    )
 								    text_group.add_argument(
 								        "--encoding",
 								        default="utf-8",
 								        help='File encoding for text files (default: "utf-8").',
 								    )
 								    text_group.add_argument(
 								        "--quotechar",
 								        default='"',
 								        help='Quote character for text files (default: \'"\').',
 								    )
 								    return parser
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								# ---------------------------------------------------------------------------
 								# Main
 								# ---------------------------------------------------------------------------
 								if __name__ == "__main__":
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								    parser = _build_parser()
 								    args = parser.parse_args()
 								    if args.local:
 								        # ---- Local file mode -----------------------------------------------
 								        local_path = args.local
 								        ext = os.path.splitext(local_path)[1].lower()
 								        if ext not in SUPPORTED_EXTENSIONS:
 								            parser.error(
 								                f"Unsupported file extension '{ext}'. "
 								                f"Supported: {sorted(SUPPORTED_EXTENSIONS)}"
 								            )
 								        if not os.path.isfile(local_path):
 								            print(f"File not found: {local_path}", file=sys.stderr)
 								            sys.exit(1)
 								    else:
 								        # ---- S3 download mode ----------------------------------------------
 								        s3_key = args.s3_key or S3_KEY
 								        ext = os.path.splitext(s3_key)[1].lower()
 								        if ext not in SUPPORTED_EXTENSIONS:
 								            parser.error(
 								                f"Unsupported file extension '{ext}' in S3 key. "
 								                f"Supported: {sorted(SUPPORTED_EXTENSIONS)}"
 								            )
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								        os.makedirs(LOCAL_FOLDER, exist_ok=True)
 								        local_filename = os.path.basename(s3_key)
 								        local_path = os.path.join(LOCAL_FOLDER, local_filename)
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								        try:
 								            _ensure_local_copy(S3_BUCKET, s3_key, local_path)
 								        except Exception as exc:
 								            print(f"S3 download error: {exc}", file=sys.stderr)
 								            sys.exit(1)
 								    # ---- Read & summarise --------------------------------------------------
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								    try:
-												adding text file support

											
										
										
											2026-04-22 01:05:26 +00:00
+								        df = _read_head(
 								            local_path,
 								            row_count=args.rows,
 								            delimiter=args.delimiter,
 								            encoding=args.encoding,
 								            quotechar=args.quotechar,
 								        )
-												adding file_viewer

											
										
										
											2026-04-18 16:19:38 +00:00
+								    except Exception as exc:
 								        print(f"File read error: {exc}", file=sys.stderr)
 								        sys.exit(2)
 								    _print_summary(df)