moving to env file #10

Merged
mc merged 1 commits from move_to_env into main 2026-04-22 15:37:35 +00:00
5 changed files with 33 additions and 9 deletions

3
.gitignore vendored
View File

@ -1,6 +1,7 @@
/.venv
/samples
/.env
.env
!.env.example
__pycache__/
venv/
*/__pycache__/

View File

@ -3,3 +3,6 @@ PGPORT=5432
PGUSER=
PGPASSWORD=
PGDATABASE=
S3_BUCKET=my-bucket
AWS_PROFILE=default

View File

@ -35,6 +35,10 @@ import sys
from dataclasses import dataclass, field
from typing import List, Set, Tuple
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
# ---------------------------------------------------------------------------
# Dependency check
# ---------------------------------------------------------------------------
@ -75,10 +79,10 @@ FILE_EXTENSIONS: Set[str] = SUPPORTED_EXTENSIONS
INPUT_FILE: str = "s3_directories.txt"
"""Path to the text file containing one S3 prefix per line."""
S3_BUCKET: str = "my-bucket"
S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket")
"""S3 bucket name (all prefixes are assumed to live in this bucket)."""
AWS_PROFILE: str = "default"
AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default")
"""AWS CLI profile name used for authentication."""
# Text-file reading defaults (used when downloading / previewing text files)

View File

@ -21,6 +21,10 @@ import argparse
import os
import sys
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
import boto3
import pandas as pd
import pyreadstat
@ -44,7 +48,7 @@ SUPPORTED_EXTENSIONS: set[str] = SAS_EXTENSIONS | TEXT_EXTENSIONS
# Configuration — edit these before running (or use CLI arguments)
# ---------------------------------------------------------------------------
S3_BUCKET: str = "my-bucket"
S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket")
"""S3 bucket name."""
S3_KEY: str = "path/to/file.sas7bdat"
@ -53,7 +57,7 @@ S3_KEY: str = "path/to/file.sas7bdat"
LOCAL_FOLDER: str = "./downloads"
"""Local directory to download the file into."""
AWS_PROFILE: str = "default"
AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default")
"""AWS CLI profile name used for authentication."""

View File

@ -92,6 +92,7 @@ Exit codes:
from __future__ import annotations
import argparse
import os
import re
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
@ -99,6 +100,10 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
import boto3
import yaml
@ -226,15 +231,20 @@ def load_download_config(path: Path) -> DownloadConfig:
f"Config at {path} must be a YAML mapping at the top level."
)
missing = [
k for k in ("bucket", "prefix", "local_folder") if k not in raw
]
# 'bucket' can fall back to the S3_BUCKET env var, so only flag it as
# missing when neither the YAML key nor the env var is present.
required_always = ("prefix", "local_folder")
missing = [k for k in required_always if k not in raw]
if "bucket" not in raw and not os.environ.get("S3_BUCKET"):
missing.insert(0, "bucket")
if missing:
raise ValueError(
f"Config {path} missing required keys: {', '.join(missing)}"
)
bucket = str(raw["bucket"]).strip()
bucket = str(raw["bucket"]).strip() if raw.get("bucket") else ""
if not bucket:
bucket = os.environ.get("S3_BUCKET", "")
if not bucket:
raise ValueError(f"Config {path}: 'bucket' must be a non-empty string.")
@ -256,6 +266,8 @@ def load_download_config(path: Path) -> DownloadConfig:
aws_profile = raw.get("aws_profile")
if aws_profile is not None:
aws_profile = str(aws_profile).strip() or None
if aws_profile is None:
aws_profile = os.environ.get("AWS_PROFILE") or None
auto_detect = bool(raw.get("auto_detect", True))
extensions = _parse_extensions(raw.get("extensions"), f"Config {path}")