diff --git a/.gitignore b/.gitignore index 64af339..0ea982e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ /.venv /samples -/.env +.env +!.env.example __pycache__/ venv/ */__pycache__/ \ No newline at end of file diff --git a/generic_loader/.env.example b/generic_loader/.env.example index 5be8065..11161dc 100644 --- a/generic_loader/.env.example +++ b/generic_loader/.env.example @@ -3,3 +3,6 @@ PGPORT=5432 PGUSER= PGPASSWORD= PGDATABASE= + +S3_BUCKET=my-bucket +AWS_PROFILE=default diff --git a/utils/data_explorer.py b/utils/data_explorer.py index e2825c2..ec53ba0 100644 --- a/utils/data_explorer.py +++ b/utils/data_explorer.py @@ -35,6 +35,10 @@ import sys from dataclasses import dataclass, field from typing import List, Set, Tuple +from dotenv import find_dotenv, load_dotenv + +load_dotenv(find_dotenv()) + # --------------------------------------------------------------------------- # Dependency check # --------------------------------------------------------------------------- @@ -75,10 +79,10 @@ FILE_EXTENSIONS: Set[str] = SUPPORTED_EXTENSIONS INPUT_FILE: str = "s3_directories.txt" """Path to the text file containing one S3 prefix per line.""" -S3_BUCKET: str = "my-bucket" +S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket") """S3 bucket name (all prefixes are assumed to live in this bucket).""" -AWS_PROFILE: str = "default" +AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default") """AWS CLI profile name used for authentication.""" # Text-file reading defaults (used when downloading / previewing text files) diff --git a/utils/file_viewer.py b/utils/file_viewer.py index 6c6343d..85f42eb 100644 --- a/utils/file_viewer.py +++ b/utils/file_viewer.py @@ -21,6 +21,10 @@ import argparse import os import sys +from dotenv import find_dotenv, load_dotenv + +load_dotenv(find_dotenv()) + import boto3 import pandas as pd import pyreadstat @@ -44,7 +48,7 @@ SUPPORTED_EXTENSIONS: set[str] = SAS_EXTENSIONS | TEXT_EXTENSIONS # Configuration — edit these before running (or use CLI arguments) # --------------------------------------------------------------------------- -S3_BUCKET: str = "my-bucket" +S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket") """S3 bucket name.""" S3_KEY: str = "path/to/file.sas7bdat" @@ -53,7 +57,7 @@ S3_KEY: str = "path/to/file.sas7bdat" LOCAL_FOLDER: str = "./downloads" """Local directory to download the file into.""" -AWS_PROFILE: str = "default" +AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default") """AWS CLI profile name used for authentication.""" diff --git a/utils/s3_download.py b/utils/s3_download.py index d1a2f2e..0cbb3b4 100644 --- a/utils/s3_download.py +++ b/utils/s3_download.py @@ -92,6 +92,7 @@ Exit codes: from __future__ import annotations import argparse +import os import re import sys from concurrent.futures import ThreadPoolExecutor, as_completed @@ -99,6 +100,10 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional, Tuple +from dotenv import find_dotenv, load_dotenv + +load_dotenv(find_dotenv()) + import boto3 import yaml @@ -226,15 +231,20 @@ def load_download_config(path: Path) -> DownloadConfig: f"Config at {path} must be a YAML mapping at the top level." ) - missing = [ - k for k in ("bucket", "prefix", "local_folder") if k not in raw - ] + # 'bucket' can fall back to the S3_BUCKET env var, so only flag it as + # missing when neither the YAML key nor the env var is present. + required_always = ("prefix", "local_folder") + missing = [k for k in required_always if k not in raw] + if "bucket" not in raw and not os.environ.get("S3_BUCKET"): + missing.insert(0, "bucket") if missing: raise ValueError( f"Config {path} missing required keys: {', '.join(missing)}" ) - bucket = str(raw["bucket"]).strip() + bucket = str(raw["bucket"]).strip() if raw.get("bucket") else "" + if not bucket: + bucket = os.environ.get("S3_BUCKET", "") if not bucket: raise ValueError(f"Config {path}: 'bucket' must be a non-empty string.") @@ -256,6 +266,8 @@ def load_download_config(path: Path) -> DownloadConfig: aws_profile = raw.get("aws_profile") if aws_profile is not None: aws_profile = str(aws_profile).strip() or None + if aws_profile is None: + aws_profile = os.environ.get("AWS_PROFILE") or None auto_detect = bool(raw.get("auto_detect", True)) extensions = _parse_extensions(raw.get("extensions"), f"Config {path}")