moving to env file #10

Merged
mc merged 1 commits from move_to_env into main 2026-04-22 15:37:35 +00:00
5 changed files with 33 additions and 9 deletions
Showing only changes of commit 75bbf5fcd2 - Show all commits

3
.gitignore vendored
View File

@ -1,6 +1,7 @@
/.venv /.venv
/samples /samples
/.env .env
!.env.example
__pycache__/ __pycache__/
venv/ venv/
*/__pycache__/ */__pycache__/

View File

@ -3,3 +3,6 @@ PGPORT=5432
PGUSER= PGUSER=
PGPASSWORD= PGPASSWORD=
PGDATABASE= PGDATABASE=
S3_BUCKET=my-bucket
AWS_PROFILE=default

View File

@ -35,6 +35,10 @@ import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Set, Tuple from typing import List, Set, Tuple
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Dependency check # Dependency check
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -75,10 +79,10 @@ FILE_EXTENSIONS: Set[str] = SUPPORTED_EXTENSIONS
INPUT_FILE: str = "s3_directories.txt" INPUT_FILE: str = "s3_directories.txt"
"""Path to the text file containing one S3 prefix per line.""" """Path to the text file containing one S3 prefix per line."""
S3_BUCKET: str = "my-bucket" S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket")
"""S3 bucket name (all prefixes are assumed to live in this bucket).""" """S3 bucket name (all prefixes are assumed to live in this bucket)."""
AWS_PROFILE: str = "default" AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default")
"""AWS CLI profile name used for authentication.""" """AWS CLI profile name used for authentication."""
# Text-file reading defaults (used when downloading / previewing text files) # Text-file reading defaults (used when downloading / previewing text files)

View File

@ -21,6 +21,10 @@ import argparse
import os import os
import sys import sys
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
import boto3 import boto3
import pandas as pd import pandas as pd
import pyreadstat import pyreadstat
@ -44,7 +48,7 @@ SUPPORTED_EXTENSIONS: set[str] = SAS_EXTENSIONS | TEXT_EXTENSIONS
# Configuration — edit these before running (or use CLI arguments) # Configuration — edit these before running (or use CLI arguments)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
S3_BUCKET: str = "my-bucket" S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket")
"""S3 bucket name.""" """S3 bucket name."""
S3_KEY: str = "path/to/file.sas7bdat" S3_KEY: str = "path/to/file.sas7bdat"
@ -53,7 +57,7 @@ S3_KEY: str = "path/to/file.sas7bdat"
LOCAL_FOLDER: str = "./downloads" LOCAL_FOLDER: str = "./downloads"
"""Local directory to download the file into.""" """Local directory to download the file into."""
AWS_PROFILE: str = "default" AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default")
"""AWS CLI profile name used for authentication.""" """AWS CLI profile name used for authentication."""

View File

@ -92,6 +92,7 @@ Exit codes:
from __future__ import annotations from __future__ import annotations
import argparse import argparse
import os
import re import re
import sys import sys
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
@ -99,6 +100,10 @@ from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
import boto3 import boto3
import yaml import yaml
@ -226,15 +231,20 @@ def load_download_config(path: Path) -> DownloadConfig:
f"Config at {path} must be a YAML mapping at the top level." f"Config at {path} must be a YAML mapping at the top level."
) )
missing = [ # 'bucket' can fall back to the S3_BUCKET env var, so only flag it as
k for k in ("bucket", "prefix", "local_folder") if k not in raw # missing when neither the YAML key nor the env var is present.
] required_always = ("prefix", "local_folder")
missing = [k for k in required_always if k not in raw]
if "bucket" not in raw and not os.environ.get("S3_BUCKET"):
missing.insert(0, "bucket")
if missing: if missing:
raise ValueError( raise ValueError(
f"Config {path} missing required keys: {', '.join(missing)}" f"Config {path} missing required keys: {', '.join(missing)}"
) )
bucket = str(raw["bucket"]).strip() bucket = str(raw["bucket"]).strip() if raw.get("bucket") else ""
if not bucket:
bucket = os.environ.get("S3_BUCKET", "")
if not bucket: if not bucket:
raise ValueError(f"Config {path}: 'bucket' must be a non-empty string.") raise ValueError(f"Config {path}: 'bucket' must be a non-empty string.")
@ -256,6 +266,8 @@ def load_download_config(path: Path) -> DownloadConfig:
aws_profile = raw.get("aws_profile") aws_profile = raw.get("aws_profile")
if aws_profile is not None: if aws_profile is not None:
aws_profile = str(aws_profile).strip() or None aws_profile = str(aws_profile).strip() or None
if aws_profile is None:
aws_profile = os.environ.get("AWS_PROFILE") or None
auto_detect = bool(raw.get("auto_detect", True)) auto_detect = bool(raw.get("auto_detect", True))
extensions = _parse_extensions(raw.get("extensions"), f"Config {path}") extensions = _parse_extensions(raw.get("extensions"), f"Config {path}")