moving to env file
This commit is contained in:
parent
0632e110e5
commit
f63d684d51
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,6 +1,7 @@
|
|||||||
/.venv
|
/.venv
|
||||||
/samples
|
/samples
|
||||||
/.env
|
.env
|
||||||
|
!.env.example
|
||||||
__pycache__/
|
__pycache__/
|
||||||
venv/
|
venv/
|
||||||
*/__pycache__/
|
*/__pycache__/
|
||||||
@ -3,3 +3,6 @@ PGPORT=5432
|
|||||||
PGUSER=
|
PGUSER=
|
||||||
PGPASSWORD=
|
PGPASSWORD=
|
||||||
PGDATABASE=
|
PGDATABASE=
|
||||||
|
|
||||||
|
S3_BUCKET=my-bucket
|
||||||
|
AWS_PROFILE=default
|
||||||
|
|||||||
@ -35,6 +35,10 @@ import sys
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import List, Set, Tuple
|
from typing import List, Set, Tuple
|
||||||
|
|
||||||
|
from dotenv import find_dotenv, load_dotenv
|
||||||
|
|
||||||
|
load_dotenv(find_dotenv())
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Dependency check
|
# Dependency check
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@ -75,10 +79,10 @@ FILE_EXTENSIONS: Set[str] = SUPPORTED_EXTENSIONS
|
|||||||
INPUT_FILE: str = "s3_directories.txt"
|
INPUT_FILE: str = "s3_directories.txt"
|
||||||
"""Path to the text file containing one S3 prefix per line."""
|
"""Path to the text file containing one S3 prefix per line."""
|
||||||
|
|
||||||
S3_BUCKET: str = "my-bucket"
|
S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket")
|
||||||
"""S3 bucket name (all prefixes are assumed to live in this bucket)."""
|
"""S3 bucket name (all prefixes are assumed to live in this bucket)."""
|
||||||
|
|
||||||
AWS_PROFILE: str = "default"
|
AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default")
|
||||||
"""AWS CLI profile name used for authentication."""
|
"""AWS CLI profile name used for authentication."""
|
||||||
|
|
||||||
# Text-file reading defaults (used when downloading / previewing text files)
|
# Text-file reading defaults (used when downloading / previewing text files)
|
||||||
|
|||||||
@ -21,6 +21,10 @@ import argparse
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from dotenv import find_dotenv, load_dotenv
|
||||||
|
|
||||||
|
load_dotenv(find_dotenv())
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyreadstat
|
import pyreadstat
|
||||||
@ -44,7 +48,7 @@ SUPPORTED_EXTENSIONS: set[str] = SAS_EXTENSIONS | TEXT_EXTENSIONS
|
|||||||
# Configuration — edit these before running (or use CLI arguments)
|
# Configuration — edit these before running (or use CLI arguments)
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
S3_BUCKET: str = "my-bucket"
|
S3_BUCKET: str = os.environ.get("S3_BUCKET", "my-bucket")
|
||||||
"""S3 bucket name."""
|
"""S3 bucket name."""
|
||||||
|
|
||||||
S3_KEY: str = "path/to/file.sas7bdat"
|
S3_KEY: str = "path/to/file.sas7bdat"
|
||||||
@ -53,7 +57,7 @@ S3_KEY: str = "path/to/file.sas7bdat"
|
|||||||
LOCAL_FOLDER: str = "./downloads"
|
LOCAL_FOLDER: str = "./downloads"
|
||||||
"""Local directory to download the file into."""
|
"""Local directory to download the file into."""
|
||||||
|
|
||||||
AWS_PROFILE: str = "default"
|
AWS_PROFILE: str = os.environ.get("AWS_PROFILE", "default")
|
||||||
"""AWS CLI profile name used for authentication."""
|
"""AWS CLI profile name used for authentication."""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -92,6 +92,7 @@ Exit codes:
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
@ -99,6 +100,10 @@ from dataclasses import dataclass, field
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from dotenv import find_dotenv, load_dotenv
|
||||||
|
|
||||||
|
load_dotenv(find_dotenv())
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
@ -226,15 +231,20 @@ def load_download_config(path: Path) -> DownloadConfig:
|
|||||||
f"Config at {path} must be a YAML mapping at the top level."
|
f"Config at {path} must be a YAML mapping at the top level."
|
||||||
)
|
)
|
||||||
|
|
||||||
missing = [
|
# 'bucket' can fall back to the S3_BUCKET env var, so only flag it as
|
||||||
k for k in ("bucket", "prefix", "local_folder") if k not in raw
|
# missing when neither the YAML key nor the env var is present.
|
||||||
]
|
required_always = ("prefix", "local_folder")
|
||||||
|
missing = [k for k in required_always if k not in raw]
|
||||||
|
if "bucket" not in raw and not os.environ.get("S3_BUCKET"):
|
||||||
|
missing.insert(0, "bucket")
|
||||||
if missing:
|
if missing:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Config {path} missing required keys: {', '.join(missing)}"
|
f"Config {path} missing required keys: {', '.join(missing)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
bucket = str(raw["bucket"]).strip()
|
bucket = str(raw["bucket"]).strip() if raw.get("bucket") else ""
|
||||||
|
if not bucket:
|
||||||
|
bucket = os.environ.get("S3_BUCKET", "")
|
||||||
if not bucket:
|
if not bucket:
|
||||||
raise ValueError(f"Config {path}: 'bucket' must be a non-empty string.")
|
raise ValueError(f"Config {path}: 'bucket' must be a non-empty string.")
|
||||||
|
|
||||||
@ -256,6 +266,8 @@ def load_download_config(path: Path) -> DownloadConfig:
|
|||||||
aws_profile = raw.get("aws_profile")
|
aws_profile = raw.get("aws_profile")
|
||||||
if aws_profile is not None:
|
if aws_profile is not None:
|
||||||
aws_profile = str(aws_profile).strip() or None
|
aws_profile = str(aws_profile).strip() or None
|
||||||
|
if aws_profile is None:
|
||||||
|
aws_profile = os.environ.get("AWS_PROFILE") or None
|
||||||
|
|
||||||
auto_detect = bool(raw.get("auto_detect", True))
|
auto_detect = bool(raw.get("auto_detect", True))
|
||||||
extensions = _parse_extensions(raw.get("extensions"), f"Config {path}")
|
extensions = _parse_extensions(raw.get("extensions"), f"Config {path}")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user