diff --git a/utils/file_viewer.py b/utils/file_viewer.py index 01fa0c1..0b3303d 100644 --- a/utils/file_viewer.py +++ b/utils/file_viewer.py @@ -40,10 +40,30 @@ AWS_PROFILE: str = "default" # --------------------------------------------------------------------------- -def _download_from_s3(bucket: str, key: str, local_path: str) -> None: - """Download *key* from *bucket* to *local_path* using a named session.""" +def _ensure_local_copy(bucket: str, key: str, local_path: str) -> None: + """Download *key* from *bucket* to *local_path*, skipping if already present. + + If *local_path* exists and its size matches the S3 object's size, the + download is skipped and a message is printed. + """ session = boto3.Session(profile_name=AWS_PROFILE) s3 = session.client("s3") + + remote_size = s3.head_object(Bucket=bucket, Key=key)["ContentLength"] + + if os.path.exists(local_path): + local_size = os.path.getsize(local_path) + if local_size == remote_size: + print( + f"Local file {local_path} already matches s3://{bucket}/{key} " + f"({local_size} bytes); skipping download." + ) + return + print( + f"Local file {local_path} size ({local_size} bytes) differs from " + f"S3 ({remote_size} bytes); re-downloading." + ) + print(f"Downloading s3://{bucket}/{key} -> {local_path}") s3.download_file(bucket, key, local_path) print("Download complete.") @@ -106,7 +126,7 @@ if __name__ == "__main__": local_path = os.path.join(LOCAL_FOLDER, local_filename) try: - _download_from_s3(S3_BUCKET, S3_KEY, local_path) + _ensure_local_copy(S3_BUCKET, S3_KEY, local_path) except Exception as exc: print(f"S3 download error: {exc}", file=sys.stderr) sys.exit(1)