2026-04-21 22:32:18 +00:00
1 changed files with 12 additions and 0 deletions
--- a/generic_loader/load_sas.py
+++ b/generic_loader/load_sas.py
@ -227,6 +227,7 @@ import math
 import os
 import re
 import sys
 import warnings
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Tuple
@ -239,8 +240,19 @@ import pyarrow.csv as pa_csv
 import pyreadstat
 import yaml
 from dotenv import load_dotenv
 from pandas.errors import PerformanceWarning
 from tqdm import tqdm
 # ``_prepare_for_copy`` builds its output frame one column at a time with
 # ``out[name] = ...``. On wide SAS files (~100+ columns) pandas prints a
 # ``PerformanceWarning: DataFrame is highly fragmented`` once per chunk to
 # nudge callers toward ``pd.concat(axis=1, ...)``. The fragmentation only
 # matters for row-oriented ops or in-place ``.copy()``; we hand the frame
 # straight to ``pyarrow.Table.from_pandas`` which reads columns
 # independently, so the warning is pure noise for our pipeline. Filter it
 # at import time - narrow category match so nothing else is suppressed.
 warnings.filterwarnings("ignore", category=PerformanceWarning)
 logger = logging.getLogger(__name__)