advanced_analyzer #8

Merged
dp merged 23 commits from advanced_analyzer into main 2026-04-21 22:32:18 +00:00
Showing only changes of commit a46f0518f6 - Show all commits

View File

@ -227,6 +227,7 @@ import math
import os
import re
import sys
import warnings
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
@ -239,8 +240,19 @@ import pyarrow.csv as pa_csv
import pyreadstat
import yaml
from dotenv import load_dotenv
from pandas.errors import PerformanceWarning
from tqdm import tqdm
# ``_prepare_for_copy`` builds its output frame one column at a time with
# ``out[name] = ...``. On wide SAS files (~100+ columns) pandas prints a
# ``PerformanceWarning: DataFrame is highly fragmented`` once per chunk to
# nudge callers toward ``pd.concat(axis=1, ...)``. The fragmentation only
# matters for row-oriented ops or in-place ``.copy()``; we hand the frame
# straight to ``pyarrow.Table.from_pandas`` which reads columns
# independently, so the warning is pure noise for our pipeline. Filter it
# at import time - narrow category match so nothing else is suppressed.
warnings.filterwarnings("ignore", category=PerformanceWarning)
logger = logging.getLogger(__name__)