advanced_analyzer #8
@ -227,6 +227,7 @@ import math
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
@ -239,8 +240,19 @@ import pyarrow.csv as pa_csv
|
||||
import pyreadstat
|
||||
import yaml
|
||||
from dotenv import load_dotenv
|
||||
from pandas.errors import PerformanceWarning
|
||||
from tqdm import tqdm
|
||||
|
||||
# ``_prepare_for_copy`` builds its output frame one column at a time with
|
||||
# ``out[name] = ...``. On wide SAS files (~100+ columns) pandas prints a
|
||||
# ``PerformanceWarning: DataFrame is highly fragmented`` once per chunk to
|
||||
# nudge callers toward ``pd.concat(axis=1, ...)``. The fragmentation only
|
||||
# matters for row-oriented ops or in-place ``.copy()``; we hand the frame
|
||||
# straight to ``pyarrow.Table.from_pandas`` which reads columns
|
||||
# independently, so the warning is pure noise for our pipeline. Filter it
|
||||
# at import time - narrow category match so nothing else is suppressed.
|
||||
warnings.filterwarnings("ignore", category=PerformanceWarning)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user