advanced_analyzer #8
@ -227,6 +227,7 @@ import math
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import warnings
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||||
@ -239,8 +240,19 @@ import pyarrow.csv as pa_csv
|
|||||||
import pyreadstat
|
import pyreadstat
|
||||||
import yaml
|
import yaml
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from pandas.errors import PerformanceWarning
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
# ``_prepare_for_copy`` builds its output frame one column at a time with
|
||||||
|
# ``out[name] = ...``. On wide SAS files (~100+ columns) pandas prints a
|
||||||
|
# ``PerformanceWarning: DataFrame is highly fragmented`` once per chunk to
|
||||||
|
# nudge callers toward ``pd.concat(axis=1, ...)``. The fragmentation only
|
||||||
|
# matters for row-oriented ops or in-place ``.copy()``; we hand the frame
|
||||||
|
# straight to ``pyarrow.Table.from_pandas`` which reads columns
|
||||||
|
# independently, so the warning is pure noise for our pipeline. Filter it
|
||||||
|
# at import time - narrow category match so nothing else is suppressed.
|
||||||
|
warnings.filterwarnings("ignore", category=PerformanceWarning)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user