advanced_analyzer #8

Merged
dp merged 23 commits from advanced_analyzer into main 2026-04-21 22:32:18 +00:00
Showing only changes of commit 1265489276 - Show all commits

View File

@ -1794,6 +1794,17 @@ def _prepare_for_copy(df: pd.DataFrame, columns: Dict[str, ColumnSpec]) -> pd.Da
series.replace({"": None}), errors="coerce"
)
out[name] = parsed.dt.date
elif pd.api.types.is_numeric_dtype(series):
# pyreadstat couldn't decode the SAS format (some
# ``DATEw.``/``YYMMDDw.`` variants and all custom formats slip
# through) so the column came back as float64: days since
# 1960-01-01, the SAS epoch. Without this branch the raw
# number would hit COPY and Postgres rejects it with
# ``invalid input syntax for type date``.
parsed = pd.to_datetime(
series, unit="D", origin="1960-01-01", errors="coerce",
)
out[name] = parsed.dt.date
else:
out[name] = series
elif pg in ("TIMESTAMP", "TIMESTAMP WITHOUT TIME ZONE", "TIMESTAMP WITH TIME ZONE"):
@ -1803,6 +1814,14 @@ def _prepare_for_copy(df: pd.DataFrame, columns: Dict[str, ColumnSpec]) -> pd.Da
out[name] = pd.to_datetime(
series.replace({"": None}), errors="coerce"
)
elif pd.api.types.is_numeric_dtype(series):
# Same story as the DATE branch above, but SAS datetimes are
# *seconds* since 1960-01-01 (fractional seconds for
# ``DATETIMEw.d``). Example caught in the wild:
# ``1915465463.615`` -> 2020-09-13 05:44:23.615.
out[name] = pd.to_datetime(
series, unit="s", origin="1960-01-01", errors="coerce",
)
else:
out[name] = series
elif pg in ("TIME", "TIME WITHOUT TIME ZONE", "TIME WITH TIME ZONE"):