Enhance date and timestamp handling in _prepare_for_copy function in load_sas.py
Added support for numeric date and datetime conversions from SAS formats. Implemented logic to handle float64 representations of dates (days since 1960-01-01) and datetimes (seconds since 1960-01-01), ensuring proper parsing and preventing errors during data copying to Postgres. This enhancement improves compatibility with various SAS date formats.
This commit is contained in:
parent
2dd247b067
commit
1265489276
@ -1794,6 +1794,17 @@ def _prepare_for_copy(df: pd.DataFrame, columns: Dict[str, ColumnSpec]) -> pd.Da
|
||||
series.replace({"": None}), errors="coerce"
|
||||
)
|
||||
out[name] = parsed.dt.date
|
||||
elif pd.api.types.is_numeric_dtype(series):
|
||||
# pyreadstat couldn't decode the SAS format (some
|
||||
# ``DATEw.``/``YYMMDDw.`` variants and all custom formats slip
|
||||
# through) so the column came back as float64: days since
|
||||
# 1960-01-01, the SAS epoch. Without this branch the raw
|
||||
# number would hit COPY and Postgres rejects it with
|
||||
# ``invalid input syntax for type date``.
|
||||
parsed = pd.to_datetime(
|
||||
series, unit="D", origin="1960-01-01", errors="coerce",
|
||||
)
|
||||
out[name] = parsed.dt.date
|
||||
else:
|
||||
out[name] = series
|
||||
elif pg in ("TIMESTAMP", "TIMESTAMP WITHOUT TIME ZONE", "TIMESTAMP WITH TIME ZONE"):
|
||||
@ -1803,6 +1814,14 @@ def _prepare_for_copy(df: pd.DataFrame, columns: Dict[str, ColumnSpec]) -> pd.Da
|
||||
out[name] = pd.to_datetime(
|
||||
series.replace({"": None}), errors="coerce"
|
||||
)
|
||||
elif pd.api.types.is_numeric_dtype(series):
|
||||
# Same story as the DATE branch above, but SAS datetimes are
|
||||
# *seconds* since 1960-01-01 (fractional seconds for
|
||||
# ``DATETIMEw.d``). Example caught in the wild:
|
||||
# ``1915465463.615`` -> 2020-09-13 05:44:23.615.
|
||||
out[name] = pd.to_datetime(
|
||||
series, unit="s", origin="1960-01-01", errors="coerce",
|
||||
)
|
||||
else:
|
||||
out[name] = series
|
||||
elif pg in ("TIME", "TIME WITHOUT TIME ZONE", "TIME WITH TIME ZONE"):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user