Add widening compatibility checks in load_sas.py for type inference

Introduced a new set of widening compatible type pairs to allow for accepting narrower inferred types when they fit within wider target types during schema compatibility checks. This change enhances the type inference process by preventing unnecessary mismatches and improving handling of varying integer ranges in cluster loads. Updated warning messages to inform users of accepted type adjustments.
This commit is contained in:
David Peterson 2026-04-20 21:08:13 -05:00
parent f84e127796
commit 5e347f50ef

View File

@ -977,6 +977,30 @@ def _normalize_type(pg_type: str) -> str:
return _TYPE_NORMALIZATION.get(stripped, stripped.lower()) return _TYPE_NORMALIZATION.get(stripped, stripped.lower())
# Widening pairs: (inferred_from_source, existing_in_target). When the
# incoming spec is narrower than the target we accept it - the value is
# guaranteed to fit, and ``_prepare_for_copy`` already emits ``COPY``
# payloads that Postgres silently promotes to the wider column type. The
# INVERSE direction stays a hard failure: a BIGINT value does not fit in
# an INTEGER column, so we must not let a cluster whose first file had
# only small ints accept a later file with a value past int32. Comes up
# most often on cluster loads where file 1 pushed the target to BIGINT
# (a single value > 2_147_483_647) and file N happens to sit entirely
# within int32 range - strict equality would reject file N even though
# the copy is trivially safe.
_WIDENING_COMPATIBLE: set = {
("smallint", "integer"),
("smallint", "bigint"),
("integer", "bigint"),
("real", "double precision"),
# INTEGER / BIGINT into DOUBLE PRECISION is lossless for int32 and
# exact up to 2**53 for int64, which covers every value pandas could
# have carried through as Int64 without wrapping anyway.
("integer", "double precision"),
("bigint", "double precision"),
}
def _assert_schema_compatible( def _assert_schema_compatible(
conn, schema: str, table: str, columns: Dict[str, ColumnSpec] conn, schema: str, table: str, columns: Dict[str, ColumnSpec]
) -> None: ) -> None:
@ -1003,11 +1027,22 @@ def _assert_schema_compatible(
inferred_norm = _normalize_type(spec.postgres_type) inferred_norm = _normalize_type(spec.postgres_type)
target_norm = _normalize_type(target_type) target_norm = _normalize_type(target_type)
if inferred_norm != target_norm: if inferred_norm != target_norm:
mismatches.append( if (inferred_norm, target_norm) in _WIDENING_COMPATIBLE:
f"column {name!r}: inferred {spec.postgres_type} " # Narrower inferred type fits inside the wider target.
f"(normalized {inferred_norm!r}) but target is {target_type} " # Accept silently-but-noisily so the operator knows the
f"(normalized {target_norm!r})" # file came in with a smaller range than the cluster's
) # target was sized for.
warnings.append(
f"column {name!r}: inferred {spec.postgres_type} "
f"(narrower than target {target_type}); accepting - "
f"values fit in the wider target type"
)
else:
mismatches.append(
f"column {name!r}: inferred {spec.postgres_type} "
f"(normalized {inferred_norm!r}) but target is {target_type} "
f"(normalized {target_norm!r})"
)
target_is_notnull = (target_nullable == "NO") target_is_notnull = (target_nullable == "NO")
if spec.nullable and target_is_notnull: if spec.nullable and target_is_notnull:
warnings.append( warnings.append(