2026-04-21 22:32:18 +00:00
1 changed files with 40 additions and 5 deletions
--- a/generic_loader/load_sas.py
+++ b/generic_loader/load_sas.py
@ -977,6 +977,30 @@ def _normalize_type(pg_type: str) -> str:
    return _TYPE_NORMALIZATION.get(stripped, stripped.lower())


+# Widening pairs: (inferred_from_source, existing_in_target). When the
+# incoming spec is narrower than the target we accept it - the value is
+# guaranteed to fit, and ``_prepare_for_copy`` already emits ``COPY``
+# payloads that Postgres silently promotes to the wider column type. The
+# INVERSE direction stays a hard failure: a BIGINT value does not fit in
+# an INTEGER column, so we must not let a cluster whose first file had
+# only small ints accept a later file with a value past int32. Comes up
+# most often on cluster loads where file 1 pushed the target to BIGINT
+# (a single value > 2_147_483_647) and file N happens to sit entirely
+# within int32 range - strict equality would reject file N even though
+# the copy is trivially safe.
+_WIDENING_COMPATIBLE: set = {
+    ("smallint", "integer"),
+    ("smallint", "bigint"),
+    ("integer", "bigint"),
+    ("real", "double precision"),
+    # INTEGER / BIGINT into DOUBLE PRECISION is lossless for int32 and
+    # exact up to 2**53 for int64, which covers every value pandas could
+    # have carried through as Int64 without wrapping anyway.
+    ("integer", "double precision"),
+    ("bigint", "double precision"),
+}
+
+
 def _assert_schema_compatible(
    conn, schema: str, table: str, columns: Dict[str, ColumnSpec]
 ) -> None:
@ -1003,6 +1027,17 @@ def _assert_schema_compatible(
        inferred_norm = _normalize_type(spec.postgres_type)
        target_norm = _normalize_type(target_type)
        if inferred_norm != target_norm:
+            if (inferred_norm, target_norm) in _WIDENING_COMPATIBLE:
+                # Narrower inferred type fits inside the wider target.
+                # Accept silently-but-noisily so the operator knows the
+                # file came in with a smaller range than the cluster's
+                # target was sized for.
+                warnings.append(
+                    f"column {name!r}: inferred {spec.postgres_type} "
+                    f"(narrower than target {target_type}); accepting - "
+                    f"values fit in the wider target type"
+                )
+            else:
                mismatches.append(
                    f"column {name!r}: inferred {spec.postgres_type} "
                    f"(normalized {inferred_norm!r}) but target is {target_type} "