diff --git a/generic_loader/load_sas.py b/generic_loader/load_sas.py index 90a4615..3882580 100644 --- a/generic_loader/load_sas.py +++ b/generic_loader/load_sas.py @@ -977,6 +977,30 @@ def _normalize_type(pg_type: str) -> str: return _TYPE_NORMALIZATION.get(stripped, stripped.lower()) +# Widening pairs: (inferred_from_source, existing_in_target). When the +# incoming spec is narrower than the target we accept it - the value is +# guaranteed to fit, and ``_prepare_for_copy`` already emits ``COPY`` +# payloads that Postgres silently promotes to the wider column type. The +# INVERSE direction stays a hard failure: a BIGINT value does not fit in +# an INTEGER column, so we must not let a cluster whose first file had +# only small ints accept a later file with a value past int32. Comes up +# most often on cluster loads where file 1 pushed the target to BIGINT +# (a single value > 2_147_483_647) and file N happens to sit entirely +# within int32 range - strict equality would reject file N even though +# the copy is trivially safe. +_WIDENING_COMPATIBLE: set = { + ("smallint", "integer"), + ("smallint", "bigint"), + ("integer", "bigint"), + ("real", "double precision"), + # INTEGER / BIGINT into DOUBLE PRECISION is lossless for int32 and + # exact up to 2**53 for int64, which covers every value pandas could + # have carried through as Int64 without wrapping anyway. + ("integer", "double precision"), + ("bigint", "double precision"), +} + + def _assert_schema_compatible( conn, schema: str, table: str, columns: Dict[str, ColumnSpec] ) -> None: @@ -1003,11 +1027,22 @@ def _assert_schema_compatible( inferred_norm = _normalize_type(spec.postgres_type) target_norm = _normalize_type(target_type) if inferred_norm != target_norm: - mismatches.append( - f"column {name!r}: inferred {spec.postgres_type} " - f"(normalized {inferred_norm!r}) but target is {target_type} " - f"(normalized {target_norm!r})" - ) + if (inferred_norm, target_norm) in _WIDENING_COMPATIBLE: + # Narrower inferred type fits inside the wider target. + # Accept silently-but-noisily so the operator knows the + # file came in with a smaller range than the cluster's + # target was sized for. + warnings.append( + f"column {name!r}: inferred {spec.postgres_type} " + f"(narrower than target {target_type}); accepting - " + f"values fit in the wider target type" + ) + else: + mismatches.append( + f"column {name!r}: inferred {spec.postgres_type} " + f"(normalized {inferred_norm!r}) but target is {target_type} " + f"(normalized {target_norm!r})" + ) target_is_notnull = (target_nullable == "NO") if spec.nullable and target_is_notnull: warnings.append(