diff --git a/generic_loader/load_folder.py b/generic_loader/load_folder.py
index 5136fc1..82fe459 100644
--- a/generic_loader/load_folder.py
+++ b/generic_loader/load_folder.py
@@ -90,6 +90,25 @@ Exit codes:
 * Auto-detect groups remaining files by ``re.sub(r'\\d+$', '', stem)`` with
   any trailing ``_`` / ``-`` stripped afterward. Stems without trailing
   digits become singleton clusters named after the stem.
+* Within a cluster, files are sorted **numerically** by the last digit
+  group in the stem, so ``..._9_...`` comes before ``..._10_...`` /
+  ``..._40_...`` regardless of zero-padding. The first file in that
+  order drives schema inference; the rest are checked against that
+  schema via :func:`load_sas.assert_schema_compatible`. Gaps in the
+  numeric sequence (missing ``3``, ``7``, ``14``, ...) are irrelevant -
+  whatever files are present get loaded in numeric order.
+* Auto-detect only recognizes *trailing* digit runs. File names where
+  the varying number sits in the middle of the stem (surrounded by
+  other name components) are not grouped by auto-detect - each becomes
+  its own singleton cluster. Use an explicit pattern to bucket them::
+
+      clusters:
+        - pattern: '^year2020_regionA_\\d+_detail\\.sas7bdat$'
+          tablename: year2020_regionA_detail
+
+  The regex still matches any digit width, so numbers like ``9`` and
+  ``40`` both land in the same cluster and the numeric sort above puts
+  ``9`` before ``40``.
 
 4. Library usage
 ----------------
@@ -487,6 +506,7 @@ def load_folder_config(path: Path) -> FolderConfig:
 
 
 _TRAILING_DIGIT_RE = re.compile(r"\d+$")
+_DIGIT_GROUP_RE = re.compile(r"\d+")
 
 
 def _auto_prefix(stem: str) -> str:
@@ -501,6 +521,22 @@ def _auto_prefix(stem: str) -> str:
     return stripped or stem
 
 
+def _cluster_sort_key(path: Path) -> Tuple[int, str]:
+    """Sort key for ordering files within a cluster.
+
+    Sorts numerically by the LAST digit group in the stem so ``_9`` comes
+    before ``_10`` / ``_40`` regardless of width, and so a file named
+    ``foo_9_detail`` lands before ``foo_40_detail``. The first file under
+    this order is the one whose schema is inferred and used to create the
+    target table; sorting numerically keeps that choice stable as the file
+    set grows. Files with no digits fall to ``-1`` so they sort before
+    numbered files; the stem is a tiebreaker for reproducibility.
+    """
+    digits = _DIGIT_GROUP_RE.findall(path.stem)
+    n = int(digits[-1]) if digits else -1
+    return (n, path.stem)
+
+
 def _list_sas_files(folder: Path) -> List[Path]:
     files: List[Path] = []
     for p in sorted(folder.iterdir()):
@@ -582,7 +618,7 @@ def discover_clusters(cfg: FolderConfig) -> List[ClusterSpec]:
         clusters.append(
             ClusterSpec(
                 tablename=patt.tablename,
-                files=sorted(matched),
+                files=sorted(matched, key=_cluster_sort_key),
                 if_exists=patt.if_exists or cfg.if_exists,
                 include=patt.include if patt.include is not None else cfg.include,
                 exclude=patt.exclude if patt.exclude is not None else cfg.exclude,
@@ -603,7 +639,7 @@ def discover_clusters(cfg: FolderConfig) -> List[ClusterSpec]:
             clusters.append(
                 ClusterSpec(
                     tablename=key,
-                    files=sorted(buckets[key]),
+                    files=sorted(buckets[key], key=_cluster_sort_key),
                     if_exists=cfg.if_exists,
                     include=cfg.include,
                     exclude=cfg.exclude,
diff --git a/generic_loader/sample_folder_config.yaml b/generic_loader/sample_folder_config.yaml
index 066d840..5740c3f 100644
--- a/generic_loader/sample_folder_config.yaml
+++ b/generic_loader/sample_folder_config.yaml
@@ -19,6 +19,12 @@ if_exists: replace
 # auto-grouped with its peers by stripping trailing digits (and any trailing
 # _ / -) from the file stem. Files with no trailing digits become their own
 # singleton cluster.
+#
+# Auto-detect only recognizes *trailing* digit runs. If your file names put
+# the varying number in the middle of the stem (e.g. surrounded by year,
+# region, and detail components), auto-detect will NOT group them - each
+# file becomes its own singleton cluster. Use an explicit pattern instead;
+# see the embedded-digit example near the bottom of this file.
 auto_detect: true
 
 # Folder-level column filter. Every file in every cluster passes through
@@ -92,7 +98,20 @@ clusters:
   #     - region
   #     - year
 
-  # With only the gq pattern explicit, auto_detect: true will still bucket
-  # group_b1.xpt + group_b2.xpt into a "group_b" cluster and the lone
+  # Embedded-digit example. When the varying number sits in the MIDDLE of
+  # the stem (e.g. year2020_regionA_40_detail.sas7bdat,
+  # year2020_regionA_41_detail.sas7bdat, ...), auto-detect will NOT group
+  # them - each file becomes its own singleton cluster. An explicit
+  # pattern bucketizes them correctly. The \d+ matches any width, and
+  # files within the cluster are sorted numerically by the last digit
+  # group in the stem, so _9_ sorts before _40_ regardless of zero-
+  # padding. Gaps in the numeric sequence (missing 3, 7, 14, ...) are
+  # fine - whatever files are present get loaded in numeric order.
+  #
+  # - pattern: '^year2020_regionA_\d+_detail\.sas7bdat$'
+  #   tablename: year2020_regionA_detail
+
+  # With only the group_a pattern explicit, auto_detect: true will still
+  # bucket group_b1.xpt + group_b2.xpt into a "group_b" cluster and the lone
   # standalone.xpt into a "standalone" cluster. See generate_sample_folder.py
   # for the fixture that exercises exactly this layout.