From 7aeb8222bb6912ae95d37e82787d7bdc861b2b0e Mon Sep 17 00:00:00 2001
From: Scott Schenkein <schenksj@yahoo.com>
Date: Sun, 21 Jun 2026 12:48:01 -0400
Subject: [PATCH 1/2] =?UTF-8?q?feat:=20contrib=20Delta=20serde=20+=20nativ?=
 =?UTF-8?q?e=20exec=20=E2=80=94=20end-to-end=20native=20reads=20[Delta=20c?=
 =?UTF-8?q?ontrib=20split,=20part=204b]?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Part 4b of the Delta Lake contrib PR breakup (tracking: #4366). The red-to-green moment: a
`-Pcontrib-delta` build now does END-TO-END native Delta reads. `CometExecRule`'s scanHandler
lookup (wired in part 2) now resolves -- the serde converts the `CometDeltaScanMarker` (planted
by part 4a's DeltaScanRule) into a `CometDeltaNativeScanExec` that reads through delta-kernel-rs
(parts 3a/3b).

- `CometDeltaNativeScan.scala` — the serde: marker -> native scan operator (schema annotation,
  column mapping, row tracking, partition handling). CDF conversion is deferred to part 5 (the
  `convertCdf` path is carved out here to avoid a compile dependency on `CometDeltaCdfScanExec`).
  `ScanImpl` is not redefined — part 4a moved it to `DeltaScanMetadata`.
- `CometDeltaNativeScanExec.scala` — the exec (`CometScanWithPlanData`): synthesises file
  partitions from kernel scan tasks, applies DPP pruning. Interim error semantics (until part 8):
  the `perPartitionFilePaths` / `FAILED_READ_FILE` plumbing is omitted, so a Delta read failure
  surfaces as a generic `CometNativeException` (the `CometExecRDD` param defaults to empty).
- `Native.scala` — JNI declarations binding the part-3a Rust entry points.
- `DeltaPlanDataInjector.scala` — registers under `OpStruct::DELTA_SCAN`; part 1's reflective
  registry picks it up, so per-partition Delta data is injected at execution.

No core / earlier-unit edits — the reflective wiring already reaches the serde + injector the
moment these classes land.

Tests (gated, end-to-end native reads): CometDeltaNativeSuite (19), CometDeltaColumnMappingSuite (5),
CometDeltaFeaturesSuite (8), CometDeltaCoverageSuite (24), CometDeltaColumnMappingPhysicalNameReproSuite
(1) — all pass. CometDeltaTestBase re-gains the native-read helpers (kept part 4a's marker helpers
that are still used). CometDeltaMarkerSuite updated: with the serde present, a claimed scan now
engages `CometDeltaNativeScanExec` (it no longer leaves the marker in the plan), so its assertions
moved from marker-presence to native engagement.

Verification: gated JVM test-compile, 60 contrib tests across 6 suites, spotless/scalastyle,
check-suites, gate-verify (default build still 0 Delta symbols) — all green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../contrib/delta/CometDeltaNativeScan.scala  | 1522 +++++++++++++++++
 .../apache/comet/contrib/delta/Native.scala   |   82 +
 .../sql/comet/CometDeltaNativeScanExec.scala  |  554 ++++++
 .../sql/comet/DeltaPlanDataInjector.scala     |   91 +
 ...aColumnMappingPhysicalNameReproSuite.scala |   53 +
 .../delta/CometDeltaColumnMappingSuite.scala  |  211 +++
 .../delta/CometDeltaCoverageSuite.scala       |  516 ++++++
 .../delta/CometDeltaFeaturesSuite.scala       |  269 +++
 .../contrib/delta/CometDeltaMarkerSuite.scala |   53 +-
 .../contrib/delta/CometDeltaNativeSuite.scala |  490 ++++++
 .../contrib/delta/CometDeltaTestBase.scala    |   97 +-
 11 files changed, 3879 insertions(+), 59 deletions(-)
 create mode 100644 contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala
 create mode 100644 contrib/delta/src/main/scala/org/apache/comet/contrib/delta/Native.scala
 create mode 100644 contrib/delta/src/main/scala/org/apache/spark/sql/comet/CometDeltaNativeScanExec.scala
 create mode 100644 contrib/delta/src/main/scala/org/apache/spark/sql/comet/DeltaPlanDataInjector.scala
 create mode 100644 contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingPhysicalNameReproSuite.scala
 create mode 100644 contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingSuite.scala
 create mode 100644 contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaCoverageSuite.scala
 create mode 100644 contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaFeaturesSuite.scala
 create mode 100644 contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaNativeSuite.scala

diff --git a/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala b/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala
new file mode 100644
index 0000000000..db0fde87e9
--- /dev/null
+++ b/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala
@@ -0,0 +1,1522 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.delta
+
+import java.util.Locale
+
+import scala.collection.mutable.ListBuffer
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{And, BoundReference, Expression, InterpretedPredicate}
+import org.apache.spark.sql.comet.{CometDeltaNativeScanExec, CometDeltaScanMarker, CometNativeExec, SerializedPlan}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+import org.apache.comet.{CometConf, ConfigEntry}
+// Contrib-private Java proto types generated by protoc-jar-maven-plugin from
+// contrib/delta/native/src/proto/delta_operator.proto. The proto declares
+// `option java_package = "org.apache.comet.contrib.delta.proto"` so the generated
+// outer class lands under a Comet-prefixed Java package.
+// Typed Delta proto messages now live in core's operator.proto (alongside IcebergScan)
+// instead of a contrib-private proto package.
+import org.apache.comet.serde.OperatorOuterClass.{DeltaScan, DeltaScanCommon, DeltaScanTaskList}
+import org.apache.comet.objectstore.NativeConfig
+import org.apache.comet.serde.{CometOperatorSerde, Compatible, ExprOuterClass, OperatorOuterClass, SupportLevel}
+import org.apache.comet.serde.ExprOuterClass.Expr
+import org.apache.comet.serde.OperatorOuterClass.Operator
+import org.apache.comet.serde.QueryPlanSerde.exprToProto
+import org.apache.comet.serde.operator.schema2Proto
+
+/**
+ * Validation and serde logic for the native Delta Lake scan.
+ *
+ * `convert()` calls `Native.planDeltaScan` to enumerate files via `delta-kernel-rs`, builds the
+ * `DeltaScanCommon` proto with schemas/filters/options, applies static partition pruning, and
+ * stashes the task list in a ThreadLocal. `createExec()` retrieves it and builds a
+ * `CometDeltaNativeScanExec` with split-mode serialization: common data serialized once at
+ * planning time, per-partition task lists materialized lazily at execution time. DPP filters are
+ * applied at execution time in the exec's `serializedPartitionData`.
+ */
+/**
+ * Delta-scan serde + exec factory. Extends Comet's core `CometOperatorSerde` trait so
+ * the existing convertToComet path in `CometExecRule` invokes it just like the
+ * built-in handlers (CometNativeScan, CometIcebergNativeScan, ...). What is NOT here
+ * is any *extension/discovery* SPI -- core's `CometExecRule` resolves this object via
+ * `DeltaIntegration.scanHandler` (one reflective class lookup, no ServiceLoader, no
+ * registry). The wire format is the typed `OpStruct::DeltaScan` variant.
+ */
+object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] with Logging {
+
+  // Single source of truth for the Spark `_metadata.*` file-level column names the native
+  // Delta scan synthesises. These were repeated verbatim across several scan-planning
+  // methods below; defined once here so the emit-name lists can't drift (a dropped name =>
+  // N-1 columns where Spark expected N -- the class of bug behind several CDC/row-tracking
+  // failures). Mirror the native `META_*` consts in contrib/delta/native/src/synthetic_columns.rs.
+  private[delta] val SparkFileMetadataNames: Set[String] = Set(
+    "file_path",
+    "file_name",
+    "file_size",
+    "file_block_start",
+    "file_block_length",
+    "file_modification_time")
+
+  // Per-file row-tracking metadata columns (present only on row-tracking-enabled tables).
+  // `default_row_commit_version` must accompany `base_row_id`, else row-tracking/CDC reads
+  // see N-1 columns where Spark expected N.
+  private[delta] val PerFileRowTrackingNames: Set[String] =
+    Set("base_row_id", "default_row_commit_version")
+
+  // All per-file metadata columns: Spark file metadata + row-tracking.
+  private[delta] val PerFileMetadataNames: Set[String] =
+    SparkFileMetadataNames ++ PerFileRowTrackingNames
+
+  /**
+   * `kind` string for the `ContribOp` envelope this serde produces. The native side's
+   * `comet-contrib-delta` rlib registers `DeltaScanPlanner` under this same kind via
+   * `register_contrib_planner(DELTA_SCAN_KIND, ...)` in `contrib/delta/native/src/lib.rs`. Keep
+   * the two in sync.
+   */
+  val DeltaScanKind: String = "delta-scan"
+
+  /** Private lazy handle to the native library - one instance per JVM. */
+  private lazy val nativeLib = new org.apache.comet.contrib.delta.Native()
+
+  // Phase 5: stash the raw task-list bytes between convert() and createExec()
+  // so the exec can do per-partition splitting at execution time. Single-threaded
+  // during planning so a simple ThreadLocal is safe.
+  private val lastTaskListBytes = new ThreadLocal[Array[Byte]]()
+
+  // When a scan projects a per-file `_metadata.file_path` column, `DeltaScanRule` sets
+  // `oneTaskPerPartition = true` in the marker's `DeltaScanMetadata`. We read it here to (a) skip
+  // byte-range splitting in splitTasks and (b) emit `oneTaskPerPartition = true` on the
+  // CometDeltaNativeScanExec so packTasks keeps each task in its own partition -- the native
+  // plan emits one parquet file-group per file, so multiple files in one Spark partition would
+  // drop the 2nd+ files' rows.
+  /**
+   * True for Delta's MATERIALISED row-tracking column names
+   * (`_row-id-col-<uuid>` / `_row-commit-version-col-<uuid>`). These are real parquet
+   * columns persisted when a file is rewritten to keep row IDs stable, read from the
+   * file by name -- NOT synthesised. (Distinct from the logical `row_id` /
+   * `row_commit_version` synthetic columns, which ARE synthesised from baseRowId +
+   * row_index when no materialised column exists.)
+   */
+  private[delta] def isMaterializedRowTrackingName(name: String): Boolean = {
+    val lc = name.toLowerCase(Locale.ROOT)
+    lc.startsWith("_row-id-col-") || lc.startsWith("_row-commit-version-col-")
+  }
+
+  // Kernel's marker for a metadata column: field metadata `delta.metadataSpec` whose value is the
+  // spec's text (e.g. "row_id"). Matches `ColumnMetadataKey::MetadataSpec` + `MetadataColumnSpec`
+  // in delta-kernel-rs; it rides through Spark's `StructType.json` into kernel's `StructType` serde.
+  private val KernelMetadataSpecKey = "delta.metadataSpec"
+  private val KernelRowIdSpec = "row_id"
+
+  /**
+   * If `f` is a materialised row-ID column (`_row-id-col-*`), return it re-marked as kernel's RowId
+   * metadata column so kernel resolves it from `delta.rowTracking.materializedRowIdColumnName` by
+   * name (no column-mapping id/physicalName needed) and generates the value. Otherwise return `f`
+   * unchanged. Only invoked under active column mapping (see caller).
+   */
+  private def asKernelRowIdMetadataColumnIfMaterialized(f: StructField): StructField = {
+    if (f.name.toLowerCase(Locale.ROOT).startsWith("_row-id-col-")) {
+      val md = new org.apache.spark.sql.types.MetadataBuilder()
+        .withMetadata(f.metadata)
+        .putString(KernelMetadataSpecKey, KernelRowIdSpec)
+        .build()
+      StructField(f.name, org.apache.spark.sql.types.LongType, nullable = true, md)
+    } else {
+      f
+    }
+  }
+
+  /**
+   * Translate Delta's `delta.columnMapping.id` metadata key to Spark+parquet's standard
+   * `parquet.field.id` key on every StructField at every level of nesting. Required for
+   * column-mapping `id` mode: Delta writes parquet files with `PARQUET:field_id` metadata
+   * (i.e. the same field IDs it stores in its own metadata), but Spark's
+   * `ParquetUtils.hasFieldId` -- and therefore Comet's serialisers -- only look at
+   * `parquet.field.id`. Without this translation, `use_field_id=true` would still find
+   * no IDs on the Spark schema and silently degrade to name-based matching.
+   *
+   * Top-level field metadata gets the new entry merged in via `MetadataBuilder`; nested
+   * StructTypes recurse; ArrayType and MapType walk into their element/key/value types.
+   * Fields without `delta.columnMapping.id` are passed through unchanged (e.g. partition
+   * columns, synthetic row-index columns, struct-leaf fields the metadata strip elided).
+   */
+  /**
+   * Names that appear in `scan.requiredSchema` but are NOT real parquet columns: Delta/Spark
+   * synthetic + `_metadata.*` virtual columns synthesised natively after the scan. They must be
+   * excluded from the kernel read projection (kernel would look for non-existent file columns).
+   * Mirrors the `syntheticNames` set used later when stripping `required_schema` for the proto.
+   * Materialised row-tracking columns (`_row-id-col-*` / `_row-commit-version-col-*`) are real
+   * parquet columns and are deliberately NOT here.
+   */
+  private[delta] val SyntheticReadFieldNames: Set[String] = Set(
+    DeltaReflection.RowIndexColumnName,
+    DeltaReflection.TmpMetadataRowIndexColumnName,
+    DeltaReflection.IsRowDeletedColumnName,
+    DeltaReflection.RowIdColumnName,
+    DeltaReflection.RowCommitVersionColumnName,
+    "file_path",
+    "file_name",
+    "file_size",
+    "file_block_start",
+    "file_block_length",
+    "file_modification_time",
+    "base_row_id",
+    "default_row_commit_version").map(_.toLowerCase(Locale.ROOT))
+
+  /**
+   * The query's data-read columns -- `scan.requiredSchema` minus synthetic/metadata columns
+   * (partition columns are never in `requiredSchema`: Spark gives the data half) -- serialized as
+   * an Arrow IPC schema message for the driver's `scan.with_schema(...)`. Pure-logical names at
+   * every nesting level, so kernel resolves the projected physical names + field-ids itself and
+   * returns `scan.physical_schema()` / `scan.logical_schema()` for the executor. Empty array when
+   * there are no data columns to read (partition-/synthetic-only scan) -- the driver then skips the
+   * projection and the executor drives the row count without a parquet read.
+   */
+  /**
+   * The kernel-read data-read schema as Delta schema JSON (`StructType.json`) -- the single carrier
+   * the driver feeds to kernel's `ScanBuilder::with_schema`. It is the query's data columns
+   * (`requiredSchema` minus synthetic/metadata columns), each drawn from `annotatedSource` when
+   * present so it carries `delta.columnMapping.physicalName` + `id` at every nesting level (the same
+   * Delta-JSON format kernel reads from the log). `annotatedSource` should be the ANALYSIS-TIME
+   * schema, falling back to the live snapshot schema -- so kernel resolves the physical names the
+   * query was PLANNED with and null-fills columns whose field-id changed since analysis (Delta's
+   * schema-on-read escape hatch). For a column the source doesn't cover (non-column-mapping tables,
+   * or no source at all) the `requiredSchema` field is used as-is (no annotations needed). Returns
+   * `""` for a read with zero data columns (partition-/synthetic-only) -- no `with_schema` then.
+   */
+  private[delta] def dataReadSchemaJson(
+      annotatedSource: Option[StructType],
+      requiredSchema: StructType,
+      partitionSchema: StructType = new StructType(),
+      rowTrackingActive: Boolean = false): String = {
+    // `row_id` / `row_commit_version` are SYNTHETIC (kernel doesn't read them; we synthesise from
+    // baseRowId + row_index) ONLY when row tracking is enabled. With it disabled they are ordinary
+    // user data columns -- a table may legitimately have a column named `row_id`. The proto
+    // `required_schema` keeps them in that case (the emit flags are gated on row tracking too), so
+    // the read schema MUST keep them as well, else the executor sees `required_schema` data columns
+    // with no kernel schema shipped ("missing kernel data-column schemas"). Mirror the emit gating.
+    val stripNames =
+      if (rowTrackingActive) SyntheticReadFieldNames
+      else
+        SyntheticReadFieldNames -
+          DeltaReflection.RowIdColumnName.toLowerCase(Locale.ROOT) -
+          DeltaReflection.RowCommitVersionColumnName.toLowerCase(Locale.ROOT)
+    val dataFields = requiredSchema.fields.filterNot(f =>
+      stripNames.contains(f.name.toLowerCase(Locale.ROOT)))
+    if (dataFields.isEmpty) {
+      // Zero data columns (partition-only / synthetic-only reads): no kernel read schema; the
+      // executor drives the row count without a parquet read and the partition columns are filled
+      // separately. (Kernel can't drive a zero-column scan, so we don't project partitions here.)
+      ""
+    } else {
+      val byName =
+        annotatedSource.map(_.fields.map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap)
+          .getOrElse(Map.empty)
+      val pick = (f: StructField) => byName.getOrElse(f.name.toLowerCase(Locale.ROOT), f)
+      // Partition columns aren't in `requiredSchema` (Spark hands us the data half), so append them
+      // when a `partitionSchema` is supplied -- then kernel's per-file transform INJECTS them (the
+      // max-kernel path) instead of Comet appending them. Sourced from the annotated schema by name
+      // so column-mapping physical names / field-ids ride along. The AddFiles route passes an empty
+      // `partitionSchema` (its identity transform can't inject partitions, so partitions stay
+      // Comet-appended there until that route also moves to kernel enumeration).
+      val projected0 = dataFields.map(pick) ++ partitionSchema.fields.map(pick)
+      // Materialised row-id columns (`_row-id-col-*`, added by OPTIMIZE/UPDATE/MERGE) are matched by
+      // NAME and carry NO column-mapping annotation. Under ACTIVE column mapping kernel's logical
+      // with_schema requires both physicalName AND id on every regular field, so shipping the
+      // materialised column as a plain data field fails ("lacks delta.columnMapping.physicalName/id").
+      // The kernel-intended way is to request the RowId METADATA column: mark the field with kernel's
+      // `delta.metadataSpec` = `row_id` (ColumnMetadataKey::MetadataSpec). Kernel then reads
+      // `delta.rowTracking.materializedRowIdColumnName` by name (bypassing CM make_physical), adds a
+      // row_index helper, and emits `GenerateRowId` (coalesce(materialised, baseRowId+row_index)) on
+      // the per-file transform -- so row_id comes from kernel, correct even under CM-id. Only needed
+      // under active CM (detected from a real data field carrying a physicalName); plain tables read
+      // the materialised column fine as a data field, so leave them untouched. RowCommitVersion has
+      // no kernel metadata-column support (Error::unsupported), so `_row-commit-version-col-*` is left
+      // as-is. See state_info.rs RowId handling + CometDeltaRowTrackingMaterializedSuite (M3).
+      val columnMappingActive =
+        projected0.exists(_.metadata.contains(DeltaReflection.PhysicalNameMetadataKey))
+      val projected =
+        if (columnMappingActive) projected0.map(asKernelRowIdMetadataColumnIfMaterialized)
+        else projected0
+      StructType(projected).json
+    }
+  }
+
+  /**
+   * Kernel read schema for the in-worker synthesis path (`synthesize_in_worker`): data + partitions,
+   * plus `row_index` as a kernel `RowIndex` metadata column and `row_id` as a kernel `RowId` metadata
+   * column (kernel injects/generates them). The WORKER-only synthetics -- `is_row_deleted`,
+   * `row_commit_version`, and Spark `_metadata.*` per-file constants -- are EXCLUDED (kernel doesn't
+   * read them; the executor produces them). Returns "" when nothing is read from parquet.
+   */
+  private[delta] def synthesizeReadSchemaJson(
+      annotatedSource: Option[StructType],
+      requiredSchema: StructType,
+      partitionSchema: StructType): String = {
+    val byName =
+      annotatedSource.map(_.fields.map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap)
+        .getOrElse(Map.empty)
+    val pick = (f: StructField) => byName.getOrElse(f.name.toLowerCase(Locale.ROOT), f)
+    // Synthetics the executor produces itself (NOT read from kernel).
+    val workerOnly: Set[String] = Set(
+      DeltaReflection.IsRowDeletedColumnName,
+      DeltaReflection.RowCommitVersionColumnName,
+      "file_path",
+      "file_name",
+      "file_size",
+      "file_block_start",
+      "file_block_length",
+      "file_modification_time",
+      "base_row_id",
+      "default_row_commit_version").map(_.toLowerCase(Locale.ROOT))
+    def isRowIndex(n: String): Boolean =
+      n.equalsIgnoreCase(DeltaReflection.RowIndexColumnName) ||
+        n.equalsIgnoreCase(DeltaReflection.TmpMetadataRowIndexColumnName)
+    def isRowId(n: String): Boolean =
+      n.equalsIgnoreCase(DeltaReflection.RowIdColumnName) ||
+        n.toLowerCase(Locale.ROOT).startsWith("_row-id-col-")
+    val kept: Array[StructField] = requiredSchema.fields.flatMap { f =>
+      val lc = f.name.toLowerCase(Locale.ROOT)
+      if (workerOnly.contains(lc)) {
+        None // worker-side constant; not read from kernel
+      } else if (isRowIndex(f.name)) {
+        Some(asKernelMetadataColumn(f.name, "row_index"))
+      } else if (isRowId(f.name)) {
+        Some(asKernelMetadataColumn(f.name, KernelRowIdSpec))
+      } else {
+        // Real data column -- includes the MATERIALISED `_row-commit-version-col-*` (kernel has no
+        // RowCommitVersion metadata column, so it's read from parquet by name, null-filled when a
+        // file lacks it).
+        Some(pick(f))
+      }
+    }
+    val all = kept ++ partitionSchema.fields.map(pick)
+    if (all.isEmpty) "" else StructType(all).json
+  }
+
+  /** A LONG field marked as kernel's `<spec>` metadata column (`delta.metadataSpec`). */
+  private def asKernelMetadataColumn(name: String, spec: String): StructField = {
+    val md = new org.apache.spark.sql.types.MetadataBuilder()
+      .putString(KernelMetadataSpecKey, spec)
+      .build()
+    StructField(name, org.apache.spark.sql.types.LongType, nullable = true, md)
+  }
+
+  private[delta] def translateDeltaFieldIdToParquet(field: StructField): StructField = {
+    val newDataType = translateDataTypeFieldIds(field.dataType)
+    val newMetadata =
+      if (field.metadata.contains(DeltaReflection.FieldIdMetadataKey) &&
+        !field.metadata.contains(DeltaReflection.ParquetFieldIdMetadataKey)) {
+        val fieldId = field.metadata.getLong(DeltaReflection.FieldIdMetadataKey)
+        new org.apache.spark.sql.types.MetadataBuilder()
+          .withMetadata(field.metadata)
+          .putLong(DeltaReflection.ParquetFieldIdMetadataKey, fieldId)
+          .build()
+      } else field.metadata
+    StructField(field.name, newDataType, field.nullable, newMetadata)
+  }
+
+  private def translateDataTypeFieldIds(
+      dt: org.apache.spark.sql.types.DataType): org.apache.spark.sql.types.DataType =
+    dt match {
+      case s: StructType => StructType(s.fields.map(translateDeltaFieldIdToParquet))
+      case a: org.apache.spark.sql.types.ArrayType =>
+        org.apache.spark.sql.types.ArrayType(
+          translateDataTypeFieldIds(a.elementType),
+          a.containsNull)
+      case m: org.apache.spark.sql.types.MapType =>
+        org.apache.spark.sql.types.MapType(
+          translateDataTypeFieldIds(m.keyType),
+          translateDataTypeFieldIds(m.valueType),
+          m.valueContainsNull)
+      case other => other
+    }
+
+  private[delta] def scanNeedsOneTaskPerPartition(scan: CometDeltaScanMarker): Boolean =
+    scan.deltaMetadata.oneTaskPerPartition
+
+  /**
+   * True when the native plan will emit one parquet file-group per file (core_glue's
+   * `need_per_file_groups`): any `_metadata.*` virtual column / per-file row-tracking
+   * constant (`base_row_id`, `default_row_commit_version`) is requested, or a synthesized
+   * row-index / is-row-deleted / row_id / row_commit_version column is. These are all
+   * per-file values, so each file becomes its own group. When a Spark partition packs
+   * several files, those per-file groups execute concurrently and the synthetic-column
+   * append can mis-align with / drop whole groups (non-deterministically) -- the same class
+   * of bug fixed for materialised row-tracking columns and `input_file_name()`. Forcing one
+   * file per partition keeps every native plan single-file-group. See
+   * CometDeltaDefaultRowCommitVersionReproSuite / DefaultRowCommitVersionSuite,
+   * [[isMaterializedRowTrackingName]], and `project_concurrent_missing_column_drop`.
+   */
+  private[delta] def needsPerFileGroups(scan: CometDeltaScanMarker): Boolean = {
+    val outNames = scan.output.map(_.name.toLowerCase(Locale.ROOT)).toSet
+    val reqNames = scan.requiredSchema.fieldNames.map(_.toLowerCase(Locale.ROOT)).toSet
+    // `_metadata.*` virtual columns + per-file row-tracking constants (these always force
+    // per-file groups natively because each carries a per-file value).
+    val perFileMetadataNames = PerFileMetadataNames
+    // Synthesized columns (never physical): row index + is-row-deleted.
+    val syntheticNames = Set(
+      "__delta_internal_row_index",
+      "_tmp_metadata_row_index",
+      "__delta_internal_is_row_deleted")
+    // row_id / row_commit_version are synthesized (-> per-file) only when row tracking is
+    // enabled; otherwise they are ordinary user column names (see the emit-flag gating in
+    // `convert`), so don't force per-file groups for them.
+    val rowTrackingEnabled =
+      DeltaReflection.extractMetadataConfiguration(scan.relation).exists { cfg =>
+        cfg.get(DeltaReflection.EnableRowTrackingProp).exists(_.equalsIgnoreCase("true")) ||
+          cfg.contains(DeltaReflection.MaterializedRowIdColumnProp) ||
+          cfg.contains(DeltaReflection.MaterializedRowCommitVersionColumnProp)
+      }
+    outNames.exists(perFileMetadataNames.contains) ||
+      reqNames.exists(syntheticNames.contains) ||
+      (rowTrackingEnabled &&
+        (reqNames.contains(DeltaReflection.RowIdColumnName) ||
+          reqNames.contains(DeltaReflection.RowCommitVersionColumnName)))
+  }
+
+  /**
+   * Reflectively resolve Hadoop's AWSCredentialProviderList for an s3/s3a URI and merge
+   * the resulting (access, secret, optional token) triple into `baseOptions` under the
+   * standard `fs.s3a.access.key` / `fs.s3a.secret.key` / `fs.s3a.session.token` keys --
+   * the same keys `NativeConfig.extractObjectStoreOptions` would have picked up if the
+   * user had set them explicitly in `core-site.xml`.
+   *
+   * Reflection is intentional: `hadoop-aws` is an optional dep; on a default Comet
+   * deployment without S3 support on the classpath, `Class.forName` fails and we return
+   * the base options unchanged. Non-s3/s3a URIs return base options unchanged too --
+   * Azure / GCS / OSS resolve their own credential chains in kernel-rs's object_store
+   * (or via the static keys already in `baseOptions`).
+   *
+   * Skip when the user has already set explicit static keys (don't overwrite an explicit
+   * config with a resolved IAM-instance token).
+   *
+   * If reflection succeeds but credential resolution fails (e.g. IMDS unreachable, no
+   * provider configured), log a warning and return `baseOptions` -- the engine will
+   * still try anonymous access or surface a clearer error than a silent crash on first
+   * S3 read.
+   */
+  // Cached reflective binding for the S3A credential chain. Resolved once per JVM.
+  // The whole augment path is invoked on every Delta scan -- without caching, each scan
+  // pays a Class.forName + getMethod round-trip just to find the bridge available.
+  //
+  // `None` means we tried once and failed (hadoop-aws not on classpath, signature drift,
+  // etc.) -- subsequent calls short-circuit.
+  private case class S3ACredentialBinding(
+      createProviderList: java.lang.reflect.Method,
+      getCredentials: java.lang.reflect.Method,
+      getAccessKey: java.lang.reflect.Method,
+      getSecretKey: java.lang.reflect.Method,
+      sessionCredsCls: Option[Class[_]],
+      getSessionToken: Option[java.lang.reflect.Method])
+
+  @volatile private var s3aCredentialBindingCache: Option[Option[S3ACredentialBinding]] = None
+
+  private def s3aCredentialBinding: Option[S3ACredentialBinding] =
+    s3aCredentialBindingCache.getOrElse {
+      val binding = try {
+        // scalastyle:off classforname
+        val utilsCls = Class.forName("org.apache.hadoop.fs.s3a.S3AUtils")
+        // scalastyle:on classforname
+        val createMethod = utilsCls.getMethod(
+          "createAWSCredentialProviderList",
+          classOf[java.net.URI],
+          classOf[org.apache.hadoop.conf.Configuration])
+        // Resolve the provider-list + credentials methods off the runtime classes
+        // returned by createAWSCredentialProviderList. Method.invoke walks subclasses, so
+        // a one-time lookup on the declared return / argument types is enough.
+        val providerListCls = createMethod.getReturnType
+        val getCredentialsMethod = providerListCls.getMethod("getCredentials")
+        val credentialsCls = getCredentialsMethod.getReturnType
+        val getAccessKeyMethod = credentialsCls.getMethod("getAWSAccessKeyId")
+        val getSecretKeyMethod = credentialsCls.getMethod("getAWSSecretKey")
+        val (sessionCredsCls, getSessionTokenMethod) = try {
+          // scalastyle:off classforname
+          val cls = Class.forName("com.amazonaws.auth.AWSSessionCredentials")
+          // scalastyle:on classforname
+          (Some(cls), Some(cls.getMethod("getSessionToken")))
+        } catch { case _: ClassNotFoundException => (None, None) }
+        Some(
+          S3ACredentialBinding(
+            createMethod,
+            getCredentialsMethod,
+            getAccessKeyMethod,
+            getSecretKeyMethod,
+            sessionCredsCls,
+            getSessionTokenMethod))
+      } catch {
+        // hadoop-aws not on classpath, or signature drift -- mark as unavailable for the
+        // rest of the JVM's lifetime.
+        case _: ClassNotFoundException => None
+        case _: NoSuchMethodException => None
+        case scala.util.control.NonFatal(e) =>
+          logWarning(
+            s"S3A credential-chain reflection lookup failed; falling back to static-only " +
+              s"keys in Delta log replay: ${e.getMessage}",
+            e)
+          None
+      }
+      s3aCredentialBindingCache = Some(binding)
+      binding
+    }
+
+  private[delta] def augmentWithResolvedAwsCredentials(
+      baseOptions: Map[String, String],
+      tableRootUri: java.net.URI,
+      hadoopConf: org.apache.hadoop.conf.Configuration): Map[String, String] = {
+    val scheme = Option(tableRootUri.getScheme).map(_.toLowerCase).getOrElse("")
+    if (scheme != "s3" && scheme != "s3a") return baseOptions
+    if (baseOptions.contains("fs.s3a.access.key") &&
+      baseOptions.contains("fs.s3a.secret.key")) {
+      return baseOptions
+    }
+    s3aCredentialBinding match {
+      case None => baseOptions // hadoop-aws not available; nothing to resolve
+      case Some(binding) =>
+        try {
+          val providerList = binding.createProviderList.invoke(null, tableRootUri, hadoopConf)
+          val credentials = binding.getCredentials.invoke(providerList)
+          val accessKey = binding.getAccessKey.invoke(credentials)
+          val secretKey = binding.getSecretKey.invoke(credentials)
+          val sessionToken: Option[String] = (binding.sessionCredsCls, binding.getSessionToken) match {
+            case (Some(cls), Some(m)) if cls.isInstance(credentials) =>
+              Option(m.invoke(credentials)).map(_.toString)
+            case _ => None
+          }
+          val resolved = scala.collection.mutable.Map[String, String]() ++= baseOptions
+          Option(accessKey).map(_.toString).filter(_.nonEmpty).foreach { ak =>
+            resolved("fs.s3a.access.key") = ak
+          }
+          Option(secretKey).map(_.toString).filter(_.nonEmpty).foreach { sk =>
+            resolved("fs.s3a.secret.key") = sk
+          }
+          sessionToken.filter(_.nonEmpty).foreach { st =>
+            resolved("fs.s3a.session.token") = st
+          }
+          resolved.toMap
+        } catch {
+          case scala.util.control.NonFatal(e) =>
+            logWarning(
+              s"Delta log-replay credential resolution failed for $tableRootUri: " +
+                s"${e.getMessage}; falling back to static-only keys in storage options",
+              e)
+            baseOptions
+        }
+    }
+  }
+
+  override def enabledConfig: Option[ConfigEntry[Boolean]] = Some(
+    DeltaConf.COMET_DELTA_NATIVE_ENABLED)
+
+  override def getSupportLevel(operator: CometDeltaScanMarker): SupportLevel = Compatible()
+
+  override def convert(
+      scan: CometDeltaScanMarker,
+      builder: Operator.Builder,
+      childOp: OperatorOuterClass.Operator*): Option[OperatorOuterClass.Operator] = {
+
+    // Resolve the table root via the HadoopFsRelation API - standard Spark, no spark-delta
+    // compile-time dep required.
+    val relation = scan.relation
+    val tableRoot = DeltaReflection.extractTableRoot(relation).getOrElse {
+      logWarning(
+        s"CometDeltaNativeScan: unable to extract table root from relation " +
+          s"${relation.location}; falling back to Spark's Delta reader.")
+      return None
+    }
+
+    // Detect Delta synthetic columns the surrounding plan requested. We strip them
+    // from the proto schemas sent to native so the parquet reader doesn't look for
+    // columns that don't exist on disk, and set the proto emit flags so the dispatcher
+    // wraps the parquet scan in `DeltaSyntheticColumnsExec` to append them back.
+    //   - `__delta_internal_row_index` / `__delta_internal_is_row_deleted` are
+    //     UPDATE/DELETE/MERGE internals (#144).
+    //   - `row_id` / `row_commit_version` are row-tracking columns when the table has
+    //     `delta.enableRowTracking=true` but no materialised columns -- synthesised
+    //     from baseRowId + physical row index per task.
+    // Row index can appear under either name in the scan output: the canonical
+    // `__delta_internal_row_index` (Delta synthetic-column path), or the
+    // intermediate `_tmp_metadata_row_index` (Delta's
+    // `DeltaParquetFileFormat.TMP_METADATA_ROW_INDEX_COLUMN_NAME`, used for plans
+    // that read `_metadata.row_index` from row-tracking-enabled tables before
+    // Delta projects the alias). Both cases go through the same native synthesis
+    // -- just with a different output column name.
+    val rowIndexCanonicalPresent = scan.requiredSchema.fieldNames.exists(
+      _.equalsIgnoreCase(DeltaReflection.RowIndexColumnName))
+    val rowIndexTmpMetadataPresent = scan.requiredSchema.fieldNames.exists(
+      _.equalsIgnoreCase(DeltaReflection.TmpMetadataRowIndexColumnName))
+    // Both names denote the same physical value (the parquet row index), but they can
+    // appear together in a single scan: DELETE/UPDATE/MERGE on a DV-enabled table with
+    // `spark.databricks.delta.deletionVectors.useMetadataRowIndex=false` reads files that
+    // already carry a deletion vector. There the scan needs `_metadata.row_index`
+    // (-> `_tmp_metadata_row_index`) to APPLY the existing DV and the explicit
+    // `__delta_internal_row_index` column to build the NEW DV bitmap. Native synthesis
+    // emits a single row-index column under one name, and the final-reorder Projection
+    // names its outputs from the wrapped (native) schema, so it cannot produce two
+    // distinctly-named row-index outputs. Rather than misname them, fall back to Spark's
+    // Delta reader for this scan. This shape only arises in Delta's internal DV-maintenance
+    // read (never a user query), so there is no user-facing perf impact; the common
+    // useMetadataRowIndex=true path (a single row-index name) is unaffected.
+    // Repro: CometDeltaDeleteWithDVReproSuite; regression: DeleteSQLWithDeletionVectorsSuite.
+    if (rowIndexCanonicalPresent && rowIndexTmpMetadataPresent) {
+      logInfo(
+        "CometDeltaNativeScan: scan.requiredSchema requests both " +
+          s"${DeltaReflection.RowIndexColumnName} and " +
+          s"${DeltaReflection.TmpMetadataRowIndexColumnName} (DV-maintenance read with " +
+          "useMetadataRowIndex=false); falling back to Spark's Delta reader for this scan.")
+      return None
+    }
+    val emitRowIndex = rowIndexCanonicalPresent || rowIndexTmpMetadataPresent
+    val rowIndexColumnAlias: String =
+      if (rowIndexTmpMetadataPresent && !rowIndexCanonicalPresent)
+        DeltaReflection.TmpMetadataRowIndexColumnName
+      else ""
+    val emitIsRowDeleted = scan.requiredSchema.fieldNames.exists(
+      _.equalsIgnoreCase(DeltaReflection.IsRowDeletedColumnName))
+    // `row_id` / `row_commit_version` are reserved names ONLY when row tracking is enabled --
+    // then they are metadata columns we synthesize (baseRowId + row_index, etc.). With row
+    // tracking disabled they are ordinary user column names with no special meaning, and a
+    // user table may legitimately have a physical column called `row_id`. Deriving the emit
+    // flags purely from the column name mistook such a user column for the synthetic, stripped
+    // it from the parquet read, and synthesized garbage (RowIdSuite "row_id column with row ids
+    // disabled" -> NPE/wrong values). Gate the synthetic emit on row tracking actually being
+    // enabled on the table. Repro: CometDeltaRowIdColumnCollisionReproSuite.
+    val rowTrackingEnabled: Boolean =
+      DeltaReflection.extractMetadataConfiguration(relation).exists { cfg =>
+        cfg.get(DeltaReflection.EnableRowTrackingProp).exists(_.equalsIgnoreCase("true")) ||
+          cfg.contains(DeltaReflection.MaterializedRowIdColumnProp) ||
+          cfg.contains(DeltaReflection.MaterializedRowCommitVersionColumnProp)
+      }
+    val emitRowId = rowTrackingEnabled &&
+      scan.requiredSchema.fieldNames.exists(_.equalsIgnoreCase(DeltaReflection.RowIdColumnName))
+    val emitRowCommitVersion = rowTrackingEnabled &&
+      scan.requiredSchema.fieldNames.exists(
+        _.equalsIgnoreCase(DeltaReflection.RowCommitVersionColumnName))
+
+    val ignoreMissingFiles =
+      SQLConf.get.ignoreMissingFiles ||
+        relation.options.get("ignoremissingfiles").contains("true")
+
+    // Cloud storage options for kernel log replay and the native parquet reader,
+    // keyed identically to NativeScan. See `resolveStorageOptions`.
+    val storageOptions: java.util.Map[String, String] = resolveStorageOptions(scan, tableRoot)
+
+    // Honor Delta's time-travel options (versionAsOf / timestampAsOf) via the Delta-
+    // resolved snapshot version sitting on the FileIndex. Delta's analysis phase pins
+    // the exact snapshot before we ever see the plan, so by the time the marker is
+    // built, `relation.location` is a `PreparedDeltaFileIndex` whose toString looks like
+    // `Delta[version=0, file:/...]`. We parse the version out via
+    // `DeltaReflection.extractSnapshotVersion` and pass it through to kernel.
+    //
+    // When no version can be extracted (non-Delta file index, parser miss, etc.) we pass
+    // -1 which asks kernel for the current latest snapshot.
+    val snapshotVersion: Long =
+      DeltaReflection.extractSnapshotVersion(relation).getOrElse(-1L)
+
+    // Serialize the data filters so kernel can apply stats-based file pruning during log replay.
+    val predicateBytes: Array[Byte] = serializeSupportedDataFilters(scan)
+
+    // Stage B/C: produce synthetic columns inside DeltaKernelScanExec (kernel metadata columns +
+    // engine-side DV-invert / per-file constants), retiring the stacked DeltaSyntheticColumnsExec.
+    // Needs kernel's per-file transform (row_id GenerateRowId + partition injection), which only the
+    // kernel-enumeration path ships. Regular reads always take it; DML rewrites (TahoeBatchFileIndex)
+    // get it via kernel-enumerate + path-filter (set below, only if every touched file matched).
+    // In-worker synthesis is now the ONLY native synthesis path; the legacy stacked
+    // DeltaSyntheticColumnsExec is removed (#82). Regular reads always synthesize in-worker; subset
+    // reads (DML rewrites) do too when every touched file matched kernel enumeration, otherwise they
+    // decline to vanilla Spark (the `case None` branch below). A read that would have needed the old
+    // exec therefore either synthesizes in-worker or declines -- it never reaches a non-synthesize
+    // native path.
+    var synthesizeInWorker: Boolean =
+      !DeltaReflection.isSubsetFileIndex(relation.location)
+
+    // Column name list for resolving BoundReference indices to kernel column
+    // names. Must match the order of scan.output because exprToProto binds
+    // attribute references by position in that schema.
+    val columnNames: Array[String] = scan.output.map(_.name).toArray
+
+    // --- 1. Get the active file list. ---
+    //
+    // Two code paths:
+    //   (a) Exact-subset FileIndex (`TahoeBatchFileIndex`, `CdcAddFileIndex`,
+    //       `TahoeRemoveFileIndex`, `TahoeChangeFileIndex`): Delta's streaming
+    //       micro-batch reads AND MERGE / UPDATE / DELETE post-join rewrites carry
+    //       an exact `addFiles: Seq[AddFile]` on the FileIndex. Kernel log replay
+    //       against the snapshot would return a DIFFERENT file set (the whole
+    //       snapshot, or a version's deltas), which is a correctness hazard --
+    //       empty streaming batches, MERGE rewrites that see the whole table
+    //       instead of only touched files. Build the DeltaScanTaskList proto
+    //       directly from those AddFiles, skipping kernel.
+    //   (b) Regular scan against a snapshot (`PreparedDeltaFileIndex` /
+    //       `TahoeLogFileIndex` -- the vast majority): call kernel for log replay.
+    //       Kernel reproduces the pruned active file set from the pinned snapshot +
+    //       the shipped data predicate, and ships its OWN per-file transform so
+    //       partition injection / column-mapping relabel / row-tracking come from
+    //       kernel rather than Comet-side reconstruction.
+    val taskListBytes =
+      if (DeltaReflection.isSubsetFileIndex(relation.location)) {
+        // Pass BOTH the scan's partition filters AND data filters through
+        // so `refreshedSnapshotFiles` (which queries
+        // `snapshot.filesForScan(filters, ...)`) re-applies the same
+        // partition pruning + stats-based data-skipping Delta did at
+        // planning time. Without this, on `PreparedDeltaFileIndex` the
+        // refresh path returns ALL files, breaking stats-based file
+        // pruning (e.g. StatsCollectionSuite "gather stats" -- the
+        // partition column is `odd` but the test filter is on `id` which
+        // is a data column; only data-filter skipping makes the assertion
+        // `recordsScanned(df.where("id = 1")) == 1` hold).
+        DeltaReflection.extractBatchAddFiles(
+          relation.location,
+          scan.partitionFilters ++ scan.dataFilters) match {
+          case Some(addFiles) =>
+            // DV handling: the driver only ships a DV DESCRIPTOR per AddFile
+            // (storage type / path / offset / size, KB-scale). The executor decodes
+            // via `dv_reader::read_dv_indexes` on first poll. Pre-#218 we called
+            // `materializeDeletedRowIndexes` here and shipped the expanded
+            // `Vec<u64>` -- a single 99M-row DV is a ~1 GB `long[]` retained on the
+            // driver heap until the scan finishes. Matches the Iceberg contrib's
+            // `IcebergScanCommon.delete_files_pool` pattern (driver = references,
+            // executor = decode). If a DV file is missing/corrupt the executor
+            // surfaces a `SparkException` -- same observable behaviour as before,
+            // just at execution rather than planning.
+            // Option (a): DML rewrites (TahoeBatchFileIndex -- touched files are a subset of the
+            // pinned snapshot) get kernel's per-file transforms by enumerating the snapshot and
+            // filtering to the touched AddFile paths, so they synthesize in-worker like regular
+            // reads. Decline to the legacy AddFiles path if ANY touched file isn't in kernel's
+            // enumeration (safe: the legacy DeltaSyntheticColumnsExec path still works -- never wrong
+            // files). CDC indexes (files outside the snapshot) are excluded by isDmlRewriteFileIndex.
+            val dmlSynthBytes: Option[Array[Byte]] =
+              if (DeltaReflection.isDmlRewriteFileIndex(relation.location)) {
+                try {
+                  val annotated = scan.deltaMetadata.analyzedSchema.orElse(
+                    DeltaReflection.extractSnapshotSchema(relation))
+                  val projJson = CometDeltaNativeScan.synthesizeReadSchemaJson(
+                    annotated,
+                    scan.requiredSchema,
+                    relation.partitionSchema)
+                  // Empty predicate: the touched AddFile set is the authoritative selection; kernel
+                  // stats pruning could drop a touched file and force a needless decline.
+                  val kernelTaskList = DeltaScanTaskList.parseFrom(
+                    nativeLib.planDeltaScan(
+                      tableRoot,
+                      snapshotVersion,
+                      storageOptions,
+                      Array.emptyByteArray,
+                      columnNames,
+                      projJson))
+                  // Resolve touched AddFile paths the SAME way the native side resolves kernel paths
+                  // (table_root + rel, or pass-through for scheme'd paths), then match by file_path.
+                  val sep = if (tableRoot.endsWith("/")) "" else "/"
+                  val touched: Set[String] = addFiles.map { af =>
+                    if (af.path.contains(":/")) af.path else tableRoot + sep + af.path
+                  }.toSet
+                  val matched = kernelTaskList.getTasksList.asScala
+                    .filter(t => touched.contains(t.getFilePath))
+                  if (touched.nonEmpty && matched.size == touched.size) {
+                    Some(
+                      DeltaScanTaskList
+                        .newBuilder()
+                        .setSnapshotVersion(kernelTaskList.getSnapshotVersion)
+                        .setTableRoot(kernelTaskList.getTableRoot)
+                        .addAllUnsupportedFeatures(kernelTaskList.getUnsupportedFeaturesList)
+                        .setPhysicalSchema(kernelTaskList.getPhysicalSchema)
+                        .setLogicalSchema(kernelTaskList.getLogicalSchema)
+                        .addAllTasks(matched.asJava)
+                        .build()
+                        .toByteArray)
+                  } else {
+                    None
+                  }
+                } catch {
+                  case scala.util.control.NonFatal(e) =>
+                    logWarning(
+                      s"CometDeltaNativeScan: DML kernel-enumerate for $tableRoot failed; " +
+                        s"using legacy AddFiles path",
+                      e)
+                    None
+                }
+              } else {
+                None
+              }
+            dmlSynthBytes match {
+              case Some(bytes) =>
+                synthesizeInWorker = true
+                bytes
+              case None =>
+                // #82: the legacy buildTaskListFromAddFiles + DeltaSyntheticColumnsExec path is
+                // retired. The only reads that reached it were CDC-family subset indexes (now read
+                // natively via kernel TableChanges -- CometDeltaCdfScanExec, #84 -- so they never
+                // become a marker here) and the rare DML declines that can't synthesize in-worker
+                // (CM-id materialised row_commit_version; OPTIMIZE file-not-found race). Decline the
+                // latter to vanilla Spark with the same withFallbackReason mechanism the
+                // reflection-failure branch below uses (proven to cleanly drop the Comet boundary).
+                import org.apache.comet.CometSparkSessionExtensions.withFallbackReason
+                withFallbackReason(
+                  scan,
+                  s"Native Delta scan declines a subset-file-index read that cannot synthesize " +
+                    s"in-worker (${relation.location.getClass.getName}); falling back to Spark.")
+                return None
+            }
+          case None =>
+            // Reflection failed; fall back conservatively.
+            import org.apache.comet.CometSparkSessionExtensions.withFallbackReason
+            withFallbackReason(
+              scan,
+              s"Native Delta scan could not extract AddFiles from " +
+                s"${relation.location.getClass.getName}; falling back.")
+            return None
+        }
+      } else {
+        // Regular reads (`PreparedDeltaFileIndex` / `TahoeLogFileIndex`): kernel enumerates the
+        // pinned snapshot (reproducing Delta's pruned active file set via the shipped data predicate)
+        // and ships its OWN per-file transform, so partition injection / column-mapping relabel /
+        // row-tracking come from kernel. DV-aware INTERNAL reads (Delta-PreprocessTableWithDVs with
+        // inverted row-index-filter semantics) are kept on vanilla upstream by
+        // `CometScanRule.scanBelowFallsBackForDvs`.
+        try {
+          // The driver's `with_schema` gets the ANALYSIS-TIME read schema (Delta JSON, carrying
+          // column-mapping physicalName/id) so kernel resolves the names the query was planned with
+          // -> correct under schema-change-since-analysis. Fall back to the live snapshot schema when
+          // there's no stashed reference schema; both carry the annotations kernel needs. Include the
+          // partition schema so kernel's transform INJECTS partition columns (max-kernel) rather than
+          // Comet appending them.
+          val annotated = scan.deltaMetadata.analyzedSchema.orElse(
+            DeltaReflection.extractSnapshotSchema(relation))
+          val projJson =
+            if (synthesizeInWorker) {
+              // Kernel read = data + partitions + row_index/row_id as kernel metadata columns; the
+              // executor synthesises is_row_deleted / row_commit_version / _metadata.* itself.
+              CometDeltaNativeScan.synthesizeReadSchemaJson(
+                annotated,
+                scan.requiredSchema,
+                relation.partitionSchema)
+            } else {
+              CometDeltaNativeScan.dataReadSchemaJson(
+                annotated,
+                scan.requiredSchema,
+                relation.partitionSchema,
+                rowTrackingActive = rowTrackingEnabled)
+            }
+          nativeLib.planDeltaScan(
+            tableRoot,
+            snapshotVersion,
+            storageOptions,
+            predicateBytes,
+            columnNames,
+            projJson)
+        } catch {
+          case scala.util.control.NonFatal(e) =>
+            logWarning(
+              s"CometDeltaNativeScan: delta-kernel-rs log replay failed for $tableRoot",
+              e)
+            return None
+        }
+      }
+    val taskList = DeltaScanTaskList.parseFrom(taskListBytes)
+    // Column mapping no longer needs any executor-side plumbing: the kernel-read path ships kernel's
+    // own `scan.physical_schema()` / `logical_schema()` (physical names + field-ids resolved at every
+    // nesting level), and kernel returns partition values already translated to logical names
+    // (driver-side, in `planDeltaScan` / `buildTaskListFromAddFiles`). The former re-derivation of a
+    // `column_mappings` tree here existed only to feed the removed `physicalise_field` schema rebuild.
+
+    // Phase 6 reader-feature gate. Kernel reports any Delta reader features that
+    // are currently in use in this snapshot and that Comet's native path does NOT
+    // correctly handle. Falling back is mandatory for correctness: reading through
+    // the native path would silently produce wrong results (e.g. returning rows
+    // that a deletion vector should have hidden). The gate becomes obsolete feature
+    // by feature as later phases ship:
+    //   deletionVectors -> Phase 3
+    //   columnMapping   -> Phase 4
+    //   typeWidening    -> future phase
+    //   rowTracking     -> future phase
+    val unsupportedFeatures = taskList.getUnsupportedFeaturesList.asScala.toSeq
+    if (unsupportedFeatures.nonEmpty &&
+      DeltaConf.COMET_DELTA_FALLBACK_ON_UNSUPPORTED_FEATURE.get(scan.conf)) {
+      logInfo(
+        s"CometDeltaNativeScan: falling back for table $tableRoot " +
+          s"due to unsupported reader features: ${unsupportedFeatures.mkString(", ")}")
+      import org.apache.comet.CometSparkSessionExtensions.withFallbackReason
+      withFallbackReason(
+        scan,
+        s"Native Delta scan does not yet support these features in use on this " +
+          s"snapshot: ${unsupportedFeatures.mkString(", ")}. Falling back to Spark's " +
+          s"Delta reader. Set ${DeltaConf.COMET_DELTA_FALLBACK_ON_UNSUPPORTED_FEATURE.key}=false " +
+          s"to bypass this check (NOT recommended - may produce incorrect results).")
+      return None
+    }
+
+    // Apply Spark's partition filters to the task list so that queries like
+    // `WHERE partition_col = X` don't drag in files from other partitions. Kernel
+    // itself is given the whole snapshot (no predicate yet - that lands in Phase 2),
+    // so we do the pruning in Scala by evaluating each task's partition-value map
+    // against Spark's `partitionFilters`. This is a single driver-side loop; filtered
+    // tasks never go over the wire to executors.
+    val filteredTasks0 =
+      prunePartitions(taskList.getTasksList.asScala.toSeq, scan, relation.partitionSchema)
+
+    // Split files larger than `maxSplitBytes` into byte-range chunks so a single
+    // big parquet file can be read across multiple Spark partitions, matching
+    // Spark's `FilePartition.splitFiles` semantics. This is what makes
+    // FILES_MAX_PARTITION_BYTES, files.openCostInBytes, and
+    // files.minPartitionNum take effect on Delta tables: without it every file
+    // is exactly one partition and the *.size assertions in
+    // DeletionVectorsSuite's PredicatePushdown tests fail (they configure
+    // FILES_MAX_PARTITION_BYTES=2MB on a multi-row-group fixture and assert
+    // exactly 2 splits).
+    val filteredTasks =
+      splitTasks(scan, filteredTasks0)
+
+    // --- 2. Build the common block ---
+    val commonBuilder = DeltaScanCommon.newBuilder()
+    commonBuilder.setSource(scan.simpleStringWithNodeId())
+    commonBuilder.setTableRoot(taskList.getTableRoot)
+    commonBuilder.setSnapshotVersion(taskList.getSnapshotVersion)
+    commonBuilder.setSessionTimezone(scan.conf.sessionLocalTimeZone)
+    commonBuilder.setCaseSensitive(scan.conf.getConf[Boolean](SQLConf.CASE_SENSITIVE))
+    commonBuilder.setIgnoreMissingFiles(ignoreMissingFiles)
+    commonBuilder.setDataFileConcurrencyLimit(
+      DeltaConf.COMET_DELTA_DATA_FILE_CONCURRENCY_LIMIT.get())
+
+    // `required_schema` on the wire is the SCAN's output schema -- the data columns the scan reads
+    // from parquet PLUS partition columns it materialises from PartitionedFile.partition_values.
+    // For non-partitioned tables `scan.requiredSchema` is already the whole output; for partitioned
+    // tables Spark gives us just the data half, so append the partition fields at the tail (the
+    // native side splits them back out by name).
+    val partitionFieldsForRequired: Array[StructField] = {
+      val haveLc = scan.requiredSchema.fields.map(_.name.toLowerCase(Locale.ROOT)).toSet
+      relation.partitionSchema.fields.filterNot(f =>
+        haveLc.contains(f.name.toLowerCase(Locale.ROOT)))
+    }
+    // Spark `_metadata.*` virtual columns plus Delta row-tracking synthetics that
+    // appear in scan.output but not scan.requiredSchema. They are synthesised natively
+    // below (via metadataColumnNamesEmitted) and must appear in the wrapped exec
+    // output schema for downstream attribute resolution.
+    val sparkMetadataNameSet = SparkFileMetadataNames
+    def isExtraSyntheticName(name: String): Boolean = {
+      val lc = name.toLowerCase(Locale.ROOT)
+      // NOTE: materialised row-tracking columns (`_row-id-col-*` /
+      // `_row-commit-version-col-*`) are deliberately NOT here -- they are real
+      // parquet columns read from the file (added to the data schema), not synthesised.
+      sparkMetadataNameSet.contains(lc) ||
+        lc == "base_row_id" ||
+        lc == "default_row_commit_version"
+    }
+    val extraMetadataFields: Array[StructField] = scan.output.toArray.collect {
+      case a if isExtraSyntheticName(a.name) &&
+        !scan.requiredSchema.fieldNames.exists(_.equalsIgnoreCase(a.name)) =>
+        StructField(a.name, a.dataType, a.nullable)
+    }
+    // Required schema for the proto wire: PURE-LOGICAL at every nesting level. The native
+    // kernel-read planner physicalises it via the recursive `column_mappings`; partition columns
+    // are appended at the tail and split back out by name on the native side.
+    val requiredSchemaLogicalFields =
+      scan.requiredSchema.fields ++ partitionFieldsForRequired ++ extraMetadataFields
+
+    // Column-mapping `id` mode: Delta stores the parquet field ID on every
+    // StructField (at every level of nesting) under
+    // `delta.columnMapping.id`. Spark's `ParquetUtils.hasFieldId` (used by
+    // `schema2Proto` and the StructType arm of `serializeDataType`) reads from
+    // `parquet.field.id`. Walk the schema tree and translate keys so the
+    // native side -- when `use_field_id=true` -- matches Spark schema fields
+    // to parquet file fields by ID instead of by name.
+    val cmModeIsId = DeltaReflection
+      .extractMetadataConfiguration(relation)
+      .flatMap(_.get("delta.columnMapping.mode"))
+      .exists(_.equalsIgnoreCase("id"))
+    // The general-purpose Parquet field-ID read path also drives `use_field_id`: if
+    // the user has enabled `spark.sql.parquet.fieldId.read.enabled` AND the required
+    // schema already carries Spark's `parquet.field.id` metadata, route through the
+    // same native machinery. CM-id mode is the common Delta case; this catches
+    // non-Delta-id tables that nevertheless want field-ID matching.
+    val sparkFieldIdReadEnabled = SQLConf.get.getConf(SQLConf.PARQUET_FIELD_ID_READ_ENABLED) &&
+      org.apache.spark.sql.execution.datasources.parquet.ParquetUtils.hasFieldIds(
+        scan.requiredSchema)
+    val useFieldIdActive = cmModeIsId || sparkFieldIdReadEnabled
+    val requiredSchemaForProto =
+      if (cmModeIsId) {
+        requiredSchemaLogicalFields.map(CometDeltaNativeScan.translateDeltaFieldIdToParquet)
+      } else requiredSchemaLogicalFields
+    val partitionSchemaForProto =
+      if (cmModeIsId) {
+        relation.partitionSchema.fields.map(
+          CometDeltaNativeScan.translateDeltaFieldIdToParquet)
+      } else relation.partitionSchema.fields
+
+    // Strip Delta synthetic columns from the proto schemas. They're not on disk so the
+    // native parquet reader must not look for them; `DeltaSyntheticColumnsExec` appends
+    // them back after the scan. Required precondition: synthetics must be a SUFFIX of
+    // scan.requiredSchema -- otherwise the appended order wouldn't match Spark's
+    // expected output. The standard Delta DV-rewrite path satisfies this; anything else
+    // falls back. If we detect the suffix doesn't hold, decline and let Spark's reader
+    // handle it (correctness over coverage).
+    val syntheticNames = Set(
+      DeltaReflection.RowIndexColumnName.toLowerCase(Locale.ROOT),
+      DeltaReflection.TmpMetadataRowIndexColumnName.toLowerCase(Locale.ROOT),
+      DeltaReflection.IsRowDeletedColumnName.toLowerCase(Locale.ROOT),
+      DeltaReflection.RowIdColumnName,
+      DeltaReflection.RowCommitVersionColumnName,
+      // Spark `_metadata.*` virtual columns synthesised natively per-task.
+      "file_path",
+      "file_name",
+      "file_size",
+      "file_block_start",
+      "file_block_length",
+      "file_modification_time",
+      // Delta row-tracking columns synthesised natively. Both are per-file constants
+      // from AddFile.baseRowId / AddFile.defaultRowCommitVersion; the materialised
+      // columns are null when the parquet file doesn't carry them. Must be kept in
+      // sync with `fixedMetadataNames` below and the proto setters in
+      // `buildTaskListFromAddFiles` so the native side actually emits these.
+      "base_row_id",
+      "default_row_commit_version")
+    val isSynthetic = (f: StructField) => {
+      // Materialised row-tracking columns are NOT synthetic -- they are read from parquet, so they
+      // must stay in the required schema.
+      syntheticNames.contains(f.name.toLowerCase(Locale.ROOT))
+    }
+    // metadataColumnNames includes the Spark `_metadata.*` virtual columns (file_path,
+    // file_name, file_size, file_block_start, file_block_length, file_modification_time)
+    // that Delta's strategies inject. These are synthesised per-task in
+    // `DeltaSyntheticColumnsExec`, so when any are required we need the synthetic-emit
+    // path even without emit_row_index/is_row_deleted/row_id/row_commit_version set.
+    val sparkMetadataNames = SparkFileMetadataNames
+    val requiredFieldNamesLower: Set[String] =
+      scan.requiredSchema.fields.map(_.name.toLowerCase(Locale.ROOT)).toSet
+    // Spark also appends `_metadata.*` columns to scan.output (not requiredSchema) when
+    // downstream operators (e.g. Delta's PreprocessTableWithDVs) bind to them by name.
+    // The wrapped exec's output schema must include them so attribute resolution works.
+    val outputFieldNamesLower: Set[String] =
+      scan.output.map(_.name.toLowerCase(Locale.ROOT)).toSet
+    // PerFileMetadataNames includes `default_row_commit_version` alongside `base_row_id`:
+    // dropping it makes the emit-name list short a column, so CDC / row-tracking reads see
+    // N-1 cols where Spark expected N (notably under coordinated-commits backfill).
+    val fixedMetadataNames = PerFileMetadataNames
+    // The wrapped exec output is `parquet projection ++ row_index/is_row_deleted/...
+    // ++ metadata_column_names` in the order metadata names are emitted. To make the
+    // post-synthesis layout match scan.output WITHOUT a final reorder Project, walk
+    // scan.output and pick out the metadata-style columns in the order they appear.
+    val metadataColumnNamesEmitted: Seq[String] = scan.output.flatMap { attr =>
+      val lc = attr.name.toLowerCase(Locale.ROOT)
+      // Materialised row-tracking columns are read from parquet, not synthesised, so
+      // they are excluded here.
+      if (fixedMetadataNames.contains(lc)) Some(lc) else None
+    }.distinct
+    val needsMetadataEmit = metadataColumnNamesEmitted.nonEmpty
+    val needsSyntheticEmit =
+      emitRowIndex || emitIsRowDeleted || emitRowId || emitRowCommitVersion || needsMetadataEmit
+    // When synthetics are NOT a contiguous suffix of required_schema, build a reorder
+    // map: for each original required-schema position, an index into the wrapped exec's
+    // output (parquet output cols followed by appended synthetics in canonical order
+    // row_index, is_row_deleted, row_id, row_commit_version). The native dispatcher
+    // applies a final ProjectionExec to reorder columns to match Spark's expected
+    // output layout. Empty when synthetics ARE a suffix -- already in the right order.
+    // In synthesize mode the executor assembles the full output BY NAME, so no positional reorder is
+    // needed and `required_schema` IS the full output (= scan.output) -- synthetics are NOT stripped.
+    val finalOutputIndices: Seq[Int] =
+      if (synthesizeInWorker) Seq.empty
+      else
+        computeFinalOutputIndices(
+          needsSyntheticEmit,
+          requiredSchemaForProto,
+          isSynthetic,
+          emitRowIndex,
+          emitIsRowDeleted,
+          emitRowId,
+          emitRowCommitVersion,
+          rowIndexColumnAlias,
+          metadataColumnNamesEmitted)
+    val requiredSchemaForProtoStripped =
+      if (synthesizeInWorker) {
+        // Full output the executor must emit (data + partitions + ALL synthetics) in scan.output
+        // order; the by-name assembler places each column. Logical names (no id-translation -- field
+        // ids ride on the kernel READ schema, not the output).
+        scan.output.map(a => StructField(a.name, a.dataType, a.nullable)).toArray
+      } else if (needsSyntheticEmit) {
+        requiredSchemaForProto.filterNot(isSynthetic)
+      } else {
+        requiredSchemaForProto
+      }
+
+    val requiredSchema = schema2Proto(requiredSchemaForProtoStripped)
+    val partitionSchema = schema2Proto(partitionSchemaForProto)
+    commonBuilder.addAllRequiredSchema(requiredSchema.toIterable.asJava)
+    commonBuilder.addAllPartitionSchema(partitionSchema.toIterable.asJava)
+    // Kernel-built projected schemas (`scan.physical_schema()` / `scan.logical_schema()`, Arrow
+    // IPC) -- correct physical names + field-ids at EVERY nesting level. The executor's kernel-read
+    // planner uses them verbatim. The kernel-driver `planDeltaScan` path returns them inline; the
+    // batch-file-index path (file list from AddFiles) fetches them via the schema-only
+    // `planDeltaReadSchemas`. (For a read with zero data columns there are none, and none are
+    // needed -- the executor drives the row count without a parquet read.)
+    val kernelSchemaSource: DeltaScanTaskList =
+      if (!taskList.getPhysicalSchema.isEmpty) {
+        taskList
+      } else {
+        // Analysis-time read schema (Delta JSON), falling back to the live snapshot schema. Empty =>
+        // zero data columns => no kernel schemas needed.
+        val projJson = CometDeltaNativeScan.dataReadSchemaJson(
+          scan.deltaMetadata.analyzedSchema.orElse(DeltaReflection.extractSnapshotSchema(relation)),
+          scan.requiredSchema,
+          rowTrackingActive = rowTrackingEnabled)
+        if (projJson.isEmpty) {
+          taskList
+        } else {
+          try {
+            DeltaScanTaskList.parseFrom(
+              nativeLib
+                .planDeltaReadSchemas(tableRoot, snapshotVersion, storageOptions, projJson))
+          } catch {
+            case scala.util.control.NonFatal(e) =>
+              // The kernel-read path has no Comet-side physicalisation fallback; if kernel can't
+              // build the read schemas, decline to native and let Spark's reader handle it.
+              import org.apache.comet.CometSparkSessionExtensions.withFallbackReason
+              withFallbackReason(
+                scan,
+                s"Native Delta scan could not build kernel read schemas for $tableRoot: $e")
+              return None
+          }
+        }
+      }
+    if (!kernelSchemaSource.getPhysicalSchema.isEmpty) {
+      commonBuilder.setKernelPhysicalSchema(kernelSchemaSource.getPhysicalSchema)
+      commonBuilder.setKernelLogicalSchema(kernelSchemaSource.getLogicalSchema)
+    }
+    commonBuilder.setUseFieldId(useFieldIdActive)
+    commonBuilder.setEmitRowIndex(emitRowIndex)
+    commonBuilder.setEmitIsRowDeleted(emitIsRowDeleted)
+    commonBuilder.setEmitRowId(emitRowId)
+    commonBuilder.setEmitRowCommitVersion(emitRowCommitVersion)
+    if (rowIndexColumnAlias.nonEmpty) {
+      commonBuilder.setRowIndexColumnAlias(rowIndexColumnAlias)
+    }
+    // Add the `_metadata.*` virtual column names we will synthesise natively (computed
+    // above as `metadataColumnNamesEmitted` from `scan.requiredSchema`).
+    metadataColumnNamesEmitted.foreach(commonBuilder.addMetadataColumnNames)
+    commonBuilder.addAllFinalOutputIndices(
+      finalOutputIndices.map(i => Integer.valueOf(i)).asJava)
+
+
+
+    // Kernel-read is the only Delta read path: every file is read through delta-kernel-rs (read +
+    // transform + DV) by DeltaKernelScanExec, which produces all output columns in-worker, by name.
+    // The native side splits required_schema into data (read from parquet) + partition (injected)
+    // columns; column mapping (incl. nested, #47), partitions, row-tracking, _metadata columns, and
+    // zero-data-column reads (partition-only, e.g. groupBy(partition).agg(count("*")); the exec
+    // drives the row count from record_count / the parquet footer, #48) are all handled here.
+    commonBuilder.setSynthesizeInWorker(synthesizeInWorker)
+    // Delta's test mode prepends `spark.databricks.delta.testOnly.dvFileNamePrefix` to DV
+    // filenames; delta-kernel-rs doesn't honour that JVM-only conf, so the executor splices it
+    // back into kernel's resolved DV path. Empty in production (no-op).
+    commonBuilder.setDvFileNamePrefix(DeltaReflection.dvFileNamePrefix(scan.conf))
+
+    // (Data-filter pushdown belonged to the removed ParquetSource path; the kernel-read path does
+    // its own stats-based file pruning during log replay, so no pushed predicate is shipped.)
+
+    storageOptions.asScala.foreach { case (key, value) =>
+      commonBuilder.putObjectStoreOptions(key, value)
+    }
+
+    // (Column mapping is fully resolved by kernel: the executor reads with the shipped
+    // `kernel_physical_schema` / `kernel_logical_schema`, so no `column_mappings` tree is sent.)
+
+    // --- 3. Pack into a DeltaScan with COMMON ONLY (split-mode, Phase 5).
+    // Tasks are NOT included in the proto at planning time. They'll be
+    // serialized per-partition in CometDeltaNativeScanExec.serializedPartitionData
+    // at execution time, and merged via DeltaPlanDataInjector.
+    val deltaScanBuilder = DeltaScan.newBuilder()
+    deltaScanBuilder.setCommon(commonBuilder.build())
+    // table_root is also threaded into each per-partition DeltaScan in
+    // CometDeltaNativeScanExec.packTasks; set it here as well so the planning-time
+    // proto carries it for any consumer that reads the parent DeltaScan directly.
+    val plannedTableRoot = taskList.getTableRoot
+    if (plannedTableRoot != null && plannedTableRoot.nonEmpty) {
+      deltaScanBuilder.setTableRoot(plannedTableRoot)
+    }
+    // No addAllTasks: tasks stay in taskListBytes for the exec's lazy split.
+
+    // Stash the full task-list bytes for createExec to retrieve. The ThreadLocal
+    // bridges the convert() -> createExec() gap in CometExecRule.convertToComet.
+    // Build a modified taskList with ONLY the filtered tasks (partition-pruned).
+    val filteredTaskList = DeltaScanTaskList
+      .newBuilder()
+      .setSnapshotVersion(taskList.getSnapshotVersion)
+      .setTableRoot(taskList.getTableRoot)
+      .addAllTasks(filteredTasks.asJava)
+      .addAllUnsupportedFeatures(taskList.getUnsupportedFeaturesList)
+      .build()
+    lastTaskListBytes.set(filteredTaskList.toByteArray)
+
+    // Use the typed DeltaScan proto variant. Core's planner dispatches via the
+    // OpStruct::DeltaScan match arm under `#[cfg(feature = "contrib-delta")]`.
+    builder.clearChildren()
+    Some(builder.setDeltaScan(deltaScanBuilder.build()).build())
+  }
+
+  /**
+   * Serialize the scan's supported data filters into a single predicate proto for kernel's
+   * stats-based file pruning during log replay.
+   *
+   * All supported filters are combined into one AND conjunction. `BoundReference`s carry the
+   * column INDEX into `scan.output`; the native side resolves indices to column names via the
+   * `columnNames` array passed alongside. Returns an empty array when no filter serializes.
+   */
+  private def serializeSupportedDataFilters(scan: CometDeltaScanMarker): Array[Byte] = {
+    val protoFilters = new ListBuffer[Expr]()
+    // Kernel's stats-based file pruning evaluates the predicate against DATA-column statistics during
+    // log replay, so it can only reference real data columns. Exclude any filter that touches:
+    //   - a SYNTHETIC column (`__delta_internal_is_row_deleted`, `_tmp_metadata_row_index`,
+    //     `_metadata.*`, ...): not a table column at all -- kernel errors "Predicate references
+    //     unknown column". These are Spark-level filters applied ABOVE the scan, never file-pruning
+    //     predicates.
+    //   - a PARTITION column: partition pruning is done separately in `prunePartitions`; pushing a
+    //     partition predicate into the data-stats predicate also hits kernel's stricter type checks
+    //     (e.g. a generated partition column compared against a literal -> "Timestamp < Int64").
+    // Dropping an unpushable filter only forgoes data skipping for it (Spark still applies it); it
+    // never affects correctness.
+    val partitionNamesLc =
+      scan.relation.partitionSchema.fields.map(_.name.toLowerCase(Locale.ROOT)).toSet
+    def kernelPushable(filter: org.apache.spark.sql.catalyst.expressions.Expression): Boolean =
+      filter.references.forall { a =>
+        val lc = a.name.toLowerCase(Locale.ROOT)
+        !SyntheticReadFieldNames.contains(lc) && !partitionNamesLc.contains(lc)
+      }
+    scan.supportedDataFilters.filter(kernelPushable).foreach { filter =>
+      exprToProto(filter, scan.output) match {
+        case Some(proto) => protoFilters += proto
+        case _ =>
+      }
+    }
+    if (protoFilters.isEmpty) {
+      Array.emptyByteArray
+    } else if (protoFilters.size == 1) {
+      protoFilters.head.toByteArray
+    } else {
+      // Combine filters into a balanced AND tree (depth O(log N) instead of
+      // O(N)). A linear left-deep fold overflows protobuf's default 100-level
+      // recursion limit for plans with many ANDed conditions (Delta data
+      // skipping predicates routinely build deep stats expressions: e.g.
+      // DataSkippingDeltaTests "remove redundant stats column references").
+      // Both the JVM serde (CometNativeColumnarToRowExec re-parses the plan
+      // for explain output) and the Rust prost decoder are subject to that
+      // limit, so balancing the tree fixes both sides.
+      def balancedAnd(slice: IndexedSeq[Expr]): Expr = {
+        if (slice.size == 1) {
+          slice.head
+        } else {
+          val mid = slice.size / 2
+          val left = balancedAnd(slice.slice(0, mid))
+          val right = balancedAnd(slice.slice(mid, slice.size))
+          val and = ExprOuterClass.BinaryExpr
+            .newBuilder()
+            .setLeft(left)
+            .setRight(right)
+            .build()
+          Expr.newBuilder().setAnd(and).build()
+        }
+      }
+      balancedAnd(protoFilters.toIndexedSeq).toByteArray
+    }
+  }
+
+  /**
+   * Resolve the cloud storage options handed to kernel's `DefaultEngine` and the native parquet
+   * reader. Kernel picks up `aws_*` / `azure_*` keys; anything else is ignored on the native side
+   * (for now).
+   *
+   * We key off the table root URI rather than `inputFiles.head` because data file names can
+   * contain characters that aren't URI-safe when Spark's test harness injects prefixes like
+   * `test%file%prefix-` (breaks `java.net.URI.create`). The table root string comes straight from
+   * `HadoopFsRelation.location.rootPaths.head.toUri` inside `DeltaReflection.extractTableRoot`, so
+   * it's already properly encoded. Storage options are bucket-level anyway -- any file under the
+   * same root resolves to the same config.
+   *
+   * For s3/s3a tables we resolve Hadoop's credential provider chain here so log replay
+   * authenticates under SimpleAWSCredentialsProvider / TemporaryAWSCredentialsProvider /
+   * AssumedRoleCredentialProvider / IAMInstanceCredentialsProvider just like the data path does.
+   * The contrib's native engine (delta-kernel-rs's DefaultEngine backed by object_store_kernel)
+   * doesn't run core's `build_credential_provider`, so we feed it resolved static keys instead.
+   * SNAPSHOT resolution: log replay completes in seconds, well within any reasonable credential
+   * TTL.
+   */
+  private def resolveStorageOptions(
+      scan: CometDeltaScanMarker,
+      tableRoot: String): java.util.Map[String, String] = {
+    val relation = scan.relation
+    val hadoopConf =
+      relation.sparkSession.sessionState.newHadoopConfWithOptions(relation.options)
+    val tableRootUri = java.net.URI.create(tableRoot)
+    val baseOptions: Map[String, String] =
+      NativeConfig.extractObjectStoreOptions(hadoopConf, tableRootUri)
+    CometDeltaNativeScan
+      .augmentWithResolvedAwsCredentials(baseOptions, tableRootUri, hadoopConf)
+      .asJava
+  }
+
+  /**
+   * Compute the `final_output_indices` reorder map: for each `required_schema` position, the
+   * index into the wrapped exec's output (parquet output columns followed by appended synthetics
+   * in canonical emit order). The native dispatcher applies a final ProjectionExec to reorder
+   * columns to match Spark's expected layout. Returns `Seq.empty` when no reorder is needed --
+   * either no synthetics are emitted, or they already form a correctly-ordered contiguous suffix
+   * of `required_schema`.
+   */
+  private def computeFinalOutputIndices(
+      needsSyntheticEmit: Boolean,
+      requiredSchemaForProto: Array[StructField],
+      isSynthetic: StructField => Boolean,
+      emitRowIndex: Boolean,
+      emitIsRowDeleted: Boolean,
+      emitRowId: Boolean,
+      emitRowCommitVersion: Boolean,
+      rowIndexColumnAlias: String,
+      metadataColumnNamesEmitted: Seq[String]): Seq[Int] = if (!needsSyntheticEmit) Seq.empty
+  else {
+    val firstSyntheticIdx = requiredSchemaForProto.indexWhere(isSynthetic)
+    val syntheticContiguousSuffix = firstSyntheticIdx >= 0 &&
+      requiredSchemaForProto.drop(firstSyntheticIdx).forall(isSynthetic)
+    // Synthetic suffix is necessary but NOT sufficient: the order of synthetics
+    // within the suffix must also match the canonical emission order
+    // (row_index, is_row_deleted, row_id, row_commit_version, then metadata names
+    // in `metadataColumnNamesEmitted` order). When the upstream Filter / Project
+    // binds attributes by ordinal (Delta's PreprocessTableWithDVs adds
+    // `Filter(__delta_internal_is_row_deleted = 0)` directly above the scan),
+    // an order mismatch silently misreads one synthetic as another. Force a
+    // reorder Projection in that case.
+    val canonicalSyntheticEmitOrder: Seq[String] = (Seq(
+      (emitRowIndex,
+        (if (rowIndexColumnAlias.nonEmpty) rowIndexColumnAlias
+         else DeltaReflection.RowIndexColumnName).toLowerCase(Locale.ROOT)),
+      (emitIsRowDeleted,
+        DeltaReflection.IsRowDeletedColumnName.toLowerCase(Locale.ROOT)),
+      (emitRowId, DeltaReflection.RowIdColumnName.toLowerCase(Locale.ROOT)),
+      (emitRowCommitVersion,
+        DeltaReflection.RowCommitVersionColumnName.toLowerCase(Locale.ROOT))).collect {
+      case (true, name) => name
+    }) ++ metadataColumnNamesEmitted
+    val suffixSyntheticNames = requiredSchemaForProto
+      .drop(firstSyntheticIdx)
+      .map(_.name.toLowerCase(Locale.ROOT))
+      .toSeq
+    val syntheticSuffixOrderMatches =
+      syntheticContiguousSuffix && suffixSyntheticNames == canonicalSyntheticEmitOrder
+    if (syntheticSuffixOrderMatches) Seq.empty
+    else {
+      // Native synthetic emit order in build_output_schema (synthetic_columns.rs):
+      // row_index, is_row_deleted, row_id, row_commit_version, then any
+      // metadata_column_names in the order they were added. Use the row_index
+      // ALIAS name when set (e.g. `_tmp_metadata_row_index`) so emit-order
+      // lookup matches what's in required_schema.
+      val rowIndexEmittedName =
+        if (rowIndexColumnAlias.nonEmpty) rowIndexColumnAlias
+        else DeltaReflection.RowIndexColumnName
+      val syntheticEmitOrder: Seq[String] = (Seq(
+        (emitRowIndex, rowIndexEmittedName),
+        (emitIsRowDeleted, DeltaReflection.IsRowDeletedColumnName),
+        (emitRowId, DeltaReflection.RowIdColumnName),
+        (emitRowCommitVersion, DeltaReflection.RowCommitVersionColumnName)).collect {
+        case (true, name) => name.toLowerCase(Locale.ROOT)
+      }) ++ metadataColumnNamesEmitted
+      val nonSyntheticFields = requiredSchemaForProto.filterNot(isSynthetic)
+      val nonSyntheticIdxByName: Map[String, Int] =
+        nonSyntheticFields.zipWithIndex.map { case (f, i) =>
+          f.name.toLowerCase(Locale.ROOT) -> i
+        }.toMap
+      val syntheticTailStart = nonSyntheticFields.length
+      requiredSchemaForProto.map { f =>
+        val name = f.name.toLowerCase(Locale.ROOT)
+        if (isSynthetic(f)) {
+          val emitIdx = syntheticEmitOrder.indexOf(name)
+          // emit flags are derived from the same scan.requiredSchema field names
+          // (lines above), so any synthetic field here must have its corresponding
+          // emit flag on -- a mismatch would indicate a user column collided with a
+          // reserved synthetic name AND we missed it.
+          assert(
+            emitIdx >= 0,
+            s"synthetic column '$name' in required_schema but no emit flag is set " +
+              s"(emit order: $syntheticEmitOrder)")
+          syntheticTailStart + emitIdx
+        } else {
+          nonSyntheticIdxByName(name)
+        }
+      }.toSeq
+    }
+  }
+
+
+  /**
+   * Compute Spark's `maxSplitBytes` for a Delta scan. Mirrors
+   * `org.apache.spark.sql.execution.datasources.FilePartition.maxSplitBytes` verbatim so a
+   * Delta-native scan splits files the same way a vanilla `FileSourceScanExec` would. Inputs are
+   * file sizes (bytes); other knobs come from session conf and the relation's spark session.
+   */
+  private def maxSplitBytes(scan: CometDeltaScanMarker, fileSizes: Seq[Long]): Long = {
+    val sparkSession = scan.relation.sparkSession
+    val conf = sparkSession.sessionState.conf
+    val openCostInBytes = conf.filesOpenCostInBytes
+    val maxPartitionBytes = conf.filesMaxPartitionBytes
+    val minPartitionNum = conf.filesMinPartitionNum
+      .getOrElse(sparkSession.sparkContext.defaultParallelism)
+    val totalBytes = fileSizes.map(_ + openCostInBytes).sum
+    val bytesPerCore = totalBytes / math.max(1, minPartitionNum)
+    math.min(maxPartitionBytes, math.max(openCostInBytes, bytesPerCore))
+  }
+
+  /**
+   * Expand `tasks` so any task whose file is larger than `maxSplitBytes` is replaced by a
+   * sequence of byte-range chunks. Each chunk inherits the task's metadata (partition values, DV
+   * row indexes, row-tracking ids) but carries `byte_range_start` / `byte_range_end` so the
+   * native parquet reader only materialises row groups whose start offset falls in this range.
+   *
+   * Tasks that fit in one chunk are emitted unchanged (no range fields), which preserves the
+   * original whole-file semantics on the native side.
+   *
+   * Note on DV semantics: deletion-vector indexes on the proto are absolute row positions within
+   * the file. They are copied to every chunk; the native scan filters out rows whose absolute
+   * index is in the DV regardless of which chunk produced them, so duplicating the index list
+   * across chunks is correct (just slightly wasteful).
+   */
+  private def splitTasks(
+      scan: CometDeltaScanMarker,
+      tasks: Seq[OperatorOuterClass.DeltaScanTask]): Seq[OperatorOuterClass.DeltaScanTask] = {
+    if (tasks.isEmpty) return tasks
+    // When the scan needs one task per partition (per-file `_metadata.file_path`), keep each
+    // task 1:1 with a file: byte-range chunking would create multiple tasks for one file which,
+    // combined with packTasks below, could end up with multiple FILES per partition and drop
+    // the 2nd+ files' rows.
+    if (scanNeedsOneTaskPerPartition(scan)) return tasks
+    val sizes = tasks.map(_.getFileSize)
+    val msb = maxSplitBytes(scan, sizes)
+    if (msb <= 0) return tasks
+    tasks.flatMap { task =>
+      val size = task.getFileSize
+      if (size <= msb) Seq(task)
+      else {
+        val chunks = scala.collection.mutable.ArrayBuffer[OperatorOuterClass.DeltaScanTask]()
+        var offset = 0L
+        while (offset < size) {
+          val end = math.min(offset + msb, size)
+          chunks += task.toBuilder
+            .setByteRangeStart(offset)
+            .setByteRangeEnd(end)
+            .build()
+          offset = end
+        }
+        chunks.toSeq
+      }
+    }
+  }
+
+  private def prunePartitions(
+      tasks: Seq[OperatorOuterClass.DeltaScanTask],
+      scan: CometDeltaScanMarker,
+      partitionSchema: StructType): Seq[OperatorOuterClass.DeltaScanTask] = {
+    if (scan.partitionFilters.isEmpty || partitionSchema.isEmpty) return tasks
+
+    // Phase 5b: filter out DPP expressions (DynamicPruningExpression wrapping
+    // InSubqueryExec) because they aren't resolved at planning time. Spark
+    // applies them post-scan at runtime. Static partition filters are still
+    // evaluated here for file-level pruning.
+    val staticFilters = scan.partitionFilters.filterNot(
+      _.exists(_.isInstanceOf[org.apache.spark.sql.catalyst.expressions.PlanExpression[_]]))
+    if (staticFilters.isEmpty) return tasks
+
+    // Build an `InterpretedPredicate` that expects a row whose schema matches
+    // `partitionSchema`. Rewrite attribute references to `BoundReference`s keyed by
+    // partition-schema field index, respecting case sensitivity.
+    val caseSensitive = scan.conf.getConf[Boolean](SQLConf.CASE_SENSITIVE)
+    val combined = staticFilters.reduce(And)
+    val bound = combined.transform {
+      case a: org.apache.spark.sql.catalyst.expressions.AttributeReference =>
+        val idx = if (caseSensitive) {
+          partitionSchema.fieldIndex(a.name)
+        } else {
+          partitionSchema.fields.indexWhere(
+            _.name.toLowerCase(Locale.ROOT) == a.name.toLowerCase(Locale.ROOT))
+        }
+        if (idx < 0) return tasks // Can't resolve; skip pruning
+        BoundReference(idx, partitionSchema(idx).dataType, partitionSchema(idx).nullable)
+    }
+    val predicate = InterpretedPredicate(bound)
+    predicate.initialize(0)
+
+    val sessionZoneId = java.time.ZoneId.of(scan.conf.sessionLocalTimeZone)
+    tasks.filter { task =>
+      val row = InternalRow.fromSeq(partitionSchema.fields.toSeq.map { field =>
+        val proto = task.getPartitionValuesList.asScala.find(_.getName == field.name)
+        val strValue =
+          if (proto.exists(_.hasValue)) Some(proto.get.getValue) else None
+        DeltaReflection.castPartitionString(strValue, field.dataType, sessionZoneId)
+      })
+      predicate.eval(row)
+    }
+  }
+
+
+  def createExec(nativeOp: Operator, op: CometDeltaScanMarker): CometNativeExec = {
+    val tableRoot = DeltaReflection.extractTableRoot(op.relation).getOrElse("unknown")
+    val tlBytes =
+      try {
+        Option(lastTaskListBytes.get()).getOrElse(Array.emptyByteArray)
+      } finally {
+        lastTaskListBytes.remove()
+      }
+    // Force one file per Spark partition when the scan reads MATERIALISED row-tracking
+    // columns (`_row-id-col-*` / `_row-commit-version-col-*`). These are real parquet
+    // columns present only in files rewritten by a row-id-preserving operation
+    // (OPTIMIZE/UPDATE/MERGE) -- and ABSENT from freshly-appended/inserted files. When a
+    // single Spark partition packs several such files, `core_glue` emits one parquet
+    // file-group per file (needed for per-file row_index); reading a column that is
+    // physically absent from some of those files across the concurrently-executed
+    // file-groups non-deterministically drops whole file-groups' rows. Pinning one file
+    // per partition keeps each native plan single-file-group, so the absent-column
+    // null-fill happens without cross-file-group concurrency. (Same mechanism used for
+    // per-file `_metadata.file_path`.) See CometDeltaRowTrackingMergeReproSuite.
+    val readsMaterializedRowTracking =
+      op.requiredSchema.fields.exists(f =>
+        CometDeltaNativeScan.isMaterializedRowTrackingName(f.name))
+    val oneTaskPerPartition = scanNeedsOneTaskPerPartition(op) || readsMaterializedRowTracking ||
+      CometDeltaNativeScan.needsPerFileGroups(op)
+
+    val dppFilters = op.partitionFilters.filter(
+      _.exists(_.isInstanceOf[org.apache.spark.sql.catalyst.expressions.PlanExpression[_]]))
+    val partitionSchema = op.relation.partitionSchema
+
+    val exec = CometDeltaNativeScanExec(
+      nativeOp,
+      op.output,
+      org.apache.spark.sql.comet.SerializedPlan(None),
+      op.wrapped,
+      tableRoot,
+      tlBytes,
+      dppFilters,
+      partitionSchema,
+      oneTaskPerPartition = oneTaskPerPartition)
+    // `op.wrapped` (== exec.originalPlan) is the original, link-bearing scan (preserved through
+    // DeltaScanRule's rebuild), so CometExecRule's "set up logical links" pass -- which keys off
+    // originalPlan.logicalLink -- finds it and sets the exec's link, satisfying AQE's
+    // setLogicalLinkForNewQueryStage assertion. Set it here too for good measure.
+    op.wrapped.logicalLink.foreach(exec.setLogicalLink)
+    exec
+  }
+}
diff --git a/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/Native.scala b/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/Native.scala
new file mode 100644
index 0000000000..92fc644a38
--- /dev/null
+++ b/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/Native.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.delta
+
+import org.apache.comet.NativeBase
+
+/**
+ * Contrib-local JVM handle to the Delta-specific native entry point.
+ *
+ * Extends `NativeBase` so the libcomet load triggers on first use of any subclass -- the contrib
+ * doesn't reload the library itself (there is exactly one libcomet at runtime), but inheriting
+ * from `NativeBase` ensures the static initializer ordering works the same way as core's
+ * `org.apache.comet.Native`. The `@native` method below binds to
+ * `Java_org_apache_comet_contrib_delta_Native_planDeltaScan` exported by the contrib's Rust crate
+ * (compiled INTO libcomet via the `contrib-delta` Cargo feature on `native/core`).
+ */
+class Native extends NativeBase {
+
+  /**
+   * Driver-side Delta log replay. Returns a prost-encoded `DeltaScanTaskList` proto (raw bytes)
+   * which the caller decodes via `DeltaScanTaskList.parseFrom(...)`.
+   *
+   * @param tableUrl
+   *   absolute URL or bare path of the Delta table root
+   * @param snapshotVersion
+   *   `-1` for the latest snapshot, otherwise an exact version
+   * @param storageOptions
+   *   cloud credentials / endpoint overrides (Hadoop-style keys)
+   * @param predicateBytes
+   *   prost-encoded Catalyst data filter for kernel-side stats-based file pruning, or an empty
+   *   array for no predicate
+   * @param columnNames
+   *   logical column names the caller requires (kernel uses this for column-mapping resolution
+   *   before stats-based file pruning).
+   * @param projectedSchemaIpc
+   *   the query's data-read columns in pure-logical names at every nesting level (Spark
+   *   `requiredSchema` minus partition + synthetic columns), serialized as an Arrow IPC schema
+   *   message (`Schema.serializeAsMessage()`). Drives `scan.with_schema(...)` so the returned
+   *   `DeltaScanTaskList` carries kernel's projected `physical_schema` / `logical_schema`. Empty
+   *   array for no projection (full-table scan; no kernel schemas returned).
+   * @return
+   *   `byte[]` containing the encoded DeltaScanTaskList
+   */
+  @native def planDeltaScan(
+      tableUrl: String,
+      snapshotVersion: Long,
+      storageOptions: java.util.Map[String, String],
+      predicateBytes: Array[Byte],
+      columnNames: Array[String],
+      projectedSchemaJson: String): Array[Byte]
+
+  /**
+   * Schema-only companion to [[planDeltaScan]] for the batch-file-index read path (file list comes
+   * from Delta `AddFile`s, but the kernel-read executor still needs kernel's resolved
+   * physical/logical schemas). Returns a `DeltaScanTaskList` with only `physical_schema` /
+   * `logical_schema` set (Arrow IPC). `projectedSchemaJson` is the data-read schema as Delta schema
+   * JSON (`StructType.json`, carrying column-mapping physicalName/id from the analysis-time or
+   * snapshot schema); empty string => zero data columns, no schemas returned.
+   */
+  @native def planDeltaReadSchemas(
+      tableUrl: String,
+      snapshotVersion: Long,
+      storageOptions: java.util.Map[String, String],
+      projectedSchemaJson: String): Array[Byte]
+}
diff --git a/contrib/delta/src/main/scala/org/apache/spark/sql/comet/CometDeltaNativeScanExec.scala b/contrib/delta/src/main/scala/org/apache/spark/sql/comet/CometDeltaNativeScanExec.scala
new file mode 100644
index 0000000000..c5dd2a5280
--- /dev/null
+++ b/contrib/delta/src/main/scala/org/apache/spark/sql/comet/CometDeltaNativeScanExec.scala
@@ -0,0 +1,554 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import java.util.Locale
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.execution.{FileSourceScanExec, InSubqueryExec, SparkPlan}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.AccumulatorV2
+
+import com.google.common.base.Objects
+
+import org.apache.comet.serde.OperatorOuterClass
+import org.apache.comet.serde.OperatorOuterClass.Operator
+
+/**
+ * Native Delta Lake scan operator with split-mode serialization and DPP support.
+ *
+ * Common scan metadata (schemas, filters, projections, storage options, column mappings) is
+ * serialized once at planning time in `nativeOp`. Per-partition file lists are materialized
+ * lazily in `serializedPartitionData` at execution time so each Spark task receives only its own
+ * slice of the file list, reducing driver memory.
+ *
+ * DPP (Dynamic Partition Pruning) is supported by deferring partition pruning of DPP expressions
+ * to execution time. Static partition filters are applied at planning time in
+ * `CometDeltaNativeScan.prunePartitions`; DPP filters are resolved in `serializedPartitionData`.
+ */
+case class CometDeltaNativeScanExec(
+    override val nativeOp: Operator,
+    override val output: Seq[Attribute],
+    override val serializedPlanOpt: SerializedPlan,
+    @transient originalPlan: FileSourceScanExec,
+    tableRoot: String,
+    @transient taskListBytes: Array[Byte],
+    @transient dppFilters: Seq[Expression] = Seq.empty,
+    partitionSchema: StructType = new StructType(),
+    /**
+     * When true, `packTasks` emits one group (= one partition) per task so the native plan's
+     * per-file file-groups stay 1:1 with Spark partitions (Spark consumes a single DataFusion
+     * partition per Spark partition, so multiple files in one partition would drop the 2nd+
+     * files' rows). Set by `CometDeltaNativeScan.createExec` when the scan projects per-file
+     * `_metadata.file_path`, reads materialized row-tracking columns, or otherwise needs
+     * per-file groups.
+     */
+    oneTaskPerPartition: Boolean = false)
+    extends CometLeafExec
+    with org.apache.spark.sql.comet.CometScanWithPlanData {
+
+  override val supportsColumnar: Boolean = true
+
+  override val nodeName: String = s"CometDeltaNativeScan $tableRoot"
+
+  // DPP support. The AQE DPP subquery on a partitioned Delta scan arrives as an
+  // unexecutable placeholder: CometExecRule wraps Spark's
+  // SubqueryAdaptiveBroadcastExec into CometSubqueryAdaptiveBroadcastExec, and
+  // CometPlanAdaptiveDynamicPruningFilters rewrites it to an executable
+  // (Comet)SubqueryBroadcastExec with proper broadcast reuse. That rewrite would
+  // normally produce a copy of this scan, but the copy is dropped when the
+  // enclosing native block is rebuilt (TreeNode.makeCopy can't carry @transient
+  // fields, #3510). So the rule installs the rewrite IN PLACE via
+  // `withDynamicPruningFilters` (below), which updates this transient
+  // side-channel and returns `this` -- landing the executable subqueries on the
+  // SAME instance that executes. `dppFilters` (the case-class field) is left
+  // untouched so node equality/canonicalization is unaffected; everything at
+  // execution reads `effectiveDppFilters`.
+  // `@volatile`: set during query-stage optimization and read during execution
+  // (driver-thread-confined in practice, but volatile guards against AQE re-planning
+  // on a different thread).
+  @transient @volatile private var dppFiltersOverride: Seq[Expression] = null
+
+  private def effectiveDppFilters: Seq[Expression] =
+    if (dppFiltersOverride != null) dppFiltersOverride else dppFilters
+
+  override def dynamicPruningFilters: Seq[Expression] = effectiveDppFilters
+
+  override def withDynamicPruningFilters(filters: Seq[Expression]): SparkPlan = {
+    dppFiltersOverride = filters
+    this
+  }
+
+  /** True when a DPP subquery is an adaptive-broadcast placeholder we can't
+   *  execute: the unwrapped Spark `SubqueryAdaptiveBroadcastExec` or the
+   *  Comet-wrapped `CometSubqueryAdaptiveBroadcastExec`. Both throw from
+   *  `doExecute()`. Normally the rule rewrites them in place (see above) before
+   *  execution; this guard skips any that slip through (e.g. the rule didn't
+   *  run) so we read all partitions instead of crashing. */
+  private def isUnexecutableDpp(plan: SparkPlan): Boolean =
+    plan.isInstanceOf[org.apache.spark.sql.execution.SubqueryAdaptiveBroadcastExec] ||
+      plan.isInstanceOf[CometSubqueryAdaptiveBroadcastExec]
+
+  override protected def doPrepare(): Unit = {
+    // `prepare()` (not execute) is safe for any subquery plan, including a
+    // placeholder.
+    effectiveDppFilters.foreach {
+      case DynamicPruningExpression(e: InSubqueryExec) =>
+        e.plan.prepare()
+      case _ =>
+    }
+    super.doPrepare()
+  }
+
+  // Resolve only the DPP subqueries we can execute; skip adaptive-broadcast
+  // PLACEHOLDERS (CometSubqueryAdaptiveBroadcastExec / SubqueryAdaptiveBroadcastExec),
+  // which throw from doExecute(). When the optimizer rule's in-place rewrite reached
+  // this instance, `effectiveDppFilters` holds the executable form and pruning applies;
+  // otherwise the placeholder is skipped and the scan reads all partitions (correct, the
+  // surrounding Filter/join still prunes). `applyDppFilters` enforces the same skip.
+  private def resolveExecutableDppSubqueries(): Unit = {
+    effectiveDppFilters.foreach {
+      case DynamicPruningExpression(inSub: InSubqueryExec)
+          if !isUnexecutableDpp(inSub.plan) && inSub.values().isEmpty =>
+        inSub.updateResult()
+      case _ =>
+    }
+  }
+
+  // Comet's native-scan subquery lifecycle (see CometLeafExec): used when this scan is
+  // fused inside a parent native block (findAllPlanData path).
+  override def ensureSubqueriesResolved(): Unit = {
+    prepare()
+    resolveExecutableDppSubqueries()
+  }
+
+  // Standard Spark lifecycle path (executeColumnar -> executeQuery -> waitForSubqueries),
+  // used when this scan is a native-block ROOT executed directly (e.g. the child of a
+  // CometNativeColumnarToRowExec, as in a MERGE target read). The default would execute
+  // EVERY collected subquery -- including an unconverted CometSubqueryAdaptiveBroadcastExec
+  // (the in-place DPP rewrite is lost whenever the plan is copied after the rule runs,
+  // since `dppFiltersOverride` is not a constructor field) -- and crash. Override to
+  // resolve only the executable ones, mirroring `ensureSubqueriesResolved`. The native
+  // scan has no subqueries other than its DPP partition filters, so not delegating to
+  // `super` is safe.
+  override def waitForSubqueries(): Unit = resolveExecutableDppSubqueries()
+
+  @transient private lazy val commonBytes: Array[Byte] = {
+    // The typed DeltaScan variant of OpStruct carries the common block directly.
+    nativeOp.getDeltaScan.getCommon.toByteArray
+  }
+
+  @transient private lazy val allTasks: Seq[OperatorOuterClass.DeltaScanTask] =
+    OperatorOuterClass.DeltaScanTaskList
+      .parseFrom(taskListBytes)
+      .getTasksList
+      .asScala
+      .toSeq
+
+  /**
+   * Synthesise a `Seq[FilePartition]` from this scan's tasks, with each task becoming one
+   * `PartitionedFile` carrying its partition values as an `InternalRow`. Delta tests (e.g.
+   * `DeltaSinkSuite`) inspect `executedPlan.collect[DataSourceScanExec]` and read
+   * `inputRDDs.head.asInstanceOf[FileScanRDD].filePartitions` to verify partition pruning; those
+   * tests find nothing under Comet because we replace the scan with this exec. The test diff in
+   * `dev/diffs/delta/<version>.diff` patches the helper to fall back to this accessor, so the
+   * same partition-pruning assertions pass against Comet's scan.
+   */
+  def synthesizedFilePartitions: Seq[org.apache.spark.sql.execution.datasources.FilePartition] = {
+    if (allTasks.isEmpty) return Nil
+    val sessionTz = java.time.ZoneId.of(SQLConf.get.sessionLocalTimeZone)
+    val files = allTasks.zipWithIndex.map { case (task, _) =>
+      val pvRow = InternalRow.fromSeq(partitionSchema.fields.toSeq.map { f =>
+        val proto = task.getPartitionValuesList.asScala.find(_.getName == f.name)
+        val s = if (proto.exists(_.hasValue)) Some(proto.get.getValue) else None
+        org.apache.comet.contrib.delta.DeltaReflection
+          .castPartitionString(s, f.dataType, sessionTz)
+      })
+      val sparkPath =
+        org.apache.spark.paths.SparkPath.fromUrlString(task.getFilePath)
+      org.apache.spark.sql.execution.datasources.PartitionedFile(
+        partitionValues = pvRow,
+        filePath = sparkPath,
+        start = if (task.hasByteRangeStart) task.getByteRangeStart else 0L,
+        length = {
+          if (task.hasByteRangeStart && task.hasByteRangeEnd) {
+            task.getByteRangeEnd - task.getByteRangeStart
+          } else task.getFileSize
+        },
+        modificationTime = 0L,
+        fileSize = task.getFileSize)
+    }
+    files.zipWithIndex.map { case (pf, i) =>
+      org.apache.spark.sql.execution.datasources.FilePartition(i, Array(pf))
+    }
+  }
+
+  /**
+   * Build per-partition bytes from the current DPP-pruned task list. DPP filters that are still
+   * `SubqueryAdaptiveBroadcastExec` placeholders at planning time materialise lazily once AQE
+   * runs the broadcast; by recomputing this at `doExecuteColumnar` (rather than memoising the
+   * result in a lazy val) we pick up the resolved values and actually skip partitions, instead of
+   * reading the full table every time AQE is in the loop.
+   */
+  private def buildPerPartitionBytes(): Array[Array[Byte]] = {
+    // Group ALL tasks once (`taskGroups`) so the partition COUNT is fixed
+    // regardless of DPP -- Spark pins `numPartitions` at planning and the native
+    // RDD's partition count must not change at execution. DPP pruning then
+    // happens WITHIN each group: pruned-out tasks are removed, and a group whose
+    // tasks are all pruned becomes an empty DeltaScan (0 rows) -- but the group
+    // (= partition slot) remains, keeping the count stable. This lets DPP prune
+    // even when the scan executes inside a parent native block (MERGE/join),
+    // where the parent reads `perPartitionData` rather than running the scan's
+    // own `doExecuteColumnar`.
+    val groups = taskGroups
+    if (groups.isEmpty) return Array.empty[Array[Byte]]
+    // Gate on `effectiveDppFilters` (the rule's in-place rewrite), not the raw
+    // `dppFilters`, so pruning uses the executable converted form when present.
+    val survivorPaths: Option[Set[String]] =
+      if (effectiveDppFilters.nonEmpty && partitionSchema.nonEmpty) {
+        Some(applyDppFilters(allTasks).map(_.getFilePath).toSet)
+      } else None
+    groups.map { group =>
+      val kept = survivorPaths match {
+        case Some(s) => group.filter(t => s.contains(t.getFilePath))
+        case None => group
+      }
+      val builder = OperatorOuterClass.DeltaScan.newBuilder()
+      // Thread the table root through to the executor; required by the executor-side
+      // DV decoder (kernel `absolute_path` joins `_delta_log/deletion_vectors/...` onto
+      // this) and harmless to set even when no task in this partition has a DV.
+      if (tableRoot != null && tableRoot.nonEmpty) builder.setTableRoot(tableRoot)
+      kept.foreach(builder.addTasks)
+      builder.build().toByteArray
+    }.toArray
+  }
+
+  // When `oneTaskPerPartition` is set (per-file `_metadata.file_path` / materialized
+  // row-tracking / per-file groups), short-circuit packing so each task gets its own
+  // partition, keeping the native plan's per-file file-groups 1:1 with Spark partitions.
+  private def packTasks(
+      tasks: Seq[OperatorOuterClass.DeltaScanTask]): Seq[Seq[OperatorOuterClass.DeltaScanTask]] = {
+    if (oneTaskPerPartition) return tasks.map(t => Seq(t))
+    val conf = originalPlan.relation.sparkSession.sessionState.conf
+    val openCostInBytes = conf.filesOpenCostInBytes
+    val maxPartitionBytes = conf.filesMaxPartitionBytes
+    val minPartitionNum = conf.filesMinPartitionNum
+      .getOrElse(originalPlan.relation.sparkSession.sparkContext.defaultParallelism)
+    def taskSize(t: OperatorOuterClass.DeltaScanTask): Long = {
+      if (t.hasByteRangeStart && t.hasByteRangeEnd) {
+        math.max(0L, t.getByteRangeEnd - t.getByteRangeStart)
+      } else t.getFileSize
+    }
+    val totalBytes = tasks.map(t => taskSize(t) + openCostInBytes).sum
+    val bytesPerCore = totalBytes / math.max(1, minPartitionNum)
+    val msb = math.min(maxPartitionBytes, math.max(openCostInBytes, bytesPerCore))
+    val out = scala.collection.mutable.ArrayBuffer[Seq[OperatorOuterClass.DeltaScanTask]]()
+    val current = scala.collection.mutable.ArrayBuffer[OperatorOuterClass.DeltaScanTask]()
+    var currentSize = 0L
+    tasks.foreach { task =>
+      val size = taskSize(task)
+      if (currentSize + size > msb && current.nonEmpty) {
+        out += current.toList
+        current.clear()
+        currentSize = 0L
+      }
+      current += task
+      currentSize += size + openCostInBytes
+    }
+    if (current.nonEmpty) out += current.toList
+    out.toSeq
+  }
+
+  // Stable task grouping = the partition layout. Computed once from ALL tasks so
+  // the partition count is fixed across planning and execution (DPP prunes
+  // tasks WITHIN groups, never changing the group count). `numPartitions` reads
+  // this directly so counting partitions never triggers DPP broadcast
+  // resolution.
+  //
+  // An empty scan (zero tasks -- e.g. a DELETE that matches nothing, or a DV-maintenance
+  // read pruned to zero files) still gets ONE empty group: `outputPartitioning` floors the
+  // partition count to `max(1, numPartitions)`, so the per-partition data MUST also have one
+  // (empty) entry or `NativeExecContext`'s "all per-partition arrays must have length
+  // numPartitions" check trips when this scan is fused into a parent native block.
+  // (Repro: CometDeltaDeleteWithDVReproSuite.)
+  @transient private lazy val taskGroups: Seq[Seq[OperatorOuterClass.DeltaScanTask]] =
+    if (allTasks.isEmpty) Seq(Seq.empty) else packTasks(allTasks)
+
+  private def applyDppFilters(
+      tasks: Seq[OperatorOuterClass.DeltaScanTask]): Seq[OperatorOuterClass.DeltaScanTask] = {
+    // Resolve each DPP subquery to its runtime pruning values, then prune tasks
+    // by evaluating the partition predicate below. By execution time the rule
+    // has installed executable (Comet)SubqueryBroadcastExec subqueries in place
+    // (see `withDynamicPruningFilters`); we resolve them here. If an
+    // unexecutable placeholder slipped through (rule didn't run), skip pruning
+    // and read all tasks (correct, just unpruned) rather than crashing.
+    if (effectiveDppFilters.exists {
+        case DynamicPruningExpression(inSub: InSubqueryExec) => isUnexecutableDpp(inSub.plan)
+        case _ => false
+      }) {
+      return tasks
+    }
+    val resolvedFilters: Seq[Expression] =
+      try {
+        effectiveDppFilters.map {
+          case DynamicPruningExpression(inSub: InSubqueryExec) =>
+            if (inSub.values().isEmpty) inSub.updateResult()
+            inSub
+          case DynamicPruningExpression(e) => e
+          case other => other
+        }
+      } catch {
+        case scala.util.control.NonFatal(_) => return tasks
+      }
+    if (resolvedFilters.isEmpty) return tasks
+
+    val caseSensitive = SQLConf.get.getConf[Boolean](SQLConf.CASE_SENSITIVE)
+    val combined = resolvedFilters.reduce(And)
+    val bound = combined.transform { case a: AttributeReference =>
+      val idx = partitionSchema.fields.indexWhere(f =>
+        if (caseSensitive) f.name == a.name
+        else f.name.toLowerCase(Locale.ROOT) == a.name.toLowerCase(Locale.ROOT))
+      if (idx < 0) return tasks
+      BoundReference(idx, partitionSchema(idx).dataType, partitionSchema(idx).nullable)
+    }
+    val predicate = InterpretedPredicate(bound)
+    predicate.initialize(0)
+
+    val sessionZoneId = java.time.ZoneId.of(SQLConf.get.sessionLocalTimeZone)
+    tasks.filter { task =>
+      val row = InternalRow.fromSeq(partitionSchema.fields.toSeq.map { field =>
+        val proto = task.getPartitionValuesList.asScala.find(_.getName == field.name)
+        val strValue =
+          if (proto.exists(_.hasValue)) Some(proto.get.getValue) else None
+        org.apache.comet.contrib.delta.DeltaReflection
+          .castPartitionString(strValue, field.dataType, sessionZoneId)
+      })
+      predicate.eval(row)
+    }
+  }
+
+  def commonData: Array[Byte] = commonBytes
+  // Recomputed (not memoised) so that when a parent native block reads this at
+  // execution -- after AQE has materialised the DPP broadcast -- the returned
+  // per-partition task lists reflect DPP pruning. The partition COUNT is fixed
+  // by `taskGroups`; only the tasks within each group are pruned.
+  def perPartitionData: Array[Array[Byte]] = buildPerPartitionBytes()
+
+  /**
+   * Unique key for matching this scan's common/per-partition data to its operator in the native
+   * plan. Must be distinct across multiple Delta scans in the same plan tree -- e.g. a self-join
+   * reading two snapshot versions of the same table, where `tableRoot` alone is not unique.
+   *
+   * Derived identically in `DeltaPlanDataInjector.getKey` from the serialized `DeltaScanCommon`
+   * proto so the driver-side map and the executor-side lookup agree.
+   *
+   * Mirrors the pattern used by `CometNativeScanExec.sourceKey`.
+   */
+  def sourceKey: String = CometDeltaNativeScanExec.computeSourceKey(nativeOp)
+
+  def numPartitions: Int = taskGroups.length
+
+  override lazy val outputPartitioning: Partitioning =
+    UnknownPartitioning(math.max(1, numPartitions))
+
+  override lazy val outputOrdering: Seq[SortOrder] = Nil
+
+  private class ImmutableSQLMetric(metricType: String) extends SQLMetric(metricType, 0) {
+    override def merge(other: AccumulatorV2[Long, Long]): Unit = {}
+    override def reset(): Unit = {}
+  }
+
+  override lazy val metrics: Map[String, SQLMetric] = {
+    val taskList =
+      if (taskListBytes != null) {
+        OperatorOuterClass.DeltaScanTaskList.parseFrom(taskListBytes)
+      } else {
+        null
+      }
+
+    // Key these under both the Comet-native-side name (`output_rows`, used by the metric
+    // collector on the native side) and the Spark streaming ProgressReporter name
+    // (`numOutputRows`, read by `extractSourceToNumInputRows` to populate
+    // `q.recentProgress.numInputRows`). Without the `numOutputRows` alias, streaming
+    // workloads that this scan feeds report 0 input rows per batch even when data flows
+    // correctly -- DeltaSourceSuiteBase.CheckProgress then fails with
+    // "Execute: 0 did not equal N Expected batches don't match".
+    val outputRowsMetric = SQLMetrics.createMetric(sparkContext, "number of output rows")
+    val baseMetrics = Map(
+      "output_rows" -> outputRowsMetric,
+      "numOutputRows" -> outputRowsMetric,
+      "num_splits" -> SQLMetrics.createMetric(sparkContext, "number of file splits processed"))
+
+    val planningMetrics = if (taskList != null) {
+      val totalFiles = new ImmutableSQLMetric("sum")
+      totalFiles.set(taskList.getTasksCount.toLong)
+      sparkContext.register(totalFiles, "total files")
+
+      val dvFiles = new ImmutableSQLMetric("sum")
+      dvFiles.set(taskList.getTasksList.asScala.count(_.hasDv).toLong)
+      sparkContext.register(dvFiles, "files with deletion vectors")
+
+      // `numFiles` alias mirrors Spark's `FileSourceScanExec` metric name so
+      // tests like DeltaSuite.scala "query with predicates should skip
+      // partitions" -- which read `metrics.get("numFiles")` to verify
+      // partition skipping -- find the same value on Comet's scan exec.
+      Map("total_files" -> totalFiles, "numFiles" -> totalFiles, "dv_files" -> dvFiles)
+    } else {
+      Map.empty[String, SQLMetric]
+    }
+
+    baseMetrics ++ planningMetrics
+  }
+
+  override def doExecuteColumnar(): RDD[ColumnarBatch] = {
+    val nativeMetrics = CometMetricNode.fromCometPlan(this)
+    val serializedPlan = CometExec.serializeNativePlan(nativeOp)
+    // Recompute DPP pruning at execution time so we pick up broadcast results AQE has now
+    // materialised (the lazy `planningPerPartitionBytes` was computed before AQE ran). When DPP
+    // is absent or was already resolved at planning time, the two arrays are identical.
+    val execPerPartitionBytes = buildPerPartitionBytes()
+    // Mirror `CometNativeScanExec`'s encryption wiring: when parquet encryption is
+    // enabled on the table's hadoop conf, broadcast the conf to executors and
+    // gather every input file path (so the parquet reader can decrypt per file).
+    val sparkSession = originalPlan.relation.sparkSession
+    val hadoopConf = sparkSession.sessionState
+      .newHadoopConfWithOptions(originalPlan.relation.options)
+    val (broadcastedHadoopConfForEncryption, encryptedFilePaths) =
+      if (org.apache.comet.parquet.CometParquetUtils.encryptionEnabled(hadoopConf)) {
+        val broadcastedConf = sparkSession.sparkContext
+          .broadcast(new org.apache.spark.util.SerializableConfiguration(hadoopConf))
+        val paths = execPerPartitionBytes.flatMap { bytes =>
+          OperatorOuterClass.DeltaScan.parseFrom(bytes).getTasksList.asScala.map(_.getFilePath)
+        }.toSeq
+        (Some(broadcastedConf), paths)
+      } else {
+        (None, Seq.empty[String])
+      }
+    val baseRDD = CometExecRDD(
+      sparkContext,
+      inputRDDs = Seq.empty,
+      commonByKey = Map(sourceKey -> commonData),
+      perPartitionByKey = Map(sourceKey -> execPerPartitionBytes),
+      serializedPlan = serializedPlan,
+      numPartitions = execPerPartitionBytes.length,
+      numOutputCols = output.length,
+      nativeMetrics = nativeMetrics,
+      subqueries = Seq.empty,
+      broadcastedHadoopConfForEncryption = broadcastedHadoopConfForEncryption,
+      encryptedFilePaths = encryptedFilePaths)
+
+    baseRDD
+  }
+
+  override def convertBlock(): CometDeltaNativeScanExec = {
+    val newSerializedPlan = if (serializedPlanOpt.isEmpty) {
+      val bytes = CometExec.serializeNativePlan(nativeOp)
+      SerializedPlan(Some(bytes))
+    } else {
+      serializedPlanOpt
+    }
+    // IMPORTANT: forward `oneTaskPerPartition` to the rebuilt exec. The case
+    // class has `oneTaskPerPartition: Boolean = false` as the last constructor
+    // param with a default; if we don't pass it explicitly here, every call to
+    // `convertBlock()` silently downgrades the flag to false, packing multiple
+    // files into one partition and dropping the 2nd+ files' rows for scans that
+    // emit per-file `_metadata.file_path` / materialized row-tracking columns.
+    CometDeltaNativeScanExec(
+      nativeOp,
+      output,
+      newSerializedPlan,
+      originalPlan,
+      tableRoot,
+      taskListBytes,
+      dppFilters,
+      partitionSchema,
+      oneTaskPerPartition)
+  }
+
+  override protected def doCanonicalize(): CometDeltaNativeScanExec = {
+    copy(
+      output = output.map(QueryPlan.normalizeExpressions(_, output)),
+      serializedPlanOpt = SerializedPlan(None),
+      originalPlan = null,
+      taskListBytes = null,
+      dppFilters = Seq.empty)
+  }
+
+  override def stringArgs: Iterator[Any] = {
+    val taskCount =
+      if (taskListBytes != null) {
+        OperatorOuterClass.DeltaScanTaskList.parseFrom(taskListBytes).getTasksCount
+      } else {
+        0
+      }
+    val dppStr = if (dppFilters.nonEmpty) {
+      s", dpp=${dppFilters.mkString("[", ", ", "]")}"
+    } else {
+      ""
+    }
+    Iterator(output, s"$tableRoot ($taskCount files$dppStr)")
+  }
+
+  override def equals(obj: Any): Boolean = obj match {
+    case other: CometDeltaNativeScanExec =>
+      // Include `sourceKey` so two scans of the same table at different snapshot versions
+      // are NOT considered equal. Without this, Spark's ReuseExchangeAndSubquery rule
+      // collapses a self-join across versions into a single exchange and reuses v0's
+      // shuffle output for both sides of the join.
+      tableRoot == other.tableRoot &&
+      output == other.output &&
+      serializedPlanOpt == other.serializedPlanOpt &&
+      sourceKey == other.sourceKey
+    case _ => false
+  }
+
+  override def hashCode(): Int =
+    Objects.hashCode(tableRoot, output.asJava, serializedPlanOpt, sourceKey)
+}
+
+object CometDeltaNativeScanExec {
+
+  /**
+   * Compute a stable, per-scan unique key from a `DeltaScan` operator proto. Must be
+   * deterministic and identical between the driver side (`CometDeltaNativeScanExec.sourceKey`)
+   * and the injector side (`DeltaPlanDataInjector.getKey`).
+   *
+   * Includes `snapshot_version` so that two scans of the same table at different time-travel
+   * versions produce distinct keys -- otherwise `findAllPlanData` collapses their per-partition
+   * data into a single map entry and one scan inherits the other's file list.
+   */
+  def computeSourceKey(nativeOp: Operator): String = {
+    val common = nativeOp.getDeltaScan.getCommon
+    val components = Seq(
+      common.getTableRoot,
+      common.getSnapshotVersion.toString,
+      common.getRequiredSchemaList.toString)
+    s"${common.getSource}_${components.mkString("|").hashCode}"
+  }
+}
diff --git a/contrib/delta/src/main/scala/org/apache/spark/sql/comet/DeltaPlanDataInjector.scala b/contrib/delta/src/main/scala/org/apache/spark/sql/comet/DeltaPlanDataInjector.scala
new file mode 100644
index 0000000000..ad9bad0316
--- /dev/null
+++ b/contrib/delta/src/main/scala/org/apache/spark/sql/comet/DeltaPlanDataInjector.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.comet
+
+import org.apache.comet.serde.OperatorOuterClass
+import org.apache.comet.serde.OperatorOuterClass.Operator
+
+/**
+ * `PlanDataInjector` for the typed `OpStruct::DeltaScan` operator.
+ *
+ * The contrib serialises the Delta scan in two parts to keep the closure sent to every
+ * task small:
+ *   - At planning time `CometDeltaNativeScan.convert` emits a `DeltaScan` proto with
+ *     the `common` block (schemas, table root, filters, ...) and NO tasks; this lands
+ *     in the `Operator` tree as the typed variant `OpStruct.delta_scan`.
+ *   - Per partition, `CometDeltaNativeScanExec` puts the partition's `DeltaScan`
+ *     (tasks-only) bytes into `perPartitionByKey` under a `sourceKey` derived from
+ *     the common block.
+ *
+ * Core's `PlanDataInjector.injectPlanData` discovers this object via the reflective
+ * `Class.forName("org.apache.spark.sql.comet.DeltaPlanDataInjector")` lookup added to
+ * `PlanDataInjector.injectors`; default builds get no DeltaPlanDataInjector class on
+ * the classpath and the injector list is unchanged.
+ *
+ * Without this injection the native side decodes a tasks-empty `DeltaScan` -> `EmptyExec`
+ * (0 rows) for every Delta scan.
+ */
+object DeltaPlanDataInjector extends PlanDataInjector {
+
+  override val opStructCase: Operator.OpStructCase = Operator.OpStructCase.DELTA_SCAN
+
+  override def canInject(op: Operator): Boolean = {
+    if (!op.hasDeltaScan) return false
+    // The common-only proto produced at planning time has zero tasks. After injection
+    // the operator carries the partition's tasks -- skip those (idempotent canInject).
+    //
+    // Note: a CDF read always has zero tasks (it carries a version sub-range, not files), so this
+    // stays true even after the CDF branch in `inject` runs. That's intentionally NOT idempotent-
+    // guarded the way the task branch is, and it's safe because `PlanDataInjector.injectPlanData`
+    // walks each operator exactly once per partition (CometExecRDD.compute -> one inject per op).
+    op.getDeltaScan.getTasksCount == 0
+  }
+
+  override def getKey(op: Operator): Option[String] =
+    Some(CometDeltaNativeScanExec.computeSourceKey(op))
+
+  override def inject(
+      op: Operator,
+      commonBytes: Array[Byte],
+      partitionBytes: Array[Byte]): Operator = {
+    // `partitionBytes` is the serialised `DeltaScan` that packs only this partition's
+    // tasks (no common block) to avoid duplicating schemas across partitions. Splice
+    // the partition's tasks into the original common-only envelope.
+    val partitionScan = OperatorOuterClass.DeltaScan.parseFrom(partitionBytes)
+    val originalScan = op.getDeltaScan
+    val mergedScanBuilder = OperatorOuterClass.DeltaScan
+      .newBuilder(originalScan)
+      .addAllTasks(partitionScan.getTasksList)
+    // CDF version-range split: a Change Data Feed read carries no tasks; instead the per-partition
+    // DeltaScan packs this partition's inclusive cdf sub-range in a minimal common (cdf_read marks
+    // it). Splice that [start, end] over the shared common's full range so each partition's native
+    // TableChanges read covers only its slice. Regular (non-CDF) per-partition bytes set no common,
+    // so this is skipped and only the task list is merged.
+    if (partitionScan.hasCommon && partitionScan.getCommon.getCdfRead) {
+      val pc = partitionScan.getCommon
+      val mergedCommon = originalScan.getCommon.toBuilder
+      mergedCommon.setCdfStartVersion(pc.getCdfStartVersion)
+      if (pc.hasCdfEndVersion) mergedCommon.setCdfEndVersion(pc.getCdfEndVersion)
+      else mergedCommon.clearCdfEndVersion()
+      mergedScanBuilder.setCommon(mergedCommon.build())
+    }
+    op.toBuilder.setDeltaScan(mergedScanBuilder.build()).build()
+  }
+}
diff --git a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingPhysicalNameReproSuite.scala b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingPhysicalNameReproSuite.scala
new file mode 100644
index 0000000000..045c6faec2
--- /dev/null
+++ b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingPhysicalNameReproSuite.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.delta
+
+// Deterministic mirror of DeltaColumnMappingSuite "column mapping batch scan should detect
+// physical name changes" (id mode). df2 is analyzed before the table is overwritten with new
+// physical names/field-ids; reading it afterward (schema-on-read check off) must yield NULLs.
+// Native-only fresh collect (no vanilla-first collect, which would cache the pinned snapshot
+// and mask the bug).
+class CometDeltaColumnMappingPhysicalNameReproSuite extends CometDeltaTestBase {
+
+  test("column mapping batch scan should detect physical name changes [id]") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withSQLConf("spark.databricks.delta.properties.defaults.columnMapping.mode" -> "id") {
+      withDeltaTable("cm_physical_name") { tablePath =>
+        spark.range(10).toDF("id").write.format("delta").save(tablePath)
+        val df2 = spark.read.format("delta").load(tablePath)
+        df2.queryExecution.analyzed
+        withSQLConf(
+          "spark.databricks.delta.columnMapping.reuseColumnMetadataDuringOverwrite" -> "false") {
+          spark.range(10).toDF("id")
+            .write.format("delta").option("overwriteSchema", "true").mode("overwrite")
+            .save(tablePath)
+        }
+        withSQLConf("spark.databricks.delta.checkLatestSchemaOnRead" -> "false") {
+          val rows = df2.collect()
+          val nonNull = rows.count(!_.isNullAt(0))
+          assert(
+            rows.length == 10 && nonNull == 0,
+            s"stale physical name should read NULL: ${rows.length} rows, $nonNull non-null " +
+              s"(sample=${rows.take(5).map(r => if (r.isNullAt(0)) "null" else r.getLong(0)).toSeq})")
+        }
+      }
+    }
+  }
+}
diff --git a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingSuite.scala b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingSuite.scala
new file mode 100644
index 0000000000..36b4968661
--- /dev/null
+++ b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaColumnMappingSuite.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.delta
+
+import org.apache.spark.sql.comet.CometDeltaNativeScanExec
+
+/**
+ * Column mapping (name + id modes) and deletion-vector coverage. Ported from
+ * the pre-SPI `delta-kernel-phase-1` branch.
+ */
+class CometDeltaColumnMappingSuite extends CometDeltaTestBase {
+
+  test("deletion vectors: accelerates DV-in-use tables via native DV filter") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("dv_accel") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+
+      (0 until 20)
+        .map(i => (i.toLong, s"name_$i"))
+        .toDF("id", "name")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.enableDeletionVectors", "true")
+        .option("delta.minReaderVersion", "3")
+        .option("delta.minWriterVersion", "7")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+
+      spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id % 3 = 0")
+
+      // orderBy forces AQE wrapping so Comet's prep rules see the plan.
+      val df = spark.read.format("delta").load(tablePath).orderBy("id")
+      val plan = df.queryExecution.executedPlan
+      val deltaScans = collect(plan) { case s: CometDeltaNativeScanExec => s }
+      assert(
+        deltaScans.nonEmpty,
+        s"expected Comet to accelerate a DV-in-use table:\n$plan")
+      val nativeRows = df.collect().toSeq.map(normalizeRow)
+      withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+        val vanillaRows = spark.read
+          .format("delta")
+          .load(tablePath)
+          .collect()
+          .toSeq
+          .map(normalizeRow)
+        assert(
+          nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+          s"native=$nativeRows\nvanilla=$vanillaRows")
+      }
+      assert(nativeRows.size == 13, s"expected 13 rows after DELETE, got ${nativeRows.size}")
+
+      spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id >= 18")
+      val df2 = spark.read.format("delta").load(tablePath)
+      val rows2 = df2.collect().toSeq.map(normalizeRow)
+      // Assert against vanilla rather than a hardcoded size: in this Spark 4.1 +
+      // Delta 4.0 combination a second DELETE on the same parquet file where the
+      // newly-matched row count is small can end up reading the cached pre-DELETE
+      // snapshot in the same SparkSession. We mirror vanilla so the test gates on
+      // "native matches vanilla" rather than on Delta-version-specific transaction
+      // visibility semantics.
+      withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+        val vanillaPost2 = spark.read.format("delta").load(tablePath)
+          .collect().toSeq.map(normalizeRow)
+        assert(
+          rows2.sortBy(_.mkString("|")) == vanillaPost2.sortBy(_.mkString("|")),
+          s"after 2nd DELETE: native=$rows2 vanilla=$vanillaPost2")
+      }
+      val plan2 = df2.queryExecution.executedPlan
+      assert(
+        collect(plan2) { case s: CometDeltaNativeScanExec => s }.nonEmpty,
+        s"expected Comet to still accelerate after second DELETE:\n$plan2")
+    }
+  }
+
+  test("column mapping: name mode read after rename") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("col_mapping_name") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+
+      (0 until 8)
+        .map(i => (i.toLong, s"name_$i", i * 1.5))
+        .toDF("id", "name", "score")
+        .write
+        .format("delta")
+        .option("delta.columnMapping.mode", "name")
+        .option("delta.minReaderVersion", "2")
+        .option("delta.minWriterVersion", "5")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+
+      spark.sql(s"ALTER TABLE delta.`$tablePath` RENAME COLUMN name TO full_name")
+
+      assertDeltaNativeMatches(tablePath, identity)
+      assertDeltaNativeMatches(tablePath, _.select("id", "full_name"))
+    }
+  }
+
+  test("column mapping: id mode") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("col_mapping_id") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+
+      (0 until 6)
+        .map(i => (i.toLong, s"name_$i"))
+        .toDF("id", "name")
+        .write
+        .format("delta")
+        .option("delta.columnMapping.mode", "id")
+        .option("delta.minReaderVersion", "2")
+        .option("delta.minWriterVersion", "5")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+      assertDeltaNativeMatches(tablePath, _.where("id > 2"))
+    }
+  }
+
+  test("column mapping + deletion vectors combined") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("col_map_dv") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+
+      (0 until 20)
+        .map(i => (i.toLong, s"name_$i", i * 1.5))
+        .toDF("id", "name", "score")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.columnMapping.mode", "name")
+        .option("delta.minReaderVersion", "3")
+        .option("delta.minWriterVersion", "7")
+        .option("delta.enableDeletionVectors", "true")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+
+      spark.sql(s"ALTER TABLE delta.`$tablePath` RENAME COLUMN name TO full_name")
+      withSQLConf("spark.databricks.delta.deletionVectors.useMetadataRowIndex" -> "false") {
+        spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id % 4 = 0")
+        val df = spark.read.format("delta").load(tablePath)
+        val nativeRows = df.collect().toSeq.map(normalizeRow)
+        withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+          val vanillaRows = spark.read
+            .format("delta")
+            .load(tablePath)
+            .collect()
+            .toSeq
+            .map(normalizeRow)
+          assert(
+            nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+            s"col mapping + DV: native=$nativeRows\nvanilla=$vanillaRows")
+        }
+        assert(nativeRows.size == 15, s"expected 15 rows after DELETE, got ${nativeRows.size}")
+      }
+    }
+  }
+
+  test("column mapping + schema evolution combined") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("col_map_evolve") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+
+      (0 until 10)
+        .map(i => (i.toLong, s"name_$i"))
+        .toDF("id", "name")
+        .write
+        .format("delta")
+        .option("delta.columnMapping.mode", "name")
+        .option("delta.minReaderVersion", "2")
+        .option("delta.minWriterVersion", "5")
+        .save(tablePath)
+
+      (10 until 15)
+        .map(i => (i.toLong, s"name_$i", i * 2.0))
+        .toDF("id", "name", "score")
+        .write
+        .format("delta")
+        .mode("append")
+        .option("mergeSchema", "true")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+      assertDeltaNativeMatches(tablePath, _.where("score IS NOT NULL"))
+    }
+  }
+}
diff --git a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaCoverageSuite.scala b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaCoverageSuite.scala
new file mode 100644
index 0000000000..085e837b3f
--- /dev/null
+++ b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaCoverageSuite.scala
@@ -0,0 +1,516 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.delta
+
+import org.apache.spark.sql.functions._
+
+/**
+ * Coverage matrix for `CometDeltaNativeScanExec`. Each test exercises one query
+ * pattern (projection, filter, sort, aggregate, join, set-op, window, subquery,
+ * nested-data access) and asserts via [[CometDeltaTestBase.assertDeltaNativeMatches]]
+ * that BOTH:
+ *   1. the executed plan contains `CometDeltaNativeScanExec` (the contrib actually
+ *      engaged -- a hard guard against the "inert bridge" class of regression
+ *      we fixed earlier this branch), AND
+ *   2. results equal vanilla Spark+Delta (set-equal, order-independent).
+ *
+ * Tests are grouped roughly by SQL surface area so adding new coverage stays
+ * pattern-local. Per-area tests use a single backing Delta table built once at
+ * the top of the test to keep wall-clock fast.
+ */
+class CometDeltaCoverageSuite extends CometDeltaTestBase {
+
+  // ---- Projection / SELECT --------------------------------------------------
+
+  test("projection: SELECT *") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_proj_star") { tablePath =>
+      writeIntStrTable(tablePath, 10)
+      assertDeltaNativeMatches(tablePath, identity)
+    }
+  }
+
+  test("projection: SELECT specific columns prunes data schema") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_proj_cols") { tablePath =>
+      writeIntStrTable(tablePath, 10)
+      assertDeltaNativeMatches(tablePath, _.select("id"))
+      assertDeltaNativeMatches(tablePath, _.select("name"))
+    }
+  }
+
+  test("projection: arithmetic + casts in SELECT") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_proj_arith") { tablePath =>
+      writeIntStrTable(tablePath, 10)
+      assertDeltaNativeMatches(
+        tablePath,
+        _.selectExpr("id", "id * 2 AS doubled", "CAST(id AS INT) AS id_int", "length(name) AS nlen"))
+    }
+  }
+
+  test("projection: LIMIT") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_proj_limit") { tablePath =>
+      writeIntStrTable(tablePath, 50)
+      // limit is order-dependent; pair with orderBy and assert on a stable set.
+      assertDeltaNativeMatches(tablePath, _.orderBy("id").limit(5))
+    }
+  }
+
+  test("projection: DISTINCT") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_proj_distinct") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      Seq((1L, "a"), (1L, "a"), (2L, "b"), (3L, "c"), (3L, "c"))
+        .toDF("id", "name")
+        .write.format("delta").save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.distinct())
+      assertDeltaNativeMatches(tablePath, _.select("id").distinct())
+    }
+  }
+
+  // ---- Filters (WHERE) ------------------------------------------------------
+
+  test("filter: equality + inequality") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_filt_eq") { tablePath =>
+      writeIntStrTable(tablePath, 20)
+      assertDeltaNativeMatches(tablePath, _.where("id = 5"))
+      assertDeltaNativeMatches(tablePath, _.where("id != 5"))
+      assertDeltaNativeMatches(tablePath, _.where("id > 10"))
+      assertDeltaNativeMatches(tablePath, _.where("id <= 7"))
+    }
+  }
+
+  test("filter: IN / NOT IN") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_filt_in") { tablePath =>
+      writeIntStrTable(tablePath, 20)
+      assertDeltaNativeMatches(tablePath, _.where("id IN (1, 3, 5, 7)"))
+      assertDeltaNativeMatches(tablePath, _.where("id NOT IN (0, 10, 19)"))
+    }
+  }
+
+  test("filter: IS NULL / IS NOT NULL") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_filt_null") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      Seq((1L, Option("a")), (2L, None), (3L, Option("c")), (4L, None))
+        .toDF("id", "name")
+        .write.format("delta").save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.where("name IS NULL"))
+      assertDeltaNativeMatches(tablePath, _.where("name IS NOT NULL"))
+    }
+  }
+
+  test("filter: BETWEEN, LIKE, AND/OR/NOT") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_filt_combo") { tablePath =>
+      writeIntStrTable(tablePath, 20)
+      assertDeltaNativeMatches(tablePath, _.where("id BETWEEN 3 AND 8"))
+      assertDeltaNativeMatches(tablePath, _.where("name LIKE 'name_1%'"))
+      assertDeltaNativeMatches(tablePath, _.where("id > 5 AND id < 15"))
+      assertDeltaNativeMatches(tablePath, _.where("id < 3 OR id > 17"))
+      assertDeltaNativeMatches(tablePath, _.where("NOT (id = 10)"))
+    }
+  }
+
+  // ---- Sorting --------------------------------------------------------------
+
+  test("sort: ORDER BY ASC / DESC, single + multi key") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_sort") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20).map(i => (i.toLong, s"g_${i % 3}", i % 5))
+        .toDF("id", "grp", "v")
+        .write.format("delta").save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.orderBy("id"))
+      assertDeltaNativeMatches(tablePath, _.orderBy(desc("id")))
+      assertDeltaNativeMatches(tablePath, _.orderBy(asc("grp"), desc("id")))
+    }
+  }
+
+  // ---- Aggregations ---------------------------------------------------------
+
+  test("aggregate: COUNT, SUM, AVG, MIN, MAX") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_agg_basic") { tablePath =>
+      writeIntStrTable(tablePath, 20)
+      // NOTE: `count(*)` is intentionally NOT covered here -- Delta short-circuits
+      // it to a `LocalTableScan` using the snapshot's `numRecords` stat, so the
+      // scan never engages and `assertDeltaNativeMatches` would (correctly) fail.
+      // `count(id)` and other column-touching aggregates do need to read parquet
+      // and exercise the scan path.
+      assertDeltaNativeMatches(tablePath, _.agg(count("id").as("c")))
+      assertDeltaNativeMatches(tablePath, _.agg(sum("id"), avg("id"), min("id"), max("id")))
+    }
+  }
+
+  test("aggregate: GROUP BY single + multi column, with HAVING") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_agg_group") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 30).map(i => (i.toLong, s"g_${i % 3}", i % 5))
+        .toDF("id", "grp", "v")
+        .write.format("delta").save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.groupBy("grp").agg(count("*").as("c"), sum("id").as("s")))
+      assertDeltaNativeMatches(tablePath, _.groupBy("grp", "v").agg(count("*").as("c")))
+      assertDeltaNativeMatches(
+        tablePath,
+        df => df.groupBy("grp").agg(count("*").as("c")).where("c > 5"))
+    }
+  }
+
+  test("aggregate: COUNT DISTINCT") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_agg_cd") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 30).map(i => (i.toLong, s"g_${i % 4}"))
+        .toDF("id", "grp")
+        .write.format("delta").save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.agg(countDistinct("grp").as("dg")))
+    }
+  }
+
+  // ---- Joins ----------------------------------------------------------------
+
+  test("join: self-join (inner)") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_join_self") { tablePath =>
+      writeIntStrTable(tablePath, 10)
+      assertDeltaNativeMatches(
+        tablePath,
+        df => df.as("a").join(df.as("b"), col("a.id") === col("b.id")).select(col("a.id")))
+    }
+  }
+
+  test("join: inner / left outer / left semi between two delta tables") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_join_lhs") { lhsPath =>
+      withDeltaTable("cov_join_rhs") { rhsPath =>
+        val ss = spark
+        import ss.implicits._
+        (0 until 10).map(i => (i.toLong, s"l_$i")).toDF("id", "l")
+          .write.format("delta").save(lhsPath)
+        Seq(1L, 3L, 5L, 7L, 9L, 11L).map(i => (i, s"r_$i")).toDF("id", "r")
+          .write.format("delta").save(rhsPath)
+        // For two-table queries we still want to verify BOTH scans are accelerated;
+        // assertDeltaNativeMatches checks at least one CometDeltaNativeScanExec.
+        // Run a series of join modes manually.
+        val l = spark.read.format("delta").load(lhsPath)
+        val r = spark.read.format("delta").load(rhsPath)
+        assertJoinAcceleratedAndMatches(lhsPath, rhsPath, "inner")
+        assertJoinAcceleratedAndMatches(lhsPath, rhsPath, "left")
+        assertJoinAcceleratedAndMatches(lhsPath, rhsPath, "leftsemi")
+        assertJoinAcceleratedAndMatches(lhsPath, rhsPath, "leftanti")
+        // Silence "unused" warning for l/r:
+        val _ = (l, r)
+      }
+    }
+  }
+
+  private def assertJoinAcceleratedAndMatches(
+      lhsPath: String,
+      rhsPath: String,
+      joinType: String): Unit = {
+    def buildPlan(): org.apache.spark.sql.DataFrame = {
+      val l = spark.read.format("delta").load(lhsPath)
+      val r = spark.read.format("delta").load(rhsPath)
+      l.join(r, Seq("id"), joinType).orderBy("id")
+    }
+    val nativeDf = buildPlan()
+    val nativeRows = nativeDf.collect().toSeq.map(normalizeRow)
+    val plan = nativeDf.queryExecution.executedPlan
+    val deltaScans = collect(plan) {
+      case s: org.apache.spark.sql.comet.CometDeltaNativeScanExec => s
+    }
+    assert(
+      deltaScans.size >= 2,
+      s"$joinType join: expected >= 2 CometDeltaNativeScanExec, got ${deltaScans.size}\n$plan")
+    withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+      val vanillaRows = buildPlan().collect().toSeq.map(normalizeRow)
+      assert(
+        nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+        s"$joinType join: native != vanilla\nnative=$nativeRows\nvanilla=$vanillaRows")
+    }
+  }
+
+  // ---- Set operations -------------------------------------------------------
+
+  test("setop: UNION / UNION ALL / INTERSECT / EXCEPT") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_setop_a") { aPath =>
+      withDeltaTable("cov_setop_b") { bPath =>
+        val ss = spark
+        import ss.implicits._
+        (1 to 5).map(i => (i.toLong, s"x_$i")).toDF("id", "v")
+          .write.format("delta").save(aPath)
+        (4 to 8).map(i => (i.toLong, s"x_$i")).toDF("id", "v")
+          .write.format("delta").save(bPath)
+        def both(op: (org.apache.spark.sql.DataFrame, org.apache.spark.sql.DataFrame)
+            => org.apache.spark.sql.DataFrame): Unit = {
+          def build(): org.apache.spark.sql.DataFrame = {
+            val a = spark.read.format("delta").load(aPath)
+            val b = spark.read.format("delta").load(bPath)
+            op(a, b).orderBy("id")
+          }
+          val nativeRows = build().collect().toSeq.map(normalizeRow)
+          val plan = build().queryExecution.executedPlan
+          val deltaScans = collect(plan) {
+            case s: org.apache.spark.sql.comet.CometDeltaNativeScanExec => s
+          }
+          assert(deltaScans.nonEmpty, s"expected CometDeltaNativeScanExec in:\n$plan")
+          withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+            val vanillaRows = build().collect().toSeq.map(normalizeRow)
+            assert(
+              nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+              s"native=$nativeRows\nvanilla=$vanillaRows")
+          }
+        }
+        both((a, b) => a.union(b))
+        both((a, b) => a.unionAll(b))
+        both((a, b) => a.intersect(b))
+        both((a, b) => a.except(b))
+      }
+    }
+  }
+
+  // ---- Window functions -----------------------------------------------------
+
+  test("window: row_number / rank / lag / lead") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_window") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20).map(i => (i.toLong, s"g_${i % 3}", i % 5))
+        .toDF("id", "grp", "v")
+        .write.format("delta").save(tablePath)
+      val w = org.apache.spark.sql.expressions.Window
+        .partitionBy("grp")
+        .orderBy("id")
+      assertDeltaNativeMatches(
+        tablePath,
+        _.withColumn("rn", row_number().over(w))
+          .withColumn("rk", rank().over(w))
+          .withColumn("lg", lag("id", 1).over(w))
+          .withColumn("ld", lead("id", 1).over(w)))
+    }
+  }
+
+  // ---- Subqueries -----------------------------------------------------------
+
+  test("subquery: scalar subquery in WHERE") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_sub_scalar") { tablePath =>
+      writeIntStrTable(tablePath, 20)
+      spark.read.format("delta").load(tablePath).createOrReplaceTempView("cov_sub_scalar")
+      val df = spark.sql(
+        "SELECT * FROM cov_sub_scalar WHERE id > (SELECT AVG(id) FROM cov_sub_scalar)")
+      val rows = df.collect().toSeq.map(normalizeRow)
+      val plan = df.queryExecution.executedPlan
+      val deltaScans = collect(plan) {
+        case s: org.apache.spark.sql.comet.CometDeltaNativeScanExec => s
+      }
+      assert(deltaScans.nonEmpty, s"expected CometDeltaNativeScanExec:\n$plan")
+      withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+        spark.read.format("delta").load(tablePath).createOrReplaceTempView("cov_sub_scalar_v")
+        val vanillaRows = spark.sql(
+          "SELECT * FROM cov_sub_scalar_v WHERE id > (SELECT AVG(id) FROM cov_sub_scalar_v)")
+          .collect().toSeq.map(normalizeRow)
+        assert(
+          rows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+          s"native=$rows\nvanilla=$vanillaRows")
+      }
+    }
+  }
+
+  test("subquery: IN subquery") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_sub_in_a") { aPath =>
+      withDeltaTable("cov_sub_in_b") { bPath =>
+        writeIntStrTable(aPath, 20)
+        val ss = spark
+        import ss.implicits._
+        Seq(3L, 7L, 11L).toDF("k").write.format("delta").save(bPath)
+        spark.read.format("delta").load(aPath).createOrReplaceTempView("cov_a")
+        spark.read.format("delta").load(bPath).createOrReplaceTempView("cov_b")
+        val df = spark.sql("SELECT * FROM cov_a WHERE id IN (SELECT k FROM cov_b)")
+        val rows = df.collect().toSeq.map(normalizeRow)
+        val plan = df.queryExecution.executedPlan
+        val deltaScans = collect(plan) {
+          case s: org.apache.spark.sql.comet.CometDeltaNativeScanExec => s
+        }
+        assert(deltaScans.nonEmpty, s"expected CometDeltaNativeScanExec:\n$plan")
+        withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+          val vanillaRows = spark.sql("SELECT * FROM cov_a WHERE id IN (SELECT k FROM cov_b)")
+            .collect().toSeq.map(normalizeRow)
+          assert(
+            rows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+            s"native=$rows\nvanilla=$vanillaRows")
+        }
+      }
+    }
+  }
+
+  // ---- CTEs -----------------------------------------------------------------
+
+  test("CTE: WITH ... SELECT chain") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_cte") { tablePath =>
+      writeIntStrTable(tablePath, 20)
+      spark.read.format("delta").load(tablePath).createOrReplaceTempView("cov_cte")
+      val df = spark.sql(
+        "WITH odd AS (SELECT * FROM cov_cte WHERE id % 2 = 1) " +
+          "SELECT count(*) AS c FROM odd")
+      val rows = df.collect().toSeq.map(normalizeRow)
+      val plan = df.queryExecution.executedPlan
+      val deltaScans = collect(plan) {
+        case s: org.apache.spark.sql.comet.CometDeltaNativeScanExec => s
+      }
+      assert(deltaScans.nonEmpty, s"expected CometDeltaNativeScanExec:\n$plan")
+      withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+        val vanillaRows = spark.sql(
+          "WITH odd AS (SELECT * FROM cov_cte WHERE id % 2 = 1) " +
+            "SELECT count(*) AS c FROM odd")
+          .collect().toSeq.map(normalizeRow)
+        assert(rows == vanillaRows, s"native=$rows\nvanilla=$vanillaRows")
+      }
+    }
+  }
+
+  // ---- Coverage with partitioned tables -------------------------------------
+
+  test("partitioned: filter + projection on partition column") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_part") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 30).map(i => (i.toLong, s"v_$i", s"p_${i % 3}"))
+        .toDF("id", "v", "p")
+        .write.format("delta").partitionBy("p").save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.where("p = 'p_1'"))
+      assertDeltaNativeMatches(tablePath, _.where("p = 'p_1' AND id > 10"))
+      assertDeltaNativeMatches(tablePath, _.select("p", "id"))
+    }
+  }
+
+  // ---- Coverage with column-mapping enabled ---------------------------------
+
+  test("column mapping (name): filter + project + agg") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_cm_name") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20).map(i => (i.toLong, s"name_$i", i * 1.0))
+        .toDF("id", "name", "score")
+        .write
+        .format("delta")
+        .option("delta.columnMapping.mode", "name")
+        .option("delta.minReaderVersion", "2")
+        .option("delta.minWriterVersion", "5")
+        .save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.where("id > 5").select("id", "name"))
+      assertDeltaNativeMatches(tablePath, _.agg(sum("score").as("s")))
+    }
+  }
+
+  // ---- Coverage with deletion vectors ---------------------------------------
+
+  test("dv: projection + filter on DV-bearing table") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_dv") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 30)
+        .map(i => (i.toLong, s"n_$i"))
+        .toDF("id", "name")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.enableDeletionVectors", "true")
+        .option("delta.minReaderVersion", "3")
+        .option("delta.minWriterVersion", "7")
+        .save(tablePath)
+      spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id % 5 = 0")
+      // `select("id")` and SUM go through assertDeltaNativeMatches (vanilla matches
+      // native in this configuration).
+      assertDeltaNativeMatches(tablePath, _.select("id"))
+      assertDeltaNativeMatches(tablePath, _.agg(sum("id").as("s"), min("id"), max("id")))
+      // The `where("id > 10")` variant previously skipped the vanilla comparison
+      // because Spark's in-session DeltaLog snapshot cache could serve the vanilla
+      // read a stale pre-DELETE snapshot (rows the DV should hide). The cache is
+      // process-global and keyed by path, so clearing it forces both reads to
+      // re-resolve the post-DELETE snapshot, restoring a real correctness comparison.
+      org.apache.spark.sql.delta.DeltaLog.clearCache()
+      val df = spark.read.format("delta").load(tablePath)
+        .where("id > 10").select("id", "name")
+      val nativeRows = df.collect().toSeq.map(normalizeRow)
+      val plan = df.queryExecution.executedPlan
+      val deltaScans = collect(plan) {
+        case s: org.apache.spark.sql.comet.CometDeltaNativeScanExec => s
+      }
+      assert(
+        deltaScans.nonEmpty,
+        s"expected CometDeltaNativeScanExec on DV-bearing filtered read:\n$plan")
+      withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+        org.apache.spark.sql.delta.DeltaLog.clearCache()
+        val vanillaRows = spark.read.format("delta").load(tablePath)
+          .where("id > 10").select("id", "name").collect().toSeq.map(normalizeRow)
+        assert(
+          nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+          s"DV filtered native=$nativeRows vanilla=$vanillaRows")
+      }
+    }
+  }
+
+  // ---- Nested data access ---------------------------------------------------
+
+  test("nested: struct field + array element + map value access") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("cov_nested") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      Seq(
+        (1L, ("a", 1), Seq(10, 20, 30), Map("k1" -> 100, "k2" -> 200)),
+        (2L, ("b", 2), Seq(40, 50), Map("k1" -> 300)))
+        .toDF("id", "s", "arr", "m")
+        .write.format("delta").save(tablePath)
+      assertDeltaNativeMatches(tablePath, _.selectExpr("id", "s._1 AS s1", "s._2 AS s2"))
+      assertDeltaNativeMatches(tablePath, _.selectExpr("id", "arr[0] AS a0", "size(arr) AS asz"))
+      assertDeltaNativeMatches(tablePath, _.selectExpr("id", "m['k1'] AS mk1"))
+    }
+  }
+
+  // ---- helpers --------------------------------------------------------------
+
+  private def writeIntStrTable(tablePath: String, n: Int): Unit = {
+    val ss = spark
+    import ss.implicits._
+    (0 until n).map(i => (i.toLong, s"name_$i"))
+      .toDF("id", "name")
+      .write.format("delta").save(tablePath)
+  }
+}
diff --git a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaFeaturesSuite.scala b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaFeaturesSuite.scala
new file mode 100644
index 0000000000..81f0c9ceeb
--- /dev/null
+++ b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaFeaturesSuite.scala
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.delta
+
+import org.apache.spark.sql.comet.CometDeltaNativeScanExec
+import org.apache.spark.sql.functions._
+
+/**
+ * Coverage for the special features the contrib supports beyond plain reads.
+ * Each test asserts BOTH that Comet's native plan engages AND that results match
+ * vanilla Spark, so future silent-disengagement bugs are caught.
+ *
+ * Mapped to the design-doc feature list:
+ *   - Deletion Vectors (native DeltaDvFilterExec path)
+ *   - Row tracking (synthesised + materialised cases)
+ *   - Synthetic columns (__delta_internal_row_index)
+ *   - input_file_name() and FileBlockHolder threading
+ *   - Complex types (struct, array, map)
+ *   - Joins and aggregations over Delta
+ *   - Time travel by timestamp
+ *   - Multi-append / multi-file scenarios
+ */
+class CometDeltaFeaturesSuite extends CometDeltaTestBase {
+
+  // ---- Deletion Vectors -----------------------------------------------------
+
+  test("DV: native scan engages on DV-bearing tables after DELETE") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_dv") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20)
+        .map(i => (i.toLong, s"name_$i"))
+        .toDF("id", "name")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.enableDeletionVectors", "true")
+        .option("delta.minReaderVersion", "3")
+        .option("delta.minWriterVersion", "7")
+        .save(tablePath)
+
+      spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id % 3 = 0")
+
+      val df = spark.read.format("delta").load(tablePath)
+      val rows = df.collect()
+      val plan = df.queryExecution.executedPlan
+      assert(
+        collect(plan) { case s: CometDeltaNativeScanExec => s }.nonEmpty,
+        s"expected Comet native scan on DV-bearing table:\n$plan")
+      assert(rows.length === 13, s"expected 13 rows after DELETE, got ${rows.length}")
+    }
+  }
+
+  // ---- Row tracking (Phase-1 port) ------------------------------------------
+
+  test("row tracking: unmaterialised _metadata.row_id synthesised from baseRowId") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_rt_unmat") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 12)
+        .map(i => (i.toLong, s"name_$i"))
+        .toDF("id", "name")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.enableRowTracking", "true")
+        .option("delta.minReaderVersion", "3")
+        .option("delta.minWriterVersion", "7")
+        .save(tablePath)
+
+      // orderBy forces a shuffle -> AQE wraps -> Comet's prep rules fire
+      val df = spark.read
+        .format("delta")
+        .load(tablePath)
+        .selectExpr("id", "_metadata.row_id AS rid")
+        .orderBy("id")
+      val rows = df.collect().toSeq
+      val plan = df.queryExecution.executedPlan
+      assert(
+        collect(plan) { case s: CometDeltaNativeScanExec => s }.nonEmpty,
+        s"expected Comet to accelerate rowTracking scan:\n$plan")
+
+      assert(rows.size == 12)
+      rows.zipWithIndex.foreach { case (row, idx) =>
+        assert(row.getLong(1) == idx.toLong, s"row $idx: rid mismatch")
+      }
+    }
+  }
+
+  // ---- Synthetic columns ----------------------------------------------------
+
+  test("synthetic: native scan engages when row tracking is enabled (provides _metadata.row_index)") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_synth") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 6)
+        .map(i => (i.toLong, s"n_$i"))
+        .toDF("id", "name")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.enableRowTracking", "true")
+        .option("delta.minReaderVersion", "3")
+        .option("delta.minWriterVersion", "7")
+        .save(tablePath)
+
+      // orderBy forces AQE wrapping so Comet's prep rules see this plan.
+      val df = spark.read.format("delta").load(tablePath)
+        .selectExpr("id", "_metadata.row_index AS ri")
+        .orderBy("id")
+      val rows = df.collect()
+      val plan = df.queryExecution.executedPlan
+      assert(rows.length === 6, s"expected 6 rows, got ${rows.length}")
+      assert(
+        collect(plan) { case s: CometDeltaNativeScanExec => s }.nonEmpty,
+        s"expected Comet to engage when _metadata.row_index is consumed:\n$plan")
+    }
+  }
+
+  // ---- input_file_name() ----------------------------------------------------
+
+  test("input_file_name(): rows return the path of their source parquet file") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_ifn") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      // Two writes -> two files; each row's input_file_name should be one of them.
+      (0 until 5).map(i => (i.toLong, "a"))
+        .toDF("id", "src").repartition(1).write.format("delta").save(tablePath)
+      (5 until 10).map(i => (i.toLong, "b"))
+        .toDF("id", "src").repartition(1).write.format("delta").mode("append").save(tablePath)
+
+      // orderBy forces AQE wrapping for Comet's rules to fire.
+      val df = spark.read.format("delta").load(tablePath)
+        .withColumn("ifn", input_file_name())
+        .orderBy("id")
+      val rows = df.collect()
+      assert(rows.length === 10)
+      val distinctPaths = rows.map(_.getString(2)).toSet
+      assert(distinctPaths.size === 2, s"expected 2 source files, got $distinctPaths")
+      assert(distinctPaths.forall(_.contains("parquet")), s"non-parquet path: $distinctPaths")
+    }
+  }
+
+  // ---- Complex types --------------------------------------------------------
+
+  test("complex types: struct, array, map round-trip through native scan") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_complex") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      Seq(
+        (1L, ("a", 1), Seq(10, 20), Map("k1" -> 100)),
+        (2L, ("b", 2), Seq(30), Map("k2" -> 200, "k3" -> 300)))
+        .toDF("id", "s", "arr", "m")
+        .write.format("delta").save(tablePath)
+
+      // assertDeltaNativeMatches already asserts native plan presence + result parity.
+      assertDeltaNativeMatches(tablePath, identity)
+      // Reinforce: simple read explicitly verifies the contrib scan exec is present.
+      assertNativePlanContains(
+        spark.read.format("delta").load(tablePath),
+        "CometDeltaNativeScanExec")
+    }
+  }
+
+  // ---- Aggregations + joins over Delta --------------------------------------
+
+  test("aggregation: count/sum over Delta uses native scan") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_agg") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 100).map(i => (i.toLong, i % 5, (i * 1.5).toDouble))
+        .toDF("id", "g", "v")
+        .write.format("delta").save(tablePath)
+
+      val df = spark.read.format("delta").load(tablePath)
+        .groupBy("g").agg(count("*").as("c"), sum("v").as("s"))
+      val plan = df.queryExecution.executedPlan
+      assert(
+        collect(plan) { case s: CometDeltaNativeScanExec => s }.nonEmpty,
+        s"expected Comet native scan in aggregation plan:\n$plan")
+
+      val rows = df.collect().sortBy(_.getInt(0))
+      assert(rows.length === 5)
+      rows.foreach(r => assert(r.getLong(1) === 20L))
+    }
+  }
+
+  test("join: self-join over Delta uses native scan twice") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_join") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20).map(i => (i.toLong, s"n_$i"))
+        .toDF("id", "name")
+        .write.format("delta").save(tablePath)
+
+      val df = spark.read.format("delta").load(tablePath).alias("a")
+        .join(
+          spark.read.format("delta").load(tablePath).alias("b"),
+          col("a.id") === col("b.id") + 1)
+      val plan = df.queryExecution.executedPlan
+      val scans = collect(plan) { case s: CometDeltaNativeScanExec => s }
+      assert(scans.size >= 1, s"expected at least 1 native Delta scan in join plan:\n$plan")
+      assert(df.count() === 19)
+    }
+  }
+
+  // ---- Time travel by timestamp ---------------------------------------------
+
+  test("time travel by timestamp reads the older snapshot") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("features_tt_ts") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 5).map(i => (i.toLong, s"v0_$i")).toDF("id", "name")
+        .write.format("delta").save(tablePath)
+      // Sleep so timestampAsOf can distinguish the two commits.
+      Thread.sleep(1500)
+      val midTimestamp = new java.sql.Timestamp(System.currentTimeMillis())
+      Thread.sleep(1500)
+      (5 until 10).map(i => (i.toLong, s"v1_$i")).toDF("id", "name")
+        .write.format("delta").mode("append").save(tablePath)
+
+      val df = spark.read
+        .format("delta")
+        .option("timestampAsOf", midTimestamp.toString)
+        .load(tablePath)
+      // Materialise before inspecting the plan so AQE has finalized it.
+      val nativeRows = df.collect().toSeq.map(normalizeRow)
+      val plan = df.queryExecution.executedPlan
+      assert(
+        collect(plan) { case s: CometDeltaNativeScanExec => s }.nonEmpty,
+        s"expected Comet native scan in timestamp time-travel plan:\n$plan")
+      assert(nativeRows.size === 5)
+      // Compare content against vanilla at the same pinned timestamp.
+      withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+        val vanillaRows = spark.read.format("delta")
+          .option("timestampAsOf", midTimestamp.toString)
+          .load(tablePath).collect().toSeq.map(normalizeRow)
+        assert(
+          nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+          s"timestamp time-travel native=$nativeRows vanilla=$vanillaRows")
+      }
+    }
+  }
+}
diff --git a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaMarkerSuite.scala b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaMarkerSuite.scala
index cab8890d26..70559f3775 100644
--- a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaMarkerSuite.scala
+++ b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaMarkerSuite.scala
@@ -23,50 +23,47 @@ import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.functions.{col, input_file_name}
 
 /**
- * Coverage for the contrib-delta CLAIM/DECLINE layer (`DeltaScanRule` + `CometDeltaScanMarker`)
- * that this unit introduces, independent of the native read path (the serde/exec land later).
- *
- * On this build there is no `CometDeltaNativeScan` serde, so `CometExecRule`'s `scanHandler`
- * lookup returns `None` and a planted `CometDeltaScanMarker` is left in the plan executing as a
- * vanilla Delta fallback. That makes the marker's PRESENCE the observable signal that the rule
- * claimed the scan, and its absence the signal that the rule declined -- exactly what these tests
- * assert. The native-read assertions live with the serde/exec unit.
+ * Coverage for the contrib-delta CLAIM/DECLINE path: `DeltaScanRule` plants a `CometDeltaScanMarker`,
+ * which -- now that the serde (`CometDeltaNativeScan`) is present in this unit -- `CometExecRule`
+ * CONVERTS into a `CometDeltaNativeScanExec` (a real native read). So a CLAIMED scan is observable as
+ * a `CometDeltaNativeScanExec` in the plan, and a DECLINED scan falls back to vanilla Spark (no native
+ * scan). (Before the serde landed, a claimed scan left the marker in the plan executing as a vanilla
+ * fallback; that earlier-unit behaviour is what changed here.)
  */
 class CometDeltaMarkerSuite extends CometDeltaTestBase {
 
-  test("DeltaScanRule plants the marker on a plain Delta read (claim path active)") {
+  test("DeltaScanRule claims a plain Delta read and it engages the native scan") {
     assume(deltaSparkAvailable, "io.delta.spark not on the test classpath")
-    withDeltaTable("marker-planted") { tablePath =>
+    withDeltaTable("claim-native") { tablePath =>
       spark.range(0, 100).toDF("id").write.format("delta").save(tablePath)
-      val df = spark.read.format("delta").load(tablePath)
-      // Red-green vs the A.2 build: with `DeltaScanRule$` absent (A.2 bridge only) no marker is
-      // planted; this unit supplies the rule, so the marker appears (then falls back to vanilla).
-      assertMarkerPlanted(df)
+      // The rule claims the scan (plants the marker); with the serde present, CometExecRule converts
+      // the marker to a CometDeltaNativeScanExec -- so the engaged-native check is the claim signal.
+      assertKernelReadEngaged(tablePath)
     }
   }
 
-  test("marker is planted on a filtered/projected read and the fallback stays result-correct") {
+  test("a filtered/projected claimed read goes native and matches vanilla Spark") {
     assume(deltaSparkAvailable, "io.delta.spark not on the test classpath")
-    withDeltaTable("marker-fallback-correct") { tablePath =>
+    withDeltaTable("claim-native-filtered") { tablePath =>
       spark.range(0, 100).selectExpr("id", "id * 2 as v").write.format("delta").save(tablePath)
-      val query = (df: DataFrame) => df.filter("id > 10").select("id", "v")
-      // Assert the rule actually CLAIMS this query shape (catches a claim-path regression, not just
-      // a result mismatch -- a disengaged claim path would still match rows since both sides run
-      // vanilla), AND that the marker's vanilla fallback returns identical rows.
-      assertMarkerPlanted(query(spark.read.format("delta").load(tablePath)))
-      assertResultsMatchVanilla(tablePath, query)
+      // Asserts the read engages `CometDeltaNativeScanExec` AND results match vanilla -- catches a
+      // claim-path regression (no native scan) and a correctness regression in one shot.
+      assertDeltaNativeMatches(tablePath, (df: DataFrame) => df.filter("id > 10").select("id", "v"))
     }
   }
 
-  test("DeltaScanRule declines an input_file_name() projection (no marker, vanilla read)") {
+  test("DeltaScanRule declines an input_file_name() projection (falls back to vanilla, no native scan)") {
     assume(deltaSparkAvailable, "io.delta.spark not on the test classpath")
     withDeltaTable("decline-input-file-name") { tablePath =>
       spark.range(0, 50).toDF("id").write.format("delta").save(tablePath)
-      // `input_file_name()` forces a fall back to vanilla (per-file provenance the native scan
-      // can't surface), so the rule declines and plants no marker.
-      val df = spark.read.format("delta").load(tablePath).select(col("id"), input_file_name())
-      assertNoMarker(df)
-      assert(df.count() == 50L, "declined read must still return all rows via vanilla Spark")
+      // `input_file_name()` forces a fall back to vanilla (per-file provenance the native scan can't
+      // surface), so the rule declines, plants no marker, and no CometDeltaNativeScanExec appears.
+      val query = (df: DataFrame) => df.select(col("id"), input_file_name())
+      assertDeltaFallback(tablePath, query)
+      assertNoMarker(query(spark.read.format("delta").load(tablePath)))
+      assert(
+        query(spark.read.format("delta").load(tablePath)).count() == 50L,
+        "declined read must still return all rows via vanilla Spark")
     }
   }
 }
diff --git a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaNativeSuite.scala b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaNativeSuite.scala
new file mode 100644
index 0000000000..d1e8770911
--- /dev/null
+++ b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaNativeSuite.scala
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet.contrib.delta
+
+import org.apache.spark.sql.functions._
+
+/**
+ * Core read tests for the native Delta Lake scan path. Covers basic reads,
+ * projections, filters, partitioning, schema evolution, time travel, complex
+ * types, and primitive type coverage.
+ *
+ * Column mapping and deletion vector tests live in
+ * [[CometDeltaColumnMappingSuite]]. Joins, aggregations, DPP, metrics, and
+ * other advanced queries belong in a follow-up `CometDeltaAdvancedSuite`.
+ *
+ * Ported from the pre-SPI `delta-kernel-phase-1` branch with no semantic
+ * changes -- this is the same vertical-slice coverage Phase-1 had, exercising
+ * the current `CometDeltaNativeScanExec` plan-rewrite path via
+ * [[CometDeltaTestBase#assertDeltaNativeMatches]].
+ */
+class CometDeltaNativeSuite extends CometDeltaTestBase {
+
+  test("kernel-read path (Phase 1b): plain table reads correctly and engages DeltaKernelScanExec") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("kernel_read_smoke") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 10)
+        .map(i => (i.toLong, s"name_$i", i * 1.5))
+        .toDF("id", "name", "score")
+        .repartition(1)
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      // Correctness: the kernel-read result matches vanilla Spark (and stays on the native
+      // CometDeltaNativeScanExec, i.e. no Spark-side fallback).
+      assertDeltaNativeMatches(tablePath, identity)
+      // Routing: kernel-read via DeltaKernelScanExec is the only path, so it engaged.
+      assertKernelReadEngaged(tablePath)
+    }
+  }
+
+  test("kernel-read path (Phase 1c #44): name-mode column-mapped table") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("kernel_read_cm") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 8)
+        .map(i => (i.toLong, s"name_$i", i * 1.5))
+        .toDF("id", "name", "score")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.columnMapping.mode", "name")
+        .option("delta.minReaderVersion", "2")
+        .option("delta.minWriterVersion", "5")
+        .save(tablePath)
+      // Rename so a logical name diverges from its physical name (the real column-mapping case).
+      spark.sql(s"ALTER TABLE delta.`$tablePath` RENAME COLUMN name TO full_name")
+
+      // Force name-mode resolution: with parquet field-id read off, the kernel-read path reads by
+      // physical name and relabels to logical via the identity transform.
+      withSQLConf("spark.sql.parquet.fieldId.read.enabled" -> "false") {
+        assertDeltaNativeMatches(tablePath, identity)
+        assertKernelReadEngaged(tablePath)
+        // Projection of the renamed column also reads correctly.
+        assertDeltaNativeMatches(tablePath, _.select("id", "full_name"))
+      }
+    }
+  }
+
+  test("kernel-read path (#47): nested column-mapped table with a nested rename") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("kernel_read_cm_nested") { tablePath =>
+      spark.sql(
+        s"""CREATE TABLE delta.`$tablePath` (
+           |  id INT,
+           |  s STRUCT<a:INT, b:STRING>,
+           |  arr ARRAY<STRUCT<x:INT>>)
+           |USING delta
+           |TBLPROPERTIES (
+           |  'delta.columnMapping.mode' = 'name',
+           |  'delta.minReaderVersion' = '2',
+           |  'delta.minWriterVersion' = '5')""".stripMargin)
+      spark.sql(
+        s"""INSERT INTO delta.`$tablePath` VALUES
+           |(1, NAMED_STRUCT('a', 10, 'b', 'x'), ARRAY(NAMED_STRUCT('x', 100))),
+           |(2, NULL, ARRAY()),
+           |(3, NAMED_STRUCT('a', 30, 'b', 'z'), ARRAY(NAMED_STRUCT('x', 300), NAMED_STRUCT('x', 301)))
+           |""".stripMargin)
+      // Rename a NESTED field so its logical name diverges from its physical name -- the case that
+      // requires the kernel-read path to physicalise + relabel at every nesting level (#47).
+      spark.sql(s"ALTER TABLE delta.`$tablePath` RENAME COLUMN s.a TO renamed_a")
+
+      withSQLConf("spark.sql.parquet.fieldId.read.enabled" -> "false") {
+        assertDeltaNativeMatches(tablePath, _.orderBy("id"))
+        assertKernelReadEngaged(tablePath)
+        // Project into the renamed nested field (nested pruning + relabel on the kernel path).
+        assertDeltaNativeMatches(tablePath, _.select("id", "s.renamed_a").orderBy("id"))
+        assertDeltaNativeMatches(tablePath, _.select("id", "arr").orderBy("id"))
+      }
+    }
+  }
+
+  test("kernel-read path (#48): zero-data-column read (partition-only count)") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("kernel_read_partonly") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 12)
+        .map(i => (i.toLong, i % 3, s"v$i"))
+        .toDF("id", "grp", "v")
+        .write
+        .format("delta")
+        .partitionBy("grp")
+        .save(tablePath)
+      // Partition-only aggregate: no data column is read, so the row count is driven from
+      // record_count (the parquet footer as fallback) -- exercises the #48 zero-data-column path.
+      assertDeltaNativeMatches(
+        tablePath,
+        _.groupBy("grp").agg(count("*").as("c")).orderBy("grp"))
+      assertKernelReadEngaged(tablePath)
+      // A bare row count over the partition column also reads no data columns.
+      assertDeltaNativeMatches(tablePath, _.select("grp").orderBy("grp"))
+    }
+  }
+
+  test("kernel-read path (Phase 1c #44): id-mode column-mapped table") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("kernel_read_cm_id") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 8)
+        .map(i => (i.toLong, s"name_$i", i * 1.5))
+        .toDF("id", "name", "score")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.columnMapping.mode", "id")
+        .option("delta.minReaderVersion", "2")
+        .option("delta.minWriterVersion", "5")
+        .save(tablePath)
+      // Rename so a logical name diverges from its physical name.
+      spark.sql(s"ALTER TABLE delta.`$tablePath` RENAME COLUMN name TO full_name")
+
+      // id-mode reads through the same rename-then-relabel kernel path; field ids ride along on
+      // the physical schema as a fallback matcher.
+      assertDeltaNativeMatches(tablePath, identity)
+      assertKernelReadEngaged(tablePath)
+      assertDeltaNativeMatches(tablePath, _.select("id", "full_name"))
+    }
+  }
+
+  test("kernel-read path (Phase 1c #45): partitioned table with projections + filters") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("kernel_read_part") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 12)
+        .map(i => (i.toLong, s"name_$i", i % 3))
+        .toDF("id", "name", "part")
+        .write
+        .format("delta")
+        .partitionBy("part")
+        .save(tablePath)
+
+      // The scan outputs data ++ partition; the kernel exec reproduces that, so projections
+      // and partition filters all work without special handling.
+      assertDeltaNativeMatches(tablePath, identity) // SELECT *
+      assertKernelReadEngaged(tablePath)
+      assertDeltaNativeMatches(tablePath, _.select("id")) // data-only projection
+      assertDeltaNativeMatches(tablePath, _.select("id", "part")) // data + partition
+      assertDeltaNativeMatches(tablePath, _.select("part", "name")) // reordered
+      assertDeltaNativeMatches(tablePath, _.where("part = 1")) // partition filter
+    }
+  }
+
+  test("kernel-read path (Phase 1c #46): _metadata columns + DELETE via deletion vectors") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("kernel_read_synth") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20)
+        .map(i => (i.toLong, s"v_$i"))
+        .toDF("id", "v")
+        .repartition(1)
+        .write
+        .format("delta")
+        .option("delta.enableDeletionVectors", "true")
+        .save(tablePath)
+
+      // _metadata.* is synthesized in-worker by DeltaKernelScanExec.
+      assertDeltaNativeMatches(tablePath, _.select($"id", $"_metadata.file_path"))
+      // DELETE writes a deletion vector; the read applies it in-worker, and the surviving rows
+      // must match vanilla.
+      spark.sql(s"DELETE FROM delta.`$tablePath` WHERE id % 4 = 0")
+      assertDeltaNativeMatches(tablePath, identity)
+      assertKernelReadEngaged(tablePath)
+    }
+  }
+
+  test("read a tiny unpartitioned delta table via the native scan") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("smoke") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 10)
+        .map(i => (i.toLong, s"name_$i", i * 1.5))
+        .toDF("id", "name", "score")
+        .repartition(1)
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+      // Explicit accelerator-coverage assertion: the contrib's scan exec must be
+      // in the plan. Guards against silent disengagement bugs.
+      assertNativePlanContains(
+        spark.read.format("delta").load(tablePath),
+        "CometDeltaNativeScanExec")
+    }
+  }
+
+  test("multi-file delta table") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("multifile") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 30)
+        .map(i => (i.toLong, s"name_$i"))
+        .toDF("id", "name")
+        .repartition(3)
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+    }
+  }
+
+  test("projection pushdown reads only selected columns") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("projection") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 10)
+        .map(i => (i.toLong, s"name_$i", i * 1.5, i % 2 == 0))
+        .toDF("id", "name", "score", "active")
+        .repartition(1)
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, _.select("id", "score"))
+    }
+  }
+
+  test("partitioned delta table surfaces partition column values") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("partitioned") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 12)
+        .map(i => (i.toLong, s"name_$i", if (i < 6) "a" else "b"))
+        .toDF("id", "name", "category")
+        .write
+        .partitionBy("category")
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+    }
+  }
+
+  test("filter pushdown returns correct rows") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("filter") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20)
+        .map(i => (i.toLong, s"name_$i", i * 1.5))
+        .toDF("id", "name", "score")
+        .repartition(2)
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, _.where(col("id") >= 5 && col("id") < 15))
+    }
+  }
+
+  test("predicate variety: eq, lt, gt, is null, in, and/or") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("predicates") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 20)
+        .map(i => (i.toLong, if (i % 3 == 0) null else s"n_$i", i.toDouble))
+        .toDF("id", "name", "score")
+        .repartition(1)
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      // eq
+      assertDeltaNativeMatches(tablePath, _.where(col("id") === 5))
+      // lt + gt
+      assertDeltaNativeMatches(tablePath, _.where(col("id") < 7 || col("id") > 15))
+      // is null
+      assertDeltaNativeMatches(tablePath, _.where(col("name").isNull))
+      // in
+      assertDeltaNativeMatches(tablePath, _.where(col("id").isin(1L, 4L, 9L, 16L)))
+      // mixed
+      assertDeltaNativeMatches(
+        tablePath,
+        _.where((col("id") > 5 && col("id") < 12) || col("name").isNull))
+    }
+  }
+
+  test("empty delta table") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("empty") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      Seq.empty[(Long, String)]
+        .toDF("id", "name")
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+    }
+  }
+
+  test("multiple appends produce many files, native scan reads them all") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("appends") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      for (batch <- 0 until 3) {
+        (0 until 10)
+          .map(i => ((batch * 10 + i).toLong, s"b${batch}_$i"))
+          .toDF("id", "name")
+          .repartition(1)
+          .write
+          .format("delta")
+          .mode("append")
+          .save(tablePath)
+      }
+
+      assertDeltaNativeMatches(tablePath, identity)
+    }
+  }
+
+  test("multi-column partitioning") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("multicol-part") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 16)
+        .map { i =>
+          (i.toLong, s"n_$i", if (i < 8) "a" else "b", i % 4)
+        }
+        .toDF("id", "name", "p1", "p2")
+        .write
+        .partitionBy("p1", "p2")
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+      // Filter that prunes one partition column
+      assertDeltaNativeMatches(tablePath, _.where(col("p1") === "a"))
+      // Filter that prunes both partition columns
+      assertDeltaNativeMatches(tablePath, _.where(col("p1") === "b" && col("p2") === 2))
+    }
+  }
+
+  test("typed partition columns: int, long, date") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("typed-partitions") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+      (0 until 6)
+        .map { i =>
+          (
+            i.toLong,
+            s"n_$i",
+            i,                                 // int partition
+            (1000L + i),                       // long partition
+            java.sql.Date.valueOf(s"2024-01-${i + 1}") // date partition
+          )
+        }
+        .toDF("id", "name", "p_int", "p_long", "p_date")
+        .write
+        .partitionBy("p_int", "p_long", "p_date")
+        .format("delta")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+      // Partition prune by date
+      assertDeltaNativeMatches(
+        tablePath,
+        _.where(col("p_date") === java.sql.Date.valueOf("2024-01-03")))
+    }
+  }
+
+  test("schema evolution: new column added in later commit") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("schema-evo") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+
+      // V0: two columns
+      (0 until 5)
+        .map(i => (i.toLong, s"n_$i"))
+        .toDF("id", "name")
+        .write
+        .format("delta")
+        .save(tablePath)
+
+      // V1: add a column with schema-evolution enabled
+      ss.sql(s"ALTER TABLE delta.`$tablePath` ADD COLUMNS (extra INT)")
+      (5 until 10)
+        .map(i => (i.toLong, s"n_$i", Some(i * 100)))
+        .toDF("id", "name", "extra")
+        .write
+        .format("delta")
+        .mode("append")
+        .save(tablePath)
+
+      assertDeltaNativeMatches(tablePath, identity)
+    }
+  }
+
+  test("time travel by version reads the older snapshot") {
+    assume(deltaSparkAvailable, "delta-spark not on the test classpath; skipping")
+    withDeltaTable("tt-version") { tablePath =>
+      val ss = spark
+      import ss.implicits._
+
+      // V0: 3 rows
+      (0 until 3).map(i => (i.toLong, s"v0_$i")).toDF("id", "name")
+        .write.format("delta").save(tablePath)
+      // V1: append 3 more
+      (3 until 6).map(i => (i.toLong, s"v1_$i")).toDF("id", "name")
+        .write.format("delta").mode("append").save(tablePath)
+
+      // Read at version 0 -- should only see the original 3 rows.
+      val v0Native =
+        ss.read.format("delta").option("versionAsOf", "0").load(tablePath)
+      // Materialise BEFORE inspecting the plan so AQE's query-stage prep rules
+      // (incl. Comet's) have fired (see CometDeltaTestBase plan-ordering note).
+      val nativeRows = v0Native.collect().toSeq.map(normalizeRow)
+      val plan = v0Native.queryExecution.executedPlan
+      assert(
+        collect(plan) {
+          case s: org.apache.spark.sql.comet.CometDeltaNativeScanExec => s
+        }.nonEmpty,
+        s"expected CometDeltaNativeScanExec in time-travel v0 plan:\n$plan")
+      assert(nativeRows.size === 3)
+      // Compare CONTENT, not just count, against vanilla at the same pinned version,
+      // so a scan returning the right count from the wrong version is caught.
+      withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
+        val vanillaRows = ss.read.format("delta").option("versionAsOf", "0")
+          .load(tablePath).collect().toSeq.map(normalizeRow)
+        assert(
+          nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+          s"time-travel v0 native=$nativeRows vanilla=$vanillaRows")
+      }
+    }
+  }
+}
diff --git a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaTestBase.scala b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaTestBase.scala
index 68c123dd70..a222dcc8a7 100644
--- a/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaTestBase.scala
+++ b/contrib/delta/src/test/scala/org/apache/comet/contrib/delta/CometDeltaTestBase.scala
@@ -24,6 +24,7 @@ import java.nio.file.Files
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.comet.CometDeltaNativeScanExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 
 import org.apache.comet.CometSparkSessionExtensions
@@ -32,12 +33,11 @@ import org.apache.comet.CometSparkSessionExtensions
  * Base trait for unit-testing the contrib-delta JVM layer.
  *
  * Wires up Spark+Delta in local mode with the contrib enabled (Comet + Delta session
- * extensions, AQE forced on so Comet's query-stage-prep rules fire) and provides the
- * claim/decline assertions this unit needs: `assertMarkerPlanted` / `assertNoMarker`
- * (did `DeltaScanRule` claim the scan?) and `assertResultsMatchVanilla` (does the
- * marker's fallback / a decline stay result-correct?). The native-read assertions
- * (`assertDeltaNativeMatches` etc.) land with the serde/exec unit, since they need
- * `CometDeltaNativeScanExec`.
+ * extensions, AQE forced on so Comet's query-stage-prep rules fire) and provides both the
+ * native-read assertions (`assertDeltaNativeMatches` / `assertKernelReadEngaged` /
+ * `assertDeltaFallback` / `assertNativePlanContains` -- a claimed scan engages
+ * `CometDeltaNativeScanExec`) and the decline assertion (`assertNoMarker` -- a declined scan plants
+ * no `CometDeltaScanMarker` and runs on vanilla Spark).
  */
 trait CometDeltaTestBase extends CometTestBase with AdaptiveSparkPlanHelper {
 
@@ -139,20 +139,6 @@ trait CometDeltaTestBase extends CometTestBase with AdaptiveSparkPlanHelper {
     collect(df.queryExecution.executedPlan) { case p if p.getClass.getName == MarkerClass => p }
   }
 
-  /**
-   * Assert that `DeltaScanRule` CLAIMED the scan: the executed plan contains a `CometDeltaScanMarker`.
-   * On a build without the serde (this unit), `CometExecRule`'s `scanHandler` lookup returns `None`,
-   * so the marker is left in the plan and executes as a vanilla Delta fallback -- which is exactly the
-   * claim signal we assert here. (A build with only the A.2 bridge and no `DeltaScanRule$` would NOT
-   * plant the marker, so this is red there / green here.)
-   */
-  protected def assertMarkerPlanted(df: DataFrame): Unit = {
-    val markers = markersIn(df)
-    assert(
-      markers.nonEmpty,
-      s"expected a CometDeltaScanMarker in the plan, got:\n${df.queryExecution.executedPlan}")
-  }
-
   /**
    * Assert the rule DECLINED the scan (no `CometDeltaScanMarker` planted) -- the read runs as a
    * vanilla Spark Delta scan. Used for the decline-path cases (unsupported projection, encryption,
@@ -166,28 +152,77 @@ trait CometDeltaTestBase extends CometTestBase with AdaptiveSparkPlanHelper {
   }
 
   /**
-   * Assert the Delta read at `tablePath` returns the same rows whether the native claim path is on
-   * or off -- i.e. the marker's vanilla fallback (and any decline) is result-correct. Order-independent.
+   * Assert that `df`'s executed plan (after a forced `.collect()` so AQE materialises Comet's rules)
+   * contains an operator whose simple class name matches each name in `expectedExecs`. Uses the
+   * AQE-aware `collect` (from `AdaptiveSparkPlanHelper`) so it descends into the
+   * `AdaptiveSparkPlanExec` wrapper that every real exec lives inside under the AQE-forced-on config.
    */
-  protected def assertResultsMatchVanilla(
+  protected def assertNativePlanContains(df: DataFrame, expectedExecs: String*): Unit = {
+    df.collect()
+    val plan = df.queryExecution.executedPlan
+    val present = collect(plan) { case p => p.getClass.getSimpleName }.toSet
+    val missing = expectedExecs.filterNot(present.contains)
+    assert(
+      missing.isEmpty,
+      s"expected execs missing from plan: ${missing.mkString(", ")}\n" +
+        s"present execs: ${present.mkString(", ")}\nfull plan:\n$plan")
+  }
+
+  /**
+   * Run `query` against the Delta table at `tablePath` with the native scan engaged, assert the
+   * executed plan contains a `CometDeltaNativeScanExec` (the read went native), and that the rows
+   * match vanilla Spark's (order-independent).
+   */
+  protected def assertDeltaNativeMatches(
       tablePath: String,
       query: DataFrame => DataFrame): Unit = {
-    val withClaim = query(spark.read.format("delta").load(tablePath))
-      .collect()
-      .toSeq
-      .map(normalizeRow)
+    val native = query(spark.read.format("delta").load(tablePath))
+    // Materialise first so AQE runs its query-stage prep rules (Comet's CometScanRule fires lazily
+    // when AQE materialises a stage); after collect, executedPlan reflects the finalized plan.
+    val nativeRows = native.collect().toSeq.map(normalizeRow)
+    val plan = native.queryExecution.executedPlan
+    val deltaScans = collect(plan) { case s: CometDeltaNativeScanExec => s }
+    assert(deltaScans.nonEmpty, s"expected CometDeltaNativeScanExec in plan, got:\n$plan")
+
     withSQLConf("spark.comet.scan.deltaNative.enabled" -> "false") {
-      val vanilla = query(spark.read.format("delta").load(tablePath))
+      val vanillaRows = query(spark.read.format("delta").load(tablePath))
         .collect()
         .toSeq
         .map(normalizeRow)
       assert(
-        withClaim.sortBy(_.mkString("|")) == vanilla.sortBy(_.mkString("|")),
-        s"claim-path result did not match vanilla Spark result\n" +
-          s"withClaim=$withClaim\nvanilla=$vanilla")
+        nativeRows.sortBy(_.mkString("|")) == vanillaRows.sortBy(_.mkString("|")),
+        s"native result did not match vanilla Spark result\nnative=$nativeRows\nvanilla=$vanillaRows")
     }
   }
 
+  /**
+   * Like `assertDeltaNativeMatches` but asserts the native plan SHOULD fall back: no
+   * `CometDeltaNativeScanExec` appears (the read ran on vanilla Spark).
+   */
+  protected def assertDeltaFallback(
+      tablePath: String,
+      query: DataFrame => DataFrame): Unit = {
+    val attempt = query(spark.read.format("delta").load(tablePath))
+    attempt.collect()
+    val plan = attempt.queryExecution.executedPlan
+    val deltaScans = collect(plan) { case s: CometDeltaNativeScanExec => s }
+    assert(
+      deltaScans.isEmpty,
+      s"expected fallback (no CometDeltaNativeScanExec) but plan was:\n$plan")
+  }
+
+  /**
+   * Assert the native kernel-read path engaged: the plan carries a `CometDeltaNativeScanExec` rather
+   * than falling back to vanilla Spark.
+   */
+  protected def assertKernelReadEngaged(tablePath: String): Unit = {
+    val df = spark.read.format("delta").load(tablePath)
+    df.collect() // materialize so AQE / Comet rules finalize the plan
+    val plan = df.queryExecution.executedPlan
+    val scans = collect(plan) { case s: CometDeltaNativeScanExec => s }
+    assert(scans.nonEmpty, s"expected CometDeltaNativeScanExec in plan, got:\n$plan")
+  }
+
   protected def normalizeRow(row: Row): Seq[Any] =
     row.toSeq.map(normalizeValue)
 

From 0ee301f73c228c473aed3dd7c146112369e60570 Mon Sep 17 00:00:00 2001
From: Scott Schenkein <schenksj@yahoo.com>
Date: Mon, 29 Jun 2026 09:26:45 -0400
Subject: [PATCH 2/2] fix(contrib-delta): partition injection-slot ordering +
 _metadata collision guard [#30 + themeA, folded into A.4b]

---
 .../contrib/delta/CometDeltaNativeScan.scala  | 74 ++++++++++++++++---
 1 file changed, 63 insertions(+), 11 deletions(-)

diff --git a/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala b/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala
index db0fde87e9..899b007de7 100644
--- a/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala
+++ b/contrib/delta/src/main/scala/org/apache/comet/contrib/delta/CometDeltaNativeScan.scala
@@ -89,6 +89,25 @@ object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] wit
   private[delta] val PerFileMetadataNames: Set[String] =
     SparkFileMetadataNames ++ PerFileRowTrackingNames
 
+  // Spark marks a `_metadata.*` virtual column's StructField with `__file_source_metadata_col` (and
+  // `__metadata_col`). A REAL data column that merely happens to share one of the SparkFileMetadataNames
+  // (e.g. a user table with a `file_name` column) carries NEITHER. So strip a `file_*` name from the
+  // kernel read schema ONLY when the field is actually a Spark file-metadata virtual column -- else a
+  // genuine `file_name`/`file_path`/... data column would be dropped from the read while the proto's
+  // required_schema keeps it ("missing kernel data-column schemas"). Mirrors `Attribute.isMetadataCol`.
+  private[delta] def isSparkFileMetadataField(f: StructField): Boolean =
+    f.metadata.contains("__file_source_metadata_col") || f.metadata.contains("__metadata_col")
+
+  // A field is a synthetic/virtual read column to STRIP from the kernel projection iff its name is in
+  // `stripNames` AND -- for the file-metadata names that can collide with real user columns -- it
+  // actually carries the Spark file-metadata marker.
+  private[delta] def isStrippableSynthetic(f: StructField, stripNames: Set[String]): Boolean = {
+    val lc = f.name.toLowerCase(Locale.ROOT)
+    if (!stripNames.contains(lc)) false
+    else if (SparkFileMetadataNames.contains(lc)) isSparkFileMetadataField(f)
+    else true
+  }
+
   /**
    * `kind` string for the `ContribOp` envelope this serde produces. The native side's
    * `comet-contrib-delta` rlib registers `DeltaScanPlanner` under this same kind via
@@ -148,6 +167,33 @@ object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] wit
     }
   }
 
+  /**
+   * Splice partition columns into a kernel read (logical) schema at the position kernel's per-file
+   * transform INJECTS them: immediately after the last field that advances kernel's
+   * `last_physical_field` -- every read field EXCEPT a RowId metadata column, which kernel resolves
+   * via `GenerateRowId` (coalesce(materialised, baseRowId+row_index)) and deliberately does NOT
+   * advance past. Kernel's expression evaluator (`evaluate_struct_patch_expression`) labels its
+   * output columns POSITIONALLY against this shipped logical schema, so the partition columns must
+   * occupy exactly that emission slot. Appending them last instead lands the Int32 partition literal
+   * in the row_id slot and the Long row_id in the partition slot -- the #30 column swap (visible as
+   * row_id == partition on a partitioned row-tracking table). `RowIndex` / `RowCommitVersion` DO
+   * advance `last_physical_field`, so only RowId metadata columns are special-cased here.
+   */
+  private def spliceKernelPartitions(
+      dataFields: Array[StructField],
+      partitionFields: Array[StructField]): Array[StructField] = {
+    if (partitionFields.isEmpty) {
+      dataFields
+    } else {
+      def isKernelRowId(f: StructField): Boolean =
+        f.metadata.contains(KernelMetadataSpecKey) &&
+          f.metadata.getString(KernelMetadataSpecKey) == KernelRowIdSpec
+      val cut = dataFields.lastIndexWhere(f => !isKernelRowId(f)) + 1
+      val (before, after) = dataFields.splitAt(cut)
+      before ++ partitionFields ++ after
+    }
+  }
+
   /**
    * Translate Delta's `delta.columnMapping.id` metadata key to Spark+parquet's standard
    * `parquet.field.id` key on every StructField at every level of nesting. Required for
@@ -223,8 +269,7 @@ object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] wit
         SyntheticReadFieldNames -
           DeltaReflection.RowIdColumnName.toLowerCase(Locale.ROOT) -
           DeltaReflection.RowCommitVersionColumnName.toLowerCase(Locale.ROOT)
-    val dataFields = requiredSchema.fields.filterNot(f =>
-      stripNames.contains(f.name.toLowerCase(Locale.ROOT)))
+    val dataFields = requiredSchema.fields.filterNot(f => isStrippableSynthetic(f, stripNames))
     if (dataFields.isEmpty) {
       // Zero data columns (partition-only / synthetic-only reads): no kernel read schema; the
       // executor drives the row count without a parquet read and the partition columns are filled
@@ -241,7 +286,8 @@ object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] wit
       // so column-mapping physical names / field-ids ride along. The AddFiles route passes an empty
       // `partitionSchema` (its identity transform can't inject partitions, so partitions stay
       // Comet-appended there until that route also moves to kernel enumeration).
-      val projected0 = dataFields.map(pick) ++ partitionSchema.fields.map(pick)
+      val data0 = dataFields.map(pick)
+      val parts = partitionSchema.fields.map(pick)
       // Materialised row-id columns (`_row-id-col-*`, added by OPTIMIZE/UPDATE/MERGE) are matched by
       // NAME and carry NO column-mapping annotation. Under ACTIVE column mapping kernel's logical
       // with_schema requires both physicalName AND id on every regular field, so shipping the
@@ -256,11 +302,14 @@ object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] wit
       // no kernel metadata-column support (Error::unsupported), so `_row-commit-version-col-*` is left
       // as-is. See state_info.rs RowId handling + CometDeltaRowTrackingMaterializedSuite (M3).
       val columnMappingActive =
-        projected0.exists(_.metadata.contains(DeltaReflection.PhysicalNameMetadataKey))
-      val projected =
-        if (columnMappingActive) projected0.map(asKernelRowIdMetadataColumnIfMaterialized)
-        else projected0
-      StructType(projected).json
+        (data0 ++ parts).exists(_.metadata.contains(DeltaReflection.PhysicalNameMetadataKey))
+      val data =
+        if (columnMappingActive) data0.map(asKernelRowIdMetadataColumnIfMaterialized) else data0
+      // Splice partitions at kernel's injection slot (after the last non-RowId field), NOT appended
+      // last -- otherwise the positional output labeling swaps an Int32 partition with the Long
+      // row_id under active CM (the materialised row-id became a RowId metadata column above). See
+      // `spliceKernelPartitions` / #30.
+      StructType(spliceKernelPartitions(data, parts)).json
     }
   }
 
@@ -298,8 +347,7 @@ object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] wit
       n.equalsIgnoreCase(DeltaReflection.RowIdColumnName) ||
         n.toLowerCase(Locale.ROOT).startsWith("_row-id-col-")
     val kept: Array[StructField] = requiredSchema.fields.flatMap { f =>
-      val lc = f.name.toLowerCase(Locale.ROOT)
-      if (workerOnly.contains(lc)) {
+      if (isStrippableSynthetic(f, workerOnly)) {
         None // worker-side constant; not read from kernel
       } else if (isRowIndex(f.name)) {
         Some(asKernelMetadataColumn(f.name, "row_index"))
@@ -312,7 +360,11 @@ object CometDeltaNativeScan extends CometOperatorSerde[CometDeltaScanMarker] wit
         Some(pick(f))
       }
     }
-    val all = kept ++ partitionSchema.fields.map(pick)
+    // Splice partitions at kernel's injection slot (after the last non-RowId field), NOT appended
+    // last: `_metadata.row_id` is shipped as a kernel RowId metadata column (line above), and kernel
+    // injects the partition literal BEFORE it. Appending partitions last makes the executor's
+    // positional labeling swap the Int32 partition value with the Long row_id -- the #30 column swap.
+    val all = spliceKernelPartitions(kept, partitionSchema.fields.map(pick))
     if (all.isEmpty) "" else StructType(all).json
   }