From 3c37026ed8ba85ff02b4716e61854c3919984343 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Wed, 24 Jun 2026 20:17:45 +0800 Subject: [PATCH 1/2] Support multiply_ym_interval with YearMonth interval codegen dispatch --- docs/source/user-guide/latest/datatypes.md | 10 ++-- docs/source/user-guide/latest/expressions.md | 2 +- native/core/src/execution/planner.rs | 20 +++++-- native/core/src/execution/serde.rs | 3 +- native/proto/src/proto/types.proto | 1 + .../codegen/CometBatchKernelCodegen.scala | 2 + .../CometBatchKernelCodegenInput.scala | 18 ++++-- .../CometBatchKernelCodegenOutput.scala | 5 +- .../apache/comet/serde/QueryPlanSerde.scala | 5 +- .../org/apache/comet/serde/datetime.scala | 6 +- .../org/apache/comet/serde/literals.scala | 7 ++- .../udf/codegen/CometScalaUDFCodegen.scala | 8 +-- .../apache/spark/sql/comet/util/Utils.scala | 2 + .../datetime/multiply_ym_interval.sql | 55 +++++++++++++++++++ 14 files changed, 116 insertions(+), 28 deletions(-) create mode 100644 spark/src/test/resources/sql-tests/expressions/datetime/multiply_ym_interval.sql diff --git a/docs/source/user-guide/latest/datatypes.md b/docs/source/user-guide/latest/datatypes.md index 80465121da..43d4042a8a 100644 --- a/docs/source/user-guide/latest/datatypes.md +++ b/docs/source/user-guide/latest/datatypes.md @@ -83,11 +83,11 @@ the tables below and may be reconsidered based on demand: Interval types fall back to Spark today. Native acceleration is tracked by [#4540](https://github.com/apache/datafusion-comet/issues/4540). -| Type | Status | Notes | -| ----------------------- | ------ | ----------------- | -| `YearMonthIntervalType` | 🔜 | Tracked by #4540. | -| `DayTimeIntervalType` | 🔜 | Tracked by #4540. | -| `CalendarIntervalType` | 🔜 | Tracked by #4540. | +| Type | Status | Notes | +| ----------------------- | ------ | ---------------------------------------------------------------------- | +| `YearMonthIntervalType` | ✅ | Supported for `make_ym_interval` and YearMonth interval multiplication. | +| `DayTimeIntervalType` | 🔜 | Tracked by #4540. | +| `CalendarIntervalType` | 🔜 | Tracked by #4540. | ## Complex diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index b854e29d1f..b32eda6d3b 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -268,7 +268,7 @@ The type-name conversion functions (`bigint`, `binary`, `boolean`, `date`, `deci | `make_timestamp` | ✅ | | | `make_timestamp_ltz` | ✅ | 2-arg TIME form falls back | | `make_timestamp_ntz` | ✅ | 2-arg TIME form falls back | -| `make_ym_interval` | 🔜 | [#4541](https://github.com/apache/datafusion-comet/issues/4541) | +| `make_ym_interval` | ✅ | Routes through the JVM codegen dispatcher | | `minute` | ✅ | | | `month` | ✅ | | | `monthname` | ✅ | Abbreviated month name (Spark 4.0+) | diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index e89f0a8cf4..a1c2c108f6 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -35,7 +35,9 @@ use crate::execution::{ }; use crate::jvm_bridge::{jni_call, JVMClasses}; use arrow::compute::CastOptions; -use arrow::datatypes::{DataType, Field, FieldRef, Schema, TimeUnit, DECIMAL128_MAX_PRECISION}; +use arrow::datatypes::{ + DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, DECIMAL128_MAX_PRECISION, +}; use arrow::ffi_stream::FFI_ArrowArrayStream; use datafusion::functions_aggregate::bit_and_or_xor::{bit_and_udaf, bit_or_udaf, bit_xor_udaf}; use datafusion::functions_aggregate::count::count_udaf; @@ -101,8 +103,8 @@ use datafusion::physical_expr::LexOrdering; use crate::parquet::parquet_exec::init_datasource_exec; use arrow::array::{ new_empty_array, Array, ArrayRef, BinaryBuilder, BooleanArray, Date32Array, Decimal128Array, - Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, - NullArray, StringBuilder, TimestampMicrosecondArray, + Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, + IntervalYearMonthArray, ListArray, NullArray, StringBuilder, TimestampMicrosecondArray, }; use arrow::buffer::{BooleanBuffer, NullBuffer, OffsetBuffer}; use arrow::row::{OwnedRow, RowConverter, SortField}; @@ -362,6 +364,9 @@ impl PhysicalPlanner { DataType::Time64(TimeUnit::Nanosecond) => { ScalarValue::Time64Nanosecond(None) } + DataType::Interval(IntervalUnit::YearMonth) => { + ScalarValue::IntervalYearMonth(None) + } dt => { return Err(GeneralError(format!("{dt:?} is not supported in Comet"))) } @@ -374,9 +379,12 @@ impl PhysicalPlanner { Value::IntVal(value) => match data_type { DataType::Int32 => ScalarValue::Int32(Some(*value)), DataType::Date32 => ScalarValue::Date32(Some(*value)), + DataType::Interval(IntervalUnit::YearMonth) => { + ScalarValue::IntervalYearMonth(Some(*value)) + } dt => { return Err(GeneralError(format!( - "Expected either 'Int32' or 'Date32' for IntVal, but found {dt:?}" + "Expected either 'Int32', 'Date32', or 'Interval(YearMonth)' for IntVal, but found {dt:?}" ))) } }, @@ -3635,6 +3643,10 @@ fn literal_to_array_ref( list_literal.int_values.into(), Some(nulls.clone().into()), ))), + DataType::Interval(IntervalUnit::YearMonth) => Ok(Arc::new(IntervalYearMonthArray::new( + list_literal.int_values.into(), + Some(nulls.clone().into()), + ))), DataType::Timestamp(TimeUnit::Microsecond, None) => { Ok(Arc::new(TimestampMicrosecondArray::new( list_literal.long_values.into(), diff --git a/native/core/src/execution/serde.rs b/native/core/src/execution/serde.rs index d6ec6be132..86f7f2c1e7 100644 --- a/native/core/src/execution/serde.rs +++ b/native/core/src/execution/serde.rs @@ -19,7 +19,7 @@ use super::operators::ExecutionError; use crate::errors::ExpressionError; -use arrow::datatypes::{DataType as ArrowDataType, TimeUnit}; +use arrow::datatypes::{DataType as ArrowDataType, IntervalUnit, TimeUnit}; use arrow::datatypes::{Field, Fields}; use datafusion_comet_proto::{ spark_config, spark_expression, @@ -97,6 +97,7 @@ pub fn to_arrow_datatype(dt_value: &DataType) -> ArrowDataType { DataTypeId::TimestampNtz => ArrowDataType::Timestamp(TimeUnit::Microsecond, None), DataTypeId::Date => ArrowDataType::Date32, DataTypeId::Time => ArrowDataType::Time64(TimeUnit::Nanosecond), + DataTypeId::YearMonthInterval => ArrowDataType::Interval(IntervalUnit::YearMonth), DataTypeId::Null => ArrowDataType::Null, DataTypeId::List => match dt_value .type_info diff --git a/native/proto/src/proto/types.proto b/native/proto/src/proto/types.proto index df0c0c5553..2d9b45bb49 100644 --- a/native/proto/src/proto/types.proto +++ b/native/proto/src/proto/types.proto @@ -60,6 +60,7 @@ message DataType { MAP = 15; STRUCT = 16; TIME = 17; + YEAR_MONTH_INTERVAL = 18; } DataTypeId type_id = 1; diff --git a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala index 1c990835bb..ff4ff71993 100644 --- a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala +++ b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegen.scala @@ -62,6 +62,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim { case "TinyIntVector" => classOf[TinyIntVector] case "SmallIntVector" => classOf[SmallIntVector] case "IntVector" => classOf[IntVector] + case "IntervalYearVector" => classOf[IntervalYearVector] case "BigIntVector" => classOf[BigIntVector] case "Float4Vector" => classOf[Float4Vector] case "Float8Vector" => classOf[Float8Vector] @@ -82,6 +83,7 @@ object CometBatchKernelCodegen extends Logging with CometExprTraitShim { case BooleanType | ByteType | ShortType | IntegerType | LongType => true case FloatType | DoubleType => true case _: DecimalType => true + case _: YearMonthIntervalType => true case _: StringType | _: BinaryType => true case DateType | TimestampType | TimestampNTZType => true case ArrayType(inner, _) => isSupportedDataType(inner) diff --git a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenInput.scala b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenInput.scala index 09bfc52bd4..d9adf67d51 100644 --- a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenInput.scala +++ b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenInput.scala @@ -57,6 +57,7 @@ private[codegen] object CometBatchKernelCodegenInput { classOf[TinyIntVector], classOf[SmallIntVector], classOf[IntVector], + classOf[IntervalYearVector], classOf[BigIntVector], classOf[Float4Vector], classOf[Float8Vector], @@ -127,7 +128,9 @@ private[codegen] object CometBatchKernelCodegenInput { } val intCases = withOrd.collect { case (ArrowColumnSpec(cls, _), ord) - if cls == classOf[IntVector] || cls == classOf[DateDayVector] => + if cls == classOf[IntVector] || + cls == classOf[DateDayVector] || + cls == classOf[IntervalYearVector] => s" case $ord: return this.col$ord.getInt(this.rowIdx);" } val longCases = withOrd.collect { @@ -590,7 +593,7 @@ private[codegen] object CometBatchKernelCodegenInput { case BooleanType => s"getBoolean($idx)" case ByteType => s"getByte($idx)" case ShortType => s"getShort($idx)" - case IntegerType | DateType => s"getInt($idx)" + case IntegerType | DateType | _: YearMonthIntervalType => s"getInt($idx)" case LongType | TimestampType | TimestampNTZType => s"getLong($idx)" case FloatType => s"getFloat($idx)" case DoubleType => s"getDouble($idx)" @@ -687,7 +690,7 @@ private[codegen] object CometBatchKernelCodegenInput { | public short getShort(int i) { | return $childField.getShort(startIndex + i); | }""".stripMargin - case IntegerType | DateType => + case IntegerType | DateType | _: YearMonthIntervalType => s""" @Override | public int getInt(int i) { | return $childField.getInt(startIndex + i); @@ -843,7 +846,7 @@ private[codegen] object CometBatchKernelCodegenInput { s" case $fi: return ${path}_f$fi.getByte(this.rowIdx);" case ShortType => s" case $fi: return ${path}_f$fi.getShort(this.rowIdx);" - case IntegerType | DateType => + case IntegerType | DateType | _: YearMonthIntervalType => s" case $fi: return ${path}_f$fi.getInt(this.rowIdx);" case LongType | TimestampType | TimestampNTZType => s" case $fi: return ${path}_f$fi.getLong(this.rowIdx);" @@ -891,8 +894,11 @@ private[codegen] object CometBatchKernelCodegenInput { fieldReadScalar(fi, ShortType, f.nullable) } val intCases = scalarOrd.collect { - case (f, fi) if f.sparkType == IntegerType || f.sparkType == DateType => - fieldReadScalar(fi, IntegerType, f.nullable) + case (f, fi) + if f.sparkType == IntegerType || + f.sparkType == DateType || + f.sparkType.isInstanceOf[YearMonthIntervalType] => + fieldReadScalar(fi, f.sparkType, f.nullable) } val longCases = scalarOrd.collect { case (f, fi) diff --git a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenOutput.scala b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenOutput.scala index a26e3d1796..0da27f7166 100644 --- a/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenOutput.scala +++ b/spark/src/main/scala/org/apache/comet/codegen/CometBatchKernelCodegenOutput.scala @@ -162,6 +162,7 @@ private[codegen] object CometBatchKernelCodegenOutput { case ByteType => classOf[TinyIntVector].getName case ShortType => classOf[SmallIntVector].getName case IntegerType => classOf[IntVector].getName + case _: YearMonthIntervalType => classOf[IntervalYearVector].getName case LongType => classOf[BigIntVector].getName case FloatType => classOf[Float4Vector].getName case DoubleType => classOf[Float8Vector].getName @@ -208,7 +209,7 @@ private[codegen] object CometBatchKernelCodegenOutput { val set = if (nested) "setSafe" else "set" OutputEmit("", s"$targetVec.$set($idx, $source ? 1 : 0);") case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType | DateType | - TimestampType | TimestampNTZType => + TimestampType | TimestampNTZType | _: YearMonthIntervalType => // Spark codegen emits the matching primitive Java type; Arrow `set` overloads accept it. val set = if (nested) "setSafe" else "set" OutputEmit("", s"$targetVec.$set($idx, $source);") @@ -392,7 +393,7 @@ private[codegen] object CometBatchKernelCodegenOutput { case BooleanType => s"$target.getBoolean($idx)" case ByteType => s"$target.getByte($idx)" case ShortType => s"$target.getShort($idx)" - case IntegerType | DateType => s"$target.getInt($idx)" + case IntegerType | DateType | _: YearMonthIntervalType => s"$target.getInt($idx)" case LongType | TimestampType | TimestampNTZType => s"$target.getLong($idx)" case FloatType => s"$target.getFloat($idx)" case DoubleType => s"$target.getDouble($idx)" diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 143048fb44..9e759a036e 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -290,9 +290,11 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { classOf[Hour] -> CometHour, classOf[MakeDate] -> CometMakeDate, classOf[MakeTimestamp] -> CometMakeTimestamp, + classOf[MakeYMInterval] -> CometMakeYMInterval, classOf[MicrosToTimestamp] -> CometMicrosToTimestamp, classOf[MillisToTimestamp] -> CometMillisToTimestamp, classOf[MonthsBetween] -> CometMonthsBetween, + classOf[MultiplyYMInterval] -> CometMultiplyYMInterval, classOf[Minute] -> CometMinute, classOf[NextDay] -> CometNextDay, classOf[Second] -> CometSecond, @@ -478,7 +480,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { def supportedDataType(dt: DataType, allowComplex: Boolean = false): Boolean = dt match { case _: ByteType | _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: DoubleType | _: StringType | _: BinaryType | _: TimestampType | _: TimestampNTZType | - _: DecimalType | _: DateType | _: BooleanType | _: NullType => + _: DecimalType | _: DateType | _: BooleanType | _: NullType | _: YearMonthIntervalType => true case dt if isTimeType(dt) => true @@ -517,6 +519,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim { case _: MapType => 15 case _: StructType => 16 case dt if isTimeType(dt) => 17 + case _: YearMonthIntervalType => 18 case dt => logWarning(s"Cannot serialize Spark data type: $dt") return None diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala b/spark/src/main/scala/org/apache/comet/serde/datetime.scala index bc6214a188..2402ca529b 100644 --- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala +++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala @@ -21,7 +21,7 @@ package org.apache.comet.serde import java.util.Locale -import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, GetTimestamp, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year} +import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, GetTimestamp, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MakeYMInterval, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, MultiplyYMInterval, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DataType, DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType} import org.apache.spark.unsafe.types.UTF8String @@ -852,6 +852,10 @@ object CometMonthsBetween extends CometCodegenDispatch[MonthsBetween] object CometMakeTimestamp extends CometCodegenDispatch[MakeTimestamp] +object CometMakeYMInterval extends CometCodegenDispatch[MakeYMInterval] + +object CometMultiplyYMInterval extends CometCodegenDispatch[MultiplyYMInterval] + object CometMicrosToTimestamp extends CometCodegenDispatch[MicrosToTimestamp] object CometMillisToTimestamp extends CometCodegenDispatch[MillisToTimestamp] diff --git a/spark/src/main/scala/org/apache/comet/serde/literals.scala b/spark/src/main/scala/org/apache/comet/serde/literals.scala index 4f2a5dfa5e..3c478b975f 100644 --- a/spark/src/main/scala/org/apache/comet/serde/literals.scala +++ b/spark/src/main/scala/org/apache/comet/serde/literals.scala @@ -24,7 +24,7 @@ import java.lang import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.{Attribute, Literal} import org.apache.spark.sql.catalyst.util.ArrayData -import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, DateType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, LongType, NullType, ShortType, StringType, TimestampNTZType, TimestampType} +import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, DateType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, LongType, NullType, ShortType, StringType, TimestampNTZType, TimestampType, YearMonthIntervalType} import org.apache.spark.unsafe.types.UTF8String import com.google.protobuf.ByteString @@ -77,7 +77,8 @@ object CometLiteral extends CometExpressionSerde[Literal] with Logging { case _: BooleanType => exprBuilder.setBoolVal(value.asInstanceOf[Boolean]) case _: ByteType => exprBuilder.setByteVal(value.asInstanceOf[Byte]) case _: ShortType => exprBuilder.setShortVal(value.asInstanceOf[Short]) - case _: IntegerType | _: DateType => exprBuilder.setIntVal(value.asInstanceOf[Int]) + case _: IntegerType | _: DateType | _: YearMonthIntervalType => + exprBuilder.setIntVal(value.asInstanceOf[Int]) case _: LongType | _: TimestampType | _: TimestampNTZType => exprBuilder.setLongVal(value.asInstanceOf[Long]) case dt if isTimeType(dt) => @@ -150,7 +151,7 @@ object CometLiteral extends CometExpressionSerde[Literal] with Logging { else null.asInstanceOf[Integer]) listLiteralBuilder.addNullMask(casted != null) }) - case IntegerType | DateType => + case IntegerType | DateType | _: YearMonthIntervalType => array.foreach(v => { val casted = v.asInstanceOf[Integer] listLiteralBuilder.addIntValues(casted) diff --git a/spark/src/main/scala/org/apache/comet/udf/codegen/CometScalaUDFCodegen.scala b/spark/src/main/scala/org/apache/comet/udf/codegen/CometScalaUDFCodegen.scala index f575dd5b53..ad7d3037ee 100644 --- a/spark/src/main/scala/org/apache/comet/udf/codegen/CometScalaUDFCodegen.scala +++ b/spark/src/main/scala/org/apache/comet/udf/codegen/CometScalaUDFCodegen.scala @@ -217,10 +217,10 @@ class CometScalaUDFCodegen extends CometUDF with Logging { child = specFor(childVec)) } StructColumnSpec(nullable = true, fieldSpecs) - case _: BitVector | _: TinyIntVector | _: SmallIntVector | _: IntVector | _: BigIntVector | - _: Float4Vector | _: Float8Vector | _: DecimalVector | _: VarCharVector | - _: VarBinaryVector | _: DateDayVector | _: TimeStampMicroVector | - _: TimeStampMicroTZVector => + case _: BitVector | _: TinyIntVector | _: SmallIntVector | _: IntVector | + _: IntervalYearVector | _: BigIntVector | _: Float4Vector | _: Float8Vector | + _: DecimalVector | _: VarCharVector | _: VarBinaryVector | _: DateDayVector | + _: TimeStampMicroVector | _: TimeStampMicroTZVector => ScalarColumnSpec(v.getClass.asInstanceOf[Class[_ <: ValueVector]], nullable = true) case other => throw new UnsupportedOperationException( diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala index 15e1e2c410..fdef21dad9 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala @@ -152,6 +152,8 @@ object Utils extends CometTypeShim with Logging { case NullType => ArrowType.Null.INSTANCE case dt if isTimeType(dt) => new ArrowType.Time(TimeUnit.NANOSECOND, 64) + case _: YearMonthIntervalType => + new ArrowType.Interval(IntervalUnit.YEAR_MONTH) case _ => throw new UnsupportedOperationException( s"Unsupported data type: [${dt.getClass.getName}] ${dt.catalogString}") diff --git a/spark/src/test/resources/sql-tests/expressions/datetime/multiply_ym_interval.sql b/spark/src/test/resources/sql-tests/expressions/datetime/multiply_ym_interval.sql new file mode 100644 index 0000000000..ef41ac3174 --- /dev/null +++ b/spark/src/test/resources/sql-tests/expressions/datetime/multiply_ym_interval.sql @@ -0,0 +1,55 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- Routes multiply_ym_interval through the codegen dispatcher; produces YearMonthIntervalType. +-- Config: spark.comet.exec.scalaUDF.codegen.enabled=true + +statement +CREATE TABLE test_multiply_ym_interval(y int, m int, i int, l long, f float, d double, dec decimal(10,2)) USING parquet + +statement +INSERT INTO test_multiply_ym_interval VALUES + (1, 2, 2, CAST(3 AS BIGINT), CAST(1.5 AS FLOAT), CAST(2.5 AS DOUBLE), CAST(2.50 AS DECIMAL(10, 2))), + (-1, 1, -2, CAST(-3 AS BIGINT), CAST(-1.5 AS FLOAT), CAST(-2.5 AS DOUBLE), CAST(-2.50 AS DECIMAL(10, 2))), + (0, 6, 0, CAST(0 AS BIGINT), CAST(0.5 AS FLOAT), CAST(0.5 AS DOUBLE), CAST(0.50 AS DECIMAL(10, 2))), + (2, -6, NULL, NULL, NULL, NULL, NULL) + +query +SELECT + make_ym_interval(y, m) * i, + make_ym_interval(y, m) * l, + make_ym_interval(y, m) * f, + make_ym_interval(y, m) * d, + make_ym_interval(y, m) * dec +FROM test_multiply_ym_interval + +-- literal interval input +query +SELECT INTERVAL '1-2' YEAR TO MONTH * i FROM test_multiply_ym_interval + +-- numeric on the left is normalized by Spark to multiply_ym_interval. +query +SELECT i * make_ym_interval(y, m), 2 * INTERVAL '1-2' YEAR TO MONTH +FROM test_multiply_ym_interval + +-- literal multipliers, including half-up rounding for fractional months. +query +SELECT + make_ym_interval(1, 2) * 2, + make_ym_interval(1, 2) * 1.5D, + make_ym_interval(1, 2) * CAST(1.50 AS DECIMAL(10, 2)), + make_ym_interval(-1, 1) * 1.5D From b96f54749f81da4ed00dcb282c374bea4b926876 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Wed, 24 Jun 2026 20:28:29 +0800 Subject: [PATCH 2/2] Fix markdown formatting for interval docs --- docs/source/user-guide/latest/datatypes.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/user-guide/latest/datatypes.md b/docs/source/user-guide/latest/datatypes.md index 43d4042a8a..15d58d7f37 100644 --- a/docs/source/user-guide/latest/datatypes.md +++ b/docs/source/user-guide/latest/datatypes.md @@ -80,14 +80,14 @@ the tables below and may be reconsidered based on demand: ## Interval -Interval types fall back to Spark today. Native acceleration is tracked by +Interval type support is incremental and tracked by [#4540](https://github.com/apache/datafusion-comet/issues/4540). -| Type | Status | Notes | -| ----------------------- | ------ | ---------------------------------------------------------------------- | +| Type | Status | Notes | +| ----------------------- | ------ | ----------------------------------------------------------------------- | | `YearMonthIntervalType` | ✅ | Supported for `make_ym_interval` and YearMonth interval multiplication. | -| `DayTimeIntervalType` | 🔜 | Tracked by #4540. | -| `CalendarIntervalType` | 🔜 | Tracked by #4540. | +| `DayTimeIntervalType` | 🔜 | Tracked by #4540. | +| `CalendarIntervalType` | 🔜 | Tracked by #4540. | ## Complex