diff --git a/docs/source/user-guide/latest/installation.md b/docs/source/user-guide/latest/installation.md index 442133facc..3bd1ec27ee 100644 --- a/docs/source/user-guide/latest/installation.md +++ b/docs/source/user-guide/latest/installation.md @@ -92,6 +92,7 @@ Here are the direct links for downloading the Comet $COMET_VERSION jar file. - [Comet plugin for Spark 3.5 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark3.5_2.13/$COMET_VERSION/comet-spark-spark3.5_2.13-$COMET_VERSION.jar) - [Comet plugin for Spark 4.0 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.0_2.13/$COMET_VERSION/comet-spark-spark4.0_2.13-$COMET_VERSION.jar) - [Comet plugin for Spark 4.1 / Scala 2.13](https://repo1.maven.org/maven2/org/apache/datafusion/comet-spark-spark4.1_2.13/$COMET_VERSION/comet-spark-spark4.1_2.13-$COMET_VERSION.jar) + ## Building from source diff --git a/native/spark-expr/src/array_funcs/array_insert.rs b/native/spark-expr/src/array_funcs/array_insert.rs index e638c440fd..e056c108e0 100644 --- a/native/spark-expr/src/array_funcs/array_insert.rs +++ b/native/spark-expr/src/array_funcs/array_insert.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{make_array, Array, ArrayRef, GenericListArray, Int32Array, OffsetSizeTrait}; +use arrow::array::{ + make_array, Array, ArrayRef, BooleanArray, GenericListArray, Int32Array, OffsetSizeTrait, +}; use arrow::datatypes::{DataType, Schema}; use arrow::{ array::{as_primitive_array, Capacities, MutableArrayData}, @@ -104,9 +106,32 @@ impl PhysicalExpr for ArrayInsert { } fn evaluate(&self, batch: &RecordBatch) -> DataFusionResult { + // Spark evaluates arguments left-to-right: + // 1. src + // 2. pos only when src is non-null + // 3. item only when src and pos are non-null + + // Check that src array is actually an array and get it's value type + let src_value = self + .src_array_expr + .evaluate(batch)? + .into_array(batch.num_rows())?; + + let src_element_type = match self.array_type(src_value.data_type())? { + DataType::List(field) => &field.data_type().clone(), + DataType::LargeList(field) => &field.data_type().clone(), + _ => unreachable!(), + }; + + let evaluate_pos = BooleanArray::from( + (0..batch.num_rows()) + .map(|row| src_value.is_valid(row)) + .collect::>(), + ); + let pos_value = self .pos_expr - .evaluate(batch)? + .evaluate_selection(batch, &evaluate_pos)? .into_array(batch.num_rows())?; // Spark supports only IntegerType (Int32): @@ -118,22 +143,16 @@ impl PhysicalExpr for ArrayInsert { ))); } - // Check that src array is actually an array and get it's value type - let src_value = self - .src_array_expr - .evaluate(batch)? - .into_array(batch.num_rows())?; - - let src_element_type = match self.array_type(src_value.data_type())? { - DataType::List(field) => &field.data_type().clone(), - DataType::LargeList(field) => &field.data_type().clone(), - _ => unreachable!(), - }; + let evaluate_item = BooleanArray::from( + (0..batch.num_rows()) + .map(|row| src_value.is_valid(row) && pos_value.is_valid(row)) + .collect::>(), + ); // Check that inserted value has the same type as an array let item_value = self .item_expr - .evaluate(batch)? + .evaluate_selection(batch, &evaluate_item)? .into_array(batch.num_rows())?; if item_value.data_type() != src_element_type { return Err(DataFusionError::Internal(format!( diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_insert.sql b/spark/src/test/resources/sql-tests/expressions/array/array_insert.sql index 06294bc318..d80cc16e1e 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/array_insert.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/array_insert.sql @@ -39,6 +39,39 @@ INSERT INTO test_array_insert VALUES query SELECT array_insert(arr, pos, val) FROM test_array_insert +statement +CREATE TABLE test_array_insert_short_circuit( + arr ARRAY, + idx INT +) USING parquet + +statement +INSERT INTO test_array_insert_short_circuit VALUES + (NULL, 0), + (array(1), 1) + +-- null source array short-circuits position evaluation +query +SELECT array_insert(arr, element_at(array(1), idx), 9) +FROM test_array_insert_short_circuit + +statement +CREATE TABLE test_array_insert_null_pos_short_circuit( + arr ARRAY, + idx INT +) USING parquet + +statement +INSERT INTO test_array_insert_null_pos_short_circuit VALUES + (array(1), 0), + (array(1), 1), + (array(1), 2) + +-- null position short-circuits item evaluation +query +SELECT array_insert(arr, CAST(NULL AS INT), element_at(array(1), idx)) +FROM test_array_insert_null_pos_short_circuit + -- ============================================================ -- Literal arguments (all-literal queries test native eval -- because CometSqlFileTestSuite disables constant folding) diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql b/spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql index 0830550f14..e8b93d580f 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/array_prepend.sql @@ -25,7 +25,7 @@ statement CREATE TABLE test_array_prepend(arr array, val int) USING parquet statement -INSERT INTO test_array_prepend VALUES (array(1, 2, 3), 4), (array(), 1), (NULL, 1), (array(1, 2), NULL) +INSERT INTO test_array_prepend VALUES (array(1, 2, 3), 4), (array(), 1), (NULL, 1), (NULL, 0), (array(1, 2), NULL) -- column + column query @@ -135,3 +135,7 @@ INSERT INTO test_array_prepend_bin VALUES (array(X'0102', X'0304'), X'0506'), (a query SELECT array_prepend(arr, val) FROM test_array_prepend_bin + +query +SELECT array_prepend(arr, element_at(array(9), val)) +FROM test_array_prepend