Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 40 additions & 3 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -2198,25 +2198,62 @@ def pg_type_code_to_arrow(code: int) -> str:

def athena_to_feast_value_type(athena_type_as_str: str) -> ValueType:
# Type names from https://docs.aws.amazon.com/athena/latest/ug/data-types.html
athena_type = athena_type_as_str.lower().strip()
if athena_type.startswith("array"):
inner_type_match = re.search(r"(?:<|\[)(.+)(?:>|\])", athena_type)
if inner_type_match:
inner_type = inner_type_match.group(1).strip()
inner_feast_type = athena_to_feast_value_type(inner_type)

list_mapping = {
ValueType.BYTES: ValueType.BYTES_LIST,
ValueType.STRING: ValueType.STRING_LIST,
ValueType.INT32: ValueType.INT32_LIST,
ValueType.INT64: ValueType.INT64_LIST,
ValueType.DOUBLE: ValueType.DOUBLE_LIST,
ValueType.FLOAT: ValueType.FLOAT_LIST,
ValueType.BOOL: ValueType.BOOL_LIST,
ValueType.UNIX_TIMESTAMP: ValueType.UNIX_TIMESTAMP_LIST,
ValueType.MAP: ValueType.MAP_LIST,
ValueType.JSON: ValueType.JSON_LIST,
ValueType.STRUCT: ValueType.STRUCT_LIST,
ValueType.UUID: ValueType.UUID_LIST,
ValueType.DECIMAL: ValueType.DECIMAL_LIST,
}
return list_mapping.get(inner_feast_type, ValueType.VALUE_LIST)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep nested array element types

For Athena columns like ARRAY<ARRAY<INT>>, the recursive call returns ValueType.INT32_LIST, which is not in list_mapping, so this fallback classifies the column as generic VALUE_LIST. The generic path is later converted to placeholder string nested lists (Array(Array(String)) in from_value_type, and the Athena type string cannot be parsed by the PyArrow fallback), so feature inference/materialization will use the wrong schema for nested numeric, boolean, or timestamp arrays.

Useful? React with 👍 / 👎.

return ValueType.VALUE_LIST

base_type = re.split(r"[(<\[]", athena_type)[0].strip()

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve non-string Athena map keys

When Athena returns a map type with a non-string key, e.g. MAP<INT, STRING> or MAP(INTEGER, VARCHAR), this split drops the key type and the existing map entry below infers ValueType.MAP. That path serializes through Feast's string-keyed Map proto, so materializing or inferring features with integer/boolean/etc. map keys will fail during conversion or be represented with the wrong schema instead of being rejected or handled with the non-string-key map path.

Useful? React with 👍 / 👎.


if "timestamp" in base_type or "time" in base_type or "date" in base_type:
return ValueType.UNIX_TIMESTAMP

type_map = {
"null": ValueType.UNKNOWN,
"null": ValueType.UNKNOWN, # There is a null type, but this preserves backwards compat
"boolean": ValueType.BOOL,
"tinyint": ValueType.INT32,
"smallint": ValueType.INT32,
"int": ValueType.INT32,
"integer": ValueType.INT32,
"bigint": ValueType.INT64,
"double": ValueType.DOUBLE,
"float": ValueType.FLOAT,
"real": ValueType.FLOAT,
"decimal": ValueType.DECIMAL,
"binary": ValueType.BYTES,
"varbinary": ValueType.BYTES,
"char": ValueType.STRING,
"varchar": ValueType.STRING,
"string": ValueType.STRING,
"timestamp": ValueType.UNIX_TIMESTAMP,
"json": ValueType.JSON,
"struct": ValueType.STRUCT,
"row": ValueType.STRUCT,
"map": ValueType.MAP,
"uuid": ValueType.UUID,
"ipaddress": ValueType.STRING,
}
return type_map[athena_type_as_str.lower()]

return type_map.get(base_type, ValueType.UNKNOWN)


def pa_to_athena_value_type(pa_type: "pyarrow.DataType") -> str:
Expand Down