From 7de295467a3a4c4cda264528949ba736730e6d89 Mon Sep 17 00:00:00 2001 From: Charlie Tonneslan Date: Mon, 18 May 2026 15:41:06 -0400 Subject: [PATCH] parseYaml: drop the stray null when the stream starts with comments When a YAML stream begins with comment-only lines before the first Signed-off-by: Charlie Tonneslan --- separator, the chunked YAMLReader handed those lines to the decoder as a separate "document" and they unmarshaled to nil. The result array then contained a leading null the user didn't write. Strip a comment-and-blank prefix that runs up to (and including) the first --- marker, but only when the marker is followed by real content. Leaves the bare case (an explicit single-null document) untouched. Fixes #660 --- builtins.go | 54 ++++++++++++++++++++++++++++++++++++++ testdata/parseYaml.golden | 8 ++++++ testdata/parseYaml.jsonnet | 10 +++++++ 3 files changed, 72 insertions(+) diff --git a/builtins.go b/builtins.go index 9cf0f8be..4859b93b 100644 --- a/builtins.go +++ b/builtins.go @@ -1588,6 +1588,11 @@ func builtinParseYAML(i *interpreter, str value) (value, error) { return nil, err } s := sval.getGoString() + // Strip a leading comment/blank-only prefix before the first + // explicit --- separator. Otherwise the YAML reader treats that + // prefix as a separate (empty) document and emits a stray null in + // the result array. + s = stripLeadingYAMLComments(s) elems := []interface{}{} d := NewYAMLToJSONDecoder(strings.NewReader(s)) @@ -1611,6 +1616,55 @@ func builtinParseYAML(i *interpreter, str value) (value, error) { return jsonToValue(i, elems[0]) } +// stripLeadingYAMLComments removes leading lines that are blank or +// pure comments up to (and including) the first --- document marker, +// but only when no real content appears before that marker AND +// content follows the marker. This avoids emitting a stray null +// element for streams whose first "document" is just commentary +// while leaving a bare `---` (an explicit single-null document) +// alone. +func stripLeadingYAMLComments(s string) string { + idx := 0 + hasComment := false + for idx < len(s) { + nl := strings.IndexByte(s[idx:], '\n') + var line string + var next int + if nl < 0 { + line = s[idx:] + next = len(s) + } else { + line = s[idx : idx+nl] + next = idx + nl + 1 + } + trimmed := strings.TrimSpace(line) + switch { + case len(trimmed) == 0: + // blank, keep scanning + case strings.HasPrefix(trimmed, "#"): + hasComment = true + case trimmed == "---": + if !hasComment { + return s + } + // Only strip if real content follows; otherwise the input is + // a comment-only-then-marker stream that should still decode + // to a single null doc, matching the bare `---` case. + rest := s[next:] + for _, c := range rest { + if c != '\n' && c != '\r' && c != ' ' && c != '\t' { + return rest + } + } + return s + default: + return s + } + idx = next + } + return s +} + func jsonEncode(v interface{}) (string, error) { buf := new(bytes.Buffer) enc := json.NewEncoder(buf) diff --git a/testdata/parseYaml.golden b/testdata/parseYaml.golden index a5eb13dd..707ead69 100644 --- a/testdata/parseYaml.golden +++ b/testdata/parseYaml.golden @@ -54,5 +54,13 @@ ], [ null + ], + [ + { + "foo": "bar" + }, + { + "baz": "cuux" + } ] ] diff --git a/testdata/parseYaml.jsonnet b/testdata/parseYaml.jsonnet index 9910f8f2..92b11cba 100644 --- a/testdata/parseYaml.jsonnet +++ b/testdata/parseYaml.jsonnet @@ -60,5 +60,15 @@ |||, "---", + + // Comment-only prefix before the first document separator + // shouldn't be exposed as a leading null. + ||| + # Test + --- + foo: bar + --- + baz: cuux + |||, ] ]