From d06cd9eb0347ec20439d94cef989f8b97837aeaf Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 4 Jun 2026 22:40:50 -0300 Subject: [PATCH 1/2] feat(schema): add ConfigType interface method + RegisteredConfigTypes accessor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Establishes the architectural foundation for the upcoming JSON Schema + commented YAML reference pipeline (consumed by nebari-docs). The actual generation logic lands in follow-up commits on this branch. What this commit changes: - pkg/provider/provider.go: adds `ConfigType() reflect.Type` to the Provider interface. One-line implementations on all 6 cluster providers (aws, gcp, azure, hetzner, local, existing). - pkg/dnsprovider/provider.go: same addition to DNSProvider, with the one-line implementation on cloudflare. - pkg/nic/config_types.go: adds `(*Client).RegisteredConfigTypes(ctx)` returning a ConfigTypes struct keyed by provider name. Walks the registry; no hard-coded provider list. - pkg/configschema/: skeleton package with Format constants, Options struct, and Generate signature. Body returns "not yet implemented" for now; the invopop/jsonschema + goccy CommentMap wiring lands in the next commit. - cmd/schemagen/: skeleton binary that wires up nic.Client → RegisteredConfigTypes → configschema.Generate. Currently does a dry-run discovery print so reviewers can see the registry-driven enumeration working end-to-end. Run: `go run ./cmd/schemagen`. - Makefile: `make schemas` target wraps the schemagen invocation. Design rationale lives in ADR-0005 (#360) and the architecture review that drove the registry-driven approach. Chuck's prior proposal in #40 arrived at the same architectural conclusion (registry-driven enumeration, new providers picked up automatically); the output format shifted to JSON Schema + commented YAML for the Docusaurus renderer. Refs #40. --- Makefile | 7 +- cmd/schemagen/main.go | 109 +++++++++++++++++++++++++ pkg/configschema/configschema.go | 81 ++++++++++++++++++ pkg/dnsprovider/cloudflare/provider.go | 6 ++ pkg/dnsprovider/provider.go | 12 ++- pkg/nic/config_types.go | 60 ++++++++++++++ pkg/provider/aws/provider.go | 8 ++ pkg/provider/azure/provider.go | 4 + pkg/provider/existing/provider.go | 6 ++ pkg/provider/gcp/provider.go | 6 ++ pkg/provider/hetzner/provider.go | 4 + pkg/provider/local/provider.go | 6 ++ pkg/provider/provider.go | 8 ++ 13 files changed, 315 insertions(+), 2 deletions(-) create mode 100644 cmd/schemagen/main.go create mode 100644 pkg/configschema/configschema.go create mode 100644 pkg/nic/config_types.go diff --git a/Makefile b/Makefile index 8982d8b3..c7b05f4b 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help build test test-unit test-integration test-coverage test-race clean fmt vet lint install pre-commit release-snapshot localkind-up localkind-down +.PHONY: help build test test-unit test-integration test-coverage test-race clean fmt vet lint install pre-commit release-snapshot localkind-up localkind-down schemas # Variables BINARY_NAME=nic @@ -166,6 +166,11 @@ deps: ## Download Go dependencies go mod verify @echo "Dependencies downloaded successfully" +schemas: ## Regenerate JSON Schema + commented YAML reference under schemas/ + @echo "Regenerating schemas/..." + go run ./cmd/schemagen -out ./schemas + @echo "Schemas regenerated. Run 'git diff schemas/' to review." + deps-update: ## Update Go dependencies @echo "Updating dependencies..." go get -u ./... diff --git a/cmd/schemagen/main.go b/cmd/schemagen/main.go new file mode 100644 index 00000000..91b9f0f8 --- /dev/null +++ b/cmd/schemagen/main.go @@ -0,0 +1,109 @@ +// schemagen emits JSON Schema + commented YAML reference documents for +// nebari-config.yaml and each registered provider's Config struct. It is +// an internal build/CI tool, not a user-facing subcommand of nic. +// +// Currently a skeleton: it enumerates the registered providers via +// pkg/nic.RegisteredConfigTypes and reports what it would generate, but +// the underlying configschema.Generate is not yet implemented. The +// actual generation + file writing lands in a follow-up commit on the +// same branch. +// +// Intended invocation (once complete): +// +// go run ./cmd/schemagen -out ./schemas +// +// Flags: +// +// -out output directory (default "./schemas") +// -providers comma-separated subset to regenerate (default: all registered) +package main + +import ( + "context" + "flag" + "fmt" + "log/slog" + "os" + "sort" + "strings" + + "github.com/nebari-dev/nebari-infrastructure-core/pkg/nic" +) + +func main() { + var ( + outDir string + providers string + ) + flag.StringVar(&outDir, "out", "./schemas", "output directory for generated schema files") + flag.StringVar(&providers, "providers", "", "comma-separated subset of providers to regenerate (default: all registered)") + flag.Parse() + + ctx := context.Background() + + client, err := nic.NewClient(ctx) + if err != nil { + slog.Error("build nic client", "error", err) + os.Exit(1) + } + + types := client.RegisteredConfigTypes(ctx) + + cluster := sortedKeys(types.Cluster) + dns := sortedKeys(types.DNS) + + filter := parseFilter(providers) + if len(filter) > 0 { + cluster = filterNames(cluster, filter) + dns = filterNames(dns, filter) + } + + fmt.Printf("schemagen — output directory: %s\n", outDir) + fmt.Printf("cluster providers (%d):\n", len(cluster)) + for _, name := range cluster { + fmt.Printf(" %-10s → %s\n", name, types.Cluster[name].String()) + } + fmt.Printf("dns providers (%d):\n", len(dns)) + for _, name := range dns { + fmt.Printf(" %-10s → %s\n", name, types.DNS[name].String()) + } + fmt.Println() + fmt.Println("schemagen is a skeleton. configschema.Generate is not yet") + fmt.Println("implemented; no schema files were written. See the follow-up") + fmt.Println("commit on the feat/config-schema-gen branch.") +} + +// sortedKeys returns the keys of m in deterministic order. The schema +// output must be reproducible for the CI drift gate to work. +func sortedKeys[V any](m map[string]V) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +func parseFilter(raw string) map[string]struct{} { + if raw == "" { + return nil + } + out := make(map[string]struct{}) + for name := range strings.SplitSeq(raw, ",") { + name = strings.TrimSpace(name) + if name != "" { + out[name] = struct{}{} + } + } + return out +} + +func filterNames(all []string, want map[string]struct{}) []string { + out := make([]string, 0, len(all)) + for _, name := range all { + if _, ok := want[name]; ok { + out = append(out, name) + } + } + return out +} diff --git a/pkg/configschema/configschema.go b/pkg/configschema/configschema.go new file mode 100644 index 00000000..05516b0d --- /dev/null +++ b/pkg/configschema/configschema.go @@ -0,0 +1,81 @@ +// Package configschema generates schema documents from Go config types. +// Two output formats are supported: JSON Schema (for editor LSPs and the +// docs-site renderer) and a fully-commented YAML reference (the Helm +// values.yaml analogue). Field descriptions come from godoc comments on +// the source struct, extracted at call time from the package source. +// +// Currently a skeleton — Generate is not yet implemented. The full +// implementation will wrap github.com/invopop/jsonschema for the JSON +// path and goccy/go-yaml's CommentMap for the YAML path. +package configschema + +import ( + "context" + "errors" + "reflect" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" +) + +// Format identifies which schema-document format Generate should produce. +type Format int + +const ( + // FormatJSON produces a JSON Schema document. + FormatJSON Format = iota + + // FormatYAML produces a fully-commented YAML reference document with + // the same field structure as FormatJSON and godoc descriptions + // rendered as YAML comments above each field. + FormatYAML +) + +// String returns the format name for span attributes and error messages. +func (f Format) String() string { + switch f { + case FormatJSON: + return "json" + case FormatYAML: + return "yaml" + default: + return "unknown" + } +} + +// Options controls Generate's behavior. PackagePaths is the only required +// field: it lists the filesystem paths whose Go source should be parsed +// for field godoc. +type Options struct { + // Title set on the schema root (e.g. "AWS provider configuration"). + Title string + + // Description set on the schema root. + Description string + + // PackagePaths are filesystem paths to the Go packages whose source + // should be parsed for field godoc. Required: without at least one + // path, no field descriptions can be extracted. + PackagePaths []string +} + +// Generate renders the schema for the given type in the requested format. +// +// Not yet implemented. The intended behavior is documented in +// config-schema-plan.md and ADR-0005: +// - FormatJSON: invopop/jsonschema with Reflector.AddGoComments populated +// from Options.PackagePaths. +// - FormatYAML: a fully-commented YAML document constructed via +// goccy/go-yaml's CommentMap, reusing the same comment source. +func Generate(ctx context.Context, t reflect.Type, format Format, opts Options) ([]byte, error) { + tracer := otel.Tracer("nebari-infrastructure-core") + _, span := tracer.Start(ctx, "configschema.Generate") + defer span.End() + + span.SetAttributes( + attribute.String("format", format.String()), + attribute.String("type", t.String()), + ) + + return nil, errors.New("configschema.Generate: not yet implemented") +} diff --git a/pkg/dnsprovider/cloudflare/provider.go b/pkg/dnsprovider/cloudflare/provider.go index 83d4b3e6..2bd03722 100644 --- a/pkg/dnsprovider/cloudflare/provider.go +++ b/pkg/dnsprovider/cloudflare/provider.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "os" + "reflect" "strings" "go.opentelemetry.io/otel" @@ -41,6 +42,11 @@ func (p *Provider) Name() string { return "cloudflare" } +// ConfigType returns the reflect.Type of this DNS provider's configuration struct. +func (p *Provider) ConfigType() reflect.Type { + return reflect.TypeFor[Config]() +} + // ProvisionRecords creates or updates DNS records for the deployment. // It creates a root domain record and wildcard record pointing to the // load balancer endpoint. The record type (A or CNAME) is determined diff --git a/pkg/dnsprovider/provider.go b/pkg/dnsprovider/provider.go index c86e2ae5..b0d6b5bf 100644 --- a/pkg/dnsprovider/provider.go +++ b/pkg/dnsprovider/provider.go @@ -1,6 +1,9 @@ package dnsprovider -import "context" +import ( + "context" + "reflect" +) // DNSProvider defines the interface that all DNS providers must implement. // Providers are stateless - domain and DNS config are passed to each call. @@ -18,4 +21,11 @@ type DNSProvider interface { // This is called before infrastructure destruction to clean up stale records. // Idempotent - succeeds even if records are already gone. DestroyRecords(ctx context.Context, domain string, dnsConfig map[string]any) error + + // ConfigType returns the reflect.Type of this DNS provider's configuration + // struct. Used by schema-generation tooling to enumerate provider + // configurations via the registry, without taking by-name imports on + // concrete provider packages. Implementations are one-liners: + // func (*Provider) ConfigType() reflect.Type { return reflect.TypeFor[Config]() } + ConfigType() reflect.Type } diff --git a/pkg/nic/config_types.go b/pkg/nic/config_types.go new file mode 100644 index 00000000..757a360b --- /dev/null +++ b/pkg/nic/config_types.go @@ -0,0 +1,60 @@ +package nic + +import ( + "context" + "reflect" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" +) + +// ConfigTypes is the set of reflect.Type values for each registered provider's +// configuration struct, grouped by category. Returned by +// (*Client).RegisteredConfigTypes for use by schema-generation tooling that +// needs to reflect on provider config types without taking by-name imports on +// concrete provider packages. +type ConfigTypes struct { + Cluster map[string]reflect.Type + DNS map[string]reflect.Type +} + +// RegisteredConfigTypes returns the Config Go types associated with each +// registered cluster and DNS provider. The returned maps are keyed by +// provider name (as registered) and contain the reflect.Type of each +// provider's configuration struct. +// +// Intended for build-time schema-generation tooling (e.g. cmd/schemagen) +// that enumerates provider configurations via the registry rather than +// hard-coding the provider list. The registry remains the single source +// of truth for which providers ship in this build. +func (c *Client) RegisteredConfigTypes(ctx context.Context) *ConfigTypes { + tracer := otel.Tracer("nebari-infrastructure-core") + ctx, span := tracer.Start(ctx, "nic.RegisteredConfigTypes") + defer span.End() + + cluster := make(map[string]reflect.Type) + for _, name := range c.registry.ClusterProviders.List(ctx) { + p, err := c.registry.ClusterProviders.Get(ctx, name) + if err != nil { + // Unreachable in practice: List and Get share the same backing map. + continue + } + cluster[name] = p.ConfigType() + } + + dns := make(map[string]reflect.Type) + for _, name := range c.registry.DNSProviders.List(ctx) { + p, err := c.registry.DNSProviders.Get(ctx, name) + if err != nil { + continue + } + dns[name] = p.ConfigType() + } + + span.SetAttributes( + attribute.Int("cluster.count", len(cluster)), + attribute.Int("dns.count", len(dns)), + ) + + return &ConfigTypes{Cluster: cluster, DNS: dns} +} diff --git a/pkg/provider/aws/provider.go b/pkg/provider/aws/provider.go index 70e3e761..dd492d89 100644 --- a/pkg/provider/aws/provider.go +++ b/pkg/provider/aws/provider.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "reflect" "sync" "time" @@ -62,6 +63,13 @@ func (p *Provider) Name() string { return ProviderName } +// ConfigType returns the reflect.Type of this provider's configuration struct. +// Used by schema-generation tooling to enumerate provider configurations via +// the registry without taking by-name imports on concrete provider packages. +func (p *Provider) ConfigType() reflect.Type { + return reflect.TypeFor[Config]() +} + // contains checks if a string slice contains a string func contains(slice []string, str string) bool { for _, s := range slice { diff --git a/pkg/provider/azure/provider.go b/pkg/provider/azure/provider.go index dace0500..7431a123 100644 --- a/pkg/provider/azure/provider.go +++ b/pkg/provider/azure/provider.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os" + "reflect" "strconv" "github.com/hashicorp/terraform-exec/tfexec" @@ -32,6 +33,9 @@ func NewProvider() *Provider { // Name returns the provider name used in cluster.azure: dispatch. func (p *Provider) Name() string { return providerName } +// ConfigType returns the reflect.Type of this provider's configuration struct. +func (p *Provider) ConfigType() reflect.Type { return reflect.TypeFor[Config]() } + func (p *Provider) parseConfig(ctx context.Context, clusterConfig *config.ClusterConfig) (*Config, error) { raw := clusterConfig.ProviderConfig() if raw == nil { diff --git a/pkg/provider/existing/provider.go b/pkg/provider/existing/provider.go index 6b2d4ae2..ada5257b 100644 --- a/pkg/provider/existing/provider.go +++ b/pkg/provider/existing/provider.go @@ -3,6 +3,7 @@ package existing import ( "context" "fmt" + "reflect" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -31,6 +32,11 @@ func (p *Provider) Name() string { return ProviderName } +// ConfigType returns the reflect.Type of this provider's configuration struct. +func (p *Provider) ConfigType() reflect.Type { + return reflect.TypeFor[Config]() +} + // extractConfig converts the generic provider config to the existing-cluster Config type. func extractConfig(ctx context.Context, clusterConfig *config.ClusterConfig) (*Config, error) { tracer := otel.Tracer("nebari-infrastructure-core") diff --git a/pkg/provider/gcp/provider.go b/pkg/provider/gcp/provider.go index 2263f941..8f83b5e0 100644 --- a/pkg/provider/gcp/provider.go +++ b/pkg/provider/gcp/provider.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "reflect" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -26,6 +27,11 @@ func (p *Provider) Name() string { return "gcp" } +// ConfigType returns the reflect.Type of this provider's configuration struct. +func (p *Provider) ConfigType() reflect.Type { + return reflect.TypeFor[Config]() +} + // Validate validates the GCP configuration (stub implementation) func (p *Provider) Validate(ctx context.Context, projectName string, _ *config.ClusterConfig) error { tracer := otel.Tracer("nebari-infrastructure-core") diff --git a/pkg/provider/hetzner/provider.go b/pkg/provider/hetzner/provider.go index 36677b32..795a4716 100644 --- a/pkg/provider/hetzner/provider.go +++ b/pkg/provider/hetzner/provider.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "reflect" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -27,6 +28,9 @@ func NewProvider() *Provider { func (p *Provider) Name() string { return providerName } +// ConfigType returns the reflect.Type of this provider's configuration struct. +func (p *Provider) ConfigType() reflect.Type { return reflect.TypeFor[Config]() } + // parseConfig extracts and validates the Hetzner config from ClusterConfig. func (p *Provider) parseConfig(ctx context.Context, clusterConfig *config.ClusterConfig) (*Config, error) { var hCfg Config diff --git a/pkg/provider/local/provider.go b/pkg/provider/local/provider.go index 9ceb0621..66417f09 100644 --- a/pkg/provider/local/provider.go +++ b/pkg/provider/local/provider.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "os" + "reflect" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -38,6 +39,11 @@ func (p *Provider) Name() string { return ProviderName } +// ConfigType returns the reflect.Type of this provider's configuration struct. +func (p *Provider) ConfigType() reflect.Type { + return reflect.TypeFor[Config]() +} + // Validate validates the local configuration func (p *Provider) Validate(ctx context.Context, projectName string, clusterConfig *config.ClusterConfig) error { tracer := otel.Tracer("nebari-infrastructure-core") diff --git a/pkg/provider/provider.go b/pkg/provider/provider.go index b30e58a1..349599b9 100644 --- a/pkg/provider/provider.go +++ b/pkg/provider/provider.go @@ -2,6 +2,7 @@ package provider import ( "context" + "reflect" "time" "github.com/nebari-dev/nebari-infrastructure-core/pkg/config" @@ -112,4 +113,11 @@ type Provider interface { // CLI commands and the ArgoCD writer use these to configure templates // without importing provider packages or switching on provider names. InfraSettings(clusterConfig *config.ClusterConfig) InfraSettings + + // ConfigType returns the reflect.Type of this provider's configuration + // struct. Used by schema-generation tooling to enumerate provider + // configurations via the registry, without taking by-name imports on + // concrete provider packages. Implementations are one-liners: + // func (*Provider) ConfigType() reflect.Type { return reflect.TypeFor[Config]() } + ConfigType() reflect.Type } From 0301a6e627c58e95023d3e769c8de657ffcf8707 Mon Sep 17 00:00:00 2001 From: viniciusdc Date: Thu, 4 Jun 2026 23:06:27 -0300 Subject: [PATCH 2/2] feat(schema): implement JSON Schema generation + drift-check workflow Wires up the schema-gen pipeline end-to-end: - pkg/configschema.Generate (FormatJSON) now produces real JSON Schema using invopop/jsonschema with godoc descriptions extracted via Reflector.AddGoComments. Configured to read yaml tags (not json), package-qualify $defs keys to avoid collisions across providers (e.g. aws.Config vs longhorn.Config), and emit additionalProperties:false to match the validator's strict-on-unknown contract. - cmd/schemagen now writes schemas/manifest.json, schemas/nebari-config.json, and schemas/providers/.json. Walks pkg/ to collect package paths for godoc extraction. Supports -version, -providers, -pkg-root flags. - schemas/ committed in-tree. 6 cluster providers (aws, gcp, azure, hetzner, local, existing) + 1 DNS provider (cloudflare) + top-level nebari-config + manifest. Total ~28KB of JSON. - .github/workflows/schemas.yml drift-check on PRs and main pushes, paths-filtered to config/provider/schema-gen files. Authors run `make schemas` locally and commit; CI fails on divergence (kubebuilder CRD-regen pattern, no auto-commit). YAML reference output (FormatYAML) still stubbed; lands in a follow-up commit on this branch. --- .github/workflows/schemas.yml | 71 +++++++++ cmd/schemagen/main.go | 211 ++++++++++++++++++++------ go.mod | 5 + go.sum | 10 ++ pkg/configschema/configschema.go | 105 ++++++++++--- schemas/manifest.json | 14 ++ schemas/nebari-config.json | 136 +++++++++++++++++ schemas/providers/aws.json | 244 ++++++++++++++++++++++++++++++ schemas/providers/azure.json | 141 +++++++++++++++++ schemas/providers/cloudflare.json | 21 +++ schemas/providers/existing.json | 62 ++++++++ schemas/providers/gcp.json | 151 ++++++++++++++++++ schemas/providers/hetzner.json | 156 +++++++++++++++++++ schemas/providers/local.json | 50 ++++++ 14 files changed, 1314 insertions(+), 63 deletions(-) create mode 100644 .github/workflows/schemas.yml create mode 100644 schemas/manifest.json create mode 100644 schemas/nebari-config.json create mode 100644 schemas/providers/aws.json create mode 100644 schemas/providers/azure.json create mode 100644 schemas/providers/cloudflare.json create mode 100644 schemas/providers/existing.json create mode 100644 schemas/providers/gcp.json create mode 100644 schemas/providers/hetzner.json create mode 100644 schemas/providers/local.json diff --git a/.github/workflows/schemas.yml b/.github/workflows/schemas.yml new file mode 100644 index 00000000..02880c07 --- /dev/null +++ b/.github/workflows/schemas.yml @@ -0,0 +1,71 @@ +name: Schemas + +# Drift check for the generated schemas/ artifacts (committed in-tree, +# consumed by nebari-docs). Runs schemagen and fails if the working tree +# diverges from what was committed. Authors keep schemas/ in sync by +# running `make schemas` locally before pushing. +# +# Paths-filtered to avoid running on PRs that can't affect schema output +# (e.g. docs-only PRs, CI tweaks). + +on: + push: + branches: [main] + paths: + - 'pkg/config/**' + - 'pkg/provider/**' + - 'pkg/dnsprovider/**' + - 'pkg/nic/registry.go' + - 'pkg/nic/config_types.go' + - 'pkg/storage/longhorn/**' + - 'pkg/git/**' + - 'pkg/configschema/**' + - 'cmd/schemagen/**' + - 'schemas/**' + - 'go.mod' + - 'go.sum' + - 'Makefile' + - '.github/workflows/schemas.yml' + pull_request: + branches: [main] + paths: + - 'pkg/config/**' + - 'pkg/provider/**' + - 'pkg/dnsprovider/**' + - 'pkg/nic/registry.go' + - 'pkg/nic/config_types.go' + - 'pkg/storage/longhorn/**' + - 'pkg/git/**' + - 'pkg/configschema/**' + - 'cmd/schemagen/**' + - 'schemas/**' + - 'go.mod' + - 'go.sum' + - 'Makefile' + - '.github/workflows/schemas.yml' + +permissions: + contents: read + +jobs: + drift-check: + name: schemas/ drift check + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Regenerate schemas + run: make schemas + + - name: Fail on drift + run: | + if ! git diff --exit-code schemas/; then + echo "::error::schemas/ is out of sync with the code. Run 'make schemas' locally and commit the result." + exit 1 + fi diff --git a/cmd/schemagen/main.go b/cmd/schemagen/main.go index 91b9f0f8..7c16e1ed 100644 --- a/cmd/schemagen/main.go +++ b/cmd/schemagen/main.go @@ -1,32 +1,38 @@ -// schemagen emits JSON Schema + commented YAML reference documents for -// nebari-config.yaml and each registered provider's Config struct. It is -// an internal build/CI tool, not a user-facing subcommand of nic. +// schemagen emits JSON Schema documents for nebari-config.yaml and each +// registered provider's Config struct. It is an internal build/CI tool, +// not a user-facing subcommand of nic. // -// Currently a skeleton: it enumerates the registered providers via -// pkg/nic.RegisteredConfigTypes and reports what it would generate, but -// the underlying configschema.Generate is not yet implemented. The -// actual generation + file writing lands in a follow-up commit on the -// same branch. +// Output layout (default `-out ./schemas`): // -// Intended invocation (once complete): +// schemas/ +// manifest.json +// nebari-config.json +// providers/ +// .json (one per registered cluster + DNS provider) // -// go run ./cmd/schemagen -out ./schemas +// The provider list is sourced from the nic registry (pkg/nic/registry.go) +// via (*nic.Client).RegisteredConfigTypes; there is no parallel hard-coded +// list. Adding a new provider to the registry automatically extends the +// schemagen output on the next CI run. // -// Flags: -// -// -out output directory (default "./schemas") -// -providers comma-separated subset to regenerate (default: all registered) +// Invocation: `make schemas` or `go run ./cmd/schemagen -out ./schemas`. package main import ( "context" + "encoding/json" "flag" "fmt" + "io/fs" "log/slog" "os" + "path/filepath" + "reflect" "sort" "strings" + "github.com/nebari-dev/nebari-infrastructure-core/pkg/config" + "github.com/nebari-dev/nebari-infrastructure-core/pkg/configschema" "github.com/nebari-dev/nebari-infrastructure-core/pkg/nic" ) @@ -34,47 +40,166 @@ func main() { var ( outDir string providers string + pkgRoot string + version string ) flag.StringVar(&outDir, "out", "./schemas", "output directory for generated schema files") - flag.StringVar(&providers, "providers", "", "comma-separated subset of providers to regenerate (default: all registered)") + flag.StringVar(&providers, "providers", "", "comma-separated subset to regenerate (default: all registered)") + flag.StringVar(&pkgRoot, "pkg-root", "./pkg", "root directory whose Go packages are scanned for field godoc") + flag.StringVar(&version, "version", "", "version string stamped into manifest.json (default: empty)") flag.Parse() ctx := context.Background() + if err := run(ctx, outDir, providers, pkgRoot, version); err != nil { + slog.Error("schemagen failed", "error", err) + os.Exit(1) + } +} - client, err := nic.NewClient(ctx) +func run(ctx context.Context, outDir, providersFlag, pkgRoot, version string) error { + if err := os.MkdirAll(filepath.Join(outDir, "providers"), 0o755); err != nil { + return fmt.Errorf("mkdir %s: %w", outDir, err) + } + + pkgPaths, err := collectPackagePaths(pkgRoot) if err != nil { - slog.Error("build nic client", "error", err) - os.Exit(1) + return fmt.Errorf("collect package paths under %s: %w", pkgRoot, err) } + client, err := nic.NewClient(ctx) + if err != nil { + return fmt.Errorf("build nic client: %w", err) + } types := client.RegisteredConfigTypes(ctx) - cluster := sortedKeys(types.Cluster) - dns := sortedKeys(types.DNS) + filter := parseFilter(providersFlag) + emitTopLevel := len(filter) == 0 + + clusterNames := sortedKeys(types.Cluster) + dnsNames := sortedKeys(types.DNS) + + if emitTopLevel { + if err := writeSchema(ctx, outDir, "nebari-config.json", + reflect.TypeFor[config.NebariConfig](), + "Nebari config", pkgPaths); err != nil { + return err + } + } + + for _, name := range clusterNames { + if !accepts(filter, name) { + continue + } + if err := writeSchema(ctx, outDir, filepath.Join("providers", name+".json"), + types.Cluster[name], + fmt.Sprintf("%s cluster provider configuration", name), pkgPaths); err != nil { + return err + } + } + + for _, name := range dnsNames { + if !accepts(filter, name) { + continue + } + if err := writeSchema(ctx, outDir, filepath.Join("providers", name+".json"), + types.DNS[name], + fmt.Sprintf("%s DNS provider configuration", name), pkgPaths); err != nil { + return err + } + } + + if emitTopLevel { + if err := writeManifest(outDir, version, clusterNames, dnsNames); err != nil { + return err + } + } + + fmt.Printf("schemagen wrote schemas under %s\n", outDir) + fmt.Printf(" cluster providers: %v\n", clusterNames) + fmt.Printf(" dns providers: %v\n", dnsNames) + return nil +} - filter := parseFilter(providers) - if len(filter) > 0 { - cluster = filterNames(cluster, filter) - dns = filterNames(dns, filter) +func writeSchema(ctx context.Context, outDir, relPath string, t reflect.Type, title string, pkgPaths []string) error { + data, err := configschema.Generate(ctx, t, configschema.FormatJSON, configschema.Options{ + Title: title, + PackagePaths: pkgPaths, + }) + if err != nil { + return fmt.Errorf("generate %s: %w", relPath, err) } + full := filepath.Join(outDir, relPath) + if err := os.WriteFile(full, data, 0o644); err != nil { + return fmt.Errorf("write %s: %w", full, err) + } + return nil +} + +// manifest is the shape of schemas/manifest.json. The docs site fetches +// this first to discover what schemas exist, then fetches each referenced +// file. Adding a new provider extends Providers/DNS automatically. +type manifest struct { + Version string `json:"version,omitempty"` + Providers []string `json:"providers"` + DNS []string `json:"dns"` + TopLevel string `json:"top_level"` +} - fmt.Printf("schemagen — output directory: %s\n", outDir) - fmt.Printf("cluster providers (%d):\n", len(cluster)) - for _, name := range cluster { - fmt.Printf(" %-10s → %s\n", name, types.Cluster[name].String()) +func writeManifest(outDir, version string, cluster, dns []string) error { + m := manifest{ + Version: version, + Providers: cluster, + DNS: dns, + TopLevel: "nebari-config.json", } - fmt.Printf("dns providers (%d):\n", len(dns)) - for _, name := range dns { - fmt.Printf(" %-10s → %s\n", name, types.DNS[name].String()) + data, err := json.MarshalIndent(m, "", " ") + if err != nil { + return fmt.Errorf("marshal manifest: %w", err) } - fmt.Println() - fmt.Println("schemagen is a skeleton. configschema.Generate is not yet") - fmt.Println("implemented; no schema files were written. See the follow-up") - fmt.Println("commit on the feat/config-schema-gen branch.") + data = append(data, '\n') + return os.WriteFile(filepath.Join(outDir, "manifest.json"), data, 0o644) +} + +// collectPackagePaths walks root and returns every subdirectory that +// contains at least one non-test .go file. These paths are passed to +// configschema.Generate as Options.PackagePaths so invopop/jsonschema +// can pick up godoc comments wherever the type tree leads. +func collectPackagePaths(root string) ([]string, error) { + var paths []string + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if !d.IsDir() { + return nil + } + name := d.Name() + if strings.HasPrefix(name, ".") || name == "vendor" || name == "testdata" { + return fs.SkipDir + } + entries, err := os.ReadDir(path) + if err != nil { + return err + } + for _, e := range entries { + if e.IsDir() { + continue + } + n := e.Name() + if strings.HasSuffix(n, ".go") && !strings.HasSuffix(n, "_test.go") { + paths = append(paths, path) + return nil + } + } + return nil + }) + if err != nil { + return nil, err + } + sort.Strings(paths) + return paths, nil } -// sortedKeys returns the keys of m in deterministic order. The schema -// output must be reproducible for the CI drift gate to work. func sortedKeys[V any](m map[string]V) []string { keys := make([]string, 0, len(m)) for k := range m { @@ -98,12 +223,10 @@ func parseFilter(raw string) map[string]struct{} { return out } -func filterNames(all []string, want map[string]struct{}) []string { - out := make([]string, 0, len(all)) - for _, name := range all { - if _, ok := want[name]; ok { - out = append(out, name) - } +func accepts(filter map[string]struct{}, name string) bool { + if filter == nil { + return true } - return out + _, ok := filter[name] + return ok } diff --git a/go.mod b/go.mod index aa5a9e86..d5dd16f5 100644 --- a/go.mod +++ b/go.mod @@ -69,7 +69,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 // indirect + github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/blang/semver/v4 v4.0.0 // indirect + github.com/buger/jsonparser v1.1.2 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect github.com/cloudflare/circl v1.6.1 // indirect @@ -112,6 +114,7 @@ require ( github.com/hashicorp/terraform-json v0.27.1 // indirect github.com/huandu/xstrings v1.5.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/invopop/jsonschema v0.14.0 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/jmoiron/sqlx v1.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -139,6 +142,7 @@ require ( github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect + github.com/pb33f/ordered-map/v2 v2.3.1 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect @@ -167,6 +171,7 @@ require ( go.opentelemetry.io/proto/otlp v1.7.1 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + go.yaml.in/yaml/v4 v4.0.0-rc.2 // indirect golang.org/x/net v0.53.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sync v0.20.0 // indirect diff --git a/go.sum b/go.sum index 8fd4191f..e46fb69d 100644 --- a/go.sum +++ b/go.sum @@ -108,12 +108,16 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 h1:HK5ON3KmQV2HcAunnx4sKLB9aPf3 github.com/aws/aws-sdk-go-v2/service/sts v1.40.2/go.mod h1:E19xDjpzPZC7LS2knI9E6BaRFDK43Eul7vd6rSq2HWk= github.com/aws/smithy-go v1.25.1 h1:J8ERsGSU7d+aCmdQur5Txg6bVoYelvQJgtZehD12GkI= github.com/aws/smithy-go v1.25.1/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= +github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= +github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/bshuster-repo/logrus-logstash-hook v1.0.0 h1:e+C0SB5R1pu//O4MQ3f9cFuPGoOVeF2fE4Og9otCc70= github.com/bshuster-repo/logrus-logstash-hook v1.0.0/go.mod h1:zsTqEiSzDgAa/8GZR7E1qaXrhYNDKBYy5/dWPTIflbk= +github.com/buger/jsonparser v1.1.2 h1:frqHqw7otoVbk5M8LlE/L7HTnIq2v9RX6EJ48i9AxJk= +github.com/buger/jsonparser v1.1.2/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= @@ -273,6 +277,8 @@ github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/invopop/jsonschema v0.14.0 h1:MHQqLhvpNUZfw+hM3AZDYK7jxO8FZoQeQM77g8iyZjg= +github.com/invopop/jsonschema v0.14.0/go.mod h1:ygm6C2EaVNMBDPpaPlnOA2pFAxBnxGjFlMZABxm9n2I= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= @@ -357,6 +363,8 @@ github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJw github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/opentofu/tofudl v0.0.1 h1:r2uD4nxMnq0Qkzhh/C9Ldxjt+piTJi0R0C40Kf4d+a8= github.com/opentofu/tofudl v0.0.1/go.mod h1:HeIabsnOzo0WMnIRqI13Ho6hEi6tu2nrQpzSddWL/9w= +github.com/pb33f/ordered-map/v2 v2.3.1 h1:5319HDO0aw4DA4gzi+zv4FXU9UlSs3xGZ40wcP1nBjY= +github.com/pb33f/ordered-map/v2 v2.3.1/go.mod h1:qxFQgd0PkVUtOMCkTapqotNgzRhMPL7VvaHKbd1HnmQ= github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI= @@ -500,6 +508,8 @@ go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +go.yaml.in/yaml/v4 v4.0.0-rc.2 h1:/FrI8D64VSr4HtGIlUtlFMGsm7H7pWTbj6vOLVZcA6s= +go.yaml.in/yaml/v4 v4.0.0-rc.2/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= diff --git a/pkg/configschema/configschema.go b/pkg/configschema/configschema.go index 05516b0d..3287c6ae 100644 --- a/pkg/configschema/configschema.go +++ b/pkg/configschema/configschema.go @@ -2,22 +2,27 @@ // Two output formats are supported: JSON Schema (for editor LSPs and the // docs-site renderer) and a fully-commented YAML reference (the Helm // values.yaml analogue). Field descriptions come from godoc comments on -// the source struct, extracted at call time from the package source. -// -// Currently a skeleton — Generate is not yet implemented. The full -// implementation will wrap github.com/invopop/jsonschema for the JSON -// path and goccy/go-yaml's CommentMap for the YAML path. +// the source struct, extracted at call time from the package source via +// invopop/jsonschema's AddGoComments. package configschema import ( "context" + "encoding/json" "errors" + "fmt" "reflect" + "github.com/invopop/jsonschema" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" ) +// modulePath is the base import path passed to Reflector.AddGoComments. +// It must match the module path in go.mod for invopop/jsonschema to +// associate parsed comments with the right Go types. +const modulePath = "github.com/nebari-dev/nebari-infrastructure-core" + // Format identifies which schema-document format Generate should produce. type Format int @@ -43,30 +48,30 @@ func (f Format) String() string { } } -// Options controls Generate's behavior. PackagePaths is the only required -// field: it lists the filesystem paths whose Go source should be parsed -// for field godoc. +// Options controls Generate's behavior. PackagePaths is required: without +// at least one path, no field descriptions can be extracted from godoc. type Options struct { // Title set on the schema root (e.g. "AWS provider configuration"). + // Optional; the type's own godoc becomes the description automatically. Title string - // Description set on the schema root. + // Description set on the schema root, overriding the type's godoc. + // Optional. Description string - // PackagePaths are filesystem paths to the Go packages whose source - // should be parsed for field godoc. Required: without at least one - // path, no field descriptions can be extracted. + // PackagePaths are filesystem paths to Go packages whose source + // should be parsed for field godoc. Each path is passed through + // to invopop/jsonschema's Reflector.AddGoComments. At least one + // path is required for descriptions to land in the output. PackagePaths []string } // Generate renders the schema for the given type in the requested format. // -// Not yet implemented. The intended behavior is documented in -// config-schema-plan.md and ADR-0005: -// - FormatJSON: invopop/jsonschema with Reflector.AddGoComments populated -// from Options.PackagePaths. -// - FormatYAML: a fully-commented YAML document constructed via -// goccy/go-yaml's CommentMap, reusing the same comment source. +// For FormatJSON, the output is a JSON Schema document produced by +// invopop/jsonschema with godoc descriptions extracted from the packages +// in opts.PackagePaths. For FormatYAML, the output is not yet implemented +// and the function returns an error. func Generate(ctx context.Context, t reflect.Type, format Format, opts Options) ([]byte, error) { tracer := otel.Tracer("nebari-infrastructure-core") _, span := tracer.Start(ctx, "configschema.Generate") @@ -75,7 +80,69 @@ func Generate(ctx context.Context, t reflect.Type, format Format, opts Options) span.SetAttributes( attribute.String("format", format.String()), attribute.String("type", t.String()), + attribute.Int("package_paths", len(opts.PackagePaths)), ) - return nil, errors.New("configschema.Generate: not yet implemented") + r := newReflector() + for _, path := range opts.PackagePaths { + if err := r.AddGoComments(modulePath, path); err != nil { + span.RecordError(err) + return nil, fmt.Errorf("AddGoComments(%s): %w", path, err) + } + } + + schema := r.ReflectFromType(t) + if opts.Title != "" { + schema.Title = opts.Title + } + if opts.Description != "" { + schema.Description = opts.Description + } + + switch format { + case FormatJSON: + out, err := json.MarshalIndent(schema, "", " ") + if err != nil { + span.RecordError(err) + return nil, fmt.Errorf("marshal JSON Schema: %w", err) + } + // json.MarshalIndent does not append a trailing newline; add one so + // the committed file is POSIX-friendly and `git diff` is clean. + return append(out, '\n'), nil + case FormatYAML: + err := errors.New("FormatYAML not yet implemented") + span.RecordError(err) + return nil, err + default: + err := fmt.Errorf("unknown format: %v", format) + span.RecordError(err) + return nil, err + } +} + +// newReflector constructs the Reflector with options tuned for nebari-config. +// Centralized so JSON and future YAML paths share identical settings. +func newReflector() *jsonschema.Reflector { + return &jsonschema.Reflector{ + // Read yaml tags (not the json default) — the source-of-truth tags + // on every Config field are yaml: ones, including the `,omitempty` + // hints used for required-field inference. + FieldNameTag: "yaml", + // Avoid an explosion of $ref/$defs for one-off anonymous types. + Anonymous: true, + // nebari-config does not accept unknown fields at any level; + // the validator surfaces them as errors. Reflect that in the schema. + AllowAdditionalProperties: false, + // Package-qualify $defs keys for named struct types so collisions + // across packages (e.g. aws.Config + longhorn.Config) don't merge + // into a single entry. Composite types (slices, maps) fall back + // to invopop's default by returning "" — they get inlined rather + // than landing in $defs as "map[string]string" etc. + Namer: func(t reflect.Type) string { + if t.Kind() == reflect.Struct && t.Name() != "" { + return t.String() + } + return "" + }, + } } diff --git a/schemas/manifest.json b/schemas/manifest.json new file mode 100644 index 00000000..b8807f39 --- /dev/null +++ b/schemas/manifest.json @@ -0,0 +1,14 @@ +{ + "providers": [ + "aws", + "azure", + "existing", + "gcp", + "hetzner", + "local" + ], + "dns": [ + "cloudflare" + ], + "top_level": "nebari-config.json" +} diff --git a/schemas/nebari-config.json b/schemas/nebari-config.json new file mode 100644 index 00000000..7d2ed9fa --- /dev/null +++ b/schemas/nebari-config.json @@ -0,0 +1,136 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/config.NebariConfig", + "$defs": { + "config.ACMEConfig": { + "properties": { + "email": { + "type": "string", + "description": "Email is the email address for Let's Encrypt registration" + }, + "server": { + "type": "string", + "description": "Server is the ACME server URL (defaults to Let's Encrypt production)\nUse \"https://acme-staging-v02.api.letsencrypt.org/directory\" for testing" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "email" + ], + "description": "ACMEConfig holds ACME (Let's Encrypt) configuration" + }, + "config.CertificateConfig": { + "properties": { + "type": { + "type": "string", + "description": "Type is the certificate type: \"selfsigned\" or \"letsencrypt\"" + }, + "acme": { + "$ref": "#/$defs/config.ACMEConfig", + "description": "ACME configuration for Let's Encrypt" + } + }, + "additionalProperties": false, + "type": "object", + "description": "CertificateConfig holds TLS certificate configuration" + }, + "config.ClusterConfig": { + "properties": {}, + "additionalProperties": false, + "type": "object", + "description": "ClusterConfig holds typed cloud provider configuration." + }, + "config.DNSConfig": { + "properties": {}, + "additionalProperties": false, + "type": "object", + "description": "DNSConfig holds typed DNS provider configuration." + }, + "config.NebariConfig": { + "properties": { + "project_name": { + "type": "string" + }, + "domain": { + "type": "string" + }, + "cluster": { + "$ref": "#/$defs/config.ClusterConfig", + "description": "Cluster Provider configuration.\nOnly one provider can be configured at a time." + }, + "dns": { + "$ref": "#/$defs/config.DNSConfig", + "description": "DNS provider configuration (optional).\nOnly one provider can be configured at a time." + }, + "git_repository": { + "$ref": "#/$defs/git.Config", + "description": "GitRepository configures the GitOps repository for ArgoCD bootstrap (optional)" + }, + "certificate": { + "$ref": "#/$defs/config.CertificateConfig", + "description": "Certificate configuration (optional)" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "project_name" + ], + "description": "NebariConfig represents the parsed nebari-config.yaml structure" + }, + "git.AuthConfig": { + "properties": { + "ssh_key_env": { + "type": "string", + "description": "SSHKeyEnv is the name of the environment variable containing the SSH private key\nThe key should be in PEM format (e.g., contents of ~/.ssh/id_ed25519)" + }, + "token_env": { + "type": "string", + "description": "TokenEnv is the name of the environment variable containing the personal access token\nUsed for HTTPS authentication" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "ssh_key_env", + "token_env" + ], + "description": "AuthConfig specifies authentication credentials for git operations." + }, + "git.Config": { + "properties": { + "url": { + "type": "string", + "description": "URL is the repository URL (SSH or HTTPS format)\nExamples: \"git@github.com:org/repo.git\" or \"https://github.com/org/repo.git\"" + }, + "branch": { + "type": "string", + "description": "Branch is the git branch to use (default: \"main\")" + }, + "path": { + "type": "string", + "description": "Path is an optional subdirectory within the repository\nIf specified, all operations are scoped to this path" + }, + "auth": { + "$ref": "#/$defs/git.AuthConfig", + "description": "Auth specifies credentials for NIC to push to the repository (requires write access)" + }, + "argocd_auth": { + "$ref": "#/$defs/git.AuthConfig", + "description": "ArgoCDAuth specifies optional separate credentials for ArgoCD (read-only access)\nIf not specified, falls back to Auth" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "url", + "branch", + "path", + "auth" + ], + "description": "Config represents git repository configuration for GitOps bootstrap." + } + }, + "title": "Nebari config" +} diff --git a/schemas/providers/aws.json b/schemas/providers/aws.json new file mode 100644 index 00000000..9e70b0e8 --- /dev/null +++ b/schemas/providers/aws.json @@ -0,0 +1,244 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/aws.Config", + "$defs": { + "aws.AWSLoadBalancerControllerConfig": { + "properties": { + "enabled": { + "type": "boolean" + }, + "chart_version": { + "type": "string" + }, + "destroy_timeout": { + "type": "integer" + } + }, + "additionalProperties": false, + "type": "object" + }, + "aws.Config": { + "properties": { + "region": { + "type": "string" + }, + "state_bucket": { + "type": "string" + }, + "availability_zones": { + "items": { + "type": "string" + }, + "type": "array" + }, + "vpc_cidr_block": { + "type": "string" + }, + "existing_vpc_id": { + "type": "string" + }, + "existing_private_subnet_ids": { + "items": { + "type": "string" + }, + "type": "array" + }, + "existing_security_group_id": { + "type": "string" + }, + "kubernetes_version": { + "type": "string" + }, + "endpoint_private_access": { + "type": "boolean" + }, + "endpoint_public_access": { + "type": "boolean" + }, + "eks_kms_arn": { + "type": "string" + }, + "enabled_log_types": { + "items": { + "type": "string" + }, + "type": "array" + }, + "existing_cluster_role_arn": { + "type": "string" + }, + "existing_node_role_arn": { + "type": "string" + }, + "permissions_boundary": { + "type": "string" + }, + "node_groups": { + "additionalProperties": { + "$ref": "#/$defs/aws.NodeGroup" + }, + "type": "object" + }, + "tags": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "efs": { + "$ref": "#/$defs/aws.EFSConfig" + }, + "longhorn": { + "$ref": "#/$defs/longhorn.Config" + }, + "aws_load_balancer_controller": { + "$ref": "#/$defs/aws.AWSLoadBalancerControllerConfig" + }, + "load_balancer_scheme": { + "type": "string" + }, + "trust_bundle": { + "$ref": "#/$defs/aws.TrustBundleConfig", + "description": "TrustBundle, when set, installs the given PEM bundle into the OS trust\nstore of every EKS worker node before kubelet starts. Required when nodes\nmust reach the EKS control plane, ECR, or pull container images through a\nTLS-inspecting egress proxy. Will likely move to a top-level NebariConfig\nfield once trust-manager (the in-pod half of nebari-dev/nebari-infrastructure-core#307)\nlands; keeping it provider-scoped here matches the current Provider interface." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "region", + "kubernetes_version", + "node_groups" + ] + }, + "aws.EFSConfig": { + "properties": { + "enabled": { + "type": "boolean" + }, + "performance_mode": { + "type": "string", + "description": "default: generalPurpose" + }, + "throughput_mode": { + "type": "string", + "description": "default: bursting" + }, + "provisioned_throughput_mibps": { + "type": "integer" + }, + "encrypted": { + "type": "boolean", + "description": "default: true" + }, + "kms_key_arn": { + "type": "string" + }, + "storage_class_name": { + "type": "string", + "description": "default: efs-sc" + } + }, + "additionalProperties": false, + "type": "object" + }, + "aws.NodeGroup": { + "properties": { + "instance": { + "type": "string" + }, + "min_nodes": { + "type": "integer" + }, + "max_nodes": { + "type": "integer" + }, + "gpu": { + "type": "boolean" + }, + "ami_type": { + "type": "string" + }, + "spot": { + "type": "boolean" + }, + "disk_size": { + "type": "integer" + }, + "labels": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "taints": { + "items": { + "$ref": "#/$defs/aws.Taint" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "instance" + ] + }, + "aws.Taint": { + "properties": { + "key": { + "type": "string" + }, + "value": { + "type": "string" + }, + "effect": { + "type": "string", + "description": "NO_SCHEDULE, NO_EXECUTE, PREFER_NO_SCHEDULE" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "key", + "value", + "effect" + ] + }, + "aws.TrustBundleConfig": { + "properties": { + "path": { + "type": "string" + }, + "inline": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object", + "description": "TrustBundleConfig specifies the source of an extra CA bundle." + }, + "longhorn.Config": { + "properties": { + "enabled": { + "type": "boolean" + }, + "replica_count": { + "type": "integer" + }, + "dedicated_nodes": { + "type": "boolean" + }, + "node_selector": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + } + }, + "additionalProperties": false, + "type": "object", + "description": "Config carries the user-tunable Longhorn settings shared across providers." + } + }, + "title": "aws cluster provider configuration" +} diff --git a/schemas/providers/azure.json b/schemas/providers/azure.json new file mode 100644 index 00000000..a4f5a801 --- /dev/null +++ b/schemas/providers/azure.json @@ -0,0 +1,141 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/azure.Config", + "$defs": { + "azure.Config": { + "properties": { + "region": { + "type": "string" + }, + "resource_group_name": { + "type": "string" + }, + "create_resource_group": { + "type": "boolean", + "description": "CreateResourceGroup is tri-state: nil = infer (true unless ResourceGroupName\nis set), \u0026true = always create, \u0026false = never create (must supply ResourceGroupName)." + }, + "kubernetes_version": { + "type": "string" + }, + "sku_tier": { + "type": "string" + }, + "private_cluster_enabled": { + "type": "boolean" + }, + "authorized_ip_ranges": { + "items": { + "type": "string" + }, + "type": "array" + }, + "network": { + "$ref": "#/$defs/azure.NetworkConfig" + }, + "node_groups": { + "additionalProperties": { + "$ref": "#/$defs/azure.NodeGroup" + }, + "type": "object" + }, + "tags": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "node_provisioning_mode": { + "type": "string", + "description": "NodeProvisioningMode enables AKS Node Auto Provisioning (Karpenter) when\nset to \"Auto\". Defaults to \"Manual\". \"Auto\" requires the cilium dataplane\n(network.dataplane: cilium)." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "region", + "node_groups" + ], + "description": "Config is the user-facing Azure cluster configuration as parsed from the `cluster.azure:` block of NIC YAML." + }, + "azure.NetworkConfig": { + "properties": { + "vnet_cidr_block": { + "type": "string" + }, + "node_subnet_cidr_block": { + "type": "string" + }, + "pod_cidr": { + "type": "string" + }, + "service_cidr": { + "type": "string" + }, + "dns_service_ip": { + "type": "string" + }, + "dataplane": { + "type": "string", + "description": "DataPlane selects the AKS network dataplane: \"azure\" (default) or\n\"cilium\" (Azure CNI Powered by Cilium)." + }, + "existing_vnet_id": { + "type": "string" + }, + "existing_node_subnet_id": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object", + "description": "NetworkConfig groups all VNet/subnet/CIDR knobs." + }, + "azure.NodeGroup": { + "properties": { + "instance": { + "type": "string" + }, + "min_nodes": { + "type": "integer" + }, + "max_nodes": { + "type": "integer" + }, + "mode": { + "type": "string", + "description": "\"System\" | \"User\"; defaults to \"User\"" + }, + "os_disk_size_gb": { + "type": "integer" + }, + "labels": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "taints": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Taints in \"key=value:Effect\" form, e.g. \"dedicated=gpu:NoSchedule\"." + }, + "zones": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "instance", + "min_nodes", + "max_nodes" + ], + "description": "NodeGroup describes one AKS node pool." + } + }, + "title": "azure cluster provider configuration" +} diff --git a/schemas/providers/cloudflare.json b/schemas/providers/cloudflare.json new file mode 100644 index 00000000..afa0095e --- /dev/null +++ b/schemas/providers/cloudflare.json @@ -0,0 +1,21 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/cloudflare.Config", + "$defs": { + "cloudflare.Config": { + "properties": { + "zone_name": { + "type": "string", + "description": "Domain zone (e.g., example.com)" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "zone_name" + ], + "description": "Config represents Cloudflare-specific DNS configuration Secrets like API tokens are read from environment variables, not config" + } + }, + "title": "cloudflare DNS provider configuration" +} diff --git a/schemas/providers/existing.json b/schemas/providers/existing.json new file mode 100644 index 00000000..8a9aadb0 --- /dev/null +++ b/schemas/providers/existing.json @@ -0,0 +1,62 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/existing.Config", + "$defs": { + "existing.Config": { + "properties": { + "kubeconfig": { + "type": "string", + "description": "Kubeconfig is the path to the kubeconfig file.\nDefaults to KUBECONFIG env or ~/.kube/config when empty." + }, + "context": { + "type": "string", + "description": "Context is the name of the context entry in the kubeconfig file.\nRequired — must be explicitly set to avoid accidentally deploying\nto the wrong cluster." + }, + "storage_class": { + "type": "string", + "description": "StorageClass is the default Kubernetes StorageClass for persistent volumes.\nDefaults to \"standard\" when empty, or to \"longhorn\" when Longhorn is\nenabled below and StorageClass is left unset." + }, + "load_balancer_annotations": { + "additionalProperties": { + "type": "string" + }, + "type": "object", + "description": "LoadBalancerAnnotations are added to the Gateway's LoadBalancer Service.\nUse this to pass cloud-specific annotations the Cloud Controller Manager may require for\nprovisioning LoadBalancers (e.g., \"load-balancer.hetzner.cloud/location: ash\")." + }, + "longhorn": { + "$ref": "#/$defs/longhorn.Config", + "description": "Longhorn opts the existing-cluster provider into installing Longhorn for\ndistributed/replicated block + RWX storage. The block is required to\nopt-in (nil means \"do not install\"). Use this on bare-metal / hetzner-k3s\nclusters that lack a managed RWX StorageClass — without it, charts that\nneed RWX (e.g. jupyterhub shared-storage for group dirs) fall back to\nthe in-cluster NFS-on-RWO workaround." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "context" + ], + "description": "Config represents configuration for connecting to a pre-existing Kubernetes cluster." + }, + "longhorn.Config": { + "properties": { + "enabled": { + "type": "boolean" + }, + "replica_count": { + "type": "integer" + }, + "dedicated_nodes": { + "type": "boolean" + }, + "node_selector": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + } + }, + "additionalProperties": false, + "type": "object", + "description": "Config carries the user-tunable Longhorn settings shared across providers." + } + }, + "title": "existing cluster provider configuration" +} diff --git a/schemas/providers/gcp.json b/schemas/providers/gcp.json new file mode 100644 index 00000000..34f05abd --- /dev/null +++ b/schemas/providers/gcp.json @@ -0,0 +1,151 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/gcp.Config", + "$defs": { + "gcp.Config": { + "properties": { + "project": { + "type": "string" + }, + "region": { + "type": "string" + }, + "kubernetes_version": { + "type": "string" + }, + "availability_zones": { + "items": { + "type": "string" + }, + "type": "array" + }, + "release_channel": { + "type": "string" + }, + "node_groups": { + "additionalProperties": { + "$ref": "#/$defs/gcp.NodeGroup" + }, + "type": "object" + }, + "tags": { + "items": { + "type": "string" + }, + "type": "array" + }, + "networking_mode": { + "type": "string" + }, + "network": { + "type": "string" + }, + "subnetwork": { + "type": "string" + }, + "ip_allocation_policy": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "master_authorized_networks_config": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "private_cluster_config": { + "type": "object" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "project", + "region", + "kubernetes_version" + ], + "description": "Config represents GCP-specific configuration" + }, + "gcp.GuestAccelerator": { + "properties": { + "name": { + "type": "string" + }, + "count": { + "type": "integer" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "name" + ], + "description": "GuestAccelerator represents a GCP GPU configuration" + }, + "gcp.NodeGroup": { + "properties": { + "instance": { + "type": "string" + }, + "min_nodes": { + "type": "integer" + }, + "max_nodes": { + "type": "integer" + }, + "taints": { + "items": { + "$ref": "#/$defs/gcp.Taint" + }, + "type": "array" + }, + "preemptible": { + "type": "boolean" + }, + "labels": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "guest_accelerators": { + "items": { + "$ref": "#/$defs/gcp.GuestAccelerator" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "instance" + ], + "description": "NodeGroup represents GCP-specific node group configuration" + }, + "gcp.Taint": { + "properties": { + "key": { + "type": "string" + }, + "value": { + "type": "string" + }, + "effect": { + "type": "string", + "description": "NoSchedule, PreferNoSchedule, NoExecute" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "key", + "value", + "effect" + ], + "description": "Taint represents a Kubernetes taint" + } + }, + "title": "gcp cluster provider configuration" +} diff --git a/schemas/providers/hetzner.json b/schemas/providers/hetzner.json new file mode 100644 index 00000000..6c886561 --- /dev/null +++ b/schemas/providers/hetzner.json @@ -0,0 +1,156 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/hetzner.Config", + "$defs": { + "hetzner.Autoscaling": { + "properties": { + "enabled": { + "type": "boolean" + }, + "min_instances": { + "type": "integer" + }, + "max_instances": { + "type": "integer" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "enabled", + "min_instances", + "max_instances" + ], + "description": "Autoscaling configures automatic node pool scaling." + }, + "hetzner.Config": { + "properties": { + "location": { + "type": "string" + }, + "kubernetes_version": { + "type": "string" + }, + "node_groups": { + "additionalProperties": { + "$ref": "#/$defs/hetzner.NodeGroup" + }, + "type": "object" + }, + "schedule_workloads_on_masters": { + "type": "boolean", + "description": "ScheduleWorkloadsOnMasters controls whether application pods can be\nscheduled on control-plane nodes. Defaults to true, which enables\nsingle-node clusters and makes better use of small Hetzner instances.\nSet to false for production clusters where you want dedicated masters\nthat only run etcd and the Kubernetes control plane. When false, at\nleast one non-master node group is required." + }, + "persist_data": { + "type": "boolean", + "description": "PersistData controls whether CSI volumes survive cluster destruction.\nWhen true, volumes are labeled persist=true during deploy, and destroy\nskips them. When false (the default), destroy deletes all CSI volumes\nthat are not attached to a running server." + }, + "ssh": { + "$ref": "#/$defs/hetzner.SSHConfig" + }, + "network": { + "$ref": "#/$defs/hetzner.NetworkConfig" + }, + "longhorn": { + "$ref": "#/$defs/longhorn.Config", + "description": "Longhorn configures the Longhorn distributed block storage install.\nHetzner's hcloud-volumes CSI is RWO-only; charts that need RWX (e.g.\njupyterhub shared-storage for group dirs) require Longhorn — or another\nRWX provider — to avoid the in-cluster NFS-on-RWO workaround.\nDefaults to enabled when the block is omitted; set `enabled: false` to\nopt out." + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "location", + "kubernetes_version", + "node_groups" + ], + "description": "Config holds Hetzner-specific provider configuration." + }, + "hetzner.NetworkConfig": { + "properties": { + "ssh_allowed_cidrs": { + "items": { + "type": "string" + }, + "type": "array" + }, + "api_allowed_cidrs": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object", + "description": "NetworkConfig controls firewall rules for SSH and Kubernetes API access." + }, + "hetzner.NodeGroup": { + "properties": { + "instance_type": { + "type": "string" + }, + "count": { + "type": "integer" + }, + "master": { + "type": "boolean", + "description": "Master marks this node group as the k3s control plane. Exactly one\nnode group must have this set to true. Master nodes run etcd and the\nKubernetes API server. Whether they also run application workloads is\ncontrolled by Config.ScheduleWorkloadsOnMasters." + }, + "location": { + "type": "string", + "description": "Location overrides the top-level location for this node group.\nOnly valid for worker (non-master) node groups." + }, + "autoscaling": { + "$ref": "#/$defs/hetzner.Autoscaling" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "instance_type", + "count" + ], + "description": "NodeGroup defines a pool of Hetzner Cloud instances." + }, + "hetzner.SSHConfig": { + "properties": { + "public_key_path": { + "type": "string" + }, + "private_key_path": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "public_key_path", + "private_key_path" + ], + "description": "SSHConfig allows users to provide their own SSH keys instead of auto-generated ones." + }, + "longhorn.Config": { + "properties": { + "enabled": { + "type": "boolean" + }, + "replica_count": { + "type": "integer" + }, + "dedicated_nodes": { + "type": "boolean" + }, + "node_selector": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + } + }, + "additionalProperties": false, + "type": "object", + "description": "Config carries the user-tunable Longhorn settings shared across providers." + } + }, + "title": "hetzner cluster provider configuration" +} diff --git a/schemas/providers/local.json b/schemas/providers/local.json new file mode 100644 index 00000000..7945fd1e --- /dev/null +++ b/schemas/providers/local.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/local.Config", + "$defs": { + "local.Config": { + "properties": { + "kube_context": { + "type": "string" + }, + "node_selectors": { + "additionalProperties": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "type": "object" + }, + "storage_class": { + "type": "string" + }, + "https_port": { + "type": "integer" + }, + "metallb": { + "$ref": "#/$defs/local.MetalLBConfig" + } + }, + "additionalProperties": false, + "type": "object", + "description": "Config represents local provider configuration" + }, + "local.MetalLBConfig": { + "properties": { + "enabled": { + "type": "boolean", + "description": "Enabled controls whether MetalLB is deployed. Default: true.\nUse a pointer to distinguish \"not set\" (default true) from \"explicitly false\"." + }, + "address_pool": { + "type": "string", + "description": "AddressPool is the IP range for MetalLB's IPAddressPool.\nDefault: \"192.168.1.100-192.168.1.110\"" + } + }, + "additionalProperties": false, + "type": "object", + "description": "MetalLBConfig holds MetalLB-specific settings for the local provider." + } + }, + "title": "local cluster provider configuration" +}