From 6f247dc2a4b762215ac618918d2e07ef1cd07f12 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 15:18:28 +0200 Subject: [PATCH 1/5] Add local-env constraint fetch with offline cache Third in the stacked local-env series. constraints.go fetches the per-environment pyproject.toml for a resolved env key and parses out requires-python, the databricks-connect pin, and the [tool.uv] constraint-dependencies. It caches each fetch on disk and classifies failures: a 404 is E_ENV_UNSUPPORTED (a resolvable target with no published environment, no cache fallback), while a transport or non-404 HTTP failure is E_FETCH and falls back to the last-good cached copy. The fetched body is validated by parseConstraints before it is written to the cache, so a malformed 2xx response cannot poison the cache and break a later transport-failure run that would otherwise serve the bad copy. Depends on the foundation PR for NewError and the E_FETCH / E_ENV_UNSUPPORTED codes. Still dormant. Co-authored-by: Isaac --- libs/localenv/constraints.go | 166 ++++++++++++++++++++++++++++++ libs/localenv/constraints_test.go | 90 ++++++++++++++++ 2 files changed, 256 insertions(+) create mode 100644 libs/localenv/constraints.go create mode 100644 libs/localenv/constraints_test.go diff --git a/libs/localenv/constraints.go b/libs/localenv/constraints.go new file mode 100644 index 0000000000..d4925d4ac2 --- /dev/null +++ b/libs/localenv/constraints.go @@ -0,0 +1,166 @@ +package localenv + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + + "github.com/BurntSushi/toml" + "github.com/databricks/cli/libs/log" +) + +// errEnvKeyNotFound is returned by fetchURL when the constraint artifact does +// not exist for the requested env key (HTTP 404). It is distinct from a +// transport failure so FetchConstraints can classify it as E_ENV_UNSUPPORTED +// (a resolvable target with no published environment) rather than E_FETCH. +var errEnvKeyNotFound = errors.New("environment key not found") + +// Constraints holds the parsed contents of a per-environment pyproject.toml. +type Constraints struct { + // EnvKey is the environment key used to look up the constraints. + EnvKey string + // SourceURL is the URL from which the constraints were fetched. + SourceURL string + // FromCache is true when the data came from the on-disk cache rather than a live fetch. + FromCache bool + // RequiresPython is the PEP 440 python version specifier from [project].requires-python. + RequiresPython string + // DatabricksConnect is the full dependency string for databricks-connect from [dependency-groups].dev. + DatabricksConnect string + // ConstraintDeps is the list of entries from [tool.uv].constraint-dependencies. + ConstraintDeps []string +} + +// sanitizeEnvKey replaces path separators with double-underscores to produce a flat filename. +func sanitizeEnvKey(envKey string) string { + return strings.ReplaceAll(envKey, "/", "__") +} + +// FetchConstraints fetches the pyproject.toml for envKey from baseURL and caches it in +// cacheDir. On a transport or non-404 HTTP failure it falls back to the cached copy if one +// exists (E_FETCH otherwise). A 404 means the env key is not published (E_ENV_UNSUPPORTED) +// and does not fall back to cache — a resolvable target with no environment is a distinct, +// non-transient condition. +// +// Constraint files are hosted at: +// https://github.com/rugpanov/databricks-environments +func FetchConstraints(ctx context.Context, baseURL, envKey, cacheDir string) (*Constraints, error) { + url := baseURL + "/" + envKey + "/pyproject.toml" + cachePath := filepath.Join(cacheDir, sanitizeEnvKey(envKey)+".toml") + + data, fetchErr := fetchURL(ctx, url) + if fetchErr == nil { + // Parse before caching: a malformed 2xx body must not overwrite a valid + // cached copy, or a later transport-failure run would serve the poisoned + // cache and fail to parse instead of falling back to the last-good file. + rp, dbc, deps, err := parseConstraints(data) + if err != nil { + return nil, fmt.Errorf("parse constraints for %s: %w", envKey, err) + } + // Write the cache copy; non-fatal so a read-only cacheDir doesn't break the command. + if err := os.WriteFile(cachePath, data, 0o600); err != nil { + log.Debugf(ctx, "failed to write constraint cache %s: %v", filepath.ToSlash(cachePath), err) + } + return &Constraints{ + EnvKey: envKey, + SourceURL: url, + FromCache: false, + RequiresPython: rp, + DatabricksConnect: dbc, + ConstraintDeps: deps, + }, nil + } + + // A missing env key (404) is not a transport failure and has no useful cache + // fallback: the target resolved to an environment that isn't published. + if errors.Is(fetchErr, errEnvKeyNotFound) { + return nil, NewError(ErrEnvUnsupported, fetchErr, + "no published environment for %q. If this is a new runtime, try the latest LTS target (e.g. --serverless v4 or a supported --cluster DBR)", envKey) + } + + // Network or HTTP failure: attempt to serve from cache. + cached, readErr := os.ReadFile(cachePath) + if readErr != nil { + return nil, NewError(ErrFetch, fetchErr, "fetch constraints for %s", envKey) + } + + log.Warnf(ctx, "constraint fetch failed, using cached copy: %v", fetchErr) + rp, dbc, deps, err := parseConstraints(cached) + if err != nil { + return nil, fmt.Errorf("parse cached constraints for %s: %w", envKey, err) + } + return &Constraints{ + EnvKey: envKey, + SourceURL: url, + FromCache: true, + RequiresPython: rp, + DatabricksConnect: dbc, + ConstraintDeps: deps, + }, nil +} + +// fetchURL performs an HTTP GET and returns the body bytes, or an error on non-2xx or transport failure. +func fetchURL(ctx context.Context, url string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("build request for %s: %w", url, err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("GET %s: %w", url, err) + } + defer resp.Body.Close() + if resp.StatusCode == http.StatusNotFound { + return nil, fmt.Errorf("GET %s: %w", url, errEnvKeyNotFound) + } + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return nil, fmt.Errorf("GET %s: unexpected status %s", url, resp.Status) + } + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("read body from %s: %w", url, err) + } + return data, nil +} + +// pyprojectTOML mirrors the pyproject.toml fields we care about. +type pyprojectTOML struct { + Project struct { + RequiresPython string `toml:"requires-python"` + } `toml:"project"` + DependencyGroups struct { + Dev []string `toml:"dev"` + } `toml:"dependency-groups"` + Tool struct { + UV struct { + ConstraintDependencies []string `toml:"constraint-dependencies"` + } `toml:"uv"` + } `toml:"tool"` +} + +// parseConstraints parses a pyproject.toml byte slice and extracts requires-python, +// the databricks-connect entry from dependency-groups.dev, and constraint-dependencies. +func parseConstraints(data []byte) (requiresPython, dbconnect string, deps []string, err error) { + var p pyprojectTOML + if err = toml.Unmarshal(data, &p); err != nil { + return "", "", nil, fmt.Errorf("unmarshal pyproject.toml: %w", err) + } + + requiresPython = p.Project.RequiresPython + + for _, entry := range p.DependencyGroups.Dev { + // Despace before matching so whitespace variants like "databricks-connect ~=17" also match. + if strings.HasPrefix(strings.ReplaceAll(entry, " ", ""), "databricks-connect") { + dbconnect = entry + break + } + } + + deps = p.Tool.UV.ConstraintDependencies + return requiresPython, dbconnect, deps, nil +} diff --git a/libs/localenv/constraints_test.go b/libs/localenv/constraints_test.go new file mode 100644 index 0000000000..bced735bc4 --- /dev/null +++ b/libs/localenv/constraints_test.go @@ -0,0 +1,90 @@ +package localenv + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const sampleToml = `[project] +requires-python = "==3.12.*" + +[dependency-groups] +dev = [ + "databricks-connect~=17.2.0", + "pytest~=8.0", +] + +[tool.uv] +constraint-dependencies = [ + "pydantic~=2.10.6", + "anyio~=4.6.2", +] +` + +func TestParseConstraints(t *testing.T) { + rp, dbc, deps, err := parseConstraints([]byte(sampleToml)) + require.NoError(t, err) + assert.Equal(t, "==3.12.*", rp) + assert.Equal(t, "databricks-connect~=17.2.0", dbc) + assert.Equal(t, []string{"pydantic~=2.10.6", "anyio~=4.6.2"}, deps) +} + +func TestFetchConstraintsHTTP(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/serverless/serverless-v4/pyproject.toml", r.URL.Path) + _, _ = w.Write([]byte(sampleToml)) + })) + defer srv.Close() + + c, err := FetchConstraints(t.Context(), srv.URL, "serverless/serverless-v4", t.TempDir()) + require.NoError(t, err) + assert.False(t, c.FromCache) + assert.Equal(t, "databricks-connect~=17.2.0", c.DatabricksConnect) + assert.Len(t, c.ConstraintDeps, 2) +} + +func TestFetchConstraintsEnvKeyNotFound(t *testing.T) { + // A 404 for a resolved env key means the environment is not published; this + // must classify as E_ENV_UNSUPPORTED, not E_FETCH, and not fall back to cache. + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "not found", http.StatusNotFound) + })) + defer srv.Close() + + _, err := FetchConstraints(t.Context(), srv.URL, "dbr/99.9.x-scala2.12", t.TempDir()) + var pe *PipelineError + require.ErrorAs(t, err, &pe) + assert.Equal(t, ErrEnvUnsupported, pe.Code) +} + +func TestFetchConstraintsTransportFailureNoCache(t *testing.T) { + // A transport failure with no cache classifies as E_FETCH. + down := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})) + url := down.URL + down.Close() + + _, err := FetchConstraints(t.Context(), url, "serverless/serverless-v4", t.TempDir()) + var pe *PipelineError + require.ErrorAs(t, err, &pe) + assert.Equal(t, ErrFetch, pe.Code) +} + +func TestFetchConstraintsFallsBackToCache(t *testing.T) { + cacheDir := t.TempDir() + // First, a successful fetch populates the cache. + good := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(sampleToml)) + })) + _, err := FetchConstraints(t.Context(), good.URL, "serverless/serverless-v4", cacheDir) + require.NoError(t, err) + good.Close() + + // Now the server is down; fetch must serve the cache. + c, err := FetchConstraints(t.Context(), good.URL, "serverless/serverless-v4", cacheDir) + require.NoError(t, err) + assert.True(t, c.FromCache) +} From a154bce2be7806d93891a5d411a4e167af8ae4e8 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 17:41:45 +0200 Subject: [PATCH 2/5] Harden constraint fetch: validate artifact, create+atomically write cache, exact dep match Review of the constraint-fetch layer surfaced several defects, all fixed here: - parseConstraints accepted valid-but-empty TOML: a 200 body with no [project].requires-python returned an empty result that would be cached and only fail confusingly later. It now errors when requires-python is absent. - The cache write assumed cacheDir already existed. On a fresh machine the first fetch succeeded but os.WriteFile failed (no such directory), so the cache never populated and offline runs got E_FETCH. writeCacheAtomic now MkdirAll's the parent. - The cache write was non-atomic (os.WriteFile truncates in place), so a concurrent transport-failure reader could observe a partial file. writeCacheAtomic writes a temp file and renames it into place. - databricks-connect detection used a bare string prefix, so a sibling like "databricks-connectors==1.0" matched first and was returned instead of the real pin. isDatabricksConnectDep now requires a package-name boundary. - sanitizeEnvKey collapsed only "/", leaving a Windows "\" to be treated as a separator by filepath.Join; it now collapses both. Co-authored-by: Isaac --- libs/localenv/constraints.go | 84 ++++++++++++++++++++++++++++--- libs/localenv/constraints_test.go | 43 ++++++++++++++++ 2 files changed, 121 insertions(+), 6 deletions(-) diff --git a/libs/localenv/constraints.go b/libs/localenv/constraints.go index d4925d4ac2..ffd93140b3 100644 --- a/libs/localenv/constraints.go +++ b/libs/localenv/constraints.go @@ -36,9 +36,48 @@ type Constraints struct { ConstraintDeps []string } -// sanitizeEnvKey replaces path separators with double-underscores to produce a flat filename. +// sanitizeEnvKey flattens an env key to a single filename component by replacing +// every path separator (both "/" and the OS separator, e.g. "\\" on Windows) with +// double-underscores. Collapsing both keeps the cache file inside cacheDir even on +// Windows, where filepath.Join would otherwise treat a backslash as a separator. func sanitizeEnvKey(envKey string) string { - return strings.ReplaceAll(envKey, "/", "__") + s := strings.ReplaceAll(envKey, "/", "__") + s = strings.ReplaceAll(s, "\\", "__") + return s +} + +// writeCacheAtomic writes data to path via a temp file and rename, creating the +// parent directory first. The rename is atomic on the same filesystem, so a +// concurrent reader never observes a truncated or partial cache file (os.WriteFile +// truncates in place, which a fallback reader could catch mid-write). +func writeCacheAtomic(path string, data []byte) error { + dir := filepath.Dir(path) + if err := os.MkdirAll(dir, 0o755); err != nil { + return err + } + tmp, err := os.CreateTemp(dir, ".constraints-*.tmp") + if err != nil { + return err + } + tmpName := tmp.Name() + if _, err := tmp.Write(data); err != nil { + tmp.Close() + os.Remove(tmpName) + return err + } + if err := tmp.Close(); err != nil { + os.Remove(tmpName) + return err + } + if err := os.Chmod(tmpName, 0o600); err != nil { + os.Remove(tmpName) + return err + } + if err := os.Rename(tmpName, path); err != nil { + os.Remove(tmpName) + return err + } + return nil } // FetchConstraints fetches the pyproject.toml for envKey from baseURL and caches it in @@ -62,8 +101,9 @@ func FetchConstraints(ctx context.Context, baseURL, envKey, cacheDir string) (*C if err != nil { return nil, fmt.Errorf("parse constraints for %s: %w", envKey, err) } - // Write the cache copy; non-fatal so a read-only cacheDir doesn't break the command. - if err := os.WriteFile(cachePath, data, 0o600); err != nil { + // Write the cache copy (creating cacheDir if needed, atomically); non-fatal + // so a read-only cacheDir doesn't break the command. + if err := writeCacheAtomic(cachePath, data); err != nil { log.Debugf(ctx, "failed to write constraint cache %s: %v", filepath.ToSlash(cachePath), err) } return &Constraints{ @@ -145,6 +185,9 @@ type pyprojectTOML struct { // parseConstraints parses a pyproject.toml byte slice and extracts requires-python, // the databricks-connect entry from dependency-groups.dev, and constraint-dependencies. +// A body that is valid TOML but carries no requires-python is rejected: it is not a +// usable constraint artifact, and silently accepting it would cache an empty result +// and only surface a confusing failure later in the pipeline. func parseConstraints(data []byte) (requiresPython, dbconnect string, deps []string, err error) { var p pyprojectTOML if err = toml.Unmarshal(data, &p); err != nil { @@ -152,10 +195,12 @@ func parseConstraints(data []byte) (requiresPython, dbconnect string, deps []str } requiresPython = p.Project.RequiresPython + if strings.TrimSpace(requiresPython) == "" { + return "", "", nil, errors.New("constraint artifact has no [project].requires-python") + } for _, entry := range p.DependencyGroups.Dev { - // Despace before matching so whitespace variants like "databricks-connect ~=17" also match. - if strings.HasPrefix(strings.ReplaceAll(entry, " ", ""), "databricks-connect") { + if isDatabricksConnectDep(entry) { dbconnect = entry break } @@ -164,3 +209,30 @@ func parseConstraints(data []byte) (requiresPython, dbconnect string, deps []str deps = p.Tool.UV.ConstraintDependencies return requiresPython, dbconnect, deps, nil } + +// isDatabricksConnectDep reports whether a dependency-group entry is the +// databricks-connect requirement. It matches on a package-name boundary rather +// than a bare prefix so a sibling package such as "databricks-connectors" (whose +// name merely starts with "databricks-connect") is not mistaken for it. The next +// character after the name must be a PEP 508 version/extra/marker delimiter or the +// end of the string. +func isDatabricksConnectDep(entry string) bool { + const name = "databricks-connect" + // Despace so whitespace variants like "databricks-connect ~=17" also match. + s := strings.ReplaceAll(entry, " ", "") + rest, ok := strings.CutPrefix(s, name) + if !ok { + return false + } + if rest == "" { + return true + } + // A real requirement continues with a version specifier, extra, marker, or + // separator — never an identifier character (which would mean a longer name). + switch rest[0] { + case '=', '<', '>', '!', '~', '[', ';', '@', ',', '(': + return true + default: + return false + } +} diff --git a/libs/localenv/constraints_test.go b/libs/localenv/constraints_test.go index bced735bc4..c5b54a5678 100644 --- a/libs/localenv/constraints_test.go +++ b/libs/localenv/constraints_test.go @@ -3,6 +3,8 @@ package localenv import ( "net/http" "net/http/httptest" + "os" + "path/filepath" "testing" "github.com/stretchr/testify/assert" @@ -33,6 +35,47 @@ func TestParseConstraints(t *testing.T) { assert.Equal(t, []string{"pydantic~=2.10.6", "anyio~=4.6.2"}, deps) } +func TestParseConstraintsRejectsMissingRequiresPython(t *testing.T) { + // Valid TOML but no requires-python is not a usable artifact; it must error + // rather than return an empty result that would be cached and fail later. + _, _, _, err := parseConstraints([]byte("[project]\nname = \"x\"\n")) + require.Error(t, err) +} + +func TestParseConstraintsDatabricksConnectNameBoundary(t *testing.T) { + // A sibling package whose name merely starts with "databricks-connect" must + // not be mistaken for the databricks-connect requirement. + toml := `[project] +requires-python = ">=3.10" + +[dependency-groups] +dev = [ + "databricks-connectors==1.0", + "databricks-connect~=17.2.0", +] +` + _, dbc, _, err := parseConstraints([]byte(toml)) + require.NoError(t, err) + assert.Equal(t, "databricks-connect~=17.2.0", dbc) +} + +func TestFetchConstraintsCreatesCacheDir(t *testing.T) { + // The cache directory may not exist yet on a fresh machine; the fetch must + // create it so the cache actually populates (and offline fallback works). + cacheDir := filepath.Join(t.TempDir(), "does", "not", "exist", "yet") + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(sampleToml)) + })) + defer srv.Close() + + _, err := FetchConstraints(t.Context(), srv.URL, "serverless/serverless-v4", cacheDir) + require.NoError(t, err) + // The cache file was written into the freshly created directory. + written, err := os.ReadFile(filepath.Join(cacheDir, "serverless__serverless-v4.toml")) + require.NoError(t, err) + assert.Equal(t, sampleToml, string(written)) +} + func TestFetchConstraintsHTTP(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, "/serverless/serverless-v4/pyproject.toml", r.URL.Path) From dddb4f1dc393b0c134a4f25f181edde795a5d332 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 20:14:19 +0200 Subject: [PATCH 3/5] Make the constraint cache filename collision-free Round-2 review noted that mapping an env key to a cache filename by replacing "/" with "__" was not injective: distinct keys like "a/b" and "a__b" collided on the same file, so a cached copy for one environment could be served for another on a transport-failure fallback. cacheFileName now appends a short sha256 of the raw env key to the readable slug, guaranteeing distinct keys get distinct files (env keys are internally generated and never actually collide today, but the cache should not depend on that). Co-authored-by: Isaac --- libs/localenv/constraints.go | 23 ++++++++++++++--------- libs/localenv/constraints_test.go | 11 ++++++++++- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/libs/localenv/constraints.go b/libs/localenv/constraints.go index ffd93140b3..06d84e64b0 100644 --- a/libs/localenv/constraints.go +++ b/libs/localenv/constraints.go @@ -2,6 +2,8 @@ package localenv import ( "context" + "crypto/sha256" + "encoding/hex" "errors" "fmt" "io" @@ -36,14 +38,17 @@ type Constraints struct { ConstraintDeps []string } -// sanitizeEnvKey flattens an env key to a single filename component by replacing -// every path separator (both "/" and the OS separator, e.g. "\\" on Windows) with -// double-underscores. Collapsing both keeps the cache file inside cacheDir even on -// Windows, where filepath.Join would otherwise treat a backslash as a separator. -func sanitizeEnvKey(envKey string) string { - s := strings.ReplaceAll(envKey, "/", "__") - s = strings.ReplaceAll(s, "\\", "__") - return s +// cacheFileName maps an env key to a single, collision-free cache filename. +// It keeps a readable slug (path separators flattened to double-underscores so +// the file stays inside cacheDir on every OS) and appends a short hash of the +// raw env key. The hash guarantees injectivity: distinct env keys that would +// otherwise flatten to the same slug (e.g. "a/b" and "a__b") get distinct +// filenames, so a cache hit can never serve another environment's constraints. +func cacheFileName(envKey string) string { + slug := strings.ReplaceAll(envKey, "/", "__") + slug = strings.ReplaceAll(slug, "\\", "__") + sum := sha256.Sum256([]byte(envKey)) + return fmt.Sprintf("%s-%s.toml", slug, hex.EncodeToString(sum[:8])) } // writeCacheAtomic writes data to path via a temp file and rename, creating the @@ -90,7 +95,7 @@ func writeCacheAtomic(path string, data []byte) error { // https://github.com/rugpanov/databricks-environments func FetchConstraints(ctx context.Context, baseURL, envKey, cacheDir string) (*Constraints, error) { url := baseURL + "/" + envKey + "/pyproject.toml" - cachePath := filepath.Join(cacheDir, sanitizeEnvKey(envKey)+".toml") + cachePath := filepath.Join(cacheDir, cacheFileName(envKey)) data, fetchErr := fetchURL(ctx, url) if fetchErr == nil { diff --git a/libs/localenv/constraints_test.go b/libs/localenv/constraints_test.go index c5b54a5678..dfe86968d1 100644 --- a/libs/localenv/constraints_test.go +++ b/libs/localenv/constraints_test.go @@ -71,11 +71,20 @@ func TestFetchConstraintsCreatesCacheDir(t *testing.T) { _, err := FetchConstraints(t.Context(), srv.URL, "serverless/serverless-v4", cacheDir) require.NoError(t, err) // The cache file was written into the freshly created directory. - written, err := os.ReadFile(filepath.Join(cacheDir, "serverless__serverless-v4.toml")) + written, err := os.ReadFile(filepath.Join(cacheDir, cacheFileName("serverless/serverless-v4"))) require.NoError(t, err) assert.Equal(t, sampleToml, string(written)) } +func TestCacheFileNameInjective(t *testing.T) { + // Distinct env keys that flatten to the same slug must not collide, so a + // cache hit can never serve another environment's constraints. + assert.NotEqual(t, cacheFileName("a/b"), cacheFileName("a__b")) + // The filename stays inside cacheDir (no separators leak through). + assert.NotContains(t, cacheFileName("a/b"), "/") + assert.NotContains(t, cacheFileName("a\\b"), "\\") +} + func TestFetchConstraintsHTTP(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, "/serverless/serverless-v4/pyproject.toml", r.URL.Path) From 3cbeb2fd9850ba84ea30ddda4cc63d5e22597db8 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 20:26:20 +0200 Subject: [PATCH 4/5] Match databricks-connect case-insensitively Round-3 review noted isDatabricksConnectDep was case-sensitive, but Python package names are case-insensitive (PEP 503): a valid entry like "Databricks-Connect==16.4.0" went undetected, leaving the pin empty. The match now lowercases the entry before comparing; the caller still stores the original casing. Co-authored-by: Isaac --- libs/localenv/constraints.go | 6 ++++-- libs/localenv/constraints_test.go | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/libs/localenv/constraints.go b/libs/localenv/constraints.go index 06d84e64b0..a75813fdff 100644 --- a/libs/localenv/constraints.go +++ b/libs/localenv/constraints.go @@ -223,8 +223,10 @@ func parseConstraints(data []byte) (requiresPython, dbconnect string, deps []str // end of the string. func isDatabricksConnectDep(entry string) bool { const name = "databricks-connect" - // Despace so whitespace variants like "databricks-connect ~=17" also match. - s := strings.ReplaceAll(entry, " ", "") + // Despace so whitespace variants like "databricks-connect ~=17" also match, + // and lowercase because Python package names are case-insensitive (PEP 503), + // so "Databricks-Connect==16.4.0" is the same requirement. + s := strings.ToLower(strings.ReplaceAll(entry, " ", "")) rest, ok := strings.CutPrefix(s, name) if !ok { return false diff --git a/libs/localenv/constraints_test.go b/libs/localenv/constraints_test.go index dfe86968d1..df61427f29 100644 --- a/libs/localenv/constraints_test.go +++ b/libs/localenv/constraints_test.go @@ -59,6 +59,20 @@ dev = [ assert.Equal(t, "databricks-connect~=17.2.0", dbc) } +func TestParseConstraintsDatabricksConnectCaseInsensitive(t *testing.T) { + // Python package names are case-insensitive (PEP 503), so a differently-cased + // entry must still be detected; the original casing is preserved in the result. + toml := `[project] +requires-python = ">=3.10" + +[dependency-groups] +dev = ["Databricks-Connect==16.4.0"] +` + _, dbc, _, err := parseConstraints([]byte(toml)) + require.NoError(t, err) + assert.Equal(t, "Databricks-Connect==16.4.0", dbc) +} + func TestFetchConstraintsCreatesCacheDir(t *testing.T) { // The cache directory may not exist yet on a fresh machine; the fetch must // create it so the cache actually populates (and offline fallback works). From 79890ca4c43cef3062ba230134fae4625c0ddf21 Mon Sep 17 00:00:00 2001 From: Grigory Panov Date: Fri, 3 Jul 2026 21:13:53 +0200 Subject: [PATCH 5/5] Normalize databricks-connect dependency name per PEP 503 Round-4 review found isDatabricksConnectDep matched only case, missing PEP 503 name equivalence: "databricks_connect" and "databricks.connect" (and other whitespace) were not recognized. The check now extracts the leading package name up to the first PEP 508 delimiter and compares it under PEP 503 normalization (lowercase, collapse runs of -, _, . to a single -), so all spellings match while a distinct package like databricks-connectors does not. Co-authored-by: Isaac --- libs/localenv/constraints.go | 47 +++++++++++++++---------------- libs/localenv/constraints_test.go | 28 +++++++++++------- 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/libs/localenv/constraints.go b/libs/localenv/constraints.go index a75813fdff..9673d5ece8 100644 --- a/libs/localenv/constraints.go +++ b/libs/localenv/constraints.go @@ -10,6 +10,7 @@ import ( "net/http" "os" "path/filepath" + "regexp" "strings" "github.com/BurntSushi/toml" @@ -215,31 +216,29 @@ func parseConstraints(data []byte) (requiresPython, dbconnect string, deps []str return requiresPython, dbconnect, deps, nil } +// depNameSepRe matches the first PEP 508 delimiter that ends a requirement's +// package name: a version specifier, extra, marker, url, or list separator. +var depNameSepRe = regexp.MustCompile(`[<>=!~;,@\[( \t]`) + // isDatabricksConnectDep reports whether a dependency-group entry is the -// databricks-connect requirement. It matches on a package-name boundary rather -// than a bare prefix so a sibling package such as "databricks-connectors" (whose -// name merely starts with "databricks-connect") is not mistaken for it. The next -// character after the name must be a PEP 508 version/extra/marker delimiter or the -// end of the string. +// databricks-connect requirement. It extracts the leading package name (up to +// the first PEP 508 delimiter) and compares it under PEP 503 normalization, so +// case, and runs of "-", "_", or "." are all treated as equivalent: +// "Databricks-Connect", "databricks_connect", and "databricks.connect" all match, +// while a distinct package like "databricks-connectors" does not. func isDatabricksConnectDep(entry string) bool { - const name = "databricks-connect" - // Despace so whitespace variants like "databricks-connect ~=17" also match, - // and lowercase because Python package names are case-insensitive (PEP 503), - // so "Databricks-Connect==16.4.0" is the same requirement. - s := strings.ToLower(strings.ReplaceAll(entry, " ", "")) - rest, ok := strings.CutPrefix(s, name) - if !ok { - return false - } - if rest == "" { - return true - } - // A real requirement continues with a version specifier, extra, marker, or - // separator — never an identifier character (which would mean a longer name). - switch rest[0] { - case '=', '<', '>', '!', '~', '[', ';', '@', ',', '(': - return true - default: - return false + name := strings.TrimSpace(entry) + if i := depNameSepRe.FindStringIndex(name); i != nil { + name = name[:i[0]] } + return normalizePackageName(name) == "databricks-connect" +} + +// pep503SepRe matches runs of "-", "_", or "." for PEP 503 name normalization. +var pep503SepRe = regexp.MustCompile(`[-_.]+`) + +// normalizePackageName applies PEP 503 normalization: lowercase and collapse any +// run of "-", "_", or "." to a single "-". +func normalizePackageName(name string) string { + return pep503SepRe.ReplaceAllString(strings.ToLower(strings.TrimSpace(name)), "-") } diff --git a/libs/localenv/constraints_test.go b/libs/localenv/constraints_test.go index df61427f29..9af68c9786 100644 --- a/libs/localenv/constraints_test.go +++ b/libs/localenv/constraints_test.go @@ -59,18 +59,26 @@ dev = [ assert.Equal(t, "databricks-connect~=17.2.0", dbc) } -func TestParseConstraintsDatabricksConnectCaseInsensitive(t *testing.T) { - // Python package names are case-insensitive (PEP 503), so a differently-cased - // entry must still be detected; the original casing is preserved in the result. - toml := `[project] -requires-python = ">=3.10" - -[dependency-groups] -dev = ["Databricks-Connect==16.4.0"] -` +func TestParseConstraintsDatabricksConnectPEP503(t *testing.T) { + // PEP 503: package names are case-insensitive and runs of -, _, . are + // equivalent. Every spelling of databricks-connect must be detected, with the + // original entry preserved verbatim in the result. + for _, entry := range []string{ + "Databricks-Connect==16.4.0", + "databricks_connect==16.4.0", + "databricks.connect==16.4.0", + "databricks-connect ~= 17.2", + } { + toml := "[project]\nrequires-python = \">=3.10\"\n\n[dependency-groups]\ndev = [\"" + entry + "\"]\n" + _, dbc, _, err := parseConstraints([]byte(toml)) + require.NoError(t, err, entry) + assert.Equal(t, entry, dbc, "entry %q", entry) + } + // A distinct sibling package must NOT match. + toml := "[project]\nrequires-python = \">=3.10\"\n\n[dependency-groups]\ndev = [\"databricks-connectors==1.0\"]\n" _, dbc, _, err := parseConstraints([]byte(toml)) require.NoError(t, err) - assert.Equal(t, "Databricks-Connect==16.4.0", dbc) + assert.Empty(t, dbc) } func TestFetchConstraintsCreatesCacheDir(t *testing.T) {