From 0877d01a07debf6a183bf3acfd936ebc8cd2a6de Mon Sep 17 00:00:00 2001 From: Jake Peterson <5532776+jpetey75@users.noreply.github.com> Date: Fri, 26 Jun 2026 17:51:03 -0500 Subject: [PATCH] fix: preserve AND/OR precedence in composite filters (#23) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Composite filters were flat: nested boolean expressions collapsed into a single aggregation, and `.filter()` after an OR group appended into that OR instead of AND-ing. Both silently changed the query population a user wrote. - `CompositeFilter` is now a tree — `filters` may hold nested groups. A shared `_combine()` flattens a child only when it already uses the same aggregation, so `a & (b | c)` keeps `(b | c)` nested. - `to_dict()` serializes nested groups and projects dimension vs table-calc filters into their separate top-level groups. The one shape the API cannot represent — OR across the two field types — raises a clear error instead of serializing to the wrong thing. - `Query.filter()` always ANDs the new condition with everything so far, keeping any existing OR group nested: `.filter(a | b).filter(c)` => `(a OR b) AND c`. Verified live: `(month==Jan | month==Mar).filter(month != Jan)` now returns only March (previously every month). The API confirmed to apply nested groups. Co-Authored-By: Claude Opus 4.8 --- docs/SDK_GUIDE.md | 19 +++- lightdash/filter.py | 138 +++++++++++++++-------- lightdash/query.py | 28 ++--- tests/test_composite_filter_semantics.py | 136 ++++++++++++++++++++++ 4 files changed, 250 insertions(+), 71 deletions(-) create mode 100644 tests/test_composite_filter_semantics.py diff --git a/docs/SDK_GUIDE.md b/docs/SDK_GUIDE.md index b8ca672..e1c18b2 100644 --- a/docs/SDK_GUIDE.md +++ b/docs/SDK_GUIDE.md @@ -154,23 +154,34 @@ f = (model.dimensions.country == "USA") & (model.dimensions.amount > 1000) # OR: Either condition must be true f = (model.dimensions.status == "active") | (model.dimensions.status == "pending") -# Complex combinations +# Complex combinations — nesting and precedence are preserved f = ( (model.dimensions.country == "USA") & ((model.dimensions.amount > 1000) | (model.dimensions.priority == "high")) ) +# serializes as: country = USA AND (amount > 1000 OR priority = high) ``` -Multiple `.filter()` calls on a query are combined with AND logic: +The boolean structure you write is the boolean structure that runs: +`a & (b | c)` keeps the `(b | c)` group nested rather than flattening to +`a & b & c`. + +Multiple `.filter()` calls on a query are combined with AND logic — each call +AND-s the new condition with everything so far, **as a unit**: ```python query = ( model.query() - .filter(model.dimensions.country == "USA") - .filter(model.dimensions.amount > 1000) # AND-ed with above + .filter((model.dimensions.status == "active") | (model.dimensions.status == "pending")) + .filter(model.dimensions.status != "deleted") ) +# (status = active OR status = pending) AND status != deleted ``` +> **Note:** dimension and table-calculation filters live in separate groups that +> the API always ANDs together, so they can be combined with `&` but **not** OR-ed +> with each other (`(dimension == x) | (calc > y)` raises a clear error). + ### Multiple Filters on the Same Field You can apply more than one condition to the same field—for example, to constrain diff --git a/lightdash/filter.py b/lightdash/filter.py index 4f8f68d..8b55aa6 100644 --- a/lightdash/filter.py +++ b/lightdash/filter.py @@ -58,20 +58,33 @@ ) +def _combine(left, right, aggregation: str) -> "CompositeFilter": + """Combine two filters/groups under ``aggregation``. + + A child group is flattened in only when it already uses the *same* + aggregation; otherwise it is kept as a nested item. This preserves the + precedence of mixed AND/OR expressions — e.g. ``a & (b | c)`` keeps the + ``(b | c)`` group nested rather than collapsing to ``a & b & c``. + """ + items = [] + for operand in (left, right): + if isinstance(operand, CompositeFilter) and operand.aggregation == aggregation: + items.extend(operand.filters) + else: + items.append(operand) + return CompositeFilter(filters=items, aggregation=aggregation) + + class _FieldFilterMixin: """Shared ``&`` / ``|`` combination behavior for single-field filters.""" def __and__(self, other: Union["FieldFilter", "CompositeFilter"]) -> "CompositeFilter": """Combine filters with AND: filter1 & filter2""" - if isinstance(other, CompositeFilter): - return CompositeFilter(filters=[self] + list(other.filters), aggregation="and") - return CompositeFilter(filters=[self, other], aggregation="and") + return _combine(self, other, "and") def __or__(self, other: Union["FieldFilter", "CompositeFilter"]) -> "CompositeFilter": """Combine filters with OR: filter1 | filter2""" - if isinstance(other, CompositeFilter): - return CompositeFilter(filters=[self] + list(other.filters), aggregation="or") - return CompositeFilter(filters=[self, other], aggregation="or") + return _combine(self, other, "or") @dataclass @@ -150,15 +163,38 @@ def to_dict(self) -> Dict[str, Union[str, List[str]]]: FieldFilter = Union[DimensionFilter, TableCalculationFilter] +def _target_keys(node) -> set: + """Payload keys ('dimensions' / 'tableCalculations') used by the leaf rules + within ``node`` (a filter rule or a CompositeFilter).""" + if isinstance(node, CompositeFilter): + keys = set() + for item in node.filters: + keys |= _target_keys(item) + return keys + if isinstance(node, TableCalculationFilter): + return {"tableCalculations"} + return {"dimensions"} + + +def _serialize_node(node) -> Dict: + """Serialize a single-field-type node into either a rule dict or a nested + ``{and|or: [...]}`` group dict.""" + if isinstance(node, CompositeFilter): + return {node.aggregation: [_serialize_node(item) for item in node.filters]} + return node.to_dict() + + @dataclass class CompositeFilter: - """ - Filters are a list of field filters (on dimensions and table calculations) - that are applied to a query. - Later this will also represent complex filters with AND, OR, NOT, etc. + """A boolean group of filters applied to a query. + + ``filters`` may contain individual filter rules *and* other + ``CompositeFilter`` groups, so nested boolean expressions such as + ``a & (b | c)`` are represented (and serialized) with their precedence + preserved. """ - filters: List[FieldFilter] = field(default_factory=list) + filters: List[Union[FieldFilter, "CompositeFilter"]] = field(default_factory=list) aggregation: str = "and" def __post_init__(self): @@ -168,51 +204,53 @@ def __post_init__(self): ) def to_dict(self): + keys = _target_keys(self) + + # Dimension-only (or empty) — serialize the tree under "dimensions". + if "tableCalculations" not in keys: + return {"dimensions": _serialize_node(self)} + + # Table-calculation-only. + if "dimensions" not in keys: + return { + "dimensions": {self.aggregation: []}, + "tableCalculations": _serialize_node(self), + } + + # Mixed dimension + table-calculation filters. The API keeps these in + # separate top-level groups that are implicitly AND-ed, so they can only + # be split when the outer combinator is AND. + if self.aggregation != "and": + raise ValueError( + "Cannot combine dimension and table calculation filters with OR: " + "the Lightdash API stores them in separate groups that are AND-ed " + "together. Use AND between the two field types, or run separate " + "queries." + ) + dimensions = [] table_calculations = [] - for f in self.filters: - # Check that the filter is not a composite filter - if not hasattr(f, "field"): - raise TypeError("Multi-level filter composites not supported yet") - # Multiple filters may target the same field, e.g. a date range - # expressed as (dim >= start) & (dim <= end). - if isinstance(f, TableCalculationFilter): - table_calculations.append(f.to_dict()) + for item in self.filters: + item_keys = _target_keys(item) + if "tableCalculations" not in item_keys: + dimensions.append(_serialize_node(item)) + elif "dimensions" not in item_keys: + table_calculations.append(_serialize_node(item)) else: - dimensions.append(f.to_dict()) - out = {"dimensions": {self.aggregation: dimensions}} - # Only include the tableCalculations group when present, so existing - # dimension-only payloads are unchanged. - if table_calculations: - out["tableCalculations"] = {self.aggregation: table_calculations} - return out + raise ValueError( + "A filter group mixes dimension and table calculation filters " + "and cannot be represented. Keep each field type in its own " + "group, combined with AND." + ) + return { + "dimensions": {"and": dimensions}, + "tableCalculations": {"and": table_calculations}, + } def __and__(self, other: Union[FieldFilter, "CompositeFilter"]) -> "CompositeFilter": """Combine with another filter using AND: composite & filter""" - if isinstance(other, CompositeFilter): - # Flatten if both are AND composites - if self.aggregation == "and" and other.aggregation == "and": - return CompositeFilter( - filters=list(self.filters) + list(other.filters), - aggregation="and" - ) - # Otherwise wrap as nested (not fully supported yet, but preserve structure) - return CompositeFilter(filters=list(self.filters) + list(other.filters), aggregation="and") - # Add single filter to existing composite - if self.aggregation == "and": - return CompositeFilter(filters=list(self.filters) + [other], aggregation="and") - return CompositeFilter(filters=list(self.filters) + [other], aggregation="and") + return _combine(self, other, "and") def __or__(self, other: Union[FieldFilter, "CompositeFilter"]) -> "CompositeFilter": """Combine with another filter using OR: composite | filter""" - if isinstance(other, CompositeFilter): - # Flatten if both are OR composites - if self.aggregation == "or" and other.aggregation == "or": - return CompositeFilter( - filters=list(self.filters) + list(other.filters), - aggregation="or" - ) - return CompositeFilter(filters=list(self.filters) + list(other.filters), aggregation="or") - if self.aggregation == "or": - return CompositeFilter(filters=list(self.filters) + [other], aggregation="or") - return CompositeFilter(filters=list(self.filters) + [other], aggregation="or") + return _combine(self, other, "or") diff --git a/lightdash/query.py b/lightdash/query.py index c0da285..5ee8522 100644 --- a/lightdash/query.py +++ b/lightdash/query.py @@ -511,27 +511,21 @@ def filter(self, filter: Union[DimensionFilter, TableCalculationFilter, Composit .filter(model.dimensions.country == 'USA') .filter(model.dimensions.status == 'active') ) + + Each call AND-s the new condition with everything added so far, even + when an earlier filter is an OR group — so + ``.filter(a | b).filter(c)`` means ``(a OR b) AND c``. """ if self._filters is None: - # First filter - if isinstance(filter, (DimensionFilter, TableCalculationFilter)): - new_filters = CompositeFilter(filters=[filter]) - else: + # First filter — a bare rule becomes a single-item group. + if isinstance(filter, CompositeFilter): new_filters = filter - else: - # Combine with existing filters using AND - if isinstance(filter, (DimensionFilter, TableCalculationFilter)): - # Add to existing CompositeFilter's list - new_filters = CompositeFilter( - filters=list(self._filters.filters) + [filter], - aggregation=self._filters.aggregation - ) else: - # Both are CompositeFilters - combine them - new_filters = CompositeFilter( - filters=list(self._filters.filters) + list(filter.filters), - aggregation="and" - ) + new_filters = CompositeFilter(filters=[filter]) + else: + # AND the new condition with everything so far. _combine keeps any + # existing OR group nested rather than absorbing the new rule into it. + new_filters = self._filters & filter return self._clone(filters=new_filters) def sort(self, *sorts: Sort) -> "Query": diff --git a/tests/test_composite_filter_semantics.py b/tests/test_composite_filter_semantics.py new file mode 100644 index 0000000..3c30edc --- /dev/null +++ b/tests/test_composite_filter_semantics.py @@ -0,0 +1,136 @@ +""" +Tests for composite filter boolean semantics (issue #23). + +Covers nested AND/OR precedence, the `.filter()`-always-ANDs rule, flattening of +same-operator chains, and the clear error for the one shape the Lightdash API +cannot represent (OR across dimension and table-calculation filters). +""" + +import pytest +from lightdash.dimensions import Dimension +from lightdash.filter import CompositeFilter +from lightdash.models import Model +from lightdash.table_calculations import TableCalculation + + +@pytest.fixture +def model(): + return Model(name="m", type="default", database_name="db", schema_name="s") + + +def dim(name): + return Dimension(name=name, model_name="m") + + +def _ids(rules): + return [r["target"]["fieldId"] for r in rules] + + +class TestNestedPrecedence: + def test_and_of_or_keeps_group_nested(self): + # a & (b | c) -> AND[ a, OR[b, c] ] + f = (dim("a") == 1) & ((dim("b") == 2) | (dim("c") == 3)) + d = f.to_dict()["dimensions"] + assert list(d) == ["and"] + assert d["and"][0]["target"]["fieldId"] == "m_a" + assert d["and"][1] == { + "or": [ + {"target": {"fieldId": "m_b"}, "operator": "equals", "values": [2]}, + {"target": {"fieldId": "m_c"}, "operator": "equals", "values": [3]}, + ] + } + + def test_or_of_and_keeps_group_nested(self): + # a | (b & c) -> OR[ a, AND[b, c] ] + f = (dim("a") == 1) | ((dim("b") == 2) & (dim("c") == 3)) + d = f.to_dict()["dimensions"] + assert list(d) == ["or"] + assert d["or"][0]["target"]["fieldId"] == "m_a" + assert "and" in d["or"][1] + assert _ids(d["or"][1]["and"]) == ["m_b", "m_c"] + + def test_documented_complex_example(self): + # country == "USA" & ((amount > 1000) | (priority == "high")) + f = (dim("country") == "USA") & ( + (dim("amount") > 1000) | (dim("priority") == "high") + ) + d = f.to_dict()["dimensions"] + assert d["and"][0]["target"]["fieldId"] == "m_country" + nested = d["and"][1]["or"] + assert _ids(nested) == ["m_amount", "m_priority"] + + +class TestFlatteningPreserved: + def test_chained_and_flattens(self): + f = (dim("a") == 1) & (dim("b") == 2) & (dim("c") == 3) + rules = f.to_dict()["dimensions"]["and"] + assert _ids(rules) == ["m_a", "m_b", "m_c"] + assert all("target" in r for r in rules) # flat, no nested groups + + def test_chained_or_flattens(self): + d = dim("a") + f = (d == 1) | (d == 2) | (d == 3) + assert len(f.to_dict()["dimensions"]["or"]) == 3 + + def test_same_field_range_still_flat(self): + # The #20 behaviour: (d >= x) & (d <= y) stays a flat AND of two rules. + d = dim("order_date") + f = (d >= "2026-01-01") & (d <= "2026-01-31") + rules = f.to_dict()["dimensions"]["and"] + assert [r["operator"] for r in rules] == ["greaterThanOrEqual", "lessThanOrEqual"] + + +class TestQueryFilterAlwaysAnds: + def test_filter_after_or_ands_as_a_unit(self, model): + """(a | b).filter(c) -> (a OR b) AND c — the core #23 bug.""" + status = dim("status") + q = ( + model.query() + .filter((status == "active") | (status == "pending")) + .filter(dim("region") == "west") + ) + d = q._build_payload()["filters"]["dimensions"] + assert list(d) == ["and"] + assert "or" in d["and"][0] + assert len(d["and"][0]["or"]) == 2 + assert d["and"][1]["target"]["fieldId"] == "m_region" + + def test_two_simple_filters_and_flat(self, model): + q = model.query().filter(dim("a") == 1).filter(dim("b") == 2) + rules = q._build_payload()["filters"]["dimensions"]["and"] + assert _ids(rules) == ["m_a", "m_b"] + + def test_filter_composite_then_composite(self, model): + q = ( + model.query() + .filter((dim("a") == 1) | (dim("b") == 2)) + .filter((dim("c") == 3) | (dim("d") == 4)) + ) + d = q._build_payload()["filters"]["dimensions"] + assert list(d) == ["and"] + assert all("or" in item for item in d["and"]) + + +class TestCrossFieldTypeHandling: + def test_and_across_dimension_and_calc_splits(self): + calc = TableCalculation(name="ratio", sql="1") + f = (dim("country") == "USA") & (calc > 0.2) + d = f.to_dict() + assert d["dimensions"]["and"][0]["target"]["fieldId"] == "m_country" + assert d["tableCalculations"]["and"][0]["target"]["fieldId"] == "ratio" + + def test_and_with_nested_calc_or(self): + calc_a = TableCalculation(name="ca", sql="1") + calc_b = TableCalculation(name="cb", sql="2") + f = (dim("country") == "USA") & ((calc_a > 1) | (calc_b > 2)) + d = f.to_dict() + assert _ids(d["dimensions"]["and"]) == ["m_country"] + calc_group = d["tableCalculations"]["and"] + assert len(calc_group) == 1 and "or" in calc_group[0] + assert _ids(calc_group[0]["or"]) == ["ca", "cb"] + + def test_or_across_dimension_and_calc_raises(self): + calc = TableCalculation(name="ratio", sql="1") + f = (dim("country") == "USA") | (calc > 0.2) + with pytest.raises(ValueError, match="Cannot combine dimension and table calculation"): + f.to_dict()