Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions docs/SDK_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,23 +154,34 @@ f = (model.dimensions.country == "USA") & (model.dimensions.amount > 1000)
# OR: Either condition must be true
f = (model.dimensions.status == "active") | (model.dimensions.status == "pending")

# Complex combinations
# Complex combinations — nesting and precedence are preserved
f = (
(model.dimensions.country == "USA") &
((model.dimensions.amount > 1000) | (model.dimensions.priority == "high"))
)
# serializes as: country = USA AND (amount > 1000 OR priority = high)
```

Multiple `.filter()` calls on a query are combined with AND logic:
The boolean structure you write is the boolean structure that runs:
`a & (b | c)` keeps the `(b | c)` group nested rather than flattening to
`a & b & c`.

Multiple `.filter()` calls on a query are combined with AND logic — each call
AND-s the new condition with everything so far, **as a unit**:

```python
query = (
model.query()
.filter(model.dimensions.country == "USA")
.filter(model.dimensions.amount > 1000) # AND-ed with above
.filter((model.dimensions.status == "active") | (model.dimensions.status == "pending"))
.filter(model.dimensions.status != "deleted")
)
# (status = active OR status = pending) AND status != deleted
```

> **Note:** dimension and table-calculation filters live in separate groups that
> the API always ANDs together, so they can be combined with `&` but **not** OR-ed
> with each other (`(dimension == x) | (calc > y)` raises a clear error).

### Multiple Filters on the Same Field

You can apply more than one condition to the same field—for example, to constrain
Expand Down
138 changes: 88 additions & 50 deletions lightdash/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,33 @@
)


def _combine(left, right, aggregation: str) -> "CompositeFilter":
"""Combine two filters/groups under ``aggregation``.

A child group is flattened in only when it already uses the *same*
aggregation; otherwise it is kept as a nested item. This preserves the
precedence of mixed AND/OR expressions — e.g. ``a & (b | c)`` keeps the
``(b | c)`` group nested rather than collapsing to ``a & b & c``.
"""
items = []
for operand in (left, right):
if isinstance(operand, CompositeFilter) and operand.aggregation == aggregation:
items.extend(operand.filters)
else:
items.append(operand)
return CompositeFilter(filters=items, aggregation=aggregation)


class _FieldFilterMixin:
"""Shared ``&`` / ``|`` combination behavior for single-field filters."""

def __and__(self, other: Union["FieldFilter", "CompositeFilter"]) -> "CompositeFilter":
"""Combine filters with AND: filter1 & filter2"""
if isinstance(other, CompositeFilter):
return CompositeFilter(filters=[self] + list(other.filters), aggregation="and")
return CompositeFilter(filters=[self, other], aggregation="and")
return _combine(self, other, "and")

def __or__(self, other: Union["FieldFilter", "CompositeFilter"]) -> "CompositeFilter":
"""Combine filters with OR: filter1 | filter2"""
if isinstance(other, CompositeFilter):
return CompositeFilter(filters=[self] + list(other.filters), aggregation="or")
return CompositeFilter(filters=[self, other], aggregation="or")
return _combine(self, other, "or")


@dataclass
Expand Down Expand Up @@ -150,15 +163,38 @@ def to_dict(self) -> Dict[str, Union[str, List[str]]]:
FieldFilter = Union[DimensionFilter, TableCalculationFilter]


def _target_keys(node) -> set:
"""Payload keys ('dimensions' / 'tableCalculations') used by the leaf rules
within ``node`` (a filter rule or a CompositeFilter)."""
if isinstance(node, CompositeFilter):
keys = set()
for item in node.filters:
keys |= _target_keys(item)
return keys
if isinstance(node, TableCalculationFilter):
return {"tableCalculations"}
return {"dimensions"}


def _serialize_node(node) -> Dict:
"""Serialize a single-field-type node into either a rule dict or a nested
``{and|or: [...]}`` group dict."""
if isinstance(node, CompositeFilter):
return {node.aggregation: [_serialize_node(item) for item in node.filters]}
return node.to_dict()


@dataclass
class CompositeFilter:
"""
Filters are a list of field filters (on dimensions and table calculations)
that are applied to a query.
Later this will also represent complex filters with AND, OR, NOT, etc.
"""A boolean group of filters applied to a query.

``filters`` may contain individual filter rules *and* other
``CompositeFilter`` groups, so nested boolean expressions such as
``a & (b | c)`` are represented (and serialized) with their precedence
preserved.
"""

filters: List[FieldFilter] = field(default_factory=list)
filters: List[Union[FieldFilter, "CompositeFilter"]] = field(default_factory=list)
aggregation: str = "and"

def __post_init__(self):
Expand All @@ -168,51 +204,53 @@ def __post_init__(self):
)

def to_dict(self):
keys = _target_keys(self)

# Dimension-only (or empty) — serialize the tree under "dimensions".
if "tableCalculations" not in keys:
return {"dimensions": _serialize_node(self)}

# Table-calculation-only.
if "dimensions" not in keys:
return {
"dimensions": {self.aggregation: []},
"tableCalculations": _serialize_node(self),
}

# Mixed dimension + table-calculation filters. The API keeps these in
# separate top-level groups that are implicitly AND-ed, so they can only
# be split when the outer combinator is AND.
if self.aggregation != "and":
raise ValueError(
"Cannot combine dimension and table calculation filters with OR: "
"the Lightdash API stores them in separate groups that are AND-ed "
"together. Use AND between the two field types, or run separate "
"queries."
)

dimensions = []
table_calculations = []
for f in self.filters:
# Check that the filter is not a composite filter
if not hasattr(f, "field"):
raise TypeError("Multi-level filter composites not supported yet")
# Multiple filters may target the same field, e.g. a date range
# expressed as (dim >= start) & (dim <= end).
if isinstance(f, TableCalculationFilter):
table_calculations.append(f.to_dict())
for item in self.filters:
item_keys = _target_keys(item)
if "tableCalculations" not in item_keys:
dimensions.append(_serialize_node(item))
elif "dimensions" not in item_keys:
table_calculations.append(_serialize_node(item))
else:
dimensions.append(f.to_dict())
out = {"dimensions": {self.aggregation: dimensions}}
# Only include the tableCalculations group when present, so existing
# dimension-only payloads are unchanged.
if table_calculations:
out["tableCalculations"] = {self.aggregation: table_calculations}
return out
raise ValueError(
"A filter group mixes dimension and table calculation filters "
"and cannot be represented. Keep each field type in its own "
"group, combined with AND."
)
return {
"dimensions": {"and": dimensions},
"tableCalculations": {"and": table_calculations},
}

def __and__(self, other: Union[FieldFilter, "CompositeFilter"]) -> "CompositeFilter":
"""Combine with another filter using AND: composite & filter"""
if isinstance(other, CompositeFilter):
# Flatten if both are AND composites
if self.aggregation == "and" and other.aggregation == "and":
return CompositeFilter(
filters=list(self.filters) + list(other.filters),
aggregation="and"
)
# Otherwise wrap as nested (not fully supported yet, but preserve structure)
return CompositeFilter(filters=list(self.filters) + list(other.filters), aggregation="and")
# Add single filter to existing composite
if self.aggregation == "and":
return CompositeFilter(filters=list(self.filters) + [other], aggregation="and")
return CompositeFilter(filters=list(self.filters) + [other], aggregation="and")
return _combine(self, other, "and")

def __or__(self, other: Union[FieldFilter, "CompositeFilter"]) -> "CompositeFilter":
"""Combine with another filter using OR: composite | filter"""
if isinstance(other, CompositeFilter):
# Flatten if both are OR composites
if self.aggregation == "or" and other.aggregation == "or":
return CompositeFilter(
filters=list(self.filters) + list(other.filters),
aggregation="or"
)
return CompositeFilter(filters=list(self.filters) + list(other.filters), aggregation="or")
if self.aggregation == "or":
return CompositeFilter(filters=list(self.filters) + [other], aggregation="or")
return CompositeFilter(filters=list(self.filters) + [other], aggregation="or")
return _combine(self, other, "or")
28 changes: 11 additions & 17 deletions lightdash/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,27 +511,21 @@ def filter(self, filter: Union[DimensionFilter, TableCalculationFilter, Composit
.filter(model.dimensions.country == 'USA')
.filter(model.dimensions.status == 'active')
)

Each call AND-s the new condition with everything added so far, even
when an earlier filter is an OR group — so
``.filter(a | b).filter(c)`` means ``(a OR b) AND c``.
"""
if self._filters is None:
# First filter
if isinstance(filter, (DimensionFilter, TableCalculationFilter)):
new_filters = CompositeFilter(filters=[filter])
else:
# First filter — a bare rule becomes a single-item group.
if isinstance(filter, CompositeFilter):
new_filters = filter
else:
# Combine with existing filters using AND
if isinstance(filter, (DimensionFilter, TableCalculationFilter)):
# Add to existing CompositeFilter's list
new_filters = CompositeFilter(
filters=list(self._filters.filters) + [filter],
aggregation=self._filters.aggregation
)
else:
# Both are CompositeFilters - combine them
new_filters = CompositeFilter(
filters=list(self._filters.filters) + list(filter.filters),
aggregation="and"
)
new_filters = CompositeFilter(filters=[filter])
else:
# AND the new condition with everything so far. _combine keeps any
# existing OR group nested rather than absorbing the new rule into it.
new_filters = self._filters & filter
return self._clone(filters=new_filters)

def sort(self, *sorts: Sort) -> "Query":
Expand Down
136 changes: 136 additions & 0 deletions tests/test_composite_filter_semantics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""
Tests for composite filter boolean semantics (issue #23).

Covers nested AND/OR precedence, the `.filter()`-always-ANDs rule, flattening of
same-operator chains, and the clear error for the one shape the Lightdash API
cannot represent (OR across dimension and table-calculation filters).
"""

import pytest
from lightdash.dimensions import Dimension
from lightdash.filter import CompositeFilter
from lightdash.models import Model
from lightdash.table_calculations import TableCalculation


@pytest.fixture
def model():
return Model(name="m", type="default", database_name="db", schema_name="s")


def dim(name):
return Dimension(name=name, model_name="m")


def _ids(rules):
return [r["target"]["fieldId"] for r in rules]


class TestNestedPrecedence:
def test_and_of_or_keeps_group_nested(self):
# a & (b | c) -> AND[ a, OR[b, c] ]
f = (dim("a") == 1) & ((dim("b") == 2) | (dim("c") == 3))
d = f.to_dict()["dimensions"]
assert list(d) == ["and"]
assert d["and"][0]["target"]["fieldId"] == "m_a"
assert d["and"][1] == {
"or": [
{"target": {"fieldId": "m_b"}, "operator": "equals", "values": [2]},
{"target": {"fieldId": "m_c"}, "operator": "equals", "values": [3]},
]
}

def test_or_of_and_keeps_group_nested(self):
# a | (b & c) -> OR[ a, AND[b, c] ]
f = (dim("a") == 1) | ((dim("b") == 2) & (dim("c") == 3))
d = f.to_dict()["dimensions"]
assert list(d) == ["or"]
assert d["or"][0]["target"]["fieldId"] == "m_a"
assert "and" in d["or"][1]
assert _ids(d["or"][1]["and"]) == ["m_b", "m_c"]

def test_documented_complex_example(self):
# country == "USA" & ((amount > 1000) | (priority == "high"))
f = (dim("country") == "USA") & (
(dim("amount") > 1000) | (dim("priority") == "high")
)
d = f.to_dict()["dimensions"]
assert d["and"][0]["target"]["fieldId"] == "m_country"
nested = d["and"][1]["or"]
assert _ids(nested) == ["m_amount", "m_priority"]


class TestFlatteningPreserved:
def test_chained_and_flattens(self):
f = (dim("a") == 1) & (dim("b") == 2) & (dim("c") == 3)
rules = f.to_dict()["dimensions"]["and"]
assert _ids(rules) == ["m_a", "m_b", "m_c"]
assert all("target" in r for r in rules) # flat, no nested groups

def test_chained_or_flattens(self):
d = dim("a")
f = (d == 1) | (d == 2) | (d == 3)
assert len(f.to_dict()["dimensions"]["or"]) == 3

def test_same_field_range_still_flat(self):
# The #20 behaviour: (d >= x) & (d <= y) stays a flat AND of two rules.
d = dim("order_date")
f = (d >= "2026-01-01") & (d <= "2026-01-31")
rules = f.to_dict()["dimensions"]["and"]
assert [r["operator"] for r in rules] == ["greaterThanOrEqual", "lessThanOrEqual"]


class TestQueryFilterAlwaysAnds:
def test_filter_after_or_ands_as_a_unit(self, model):
"""(a | b).filter(c) -> (a OR b) AND c — the core #23 bug."""
status = dim("status")
q = (
model.query()
.filter((status == "active") | (status == "pending"))
.filter(dim("region") == "west")
)
d = q._build_payload()["filters"]["dimensions"]
assert list(d) == ["and"]
assert "or" in d["and"][0]
assert len(d["and"][0]["or"]) == 2
assert d["and"][1]["target"]["fieldId"] == "m_region"

def test_two_simple_filters_and_flat(self, model):
q = model.query().filter(dim("a") == 1).filter(dim("b") == 2)
rules = q._build_payload()["filters"]["dimensions"]["and"]
assert _ids(rules) == ["m_a", "m_b"]

def test_filter_composite_then_composite(self, model):
q = (
model.query()
.filter((dim("a") == 1) | (dim("b") == 2))
.filter((dim("c") == 3) | (dim("d") == 4))
)
d = q._build_payload()["filters"]["dimensions"]
assert list(d) == ["and"]
assert all("or" in item for item in d["and"])


class TestCrossFieldTypeHandling:
def test_and_across_dimension_and_calc_splits(self):
calc = TableCalculation(name="ratio", sql="1")
f = (dim("country") == "USA") & (calc > 0.2)
d = f.to_dict()
assert d["dimensions"]["and"][0]["target"]["fieldId"] == "m_country"
assert d["tableCalculations"]["and"][0]["target"]["fieldId"] == "ratio"

def test_and_with_nested_calc_or(self):
calc_a = TableCalculation(name="ca", sql="1")
calc_b = TableCalculation(name="cb", sql="2")
f = (dim("country") == "USA") & ((calc_a > 1) | (calc_b > 2))
d = f.to_dict()
assert _ids(d["dimensions"]["and"]) == ["m_country"]
calc_group = d["tableCalculations"]["and"]
assert len(calc_group) == 1 and "or" in calc_group[0]
assert _ids(calc_group[0]["or"]) == ["ca", "cb"]

def test_or_across_dimension_and_calc_raises(self):
calc = TableCalculation(name="ratio", sql="1")
f = (dim("country") == "USA") | (calc > 0.2)
with pytest.raises(ValueError, match="Cannot combine dimension and table calculation"):
f.to_dict()