pytorch/torch/utils/_ordered_set.py
eellison 5f2c80d16d Add inductor OrderedSet (#130003)
Implemented by extending `collections.abc.MutableSet` and backing it with a dictionary, which is ordered. From collections.abc.MutableSet:

```
    A mutable set is a finite, iterable container.

    This class provides concrete generic implementations of all
    methods except for __contains__, __iter__, __len__,
    add(), and discard().
```

In addition to implementing those methods I also had to define some methods of python's set which were not implemented in MutableSet.

I reused the test from my python's lib. There were a few instances of tests that didnt pass because edge case behavior that is not necessary to reimplement
- support self-referencing repr
- erroring when an member's `__eq__` function would modify the set itself
- MutableSet supports Iterables as inputs, but not sequences (pretty rare..)
- Some specifics of exact equivalent type errors being thrown
- [The protocol for automatic conversion to immutable](https://docs.python.org/2/library/sets.html#protocol-for-automatic-conversion-to-immutable)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/130003
Approved by: https://github.com/aorenste
2024-07-26 18:16:57 +00:00

180 lines
5.7 KiB
Python

from __future__ import annotations
from collections.abc import MutableSet, Set as AbstractSet
from typing import (
Any,
cast,
Dict,
Generic,
Iterable,
Iterator,
List,
Optional,
Tuple,
Type,
TypeVar,
)
T = TypeVar("T")
T_co = TypeVar("T_co", covariant=True)
__all__ = ["OrderedSet"]
# Using Generic[T] bc py38 does not support type parameterized MutableSet
class OrderedSet(Generic[T], MutableSet):
"""
Insertion ordered set, similar to OrderedDict.
"""
__slots__ = ("_dict",)
def __init__(self, iterable: Optional[Iterable[T]] = None):
self._dict = dict.fromkeys(iterable, None) if iterable is not None else {}
@staticmethod
def _from_dict(dict_inp: Dict[T, None]) -> OrderedSet[T]:
s: OrderedSet[T] = OrderedSet()
s._dict = dict_inp
return s
#
# Required overriden abstract methods
#
def __contains__(self, elem: object) -> bool:
return elem in self._dict
def __iter__(self) -> Iterator[T]:
return iter(self._dict)
def __len__(self) -> int:
return len(self._dict)
def add(self, elem: T) -> None:
self._dict[elem] = None
def discard(self, elem: T) -> None:
self._dict.pop(elem, None)
def clear(self) -> None:
# overridden because MutableSet impl is slow
self._dict.clear()
# Unimplemented set() methods in _collections_abc.MutableSet
@classmethod
def _wrap_iter_in_set(cls, other: Any) -> Any:
"""
Wrap non-Set Iterables in OrderedSets
Some of the magic methods are more strict on input types than
the public apis, so we need to wrap inputs in sets.
"""
if not isinstance(other, AbstractSet) and isinstance(other, Iterable):
return cls(other)
else:
return other
def pop(self) -> T:
if not self:
raise KeyError("pop from an empty set")
return self._dict.popitem()[0]
def copy(self) -> OrderedSet[T]:
return OrderedSet._from_dict(self._dict.copy())
def difference(self, *others: Iterable[T]) -> OrderedSet[T]:
res = self.copy()
res.difference_update(*others)
return res
def difference_update(self, *others: Iterable[T]) -> None:
for other in others:
self -= other # type: ignore[operator, arg-type]
def update(self, *others: Iterable[T]) -> None:
for other in others:
self |= other # type: ignore[operator, arg-type]
def intersection(self, *others: Iterable[T]) -> OrderedSet[T]:
res = self.copy()
for other in others:
if other is not self:
res &= other # type: ignore[operator, arg-type]
return res
def intersection_update(self, *others: Iterable[T]) -> None:
for other in others:
self &= other # type: ignore[operator, arg-type]
def issubset(self, other: Iterable[T]) -> bool:
return self <= self._wrap_iter_in_set(other)
def issuperset(self, other: Iterable[T]) -> bool:
return self >= self._wrap_iter_in_set(other)
def symmetric_difference(self, other: Iterable[T]) -> OrderedSet[T]:
return self ^ other # type: ignore[operator, arg-type]
def symmetric_difference_update(self, other: Iterable[T]) -> None:
self ^= other # type: ignore[operator, arg-type]
def union(self, *others: Iterable[T]) -> OrderedSet[T]:
res = self.copy()
for other in others:
if other is self:
continue
res |= other # type: ignore[operator, arg-type]
return res
# Specify here for correct type inference, otherwise would
# return AbstractSet[T]
def __sub__(self, other: AbstractSet[T_co]) -> OrderedSet[T]:
# following cpython set impl optimization
if isinstance(other, OrderedSet) and (len(self) * 4) > len(other):
out = self.copy()
out -= other
return out
return cast(OrderedSet[T], super().__sub__(other))
def __ior__(self, other: Iterable[T]) -> OrderedSet[T]: # type: ignore[misc, override] # noqa: PYI034
if isinstance(other, OrderedSet):
self._dict.update(other._dict)
return self
return super().__ior__(other) # type: ignore[arg-type]
def __eq__(self, other: AbstractSet[T]) -> bool: # type: ignore[misc, override]
if isinstance(other, OrderedSet):
return self._dict == other._dict
return super().__eq__(other) # type: ignore[arg-type]
def __ne__(self, other: AbstractSet[T]) -> bool: # type: ignore[misc, override]
if isinstance(other, OrderedSet):
return self._dict != other._dict
return super().__ne__(other) # type: ignore[arg-type]
def __or__(self, other: AbstractSet[T_co]) -> OrderedSet[T]:
return cast(OrderedSet[T], super().__or__(other))
def __and__(self, other: AbstractSet[T_co]) -> OrderedSet[T]:
# MutableSet impl will iterate over other, iter over smaller of two sets
if isinstance(other, OrderedSet) and len(self) < len(other):
return other & self
return cast(OrderedSet[T], super().__and__(other))
def __xor__(self, other: AbstractSet[T_co]) -> OrderedSet[T]:
return cast(OrderedSet[T], super().__xor__(other))
def __repr__(self) -> str:
return f"{self.__class__.__name__}({list(self)})"
def __getstate__(self) -> List[T]:
return list(self._dict.keys())
def __setstate__(self, state: List[T]) -> None:
self._dict = dict.fromkeys(state, None)
def __reduce__(self) -> Tuple[Type[OrderedSet[T]], Tuple[List[T]]]:
return (OrderedSet, (list(self),))