1 """Classes to represent arbitrary sets (including sets of sets).
2 
3 This module implements sets using dictionaries whose values are
4 ignored.  The usual operations (union, intersection, deletion, etc.)
5 are provided as both methods and operators.
6 
7 Important: sets are not sequences!  While they support 'x in s',
8 'len(s)', and 'for x in s', none of those operations are unique for
9 sequences; for example, mappings support all three as well.  The
10 characteristic operation for sequences is subscripting with small
11 integers: s[i], for i in range(len(s)).  Sets don't support
12 subscripting at all.  Also, sequences allow multiple occurrences and
13 their elements have a definite order; sets on the other hand don't
14 record multiple occurrences and don't remember the order of element
15 insertion (which is why they don't support s[i]).
16 
17 The following classes are provided:
18 
19 BaseSet -- All the operations common to both mutable and immutable
20     sets. This is an abstract class, not meant to be directly
21     instantiated.
22 
23 Set -- Mutable sets, subclass of BaseSet; not hashable.
24 
25 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26     An iterable argument is mandatory to create an ImmutableSet.
27 
28 _TemporarilyImmutableSet -- A wrapper around a Set, hashable,
29     giving the same hash value as the immutable set equivalent
30     would have.  Do not use this class directly.
31 
32 Only hashable objects can be added to a Set. In particular, you cannot
33 really add a Set as an element to another Set; if you try, what is
34 actually added is an ImmutableSet built from it (it compares equal to
35 the one you tried adding).
36 
37 When you ask if `x in y' where x is a Set and y is a Set or
38 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
39 what's tested is actually `z in y'.
40 
41 """
42 
43 # Code history:
44 #
45 # - Greg V. Wilson wrote the first version, using a different approach
46 #   to the mutable/immutable problem, and inheriting from dict.
47 #
48 # - Alex Martelli modified Greg's version to implement the current
49 #   Set/ImmutableSet approach, and make the data an attribute.
50 #
51 # - Guido van Rossum rewrote much of the code, made some API changes,
52 #   and cleaned up the docstrings.
53 #
54 # - Raymond Hettinger added a number of speedups and other
55 #   improvements.
56 
57 from itertools import ifilter, ifilterfalse
58 
59 __all__ = ['BaseSet', 'Set', 'ImmutableSet']
60 
61 import warnings
62 warnings.warn("the sets module is deprecated", DeprecationWarning,
63                 stacklevel=2)
64 
65 class BaseSet(object):
66     """Common base class for mutable and immutable sets."""
67 
68     __slots__ = ['_data']
69 
70     # Constructor
71 
72     def __init__(self):
73         """This is an abstract class."""
74         # Don't call this from a concrete subclass!
75         if self.__class__ is BaseSet:
76             raise TypeError, ("BaseSet is an abstract class.  "
77                               "Use Set or ImmutableSet.")
78 
79     # Standard protocols: __len__, __repr__, __str__, __iter__
80 
81     def __len__(self):
82         """Return the number of elements of a set."""
83         return len(self._data)
84 
85     def __repr__(self):
86         """Return string representation of a set.
87 
88         This looks like 'Set([<list of elements>])'.
89         """
90         return self._repr()
91 
92     # __str__ is the same as __repr__
93     __str__ = __repr__
94 
95     def _repr(self, sorted=False):
96         elements = self._data.keys()
97         if sorted:
98             elements.sort()
99         return '%s(%r)' % (self.__class__.__name__, elements)
100 
101     def __iter__(self):
102         """Return an iterator over the elements or a set.
103 
104         This is the keys iterator for the underlying dict.
105         """
106         return self._data.iterkeys()
107 
108     # Three-way comparison is not supported.  However, because __eq__ is
109     # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
110     # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
111     # case).
112 
113     def __cmp__(self, other):
114         raise TypeError, "can't compare sets using cmp()"
115 
116     # Equality comparisons using the underlying dicts.  Mixed-type comparisons
117     # are allowed here, where Set == z for non-Set z always returns False,
118     # and Set != z always True.  This allows expressions like "x in y" to
119     # give the expected result when y is a sequence of mixed types, not
120     # raising a pointless TypeError just because y contains a Set, or x is
121     # a Set and y contain's a non-set ("in" invokes only __eq__).
122     # Subtle:  it would be nicer if __eq__ and __ne__ could return
123     # NotImplemented instead of True or False.  Then the other comparand
124     # would get a chance to determine the result, and if the other comparand
125     # also returned NotImplemented then it would fall back to object address
126     # comparison (which would always return False for __eq__ and always
127     # True for __ne__).  However, that doesn't work, because this type
128     # *also* implements __cmp__:  if, e.g., __eq__ returns NotImplemented,
129     # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
130 
131     def __eq__(self, other):
132         if isinstance(other, BaseSet):
133             return self._data == other._data
134         else:
135             return False
136 
137     def __ne__(self, other):
138         if isinstance(other, BaseSet):
139             return self._data != other._data
140         else:
141             return True
142 
143     # Copying operations
144 
145     def copy(self):
146         """Return a shallow copy of a set."""
147         result = self.__class__()
148         result._data.update(self._data)
149         return result
150 
151     __copy__ = copy # For the copy module
152 
153     def __deepcopy__(self, memo):
154         """Return a deep copy of a set; used by copy module."""
155         # This pre-creates the result and inserts it in the memo
156         # early, in case the deep copy recurses into another reference
157         # to this same set.  A set can't be an element of itself, but
158         # it can certainly contain an object that has a reference to
159         # itself.
160         from copy import deepcopy
161         result = self.__class__()
162         memo[id(self)] = result
163         data = result._data
164         value = True
165         for elt in self:
166             data[deepcopy(elt, memo)] = value
167         return result
168 
169     # Standard set operations: union, intersection, both differences.
170     # Each has an operator version (e.g. __or__, invoked with |) and a
171     # method version (e.g. union).
172     # Subtle:  Each pair requires distinct code so that the outcome is
173     # correct when the type of other isn't suitable.  For example, if
174     # we did "union = __or__" instead, then Set().union(3) would return
175     # NotImplemented instead of raising TypeError (albeit that *why* it
176     # raises TypeError as-is is also a bit subtle).
177 
178     def __or__(self, other):
179         """Return the union of two sets as a new set.
180 
181         (I.e. all elements that are in either set.)
182         """
183         if not isinstance(other, BaseSet):
184             return NotImplemented
185         return self.union(other)
186 
187     def union(self, other):
188         """Return the union of two sets as a new set.
189 
190         (I.e. all elements that are in either set.)
191         """
192         result = self.__class__(self)
193         result._update(other)
194         return result
195 
196     def __and__(self, other):
197         """Return the intersection of two sets as a new set.
198 
199         (I.e. all elements that are in both sets.)
200         """
201         if not isinstance(other, BaseSet):
202             return NotImplemented
203         return self.intersection(other)
204 
205     def intersection(self, other):
206         """Return the intersection of two sets as a new set.
207 
208         (I.e. all elements that are in both sets.)
209         """
210         if not isinstance(other, BaseSet):
211             other = Set(other)
212         if len(self) <= len(other):
213             little, big = self, other
214         else:
215             little, big = other, self
216         common = ifilter(big._data.__contains__, little)
217         return self.__class__(common)
218 
219     def __xor__(self, other):
220         """Return the symmetric difference of two sets as a new set.
221 
222         (I.e. all elements that are in exactly one of the sets.)
223         """
224         if not isinstance(other, BaseSet):
225             return NotImplemented
226         return self.symmetric_difference(other)
227 
228     def symmetric_difference(self, other):
229         """Return the symmetric difference of two sets as a new set.
230 
231         (I.e. all elements that are in exactly one of the sets.)
232         """
233         result = self.__class__()
234         data = result._data
235         value = True
236         selfdata = self._data
237         try:
238             otherdata = other._data
239         except AttributeError:
240             otherdata = Set(other)._data
241         for elt in ifilterfalse(otherdata.__contains__, selfdata):
242             data[elt] = value
243         for elt in ifilterfalse(selfdata.__contains__, otherdata):
244             data[elt] = value
245         return result
246 
247     def  __sub__(self, other):
248         """Return the difference of two sets as a new Set.
249 
250         (I.e. all elements that are in this set and not in the other.)
251         """
252         if not isinstance(other, BaseSet):
253             return NotImplemented
254         return self.difference(other)
255 
256     def difference(self, other):
257         """Return the difference of two sets as a new Set.
258 
259         (I.e. all elements that are in this set and not in the other.)
260         """
261         result = self.__class__()
262         data = result._data
263         try:
264             otherdata = other._data
265         except AttributeError:
266             otherdata = Set(other)._data
267         value = True
268         for elt in ifilterfalse(otherdata.__contains__, self):
269             data[elt] = value
270         return result
271 
272     # Membership test
273 
274     def __contains__(self, element):
275         """Report whether an element is a member of a set.
276 
277         (Called in response to the expression `element in self'.)
278         """
279         try:
280             return element in self._data
281         except TypeError:
282             transform = getattr(element, "__as_temporarily_immutable__", None)
283             if transform is None:
284                 raise # re-raise the TypeError exception we caught
285             return transform() in self._data
286 
287     # Subset and superset test
288 
289     def issubset(self, other):
290         """Report whether another set contains this set."""
291         self._binary_sanity_check(other)
292         if len(self) > len(other):  # Fast check for obvious cases
293             return False
294         for elt in ifilterfalse(other._data.__contains__, self):
295             return False
296         return True
297 
298     def issuperset(self, other):
299         """Report whether this set contains another set."""
300         self._binary_sanity_check(other)
301         if len(self) < len(other):  # Fast check for obvious cases
302             return False
303         for elt in ifilterfalse(self._data.__contains__, other):
304             return False
305         return True
306 
307     # Inequality comparisons using the is-subset relation.
308     __le__ = issubset
309     __ge__ = issuperset
310 
311     def __lt__(self, other):
312         self._binary_sanity_check(other)
313         return len(self) < len(other) and self.issubset(other)
314 
315     def __gt__(self, other):
316         self._binary_sanity_check(other)
317         return len(self) > len(other) and self.issuperset(other)
318 
319     # We inherit object.__hash__, so we must deny this explicitly
320     __hash__ = None
321 
322     # Assorted helpers
323 
324     def _binary_sanity_check(self, other):
325         # Check that the other argument to a binary operation is also
326         # a set, raising a TypeError otherwise.
327         if not isinstance(other, BaseSet):
328             raise TypeError, "Binary operation only permitted between sets"
329 
330     def _compute_hash(self):
331         # Calculate hash code for a set by xor'ing the hash codes of
332         # the elements.  This ensures that the hash code does not depend
333         # on the order in which elements are added to the set.  This is
334         # not called __hash__ because a BaseSet should not be hashable;
335         # only an ImmutableSet is hashable.
336         result = 0
337         for elt in self:
338             result ^= hash(elt)
339         return result
340 
341     def _update(self, iterable):
342         # The main loop for update() and the subclass __init__() methods.
343         data = self._data
344 
345         # Use the fast update() method when a dictionary is available.
346         if isinstance(iterable, BaseSet):
347             data.update(iterable._data)
348             return
349 
350         value = True
351 
352         if type(iterable) in (list, tuple, xrange):
353             # Optimized: we know that __iter__() and next() can't
354             # raise TypeError, so we can move 'try:' out of the loop.
355             it = iter(iterable)
356             while True:
357                 try:
358                     for element in it:
359                         data[element] = value
360                     return
361                 except TypeError:
362                     transform = getattr(element, "__as_immutable__", None)
363                     if transform is None:
364                         raise # re-raise the TypeError exception we caught
365                     data[transform()] = value
366         else:
367             # Safe: only catch TypeError where intended
368             for element in iterable:
369                 try:
370                     data[element] = value
371                 except TypeError:
372                     transform = getattr(element, "__as_immutable__", None)
373                     if transform is None:
374                         raise # re-raise the TypeError exception we caught
375                     data[transform()] = value
376 
377 
378 class ImmutableSet(BaseSet):
379     """Immutable set class."""
380 
381     __slots__ = ['_hashcode']
382 
383     # BaseSet + hashing
384 
385     def __init__(self, iterable=None):
386         """Construct an immutable set from an optional iterable."""
387         self._hashcode = None
388         self._data = {}
389         if iterable is not None:
390             self._update(iterable)
391 
392     def __hash__(self):
393         if self._hashcode is None:
394             self._hashcode = self._compute_hash()
395         return self._hashcode
396 
397     def __getstate__(self):
398         return self._data, self._hashcode
399 
400     def __setstate__(self, state):
401         self._data, self._hashcode = state
402 
403 class Set(BaseSet):
404     """ Mutable set class."""
405 
406     __slots__ = []
407 
408     # BaseSet + operations requiring mutability; no hashing
409 
410     def __init__(self, iterable=None):
411         """Construct a set from an optional iterable."""
412         self._data = {}
413         if iterable is not None:
414             self._update(iterable)
415 
416     def __getstate__(self):
417         # getstate's results are ignored if it is not
418         return self._data,
419 
420     def __setstate__(self, data):
421         self._data, = data
422 
423     # In-place union, intersection, differences.
424     # Subtle:  The xyz_update() functions deliberately return None,
425     # as do all mutating operations on built-in container types.
426     # The __xyz__ spellings have to return self, though.
427 
428     def __ior__(self, other):
429         """Update a set with the union of itself and another."""
430         self._binary_sanity_check(other)
431         self._data.update(other._data)
432         return self
433 
434     def union_update(self, other):
435         """Update a set with the union of itself and another."""
436         self._update(other)
437 
438     def __iand__(self, other):
439         """Update a set with the intersection of itself and another."""
440         self._binary_sanity_check(other)
441         self._data = (self & other)._data
442         return self
443 
444     def intersection_update(self, other):
445         """Update a set with the intersection of itself and another."""
446         if isinstance(other, BaseSet):
447             self &= other
448         else:
449             self._data = (self.intersection(other))._data
450 
451     def __ixor__(self, other):
452         """Update a set with the symmetric difference of itself and another."""
453         self._binary_sanity_check(other)
454         self.symmetric_difference_update(other)
455         return self
456 
457     def symmetric_difference_update(self, other):
458         """Update a set with the symmetric difference of itself and another."""
459         data = self._data
460         value = True
461         if not isinstance(other, BaseSet):
462             other = Set(other)
463         if self is other:
464             self.clear()
465         for elt in other:
466             if elt in data:
467                 del data[elt]
468             else:
469                 data[elt] = value
470 
471     def __isub__(self, other):
472         """Remove all elements of another set from this set."""
473         self._binary_sanity_check(other)
474         self.difference_update(other)
475         return self
476 
477     def difference_update(self, other):
478         """Remove all elements of another set from this set."""
479         data = self._data
480         if not isinstance(other, BaseSet):
481             other = Set(other)
482         if self is other:
483             self.clear()
484         for elt in ifilter(data.__contains__, other):
485             del data[elt]
486 
487     # Python dict-like mass mutations: update, clear
488 
489     def update(self, iterable):
490         """Add all values from an iterable (such as a list or file)."""
491         self._update(iterable)
492 
493     def clear(self):
494         """Remove all elements from this set."""
495         self._data.clear()
496 
497     # Single-element mutations: add, remove, discard
498 
499     def add(self, element):
500         """Add an element to a set.
501 
502         This has no effect if the element is already present.
503         """
504         try:
505             self._data[element] = True
506         except TypeError:
507             transform = getattr(element, "__as_immutable__", None)
508             if transform is None:
509                 raise # re-raise the TypeError exception we caught
510             self._data[transform()] = True
511 
512     def remove(self, element):
513         """Remove an element from a set; it must be a member.
514 
515         If the element is not a member, raise a KeyError.
516         """
517         try:
518             del self._data[element]
519         except TypeError:
520             transform = getattr(element, "__as_temporarily_immutable__", None)
521             if transform is None:
522                 raise # re-raise the TypeError exception we caught
523             del self._data[transform()]
524 
525     def discard(self, element):
526         """Remove an element from a set if it is a member.
527 
528         If the element is not a member, do nothing.
529         """
530         try:
531             self.remove(element)
532         except KeyError:
533             pass
534 
535     def pop(self):
536         """Remove and return an arbitrary set element."""
537         return self._data.popitem()[0]
538 
539     def __as_immutable__(self):
540         # Return a copy of self as an immutable set
541         return ImmutableSet(self)
542 
543     def __as_temporarily_immutable__(self):
544         # Return self wrapped in a temporarily immutable set
545         return _TemporarilyImmutableSet(self)
546 
547 
548 class _TemporarilyImmutableSet(BaseSet):
549     # Wrap a mutable set as if it was temporarily immutable.
550     # This only supplies hashing and equality comparisons.
551 
552     def __init__(self, set):
553         self._set = set
554         self._data = set._data  # Needed by ImmutableSet.__eq__()
555 
556     def __hash__(self):
557         return self._set._compute_hash()
558