Find sets of disjoint sets from a list of tuples or sets in python
here is the problem: I have a list of tuples (could be sets as well if needed). For instance:
a = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6)]
What I want to find is a list
r = [(1, 5, 4, 2, 3, 6, 7)]
because the intersection is not empty once all the sets are put together.
For the example
a = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6), (8, 9)]
the result should be
r = [(1, 5, 4, 2, 3, 6, 7), (8, 9)]
Hope the problem is clear. So what is the most elegant way to do this in python, if any?
Cheers
These are the connected components of a graph, and can be found using a graphing library such as networkx
. For your second example:
>>> edges = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6), (8, 9)]
>>> graph = nx.Graph(edges)
>>> [tuple(c) for c in nx.connected_components(graph)]
[(1, 2, 3, 4, 5, 6, 7), (8, 9)]
Take a look at this implementation, it's fast because it's using Disjoint set with path compression, both find and merge operations are log(n):
class DisjointSet(object):
def __init__(self,size=None):
if size is None:
self.leader = {} # maps a member to the group's leader
self.group = {} # maps a group leader to the group (which is a set)
self.oldgroup = {}
self.oldleader = {}
else:
self.group = { i:set([i]) for i in range(0,size) }
self.leader = { i:i for i in range(0,size) }
self.oldgroup = { i:set([i]) for i in range(0,size) }
self.oldleader = { i:i for i in range(0,size) }
def add(self, a, b):
self.oldgroup = self.group.copy()
self.oldleader = self.leader.copy()
leadera = self.leader.get(a)
leaderb = self.leader.get(b)
if leadera is not None:
if leaderb is not None:
if leadera == leaderb:
return # nothing to do
groupa = self.group[leadera]
groupb = self.group[leaderb]
if len(groupa) < len(groupb):
a, leadera, groupa, b, leaderb, groupb = b, leaderb, groupb, a, leadera, groupa
groupa |= groupb
del self.group[leaderb]
for k in groupb:
self.leader[k] = leadera
else:
self.group[leadera].add(b)
self.leader[b] = leadera
else:
if leaderb is not None:
self.group[leaderb].add(a)
self.leader[a] = leaderb
else:
self.leader[a] = self.leader[b] = a
self.group[a] = set([a, b])
def connected(self, a, b):
leadera = self.leader.get(a)
leaderb = self.leader.get(b)
if leadera is not None:
if leaderb is not None:
return leadera == leaderb
else:
return False
else:
return False
def undo(self):
self.group = self.oldgroup.copy()
self.leader = self.oldleader.copy()
def test():
x = DisjointSet()
x.add(0,1)
x.add(0,2)
x.add(3,4)
x.undo()
print x.leader
print x.group
if __name__ == "__main__":
test()
You can also undo the last add. In your case you can do the following:
import DisjointSet
a = [(1, 5), (4, 2), (4, 3), (5, 4), (6, 3), (7, 6)]
d = DisjointSet()
for e in a:
d.add(*e)
print d.group
print d.leader