How to make built-in containers (sets, dicts, lists) thread safe?
Solution 1:
You can use Python's metaprogramming facilities to accomplish this. (Note: written quickly and not thoroughly tested.) I prefer to use a class decorator.
I also think you may need to lock more than add
and remove
to make a set thread-safe, but I'm not sure. I'll ignore that problem and just concentrate on your question.
Also consider whether delegation (proxying) is a better fit than subclassing. Wrapping objects is the usual approach in Python.
Finally, there is no "magic wand" of metaprogramming that will magically add fine-grained locking to any mutable Python collection. The safest thing to do is to lock any method or attribute access using RLock
, but this is very coarse-grained and slow and probably still not a guarantee that your object will be thread-safe in all cases. (For example, you may have a collection that manipulates another non-threadsafe object accessible to other threads.) You really do need to examine each and every data structure and think about what operations are atomic or require locks and which methods might call other methods using the same lock (i.e., deadlock itself).
That said, here are some techniques at your disposal in increasing order of abstraction:
Delegation
class LockProxy(object):
def __init__(self, obj):
self.__obj = obj
self.__lock = RLock()
# RLock because object methods may call own methods
def __getattr__(self, name):
def wrapped(*a, **k):
with self.__lock:
getattr(self.__obj, name)(*a, **k)
return wrapped
lockedset = LockProxy(set([1,2,3]))
Context manager
class LockedSet(set):
"""A set where add(), remove(), and 'in' operator are thread-safe"""
def __init__(self, *args, **kwargs):
self._lock = Lock()
super(LockedSet, self).__init__(*args, **kwargs)
def add(self, elem):
with self._lock:
super(LockedSet, self).add(elem)
def remove(self, elem):
with self._lock:
super(LockedSet, self).remove(elem)
def __contains__(self, elem):
with self._lock:
super(LockedSet, self).__contains__(elem)
Decorator
def locked_method(method):
"""Method decorator. Requires a lock object at self._lock"""
def newmethod(self, *args, **kwargs):
with self._lock:
return method(self, *args, **kwargs)
return newmethod
class DecoratorLockedSet(set):
def __init__(self, *args, **kwargs):
self._lock = Lock()
super(DecoratorLockedSet, self).__init__(*args, **kwargs)
@locked_method
def add(self, *args, **kwargs):
return super(DecoratorLockedSet, self).add(elem)
@locked_method
def remove(self, *args, **kwargs):
return super(DecoratorLockedSet, self).remove(elem)
Class Decorator
I think this is the cleanest and easiest-to-understand of the abstract methods, so I've expanded it to allow one to specify the methods to lock and a lock object factory.
def lock_class(methodnames, lockfactory):
return lambda cls: make_threadsafe(cls, methodnames, lockfactory)
def lock_method(method):
if getattr(method, '__is_locked', False):
raise TypeError("Method %r is already locked!" % method)
def locked_method(self, *arg, **kwarg):
with self._lock:
return method(self, *arg, **kwarg)
locked_method.__name__ = '%s(%s)' % ('lock_method', method.__name__)
locked_method.__is_locked = True
return locked_method
def make_threadsafe(cls, methodnames, lockfactory):
init = cls.__init__
def newinit(self, *arg, **kwarg):
init(self, *arg, **kwarg)
self._lock = lockfactory()
cls.__init__ = newinit
for methodname in methodnames:
oldmethod = getattr(cls, methodname)
newmethod = lock_method(oldmethod)
setattr(cls, methodname, newmethod)
return cls
@lock_class(['add','remove'], Lock)
class ClassDecoratorLockedSet(set):
@lock_method # if you double-lock a method, a TypeError is raised
def frobnify(self):
pass
Override Attribute access with __getattribute__
class AttrLockedSet(set):
def __init__(self, *args, **kwargs):
self._lock = Lock()
super(AttrLockedSet, self).__init__(*args, **kwargs)
def __getattribute__(self, name):
if name in ['add','remove']:
# note: makes a new callable object "lockedmethod" on every call
# best to add a layer of memoization
lock = self._lock
def lockedmethod(*args, **kwargs):
with lock:
return super(AttrLockedSet, self).__getattribute__(name)(*args, **kwargs)
return lockedmethod
else:
return super(AttrLockedSet, self).__getattribute__(name)
Dynamically-added wrapper methods with __new__
class NewLockedSet(set):
def __new__(cls, *args, **kwargs):
# modify the class by adding new unbound methods
# you could also attach a single __getattribute__ like above
for membername in ['add', 'remove']:
def scoper(membername=membername):
# You can also return the function or use a class
def lockedmethod(self, *args, **kwargs):
with self._lock:
m = getattr(super(NewLockedSet, self), membername)
return m(*args, **kwargs)
lockedmethod.__name__ = membername
setattr(cls, membername, lockedmethod)
self = super(NewLockedSet, cls).__new__(cls, *args, **kwargs)
self._lock = Lock()
return self
Dynamically-added wrapper methods with __metaclass__
def _lockname(classname):
return '_%s__%s' % (classname, 'lock')
class LockedClass(type):
def __new__(mcls, name, bases, dict_):
# we'll bind these after we add the methods
cls = None
def lockmethodfactory(methodname, lockattr):
def lockedmethod(self, *args, **kwargs):
with getattr(self, lockattr):
m = getattr(super(cls, self), methodname)
return m(*args,**kwargs)
lockedmethod.__name__ = methodname
return lockedmethod
lockattr = _lockname(name)
for methodname in ['add','remove']:
dict_[methodname] = lockmethodfactory(methodname, lockattr)
cls = type.__new__(mcls, name, bases, dict_)
return cls
def __call__(self, *args, **kwargs):
#self is a class--i.e. an "instance" of the LockedClass type
instance = super(LockedClass, self).__call__(*args, **kwargs)
setattr(instance, _lockname(self.__name__), Lock())
return instance
class MetaLockedSet(set):
__metaclass__ = LockedClass
Dynamically-created Metaclasses
def LockedClassMetaFactory(wrapmethods):
class LockedClass(type):
def __new__(mcls, name, bases, dict_):
# we'll bind these after we add the methods
cls = None
def lockmethodfactory(methodname, lockattr):
def lockedmethod(self, *args, **kwargs):
with getattr(self, lockattr):
m = getattr(super(cls, self), methodname)
return m(*args,**kwargs)
lockedmethod.__name__ = methodname
return lockedmethod
lockattr = _lockname(name)
for methodname in wrapmethods:
dict_[methodname] = lockmethodfactory(methodname, lockattr)
cls = type.__new__(mcls, name, bases, dict_)
return cls
def __call__(self, *args, **kwargs):
#self is a class--i.e. an "instance" of the LockedClass type
instance = super(LockedClass, self).__call__(*args, **kwargs)
setattr(instance, _lockname(self.__name__), Lock())
return instance
return LockedClass
class MetaFactoryLockedSet(set):
__metaclass__ = LockedClassMetaFactory(['add','remove'])
I'll bet using a simple, explicit try...finally
doesn't look so bad now, right?
Exercise for the reader: let the caller pass in their own Lock()
object (dependency injection) using any of these methods.
Solution 2:
This is my first attempt to play with decorators (although my code doesn't actually use the @decorate syntax), and I don't have much experience with multi-threading/multiprocessing. With that disclaimer, though, here's an attempt I made:
from multiprocessing import Lock
def decorate_all(obj):
lock = Lock()
#you'll want to make this more robust:
fnc_names = [fnctn for fnctn in dir(obj) if '__' not in fnctn]
for name in fnc_names:
print 'decorating ' + name
fnc = getattr(obj, name)
setattr(obj, name, decorate(fnc, lock))
return obj
def decorate(fnctn, lock):
def decorated(*args):
print 'acquiring lock'
lock.acquire()
try:
print 'calling decorated function'
return fnctn(*args)
finally:
print 'releasing lock'
lock.release()
return decorated
def thread_safe(superclass):
lock = Lock()
class Thread_Safe(superclass):
def __init__(self, *args, **kwargs):
super(Thread_Safe, self).__init__(*args, **kwargs)
return decorate_all(Thread_Safe)
>>> thread_safe_set = thread_safe(set)
decorating add
decorating clear
decorating copy
decorating difference
decorating difference_update
decorating discard
decorating intersection
decorating intersection_update
decorating isdisjoint
decorating issubset
decorating issuperset
decorating pop
decorating remove
decorating symmetric_difference
decorating symmetric_difference_update
decorating union
decorating update
>>> s = thread_safe_set()
>>> s.add(1)
acquiring lock
calling decorated function
releasing lock
>>> s.add(4)
acquiring lock
calling decorated function
releasing lock
>>> s.pop()
acquiring lock
calling decorated function
releasing lock
1
>>> s.pop()
acquiring lock
calling decorated function
releasing lock
4
>>>
Solution 3:
[Indeed, see the comments, it is not true]
If you are running CPython you can see from the set source code that it doesn't release the GIL (http://hg.python.org/cpython/file/db20367b20de/Objects/setobject.c) so all its operations should be atomic.
If it is all what you need and you are sure to run your code on CPython you can just use it directly.