Choose Python function to call based on a regex

Is it possible to put a function in a data structure, without first giving it a name with def?

# This is the behaviour I want. Prints "hi".
def myprint(msg):
    print msg
f_list = [ myprint ]
f_list[0]('hi')
# The word "myprint" is never used again. Why litter the namespace with it?

The body of a lambda function is severely limited, so I can't use them.

Edit: For reference, this is more like the real-life code where I encountered the problem.

def handle_message( msg ):
    print msg
def handle_warning( msg ):
    global num_warnings, num_fatals
    num_warnings += 1
    if ( is_fatal( msg ) ):
        num_fatals += 1
handlers = (
    ( re.compile( '^<\w+> (.*)' ), handle_message ),
    ( re.compile( '^\*{3} (.*)' ), handle_warning ),
)
# There are really 10 or so handlers, of similar length.
# The regexps are uncomfortably separated from the handler bodies,
# and the code is unnecessarily long.

for line in open( "log" ):
    for ( regex, handler ) in handlers:
        m = regex.search( line )
        if ( m ): handler( m.group(1) )

Solution 1:

This is based on Udi's nice answer.

I think that the difficulty of creating anonymous functions is a bit of a red herring. What you really want to do is to keep related code together, and make the code neat. So I think decorators may work for you.

import re

# List of pairs (regexp, handler)
handlers = []

def handler_for(regexp):
    """Declare a function as handler for a regular expression."""
    def gethandler(f):
        handlers.append((re.compile(regexp), f))
        return f
    return gethandler

@handler_for(r'^<\w+> (.*)')
def handle_message(msg):
    print msg

@handler_for(r'^\*{3} (.*)')
def handle_warning(msg):
    global num_warnings, num_fatals
    num_warnings += 1
    if is_fatal(msg):
        num_fatals += 1

Solution 2:

Nicer DRY way to solve your actual problem:

def message(msg):
    print msg
message.re = '^<\w+> (.*)'

def warning(msg):
    global num_warnings, num_fatals
    num_warnings += 1
    if ( is_fatal( msg ) ):
        num_fatals += 1
warning.re = '^\*{3} (.*)'

handlers = [(re.compile(x.re), x) for x in [
        message,
        warning,
        foo,
        bar,
        baz,
    ]]

Solution 3:

Continuing Gareth's clean approach with a modular self contained solution:

import re

# in util.py
class GenericLogProcessor(object):

    def __init__(self):
      self.handlers = [] # List of pairs (regexp, handler)

    def register(self, regexp):
        """Declare a function as handler for a regular expression."""
        def gethandler(f):
            self.handlers.append((re.compile(regexp), f))
            return f
        return gethandler

    def process(self, file):
        """Process a file line by line and execute all handlers by registered regular expressions"""
        for line in file:
            for regex, handler in self.handlers:
                m = regex.search(line)
                if (m):
                  handler(m.group(1))      

# in log_processor.py
log_processor = GenericLogProcessor()

@log_processor.register(r'^<\w+> (.*)')
def handle_message(msg):
    print msg

@log_processor.register(r'^\*{3} (.*)')
def handle_warning(msg):
    global num_warnings, num_fatals
    num_warnings += 1
    if is_fatal(msg):
        num_fatals += 1

# in your code
with open("1.log") as f:
  log_processor.process(f)

Solution 4:

If you want to keep a clean namespace, use del:

def myprint(msg):
    print msg
f_list = [ myprint ]
del myprint
f_list[0]('hi')