improving python code in Monte Carlo simulation
I've written a Monte Carlo simulation for a "2d active ising model" and I'm trying to improve the runtime.
What my code does: I create a matrix for the number of particles (r) and one for the magnetisation for each spot (rgrid and mgrid). the spins of the particles can be either -1/1 so the magnetisation ranges from [-r, r] in steps of 2.
Then a random spot and a random particle is chosen(+1 or -1). Since the probability depends on the number of positive/negative particles at each place I create 2 arrays and zip them so I can get the fitting number of positive particles, i.e. [(-3, 0), (-1, 1), (1, 2), (3, 3)]. With 3 particles I can have a magnetisation of (-3, -1, 1, 3) which has (0, 1, 2, 3) +1 particles.
After that I calculate the probabilities for the spot and choose an action: spinflip, jump up/down, jump left/right, do nothing. Now I have to move the particle (or not) and change the magnet./density for the 2 spots (and check for periodic boundary conditions).
Here is my code:
from __future__ import print_function
from __future__ import division
from datetime import datetime
import numpy as np
import math
import matplotlib.pyplot as plt
import cProfile
pr = cProfile.Profile()
pr.enable()
m = 10 # zeilen, spalten
j = 1000 # finale zeit
r = 3 # platzdichte
b = 1.6 # beta
e = 0.9 # epsilon
M = m * m # platzanzahl
N = M * r # teilchenanzahl
dt = 1 / (4 * np.exp(b)) # delta-t
i = 0
rgrid = r * np.ones((m, m)).astype(int) # dichte-matrix, rho = n(+) + n(-)
magrange = np.arange(-r, r + 1, 2) # mögliche magnetisierungen [a, b), schrittweite
mgrid = np.random.choice(magrange, (m, m)) # magnetisierungs-matrix m = n(+) - (n-)
def flip():
mgrid[math.trunc(x / m), x % m] -= 2 * spin
def up():
y = x - m
if y < 0: # periodische randbedingung hoch
y += m * m
x1 = math.trunc(x / m)
x2 = x % m
y1 = math.trunc(y / m)
y2 = y % m
rgrid[x1, x2] -= 1 # [zeile, spalte] masse -1
rgrid[y1, y2] += 1 # [zeile, spalte] masse +1
mgrid[x1, x2] -= spin # [zeile, spalte] spinänderung alter platz
mgrid[y1, y2] += spin # [zeile, spalte] spinänderung neuer platz
def down():
y = x + m
if y >= m * m: # periodische randbedingung unten
y -= m * m
x1 = math.trunc(x / m)
x2 = x % m
y1 = math.trunc(y / m)
y2 = y % m
rgrid[x1, x2] -= 1
rgrid[y1, y2] += 1
mgrid[x1, x2] -= spin
mgrid[y1, y2] += spin
def left():
y = x - 1
if math.trunc(y / m) < math.trunc(x / m): # periodische randbedingung links
y += m
x1 = math.trunc(x / m)
x2 = x % m
y1 = math.trunc(y / m)
y2 = y % m
rgrid[x1, x2] -= 1
rgrid[y1, y2] += 1
mgrid[x1, x2] -= spin
mgrid[y1, y2] += spin
def right():
y = x + 1
if math.trunc(y / m) > math.trunc(x / m): # periodische randbedingung rechts
y -= m
x1 = math.trunc(x / m)
x2 = x % m
y1 = math.trunc(y / m)
y2 = y % m
rgrid[x1, x2] -= 1
rgrid[y1, y2] += 1
mgrid[x1, x2] -= spin
mgrid[y1, y2] += spin
while i < j:
# 1. platz aussuchen
x = np.random.randint(M) # wähle zufälligen platz aus
if rgrid.item(x) != 0:
i += dt / N
# 2. teilchen aussuchen
li1 = np.arange(-abs(rgrid.item(x)), abs(rgrid.item(x)) + 1, 2)
li2 = np.arange(0, abs(rgrid.item(x)) + 1)
li3 = zip(li1, li2) # list1 und list2 als tupel in list3
results = [item[1] for item in li3 if item[0] == mgrid.item(x)] # gebe 2. element von tupel aus für passende magnetisierung
num = int(''.join(map(str, results))) # wandle listeneintrag in int um
spin = 1.0 if np.random.random() < num / rgrid.item(x) else -1.0
# 3. ereignis aussuchen
p = np.random.random()
p1 = np.exp(- spin * b * mgrid.item(x) / rgrid.item(x)) * dt # flip
p2 = dt # hoch
p3 = dt # runter
p4 = (1 - spin * e) * dt # links
p5 = (1 + spin * e) * dt # rechts
p6 = 1 - (4 + p1) * dt # nichts
if p < p6:
continue
elif p < p6 + p1:
flip()
continue
elif p < p6 + p1 + p2:
up()
continue
elif p < p6 + p1 + p2 + p3:
down()
continue
elif p < p6 + p1 + p2 + p3 + p4:
left()
continue
else:
right()
continue
pr.disable()
pr.print_stats(sort='cumtime')
what i already did to speed things up:
- used cProfile to see that the random.choice i was using was terribly slow and changed that to the if-conditions for the spin and the probabilities p
- installed cython and used
import pyximport; pyximport.install()
to create a compiled cython file. this gave no improvement and after checking thecython -a script.py
i see that i need more static variables to gain some improvements.
running cProfile
now shows me this:
188939207 function calls in 151.834 seconds
Ordered by: cumulative time
ncalls tottime percall cumtime percall filename:lineno(function)
5943639 10.582 0.000 40.678 0.000 {method 'join' of 'str' objects}
5943639 32.543 0.000 37.503 0.000 script.py:107(<listcomp>)
5943639 4.722 0.000 30.096 0.000 numeric.py:1905(array_str)
8276949 25.852 0.000 25.852 0.000 {method 'randint' of 'mtrand.RandomState' objects}
5943639 11.855 0.000 25.374 0.000 arrayprint.py:381(wrapper)
11887279 14.403 0.000 14.403 0.000 {built-in method numpy.core.multiarray.arange}
80651998 13.559 0.000 13.559 0.000 {method 'item' of 'numpy.ndarray' objects}
5943639 8.427 0.000 9.364 0.000 arrayprint.py:399(array2string)
11887278 8.817 0.000 8.817 0.000 {method 'random_sample' of 'mtrand.RandomState' objects}
579016 7.351 0.000 7.866 0.000 script.py:79(right)
300021 3.669 0.000 3.840 0.000 script.py:40(up)
152838 1.950 0.000 2.086 0.000 script.py:66(left)
17830917 1.910 0.000 1.910 0.000 {built-in method builtins.abs}
176346 1.147 0.000 1.217 0.000 script.py:37(flip)
5943639 1.131 0.000 1.131 0.000 {method 'discard' of 'set' objects}
5943639 1.054 0.000 1.054 0.000 {built-in method _thread.get_ident}
5943639 1.010 0.000 1.010 0.000 {method 'add' of 'set' objects}
5943639 0.961 0.000 0.961 0.000 {built-in method builtins.id}
3703804 0.892 0.000 0.892 0.000 {built-in method math.trunc}
I use join
to get the integer value of the number of +1 particles on that spot since I need that for my probabilities.
If I want to run something serious like m=400
, r=3
, j=300000
(j: final time) I'm looking at about 4 years of runtime with my current speed.
Any help is greatly appreciated.
Monte Carlo simulation
At first I get rid of the lists, afterwards I used a just in time compiler (numba). Without compilation a get 196s (your version), with compilation I get 0.44s. So there is a improvement of a factor 435 by using a jit-compiler and a few easy optimizations here.
A main advantage is also, that the GIL (global interpreter lock) is released here also. If the code is processor limited and not limited by the memory bandwith, the random numbers can be calculated in another thread while running the simulation in the other (more than one core can be used). This could also improve performance a bit further and would work as follows:
- Calculate the first chunk of random numbers (Small enough that the whole problem fits easily in the processor cache (at least L3 cache).
- Start an iteration with that random numbers. While running the iteration another chunk of random numbers is calculated.
- Repeat (2) until you are done.
Code
import numba as nb
import numpy as np
def calc (m,j,e,r,dt,b,rgrid,mgrid):
M=m*m
N = M * r
i=0
while i < j:
# 1. platz aussuchen
x = np.random.randint(M) # wähle zufälligen platz aus
if rgrid[x] != 0:
i += dt / N
########
# 2. teilchen aussuchen
#li1 = np.arange(-abs(rgrid[x]), abs(rgrid[x]) + 1, 2)
#li2 = np.arange(0, abs(rgrid[x]) + 1)
#li3 = zip(li1, li2) # list1 und list2 als tupel in list3
#results = [item[1] for item in li3 if item[0] == mgrid[x]] # gebe 2. element von tupel aus für passende magnetisierung
#num = int(''.join(map(str, results))) # wandle listeneintrag in int um
#######
# This should be equivalent
jj=0 #li2 starts with 0 and has a increment of 1
for ii in range(-abs(rgrid[x]),abs(rgrid[x])+1, 2):
if (ii==mgrid[x]):
num=jj
break
jj+=1
spin = 1.0 if np.random.random() < num / rgrid[x] else -1.0
# 3. ereignis aussuchen
p = np.random.random()
p1 = np.exp(- spin * b * mgrid[x] / rgrid[x]) * dt # flip
p2 = dt # hoch
p3 = dt # runter
p4 = (1 - spin * e) * dt # links
p5 = (1 + spin * e) * dt # rechts
p6 = 1 - (4 + p1) * dt # nichts
if p < p6:
continue
elif p < p6 + p1:
#flip()
mgrid[x] -= 2 * spin
continue
elif p < p6 + p1 + p2:
#up()
y = x - m
if y < 0: # periodische randbedingung hoch
y += m * m
rgrid[x] -= 1 # [zeile, spalte] masse -1
rgrid[y] += 1 # [zeile, spalte] masse +1
mgrid[x] -= spin # [zeile, spalte] spinänderung alter platz
mgrid[y] += spin # [zeile, spalte] spinänderung neuer platz
continue
elif p < p6 + p1 + p2:
#down()
y = x + m
if y >= m * m: # periodische randbedingung unten
y -= m * m
rgrid[x] -= 1
rgrid[y] += 1
mgrid[x] -= spin
mgrid[y] += spin
continue
elif p < p6 + p2 + p3:
#left()
y = x - 1
if (y // m) < (x // m): # periodische randbedingung links
y += m
rgrid[x] -= 1
rgrid[y] += 1
mgrid[x] -= spin
mgrid[y] += spin
continue
else:
#right()
y = x + 1
if (y // m) > (x // m): # periodische randbedingung rechts
y -= m
rgrid[x] -= 1
rgrid[y] += 1
mgrid[x] -= spin
mgrid[y] += spin
continue
return (mgrid,rgrid)
And now the main function for testing:
def main():
m = 10 # zeilen, spalten
j = 1000 # finale zeit
r = 3 # platzdichte
b = 1.6 # beta
e = 0.9 # epsilon
M = m * m # platzanzahl
N = M * r # teilchenanzahl
dt = 1 / (4 * np.exp(b)) # delta-t
i = 0
rgrid = r * np.ones((m, m),dtype=np.int) #don't convert the array build it up with the right datatype # dichte-matrix, rho = n(+) + n(-)
magrange = np.arange(-r, r + 1, 2) # mögliche magnetisierungen [a, b), schrittweite
mgrid = np.random.choice(magrange, (m, m)) # magnetisierungs-matrix m = n(+) - (n-)
#Compile the function
nb_calc = nb.njit(nb.types.Tuple((nb.int32[:], nb.int32[:]))(nb.int32, nb.int32,nb.float32,nb.int32,nb.float32,nb.float32,nb.int32[:], nb.int32[:]),nogil=True)(calc)
Results=nb_calc(m,j,e,r,dt,b,rgrid.flatten(),mgrid.flatten())
#Get the results
mgrid_new=Results[0].reshape(mgrid.shape)
rgrid_new=Results[1].reshape(rgrid.shape)
Edit I have rewritten the code "2.Teilchen aussuchen" and reworked the code so that it works with scalar indices. This gives an additional speed up by a factor of 4.