matplotlib: Group boxplots
How about using colors to differentiate between "apples" and "oranges" and spacing to separate "A", "B" and "C"?
Something like this:
from pylab import plot, show, savefig, xlim, figure, \
hold, ylim, legend, boxplot, setp, axes
# function for setting the colors of the box plots pairs
def setBoxColors(bp):
setp(bp['boxes'][0], color='blue')
setp(bp['caps'][0], color='blue')
setp(bp['caps'][1], color='blue')
setp(bp['whiskers'][0], color='blue')
setp(bp['whiskers'][1], color='blue')
setp(bp['fliers'][0], color='blue')
setp(bp['fliers'][1], color='blue')
setp(bp['medians'][0], color='blue')
setp(bp['boxes'][1], color='red')
setp(bp['caps'][2], color='red')
setp(bp['caps'][3], color='red')
setp(bp['whiskers'][2], color='red')
setp(bp['whiskers'][3], color='red')
setp(bp['fliers'][2], color='red')
setp(bp['fliers'][3], color='red')
setp(bp['medians'][1], color='red')
# Some fake data to plot
A= [[1, 2, 5,], [7, 2]]
B = [[5, 7, 2, 2, 5], [7, 2, 5]]
C = [[3,2,5,7], [6, 7, 3]]
fig = figure()
ax = axes()
hold(True)
# first boxplot pair
bp = boxplot(A, positions = [1, 2], widths = 0.6)
setBoxColors(bp)
# second boxplot pair
bp = boxplot(B, positions = [4, 5], widths = 0.6)
setBoxColors(bp)
# thrid boxplot pair
bp = boxplot(C, positions = [7, 8], widths = 0.6)
setBoxColors(bp)
# set axes limits and labels
xlim(0,9)
ylim(0,9)
ax.set_xticklabels(['A', 'B', 'C'])
ax.set_xticks([1.5, 4.5, 7.5])
# draw temporary red and blue lines and use them to create a legend
hB, = plot([1,1],'b-')
hR, = plot([1,1],'r-')
legend((hB, hR),('Apples', 'Oranges'))
hB.set_visible(False)
hR.set_visible(False)
savefig('boxcompare.png')
show()
Here is my version. It stores data based on categories.
import matplotlib.pyplot as plt
import numpy as np
data_a = [[1,2,5], [5,7,2,2,5], [7,2,5]]
data_b = [[6,4,2], [1,2,5,3,2], [2,3,5,1]]
ticks = ['A', 'B', 'C']
def set_box_color(bp, color):
plt.setp(bp['boxes'], color=color)
plt.setp(bp['whiskers'], color=color)
plt.setp(bp['caps'], color=color)
plt.setp(bp['medians'], color=color)
plt.figure()
bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
bpr = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
set_box_color(bpl, '#D7191C') # colors are from http://colorbrewer2.org/
set_box_color(bpr, '#2C7BB6')
# draw temporary red and blue lines and use them to create a legend
plt.plot([], c='#D7191C', label='Apples')
plt.plot([], c='#2C7BB6', label='Oranges')
plt.legend()
plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
plt.xlim(-2, len(ticks)*2)
plt.ylim(0, 8)
plt.tight_layout()
plt.savefig('boxcompare.png')
I am short of reputation so I cannot post an image to here. You can run it and see the result. Basically it's very similar to what Molly did.
Note that, depending on the version of python you are using, you may need to replace xrange
with range
A simple way would be to use pandas. I adapted an example from the plotting documentation:
In [1]: import pandas as pd, numpy as np
In [2]: df = pd.DataFrame(np.random.rand(12,2), columns=['Apples', 'Oranges'] )
In [3]: df['Categories'] = pd.Series(list('AAAABBBBCCCC'))
In [4]: pd.options.display.mpl_style = 'default'
In [5]: df.boxplot(by='Categories')
Out[5]:
array([<matplotlib.axes.AxesSubplot object at 0x51a5190>,
<matplotlib.axes.AxesSubplot object at 0x53fddd0>], dtype=object)
Mock data:
df = pd.DataFrame({'Group':['A','A','A','B','C','B','B','C','A','C'],\
'Apple':np.random.rand(10),'Orange':np.random.rand(10)})
df = df[['Group','Apple','Orange']]
Group Apple Orange
0 A 0.465636 0.537723
1 A 0.560537 0.727238
2 A 0.268154 0.648927
3 B 0.722644 0.115550
4 C 0.586346 0.042896
5 B 0.562881 0.369686
6 B 0.395236 0.672477
7 C 0.577949 0.358801
8 A 0.764069 0.642724
9 C 0.731076 0.302369
You can use the Seaborn library for these plots. First melt
the dataframe to format data and then create the boxplot of your choice.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
dd=pd.melt(df,id_vars=['Group'],value_vars=['Apple','Orange'],var_name='fruits')
sns.boxplot(x='Group',y='value',data=dd,hue='fruits')
The accepted answer uses pylab and works for 2 groups. What if we have more?
Here is the flexible generic solution with matplotlib
# --- Your data, e.g. results per algorithm:
data1 = [5,5,4,3,3,5]
data2 = [6,6,4,6,8,5]
data3 = [7,8,4,5,8,2]
data4 = [6,9,3,6,8,4]
data6 = [17,8,4,5,8,1]
data7 = [6,19,3,6,1,1]
# --- Combining your data:
data_group1 = [data1, data2, data6]
data_group2 = [data3, data4, data7]
data_group3 = [data1, data1, data1]
data_group4 = [data2, data2, data2]
data_group5 = [data2, data2, data2]
data_groups = [data_group1, data_group2, data_group3] #, data_group4] #, data_group5]
# --- Labels for your data:
labels_list = ['a','b', 'c']
width = 0.3
xlocations = [ x*((1+ len(data_groups))*width) for x in range(len(data_group1)) ]
symbol = 'r+'
ymin = min ( [ val for dg in data_groups for data in dg for val in data ] )
ymax = max ( [ val for dg in data_groups for data in dg for val in data ])
ax = pl.gca()
ax.set_ylim(ymin,ymax)
ax.grid(True, linestyle='dotted')
ax.set_axisbelow(True)
pl.xlabel('X axis label')
pl.ylabel('Y axis label')
pl.title('title')
space = len(data_groups)/2
offset = len(data_groups)/2
ax.set_xticks( xlocations )
ax.set_xticklabels( labels_list, rotation=0 )
# --- Offset the positions per group:
group_positions = []
for num, dg in enumerate(data_groups):
_off = (0 - space + (0.5+num))
print(_off)
group_positions.append([x-_off*(width+0.01) for x in xlocations])
for dg, pos in zip(data_groups, group_positions):
pl.boxplot(dg,
sym=symbol,
# labels=['']*len(labels_list),
labels=['']*len(labels_list),
positions=pos,
widths=width,
# notch=False,
# vert=True,
# whis=1.5,
# bootstrap=None,
# usermedians=None,
# conf_intervals=None,
# patch_artist=False,
)
pl.show()