2D and 3D Scatter Histograms from arrays in Python
have you any idea, how I can bin 3 arrays to a histogram. My arrays look like
Temperature = [4, 3, 1, 4, 6, 7, 8, 3, 1]
Radius = [0, 2, 3, 4, 0, 1, 2, 10, 7]
Density = [1, 10, 2, 24, 7, 10, 21, 102, 203]
And the 1D plot should look:
Density
| X
10^2-| X
| X
10^1-|
| X
10^0-|
|___|___|___|___|___ Radius
0 3.3 6.6 10
And the 2D plot should (qualitative) look like:
Density
| 2 | |
10^2-| 11249 | |
| 233 | | Radius
10^1-| 12 | |
| 1 | |
10^0-|
|___|___|___|___|___ Temperature
0 3 5 8
So I want to bin one or two fields with python/numpy and then plot them to analyse their correspondence.
Solution 1:
Here it follows two functions: hist2d_bubble
and hist3d_bubble
; that may fit for your purpose:
import numpy as np
import matplotlib.pyplot as pyplot
from mpl_toolkits.mplot3d import Axes3D
def hist2d_bubble(x_data, y_data, bins=10):
ax = np.histogram2d(x_data, y_data, bins=bins)
xs = ax[1]
ys = ax[2]
points = []
for (i, j), v in np.ndenumerate(ax[0]):
points.append((xs[i], ys[j], v))
points = np.array(points)
fig = pyplot.figure()
sub = pyplot.scatter(points[:, 0],points[:, 1],
color='black', marker='o', s=128*points[:, 2])
sub.axes.set_xticks(xs)
sub.axes.set_yticks(ys)
pyplot.ion()
pyplot.grid()
pyplot.show()
return points, sub
def hist3d_bubble(x_data, y_data, z_data, bins=10):
ax1 = np.histogram2d(x_data, y_data, bins=bins)
ax2 = np.histogram2d(x_data, z_data, bins=bins)
ax3 = np.histogram2d(z_data, y_data, bins=bins)
xs, ys, zs = ax1[1], ax1[2], ax3[1]
smart = np.zeros((bins, bins, bins),dtype=int)
for (i1, j1), v1 in np.ndenumerate(ax1[0]):
if v1 == 0:
continue
for k2, v2 in enumerate(ax2[0][i1]):
v3 = ax3[0][k2][j1]
if v1 == 0 or v2 == 0 or v3 == 0:
continue
num = min(v1, v2, v3)
smart[i1, j1, k2] += num
v1 -= num
v2 -= num
v3 -= num
points = []
for (i, j, k), v in np.ndenumerate(smart):
points.append((xs[i], ys[j], zs[k], v))
points = np.array(points)
fig = pyplot.figure()
sub = fig.add_subplot(111, projection='3d')
sub.scatter(points[:, 0], points[:, 1], points[:, 2],
color='black', marker='o', s=128*points[:, 3])
sub.axes.set_xticks(xs)
sub.axes.set_yticks(ys)
sub.axes.set_zticks(zs)
pyplot.ion()
pyplot.grid()
pyplot.show()
return points, sub
The two figures above were created using:
temperature = [4, 3, 1, 4, 6, 7, 8, 3, 1]
radius = [0, 2, 3, 4, 0, 1, 2, 10, 7]
density = [1, 10, 2, 24, 7, 10, 21, 102, 203]
import matplotlib
matplotlib.rcParams.update({'font.size':14})
points, sub = hist2d_bubble(radius, density, bins=4)
sub.axes.set_xlabel('radius')
sub.axes.set_ylabel('density')
points, sub = hist3d_bubble(temperature, density, radius, bins=4)
sub.axes.set_xlabel('temperature')
sub.axes.set_ylabel('density')
sub.axes.set_zlabel('radius')
Related:
Howto bin series of float values into histogram in Python?
How to correctly generate a 3d histogram using numpy or matplotlib built in functions in python?
2D histogram with Python
Solution 2:
here's a bare-bones 2D version of Castro's code above. It simply plots the mean value at each x,y coordinate. This could be plotted using imshow but Castro's approach makes for a much neater scatter plot.
from matplotlib import pyplot as plt
import numpy as np
# make some x,y points and z data that needs to be averaged and plotted
x = [1,1,1,2,2,2,2,3,4,4,4,4]
y = [1,1,1,2,2,2,2,3,4,4,4,4]
z = [1,1,1,2,2,3,3,4,4,4,5,5]
xbins, ybins = int(max(x)), int(max(y))
rng = [[1, xbins+1], [1, ybins+1]]
bins = [xbins,ybins]
# get the sum of weights and sum of occurrences (their division gives the mean)
H, xs, ys =np.histogram2d(x, y, weights=z, bins=bins, range=rng)
count, _, _ =np.histogram2d(x, y, bins=bins, range=rng)
# get the mean value of each x,y point
count = np.ma.masked_where(count==0,count)
H = np.ma.masked_where(H==0,H)
H/=count
# separate the H matrix into x,y,z arrays (and discard zero values)
points = []
for (i, j),v in np.ndenumerate(H):
if v: points.append((xs[i], ys[j], v))
points = np.array(points)
# plot the data
fig = plt.figure()
cm = plt.cm.get_cmap('hot')
p = plt.scatter(points[:, 0], points[:, 1], c=points[:, 2], cmap=cm)
plt.colorbar(p).set_label('avg. z value')
plt.grid()
plt.show()
All the duplicated x,y points are now reduced to a unique set and their z values have been averaged: