2D and 3D Scatter Histograms from arrays in Python

have you any idea, how I can bin 3 arrays to a histogram. My arrays look like

Temperature = [4,   3,   1,   4,   6,   7,   8,   3,   1]
Radius      = [0,   2,   3,   4,   0,   1,   2,  10,   7]
Density     = [1,  10,   2,  24,   7,  10,  21, 102, 203]

And the 1D plot should look:

Density

     |           X
10^2-|               X
     |       X
10^1-|   
     |   X
10^0-|
     |___|___|___|___|___   Radius
         0  3.3 6.6  10

And the 2D plot should (qualitative) look like:

Density

     |           2      | |
10^2-|      11249       | |
     |     233          | | Radius
10^1-|    12            | |
     |   1              | |
10^0-|
     |___|___|___|___|___   Temperature
         0   3   5   8

So I want to bin one or two fields with python/numpy and then plot them to analyse their correspondence.


Solution 1:

Here it follows two functions: hist2d_bubble and hist3d_bubble; that may fit for your purpose:

enter image description here

import numpy as np
import matplotlib.pyplot as pyplot
from mpl_toolkits.mplot3d import Axes3D


def hist2d_bubble(x_data, y_data, bins=10):
    ax = np.histogram2d(x_data, y_data, bins=bins)
    xs = ax[1]
    ys = ax[2]
    points = []
    for (i, j), v in np.ndenumerate(ax[0]):
        points.append((xs[i], ys[j], v))

    points = np.array(points)
    fig = pyplot.figure()
    sub = pyplot.scatter(points[:, 0],points[:, 1],
                         color='black', marker='o', s=128*points[:, 2])
    sub.axes.set_xticks(xs)
    sub.axes.set_yticks(ys)
    pyplot.ion()
    pyplot.grid()
    pyplot.show()
    return points, sub


def hist3d_bubble(x_data, y_data, z_data, bins=10):
    ax1 = np.histogram2d(x_data, y_data, bins=bins)
    ax2 = np.histogram2d(x_data, z_data, bins=bins)
    ax3 = np.histogram2d(z_data, y_data, bins=bins)
    xs, ys, zs = ax1[1], ax1[2], ax3[1]
    smart = np.zeros((bins, bins, bins),dtype=int)
    for (i1, j1), v1 in np.ndenumerate(ax1[0]):
        if v1 == 0:
            continue
        for k2, v2 in enumerate(ax2[0][i1]):
            v3 = ax3[0][k2][j1]
            if v1 == 0 or v2 == 0 or v3 == 0:
                continue
            num = min(v1, v2, v3)
            smart[i1, j1, k2] += num
            v1 -= num
            v2 -= num
            v3 -= num
    points = []
    for (i, j, k), v in np.ndenumerate(smart):
        points.append((xs[i], ys[j], zs[k], v))
    points = np.array(points)
    fig = pyplot.figure()
    sub = fig.add_subplot(111, projection='3d')
    sub.scatter(points[:, 0], points[:, 1], points[:, 2],
                color='black', marker='o', s=128*points[:, 3])
    sub.axes.set_xticks(xs)
    sub.axes.set_yticks(ys)
    sub.axes.set_zticks(zs)
    pyplot.ion()
    pyplot.grid()
    pyplot.show()
    return points, sub

The two figures above were created using:

temperature = [4,   3,   1,   4,   6,   7,   8,   3,   1]
radius      = [0,   2,   3,   4,   0,   1,   2,  10,   7]
density     = [1,  10,   2,  24,   7,  10,  21, 102, 203]
import matplotlib
matplotlib.rcParams.update({'font.size':14})

points, sub = hist2d_bubble(radius, density, bins=4)
sub.axes.set_xlabel('radius')
sub.axes.set_ylabel('density')

points, sub = hist3d_bubble(temperature, density, radius, bins=4)
sub.axes.set_xlabel('temperature')
sub.axes.set_ylabel('density')
sub.axes.set_zlabel('radius')

Related:

Howto bin series of float values into histogram in Python?

How to correctly generate a 3d histogram using numpy or matplotlib built in functions in python?

2D histogram with Python

Solution 2:

here's a bare-bones 2D version of Castro's code above. It simply plots the mean value at each x,y coordinate. This could be plotted using imshow but Castro's approach makes for a much neater scatter plot.

from matplotlib import pyplot as plt
import numpy as np

# make some x,y points and z data that needs to be averaged and plotted
x = [1,1,1,2,2,2,2,3,4,4,4,4]
y = [1,1,1,2,2,2,2,3,4,4,4,4]
z = [1,1,1,2,2,3,3,4,4,4,5,5]
xbins, ybins = int(max(x)), int(max(y))
rng = [[1, xbins+1], [1, ybins+1]]
bins = [xbins,ybins]

# get the sum of weights and sum of occurrences (their division gives the mean) 
H, xs, ys =np.histogram2d(x, y, weights=z, bins=bins, range=rng) 
count, _, _ =np.histogram2d(x, y, bins=bins, range=rng) 

# get the mean value of each x,y point
count = np.ma.masked_where(count==0,count)
H = np.ma.masked_where(H==0,H)
H/=count

# separate the H matrix into x,y,z arrays (and discard zero values)
points = []
for (i, j),v in np.ndenumerate(H):
    if v: points.append((xs[i], ys[j], v))
points = np.array(points)

# plot the data
fig = plt.figure()
cm = plt.cm.get_cmap('hot')
p = plt.scatter(points[:, 0], points[:, 1], c=points[:, 2], cmap=cm)
plt.colorbar(p).set_label('avg. z value')
plt.grid()
plt.show()

All the duplicated x,y points are now reduced to a unique set and their z values have been averaged:

averaged z value of duplicated x,y coordinates