Convert 2:1 equirectangular panorama to cube map

If you want to do it server side there are many options. ImageMagick has a bunch of command-line tools which could slice your image into pieces. You could put the command to do this into a script and just run that each time you have a new image.

It's hard to tell quite what algorithm is used in the program. We can try and reverse engineer quite what is happening by feeding a square grid into the program. I've used a grid from Wikipedia:

64 by 64 grid

Which gives:

Projected grid

This gives us a clue as to how the box is constructed.

Imaging a sphere with lines of latitude and longitude on it, and a cube surrounding it. Now projecting from the point at center of the sphere produces a distorted grid on the cube.

Mathematically, take polar coordinates r, θ, ø, for the sphere r=1, 0 < θ < π, -π/4 < ø < 7π/4

  • x= r sin θ cos ø
  • y= r sin θ sin ø
  • z= r cos θ

centrally project these to the cube. First we divide into four regions by the latitude -π/4 < ø < π/4, π/4 < ø < 3π/4, 3π/4 < ø < 5π/4, 5π/4 < ø < 7π/4. These will either project to one of the four sides the top or the bottom.

Assume we are in the first side -π/4 < ø < π/4. The central projection of (sin θ cos ø, sin θ sin ø, cos θ) will be (a sin θ cos ø, a sin θ sin ø, a cos θ) which hits the x=1 plane when

  • a sin θ cos ø = 1

so

  • a = 1 / (sin θ cos ø)

and the projected point is

  • (1, tan ø, cot θ / cos ø)

If | cot θ / cos ø | < 1, this will be on the front face. Otherwise, it will be projected on the top or bottom and you will need a different projection for that. A better test for the top uses the fact that the minimum value of cos ø will be cos π/4 = 1/√2, so the projected point is always on the top if cot θ / (1/√2) > 1 or tan θ < 1/√2. This works out as θ < 35º or 0.615 radians.

Put this together in Python:

import sys
from PIL import Image
from math import pi,sin,cos,tan

def cot(angle):
    return 1/tan(angle)

# Project polar coordinates onto a surrounding cube
# assume ranges theta is [0,pi] with 0 the north poll, pi south poll
# phi is in range [0,2pi]
def projection(theta,phi):
        if theta<0.615:
            return projectTop(theta,phi)
        elif theta>2.527:
            return projectBottom(theta,phi)
        elif phi <= pi/4 or phi > 7*pi/4:
            return projectLeft(theta,phi)
        elif phi > pi/4 and phi <= 3*pi/4:
            return projectFront(theta,phi)
        elif phi > 3*pi/4 and phi <= 5*pi/4:
            return projectRight(theta,phi)
        elif phi > 5*pi/4 and phi <= 7*pi/4:
            return projectBack(theta,phi)

def projectLeft(theta,phi):
        x = 1
        y = tan(phi)
        z = cot(theta) / cos(phi)
        if z < -1:
            return projectBottom(theta,phi)
        if z > 1:
            return projectTop(theta,phi)
        return ("Left",x,y,z)

def projectFront(theta,phi):
        x = tan(phi-pi/2)
        y = 1
        z = cot(theta) / cos(phi-pi/2)
        if z < -1:
            return projectBottom(theta,phi)
        if z > 1:
            return projectTop(theta,phi)
        return ("Front",x,y,z)

def projectRight(theta,phi):
        x = -1
        y = tan(phi)
        z = -cot(theta) / cos(phi)
        if z < -1:
            return projectBottom(theta,phi)
        if z > 1:
            return projectTop(theta,phi)
        return ("Right",x,-y,z)

def projectBack(theta,phi):
        x = tan(phi-3*pi/2)
        y = -1
        z = cot(theta) / cos(phi-3*pi/2)
        if z < -1:
            return projectBottom(theta,phi)
        if z > 1:
            return projectTop(theta,phi)
        return ("Back",-x,y,z)

def projectTop(theta,phi):
        # (a sin θ cos ø, a sin θ sin ø, a cos θ) = (x,y,1)
        a = 1 / cos(theta)
        x = tan(theta) * cos(phi)
        y = tan(theta) * sin(phi)
        z = 1
        return ("Top",x,y,z)

def projectBottom(theta,phi):
        # (a sin θ cos ø, a sin θ sin ø, a cos θ) = (x,y,-1)
        a = -1 / cos(theta)
        x = -tan(theta) * cos(phi)
        y = -tan(theta) * sin(phi)
        z = -1
        return ("Bottom",x,y,z)

# Convert coords in cube to image coords
# coords is a tuple with the side and x,y,z coords
# edge is the length of an edge of the cube in pixels
def cubeToImg(coords,edge):
    if coords[0]=="Left":
        (x,y) = (int(edge*(coords[2]+1)/2), int(edge*(3-coords[3])/2) )
    elif coords[0]=="Front":
        (x,y) = (int(edge*(coords[1]+3)/2), int(edge*(3-coords[3])/2) )
    elif coords[0]=="Right":
        (x,y) = (int(edge*(5-coords[2])/2), int(edge*(3-coords[3])/2) )
    elif coords[0]=="Back":
        (x,y) = (int(edge*(7-coords[1])/2), int(edge*(3-coords[3])/2) )
    elif coords[0]=="Top":
        (x,y) = (int(edge*(3-coords[1])/2), int(edge*(1+coords[2])/2) )
    elif coords[0]=="Bottom":
        (x,y) = (int(edge*(3-coords[1])/2), int(edge*(5-coords[2])/2) )
    return (x,y)

# convert the in image to out image
def convert(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    edge = inSize[0]/4   # the length of each edge in pixels
    for i in xrange(inSize[0]):
        for j in xrange(inSize[1]):
            pixel = inPix[i,j]
            phi = i * 2 * pi / inSize[0]
            theta = j * pi / inSize[1]
            res = projection(theta,phi)
            (x,y) = cubeToImg(res,edge)
            #if i % 100 == 0 and j % 100 == 0:
            #    print i,j,phi,theta,res,x,y
            if x >= outSize[0]:
                #print "x out of range ",x,res
                x=outSize[0]-1
            if y >= outSize[1]:
                #print "y out of range ",y,res
                y=outSize[1]-1
            outPix[x,y] = pixel

imgIn = Image.open(sys.argv[1])
inSize = imgIn.size
imgOut = Image.new("RGB",(inSize[0],inSize[0]*3/4),"black")
convert(imgIn,imgOut)
imgOut.show()

The projection function takes the theta and phi values and returns coordinates in a cube from -1 to 1 in each direction. The cubeToImg takes the (x,y,z) coordinates and translates them to the output image coordinates.

The above algorithm seems to get the geometry right using an image of buckingham palace. We get:

Cube map of buckingham palace

This seems to get most of the lines in the paving right.

We are getting a few image artefacts. This is due to not having a one-to-one map of pixels. We need to do is use an inverse transformation. Rather than a loop through each pixel in the source and finding the corresponding pixel in the target, we loop through the target images and find the closest corresponding source pixel.

import sys
from PIL import Image
from math import pi,sin,cos,tan,atan2,hypot,floor
from numpy import clip

# get x,y,z coords from out image pixels coords
# i,j are pixel coords
# face is face number
# edge is edge length
def outImgToXYZ(i,j,face,edge):
    a = 2.0*float(i)/edge
    b = 2.0*float(j)/edge
    if face==0: # back
        (x,y,z) = (-1.0, 1.0-a, 3.0 - b)
    elif face==1: # left
        (x,y,z) = (a-3.0, -1.0, 3.0 - b)
    elif face==2: # front
        (x,y,z) = (1.0, a - 5.0, 3.0 - b)
    elif face==3: # right
        (x,y,z) = (7.0-a, 1.0, 3.0 - b)
    elif face==4: # top
        (x,y,z) = (b-1.0, a -5.0, 1.0)
    elif face==5: # bottom
        (x,y,z) = (5.0-b, a-5.0, -1.0)
    return (x,y,z)

# convert using an inverse transformation
def convertBack(imgIn,imgOut):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    edge = inSize[0]/4   # the length of each edge in pixels
    for i in xrange(outSize[0]):
        face = int(i/edge) # 0 - back, 1 - left 2 - front, 3 - right
        if face==2:
            rng = xrange(0,edge*3)
        else:
            rng = xrange(edge,edge*2)

        for j in rng:
            if j<edge:
                face2 = 4 # top
            elif j>=2*edge:
                face2 = 5 # bottom
            else:
                face2 = face

            (x,y,z) = outImgToXYZ(i,j,face2,edge)
            theta = atan2(y,x) # range -pi to pi
            r = hypot(x,y)
            phi = atan2(z,r) # range -pi/2 to pi/2
            # source img coords
            uf = ( 2.0*edge*(theta + pi)/pi )
            vf = ( 2.0*edge * (pi/2 - phi)/pi)
            # Use bilinear interpolation between the four surrounding pixels
            ui = floor(uf)  # coord of pixel to bottom left
            vi = floor(vf)
            u2 = ui+1       # coords of pixel to top right
            v2 = vi+1
            mu = uf-ui      # fraction of way across pixel
            nu = vf-vi
            # Pixel values of four corners
            A = inPix[ui % inSize[0],clip(vi,0,inSize[1]-1)]
            B = inPix[u2 % inSize[0],clip(vi,0,inSize[1]-1)]
            C = inPix[ui % inSize[0],clip(v2,0,inSize[1]-1)]
            D = inPix[u2 % inSize[0],clip(v2,0,inSize[1]-1)]
            # interpolate
            (r,g,b) = (
              A[0]*(1-mu)*(1-nu) + B[0]*(mu)*(1-nu) + C[0]*(1-mu)*nu+D[0]*mu*nu,
              A[1]*(1-mu)*(1-nu) + B[1]*(mu)*(1-nu) + C[1]*(1-mu)*nu+D[1]*mu*nu,
              A[2]*(1-mu)*(1-nu) + B[2]*(mu)*(1-nu) + C[2]*(1-mu)*nu+D[2]*mu*nu )

            outPix[i,j] = (int(round(r)),int(round(g)),int(round(b)))

imgIn = Image.open(sys.argv[1])
inSize = imgIn.size
imgOut = Image.new("RGB",(inSize[0],inSize[0]*3/4),"black")
convertBack(imgIn,imgOut)
imgOut.save(sys.argv[1].split('.')[0]+"Out2.png")
imgOut.show()

The results of this are:

Using the inverse transformation

If anyone want to go in the reverse, see this JS Fiddle page.


Given the excellent accepted answer, I wanted to add my corresponding C++ implementation, based on OpenCV.

For those not familiar with OpenCV, think of Mat as an image. We first construct two maps that remap from the equirectangular image to our corresponding cubemap face. Then, we do the heavy lifting (i.e., remapping with interpolation) using OpenCV.

The code can be made more compact, if readability is not of concern.

// Define our six cube faces.
// 0 - 3 are side faces, clockwise order
// 4 and 5 are top and bottom, respectively
float faceTransform[6][2] =
{
    {0, 0},
    {M_PI / 2, 0},
    {M_PI, 0},
    {-M_PI / 2, 0},
    {0, -M_PI / 2},
    {0, M_PI / 2}
};

// Map a part of the equirectangular panorama (in) to a cube face
// (face). The ID of the face is given by faceId. The desired
// width and height are given by width and height.
inline void createCubeMapFace(const Mat &in, Mat &face,
        int faceId = 0, const int width = -1,
        const int height = -1) {

    float inWidth = in.cols;
    float inHeight = in.rows;

    // Allocate map
    Mat mapx(height, width, CV_32F);
    Mat mapy(height, width, CV_32F);

    // Calculate adjacent (ak) and opposite (an) of the
    // triangle that is spanned from the sphere center
    //to our cube face.
    const float an = sin(M_PI / 4);
    const float ak = cos(M_PI / 4);

    const float ftu = faceTransform[faceId][0];
    const float ftv = faceTransform[faceId][1];

    // For each point in the target image,
    // calculate the corresponding source coordinates.
    for(int y = 0; y < height; y++) {
        for(int x = 0; x < width; x++) {

            // Map face pixel coordinates to [-1, 1] on plane
            float nx = (float)y / (float)height - 0.5f;
            float ny = (float)x / (float)width - 0.5f;

            nx *= 2;
            ny *= 2;

            // Map [-1, 1] plane coords to [-an, an]
            // thats the coordinates in respect to a unit sphere
            // that contains our box.
            nx *= an;
            ny *= an;

            float u, v;

            // Project from plane to sphere surface.
            if(ftv == 0) {
                // Center faces
                u = atan2(nx, ak);
                v = atan2(ny * cos(u), ak);
                u += ftu;
            } else if(ftv > 0) {
                // Bottom face
                float d = sqrt(nx * nx + ny * ny);
                v = M_PI / 2 - atan2(d, ak);
                u = atan2(ny, nx);
            } else {
                // Top face
                float d = sqrt(nx * nx + ny * ny);
                v = -M_PI / 2 + atan2(d, ak);
                u = atan2(-ny, nx);
            }

            // Map from angular coordinates to [-1, 1], respectively.
            u = u / (M_PI);
            v = v / (M_PI / 2);

            // Warp around, if our coordinates are out of bounds.
            while (v < -1) {
                v += 2;
                u += 1;
            }
            while (v > 1) {
                v -= 2;
                u += 1;
            }

            while(u < -1) {
                u += 2;
            }
            while(u > 1) {
                u -= 2;
            }

            // Map from [-1, 1] to in texture space
            u = u / 2.0f + 0.5f;
            v = v / 2.0f + 0.5f;

            u = u * (inWidth - 1);
            v = v * (inHeight - 1);

            // Save the result for this pixel in map
            mapx.at<float>(x, y) = u;
            mapy.at<float>(x, y) = v;
        }
    }

    // Recreate output image if it has wrong size or type.
    if(face.cols != width || face.rows != height ||
        face.type() != in.type()) {
        face = Mat(width, height, in.type());
    }

    // Do actual resampling using OpenCV's remap
    remap(in, face, mapx, mapy,
         CV_INTER_LINEAR, BORDER_CONSTANT, Scalar(0, 0, 0));
}

Given the following input:

Enter image description here

The following faces are generated:

Enter image description here

Image courtesy of Optonaut.


UPDATE 2: It looks like someone else had already built a far superior web application than my own. Their conversion runs client-side, so there aren't any uploads and downloads to worry about.

I suppose if you hate JavaScript for some reason, or are trying to do this on your mobile, then my web application below is okay.

UPDATE: I've published a simple web application where you can upload a panorama and have it return the six skybox images in a ZIP file.

The source is a cleaned up reimplementation of what's below, and is available on GitHub.

The application is presently running on a single free-tier Heroku dyno, but please don't attempt to use it as an API. If you want automation, make your own deployment; single click Deploy to Heroku available.

Original: Here's a (naively) modified version of Salix Alba's absolutely fantastic answer that converts one face at a time, spits out six different images and preserves the original image's file type.

Aside from the fact most use cases probably expect six separate images, the main advantage of converting one face at a time is that it makes working with huge images a lot less memory intensive.

#!/usr/bin/env python
import sys
from PIL import Image
from math import pi, sin, cos, tan, atan2, hypot, floor
from numpy import clip

# get x,y,z coords from out image pixels coords
# i,j are pixel coords
# faceIdx is face number
# faceSize is edge length
def outImgToXYZ(i, j, faceIdx, faceSize):
    a = 2.0 * float(i) / faceSize
    b = 2.0 * float(j) / faceSize

    if faceIdx == 0: # back
        (x,y,z) = (-1.0, 1.0 - a, 1.0 - b)
    elif faceIdx == 1: # left
        (x,y,z) = (a - 1.0, -1.0, 1.0 - b)
    elif faceIdx == 2: # front
        (x,y,z) = (1.0, a - 1.0, 1.0 - b)
    elif faceIdx == 3: # right
        (x,y,z) = (1.0 - a, 1.0, 1.0 - b)
    elif faceIdx == 4: # top
        (x,y,z) = (b - 1.0, a - 1.0, 1.0)
    elif faceIdx == 5: # bottom
        (x,y,z) = (1.0 - b, a - 1.0, -1.0)

    return (x, y, z)

# convert using an inverse transformation
def convertFace(imgIn, imgOut, faceIdx):
    inSize = imgIn.size
    outSize = imgOut.size
    inPix = imgIn.load()
    outPix = imgOut.load()
    faceSize = outSize[0]

    for xOut in xrange(faceSize):
        for yOut in xrange(faceSize):
            (x,y,z) = outImgToXYZ(xOut, yOut, faceIdx, faceSize)
            theta = atan2(y,x) # range -pi to pi
            r = hypot(x,y)
            phi = atan2(z,r) # range -pi/2 to pi/2

            # source img coords
            uf = 0.5 * inSize[0] * (theta + pi) / pi
            vf = 0.5 * inSize[0] * (pi/2 - phi) / pi

            # Use bilinear interpolation between the four surrounding pixels
            ui = floor(uf)  # coord of pixel to bottom left
            vi = floor(vf)
            u2 = ui+1       # coords of pixel to top right
            v2 = vi+1
            mu = uf-ui      # fraction of way across pixel
            nu = vf-vi

            # Pixel values of four corners
            A = inPix[ui % inSize[0], clip(vi, 0, inSize[1]-1)]
            B = inPix[u2 % inSize[0], clip(vi, 0, inSize[1]-1)]
            C = inPix[ui % inSize[0], clip(v2, 0, inSize[1]-1)]
            D = inPix[u2 % inSize[0], clip(v2, 0, inSize[1]-1)]

            # interpolate
            (r,g,b) = (
              A[0]*(1-mu)*(1-nu) + B[0]*(mu)*(1-nu) + C[0]*(1-mu)*nu+D[0]*mu*nu,
              A[1]*(1-mu)*(1-nu) + B[1]*(mu)*(1-nu) + C[1]*(1-mu)*nu+D[1]*mu*nu,
              A[2]*(1-mu)*(1-nu) + B[2]*(mu)*(1-nu) + C[2]*(1-mu)*nu+D[2]*mu*nu )

            outPix[xOut, yOut] = (int(round(r)), int(round(g)), int(round(b)))

imgIn = Image.open(sys.argv[1])
inSize = imgIn.size
faceSize = inSize[0] / 4
components = sys.argv[1].rsplit('.', 2)

FACE_NAMES = {
  0: 'back',
  1: 'left',
  2: 'front',
  3: 'right',
  4: 'top',
  5: 'bottom'
}

for face in xrange(6):
  imgOut = Image.new("RGB", (faceSize, faceSize), "black")
  convertFace(imgIn, imgOut, face)
  imgOut.save(components[0] + "_" + FACE_NAMES[face] + "." + components[1])