Is there a way to tell crossfilter to treat elements of array as separate records instead of treating whole array as single key?

I have data set where some of the field values are arrays and I'd like to use crossfilter and d3.js or dc.js to display histogram of how many times each of those values was present in the dataset.

Here's an example:

var data = [
    {"key":"KEY-1","tags":["tag1", "tag2"]},
    {"key":"KEY-2","tags":["tag2"]},
    {"key":"KEY-3","tags":["tag3", "tag1"]}];

var cf = crossfilter(data);

var tags = cf.dimension(function(d){ return d.tags;});
var tagsGroup = tags.group();


dc.rowChart("#chart")
    .renderLabel(true)
    .dimension(tags)
    .group(tagsGroup)
    .xAxis().ticks(3);

dc.renderAll();

And JSFiddle http://jsfiddle.net/uhXf5/2/

When I run that code it produces graph like this:

graph1

But what I want is something like this:

enter image description here

To make things even more complicated it would be awesome to be able to click on any of the rows and filter dataset by the tag that was clicked.

Anyone has any ideas how to achieve that?

Thanks, Kostya


Solution 1:

Solved it myself, here's fiddle with working code http://jsfiddle.net/uhXf5/6/

Here's code in case someone will came across similar problem:

function reduceAdd(p, v) {
  v.tags.forEach (function(val, idx) {
     p[val] = (p[val] || 0) + 1; //increment counts
  });
  return p;
}

function reduceRemove(p, v) {
  v.tags.forEach (function(val, idx) {
     p[val] = (p[val] || 0) - 1; //decrement counts
  });
  return p;

}

function reduceInitial() {
  return {};  
}


var data = [
    {"key":"KEY-1","tags":["tag1", "tag2"], "date":new Date("10/02/2012")},
    {"key":"KEY-2","tags":["tag2"], "date": new Date("10/05/2012")},
    {"key":"KEY-3","tags":["tag3", "tag1"], "date":new Date("10/08/2012")}];

var cf = crossfilter(data);

var tags = cf.dimension(function(d){ return d.tags;});
var tagsGroup = tags.groupAll().reduce(reduceAdd, reduceRemove, reduceInitial).value();
// hack to make dc.js charts work
tagsGroup.all = function() {
  var newObject = [];
  for (var key in this) {
    if (this.hasOwnProperty(key) && key != "all") {
      newObject.push({
        key: key,
        value: this[key]
      });
    }
  }
  return newObject;
}


var dates = cf.dimension(function(d){ return d.date;});
var datesGroup = dates.group();


var chart = dc.rowChart("#chart");
    chart                                                                                       
    .renderLabel(true)
    .dimension(tags)
    .group(tagsGroup)
    .filterHandler(function(dimension, filter){     
        dimension.filter(function(d) {return chart.filter() != null ? d.indexOf(chart.filter()) >= 0 : true;}); // perform filtering
        return filter; // return the actual filter value
       })
    .xAxis().ticks(3);

var chart2 = dc.barChart("#chart2");
    chart2  
    .width(500)
    .transitionDuration(800)
    .margins({top: 10, right: 50, bottom: 30, left: 40})
    .dimension(dates)
    .group(datesGroup)
    .elasticY(true)
    .elasticX(true)
    .round(d3.time.day.round)    
    .x(d3.time.scale())    
    .xUnits(d3.time.days)
    .centerBar(true)
    .renderHorizontalGridLines(true)       
    .brushOn(true);    


dc.renderAll();

Solution 2:

The example above is a great approach. You can take it one step further though. In the solution above, it will only filter based on the first selection you make. Any subsequent selections are ignored.

If you want it to respond to all selections, you would create a filterHandler as follows:

 barChart.filterHandler (function (dimension, filters) {
   dimension.filter(null);   
    if (filters.length === 0)
        dimension.filter(null);
    else
        dimension.filterFunction(function (d) {
            for (var i=0; i < d.length; i++) {
                if (filters.indexOf(d[i]) >= 0) return true;
            }
            return false; 
        });
  return filters; 
  }
);

Working sample here: http://jsfiddle.net/jeffsteinmetz/cwShL/

Solution 3:

I'd like to try to provide some context for the approach listed by Jeff and Kostya.

You'll notice that the tagsGroup uses groupAll unlike the typical group method. Crossfilter tells us that "The returned object is similar to a standard grouping, except it has no top or order methods. Instead, use value to retrieve the reduce value for all matching records." Kostya called the ".value()" method to retrieve the single object that represents the entire group.

var tagsGroup = tags.groupAll().reduce(reduceAdd, reduceRemove, reduceInitial).value();

This object won't work well with dc.js because dc.js expects the group object to have an all method. Kostya patched that object to have an "all" method like so:

// hack to make dc.js charts work
tagsGroup.all = function() {
  var newObject = [];
  for (var key in this) {
    if (this.hasOwnProperty(key) && key != "all") {
      newObject.push({
        key: key,
        value: this[key]
      });
    }
  }
  return newObject;
}

This will work with a simple dc.js chart, but you won't be able to use all dc.js functionality since not all of the group functions are present. For example, you won't be able to use the "cap" method on your chart because the cap method expects the group object to have a "top" method. You could also patch the top method like so:

topicsGroup.top = function(count) {
    var newObject = this.all();
     newObject.sort(function(a, b){return b.value - a.value});
    return newObject.slice(0, count);
};

This will enable your chart to use the cap method:

barChart
    .renderLabel(true)
    .height(200)
    .dimension(topicsDim)
    .group(topicsGroup)
    .cap(2)
    .ordering(function(d){return -d.value;})
    .xAxis().ticks(3);

An updated example is available at http://jsfiddle.net/djmartin_umich/m7V89/#base

Solution 4:

Jeff's answer does work, but there is no need to keep track of the "found" variable or continue the loop if an item was found. If X is in [X,Y,Z], this has already cut the amount of iterations in 1/3.

else
    dimension.filterFunction(function (d) {
        for (var i=0; i < d.length; i++) {
            if (filters.indexOf(d[i]) >= 0) return true;
        }
        return false; 
    });

Alternatively, you could patch dc.js filterFunction method and that would handle all cases.