MongoDB aggregate within daily grouping [duplicate]
Solution 1:
In Mongo 2.8 RC2 there is a new data aggregation operator: $dateToString which can be used to group by a day and simply have a "YYYY-MM-DD" in the result:
Example from the documentation:
db.sales.aggregate(
[
{
$project: {
yearMonthDay: { $dateToString: { format: "%Y-%m-%d", date: "$date" } },
time: { $dateToString: { format: "%H:%M:%S:%L", date: "$date" } }
}
}
]
)
will result in:
{ "_id" : 1, "yearMonthDay" : "2014-01-01", "time" : "08:15:39:736" }
Solution 2:
UPDATE The updated answer is based on date features in 3.6 as well as showing how to include dates in the range which had no sales (which wasn't mentioned in any original answers including mine).
Sample data:
db.inventory.find()
{ "_id" : ObjectId("5aca30eefa1585de22d7095f"), "make" : "Nissan", "saleDate" : ISODate("2013-04-10T12:39:50.676Z") }
{ "_id" : ObjectId("5aca30eefa1585de22d70960"), "make" : "Nissan" }
{ "_id" : ObjectId("5aca30effa1585de22d70961"), "make" : "Nissan", "saleDate" : ISODate("2013-04-10T11:39:50.676Z") }
{ "_id" : ObjectId("5aca30effa1585de22d70962"), "make" : "Toyota", "saleDate" : ISODate("2013-04-09T11:39:50.676Z") }
{ "_id" : ObjectId("5aca30effa1585de22d70963"), "make" : "Toyota", "saleDate" : ISODate("2013-04-10T11:38:50.676Z") }
{ "_id" : ObjectId("5aca30effa1585de22d70964"), "make" : "Toyota", "saleDate" : ISODate("2013-04-10T11:37:50.676Z") }
{ "_id" : ObjectId("5aca30effa1585de22d70965"), "make" : "Toyota", "saleDate" : ISODate("2013-04-10T11:36:50.676Z") }
{ "_id" : ObjectId("5aca30effa1585de22d70966"), "make" : "Toyota", "saleDate" : ISODate("2013-04-10T11:35:50.676Z") }
{ "_id" : ObjectId("5aca30f9fa1585de22d70967"), "make" : "Toyota", "saleDate" : ISODate("2013-04-11T11:35:50.676Z") }
{ "_id" : ObjectId("5aca30fffa1585de22d70968"), "make" : "Toyota", "saleDate" : ISODate("2013-04-13T11:35:50.676Z") }
{ "_id" : ObjectId("5aca3921fa1585de22d70969"), "make" : "Honda", "saleDate" : ISODate("2013-04-13T00:00:00Z") }
Defining startDate
and endDate
as variables and using them in aggregation:
startDate = ISODate("2013-04-08T00:00:00Z");
endDate = ISODate("2013-04-15T00:00:00Z");
db.inventory.aggregate([
{ $match : { "saleDate" : { $gte: startDate, $lt: endDate} } },
{$addFields:{
saleDate:{$dateFromParts:{
year:{$year:"$saleDate"},
month:{$month:"$saleDate"},
day:{$dayOfMonth:"$saleDate"}
}},
dateRange:{$map:{
input:{$range:[0, {$subtract:[endDate,startDate]}, 1000*60*60*24]},
in:{$add:[startDate, "$$this"]}
}}
}},
{$unwind:"$dateRange"},
{$group:{
_id:"$dateRange",
sales:{$push:{$cond:[
{$eq:["$dateRange","$saleDate"]},
{make:"$make",count:1},
{count:0}
]}}
}},
{$sort:{_id:1}},
{$project:{
_id:0,
saleDate:"$_id",
totalSold:{$sum:"$sales.count"},
byBrand:{$arrayToObject:{$reduce:{
input: {$filter:{input:"$sales",cond:"$$this.count"}},
initialValue: {$map:{input:{$setUnion:["$sales.make"]}, in:{k:"$$this",v:0}}},
in:{$let:{
vars:{t:"$$this",v:"$$value"},
in:{$map:{
input:"$$v",
in:{
k:"$$this.k",
v:{$cond:[
{$eq:["$$this.k","$$t.make"]},
{$add:["$$this.v","$$t.count"]},
"$$this.v"
]}
}
}}
}}
}}}
}}
])
On sample data this gives results:
{ "saleDate" : ISODate("2013-04-08T00:00:00Z"), "totalSold" : 0, "byBrand" : { } }
{ "saleDate" : ISODate("2013-04-09T00:00:00Z"), "totalSold" : 1, "byBrand" : { "Toyota" : 1 } }
{ "saleDate" : ISODate("2013-04-10T00:00:00Z"), "totalSold" : 6, "byBrand" : { "Nissan" : 2, "Toyota" : 4 } }
{ "saleDate" : ISODate("2013-04-11T00:00:00Z"), "totalSold" : 1, "byBrand" : { "Toyota" : 1 } }
{ "saleDate" : ISODate("2013-04-12T00:00:00Z"), "totalSold" : 0, "byBrand" : { } }
{ "saleDate" : ISODate("2013-04-13T00:00:00Z"), "totalSold" : 2, "byBrand" : { "Honda" : 1, "Toyota" : 1 } }
{ "saleDate" : ISODate("2013-04-14T00:00:00Z"), "totalSold" : 0, "byBrand" : { } }
This aggregation can also be done with two $group
stages and a simple $project
instead of $group
and a complex $project
. Here it is:
db.inventory.aggregate([
{$match : { "saleDate" : { $gte: startDate, $lt: endDate} } },
{$addFields:{saleDate:{$dateFromParts:{year:{$year:"$saleDate"}, month:{$month:"$saleDate"}, day:{$dayOfMonth : "$saleDate" }}},dateRange:{$map:{input:{$range:[0, {$subtract:[endDate,startDate]}, 1000*60*60*24]},in:{$add:[startDate, "$$this"]}}}}},
{$unwind:"$dateRange"},
{$group:{
_id:{date:"$dateRange",make:"$make"},
count:{$sum:{$cond:[{$eq:["$dateRange","$saleDate"]},1,0]}}
}},
{$group:{
_id:"$_id.date",
total:{$sum:"$count"},
byBrand:{$push:{k:"$_id.make",v:{$sum:"$count"}}}
}},
{$sort:{_id:1}},
{$project:{
_id:0,
saleDate:"$_id",
totalSold:"$total",
byBrand:{$arrayToObject:{$filter:{input:"$byBrand",cond:"$$this.v"}}}
}}
])
Same results:
{ "saleDate" : ISODate("2013-04-08T00:00:00Z"), "totalSold" : 0, "byBrand" : { "Honda" : 0, "Toyota" : 0, "Nissan" : 0 } }
{ "saleDate" : ISODate("2013-04-09T00:00:00Z"), "totalSold" : 1, "byBrand" : { "Honda" : 0, "Nissan" : 0, "Toyota" : 1 } }
{ "saleDate" : ISODate("2013-04-10T00:00:00Z"), "totalSold" : 6, "byBrand" : { "Honda" : 0, "Toyota" : 4, "Nissan" : 2 } }
{ "saleDate" : ISODate("2013-04-11T00:00:00Z"), "totalSold" : 1, "byBrand" : { "Toyota" : 1, "Honda" : 0, "Nissan" : 0 } }
{ "saleDate" : ISODate("2013-04-12T00:00:00Z"), "totalSold" : 0, "byBrand" : { "Toyota" : 0, "Nissan" : 0, "Honda" : 0 } }
{ "saleDate" : ISODate("2013-04-13T00:00:00Z"), "totalSold" : 2, "byBrand" : { "Honda" : 1, "Toyota" : 1, "Nissan" : 0 } }
{ "saleDate" : ISODate("2013-04-14T00:00:00Z"), "totalSold" : 0, "byBrand" : { "Toyota" : 0, "Honda" : 0, "Nissan" : 0 } }
Original Answer based on 2.6:
You might want to take a look at my blog entry about how to deal with various date manipulations in Aggregation Framework here.
What you can do is use $project
phase to truncate your dates to daily resolution and then run the aggregation over the whole data set (or just part of it) and aggregate by date and make.
With your sample data, say you want to know how many vehicles you sold by make, by date this year:
match={"$match" : {
"saleDate" : { "$gt" : new Date(2013,0,1) }
}
};
proj1={"$project" : {
"_id" : 0,
"saleDate" : 1,
"make" : 1,
"h" : {
"$hour" : "$saleDate"
},
"m" : {
"$minute" : "$saleDate"
},
"s" : {
"$second" : "$saleDate"
},
"ml" : {
"$millisecond" : "$saleDate"
}
}
};
proj2={"$project" : {
"_id" : 0,
"make" : 1,
"saleDate" : {
"$subtract" : [
"$saleDate",
{
"$add" : [
"$ml",
{
"$multiply" : [
"$s",
1000
]
},
{
"$multiply" : [
"$m",
60,
1000
]
},
{
"$multiply" : [
"$h",
60,
60,
1000
]
}
]
}
]
}
}
};
group={"$group" : {
"_id" : {
"m" : "$make",
"d" : "$saleDate"
},
"count" : {
"$sum" : 1
}
}
};
Now running the aggregation gives you:
db.inventory.aggregate(match, proj1, proj2, group)
{
"result" : [
{
"_id" : {
"m" : "Toyota",
"d" : ISODate("2013-04-10T00:00:00Z")
},
"count" : 4
},
{
"_id" : {
"m" : "Toyota",
"d" : ISODate("2013-04-09T00:00:00Z")
},
"count" : 1
},
{
"_id" : {
"m" : "Nissan",
"d" : ISODate("2013-04-10T00:00:00Z")
},
"count" : 2
}
],
"ok" : 1
}
You can add another {$project} phase to pretty up the output and you can add a {$sort} step, but basically for each date, for each make you get a count of how many were sold.
Solution 3:
I like user1083621's answer but that method causes some limitations in following operations with this field - because you can not use it as date field in (for instance) next aggregation pipeline stages. You can neither compare nor use any date aggregation operations and after aggregation you'll have strings(!). All of that may be solved by projecting your original date field but in that case you'll get some difficulties with retaining it through groupping stage. And after all, sometimes you just want to manipulate with the beginning of day, not with arbitrary day time. So here's my method:
{'$project': {
'start_of_day': {'$subtract': [
'$date',
{'$add': [
{'$multiply': [{'$hour': '$date'}, 3600000]},
{'$multiply': [{'$minute': '$date'}, 60000]},
{'$multiply': [{'$second': '$date'}, 1000]},
{'$millisecond': '$date'}
]}
]},
}}
It gives you this:
{
"start_of_day" : ISODate("2015-12-03T00:00:00.000Z")
},
{
"start_of_day" : ISODate("2015-12-04T00:00:00.000Z")
}
Can't say if it any faster than user1083621's method.