Hierarchical queries with Mongo using $graphLookup
I have an employee collection with half a million records. Each record will have the following details.
The mongo document is as follows.
{
"_id": "234463456453643563456",
"name": "Mike",
"empId": "10",
"managerId": "8",
"projects" : [ "123", "456", "789"]
}
- When i give any empId, it should return the complete hierarchies from that manager to the bottom level along with the following filter.
a. filter on location
b. filter on projects
The result should be like,
10 ->>> Manager
/\
/ \
8 6 ---->> 8 & 6 reporting to manager 10
/\ /\
/ \ / \
4 5 2 1 ---->> 4 & 5 reporting to manager 8 ...
Any help will be appreciated for getting the hierarchical results with level?
I am not able to get the result as expected.
Sample Data :-
db.getCollection("employees").insert({"_id":"10","empId": "10","name":"Employee10","managerId":"15" });
db.getCollection("employees").insert({"_id":"8","empId": "8","name":"Employee8","managerId":"10" });
db.getCollection("employees").insert({"_id":"6","empId": "6","name":"Employee6","managerId":"10" });
db.getCollection("employees").insert({"_id":"4","empId": "4","name":"Employee4","managerId":"8" });
db.getCollection("employees").insert({"_id":"5","empId": "5","name":"Employee5","managerId":"8" });
db.getCollection("employees").insert({"_id":"2","empId": "2","name":"Employee2","managerId":"6" });
db.getCollection("employees").insert({"_id":"1","empId": "1","name":"Employee1","managerId":"6" });
Query :-
db.getCollection('employees').aggregate([
{
$match: {
empId : "10"
}
},
{
$graphLookup: {
from: "employees",
startWith: "$empId",
connectFromField: "empId",
connectToField: "managerId",
as: "reportees",
maxDepth: 4,
depthField: "level"
}
},
{
$project: {
"empId":1,
"managerId":1,
"reportees.empId":1,
"reportees.name":1,
"reportees.managerId":1,
"reportees.level":1
}
}
]);
Actual Result :-
{
"_id" : "10",
"empId" : "10",
"managerId" : "15",
"reportees" : [
{
"empId" : "1",
"name" : "Employee1",
"managerId" : "6",
"level" : NumberLong(1)
},
{
"empId" : "4",
"name" : "Employee4",
"managerId" : "8",
"level" : NumberLong(1)
},
{
"empId" : "2",
"name" : "Employee2",
"managerId" : "6",
"level" : NumberLong(1)
},
{
"empId" : "5",
"name" : "Employee5",
"managerId" : "8",
"level" : NumberLong(1)
},
{
"empId" : "6",
"name" : "Employee6",
"managerId" : "10",
"level" : NumberLong(0)
},
{
"empId" : "8",
"name" : "Employee8",
"managerId" : "10",
"level" : NumberLong(0)
}
]
}
Expected Result :-
{
"_id" : "10",
"empId" : "10",
"managerId" : "15",
"reportees" : [
{
"empId" : "6",
"name" : "Employee6",
"managerId" : "10",
"level" : NumberLong(0),
"reportees" : [
{
"empId" : "1",
"name" : "Employee1",
"managerId" : "6",
"level" : NumberLong(1)
},
{
"empId" : "2",
"name" : "Employee2",
"managerId" : "6",
"level" : NumberLong(1)
}
]
},
{
"empId" : "8",
"name" : "Employee8",
"managerId" : "10",
"level" : NumberLong(0),
"reportees" : [
{
"empId" : "5",
"name" : "Employee5",
"managerId" : "8",
"level" : NumberLong(1)
},
{
"empId" : "4",
"name" : "Employee4",
"managerId" : "8",
"level" : NumberLong(1)
}
]
}
]
}
Questions :-
- Is it possible to get the expected output with $graphLookup?
- Also, Is it possible to get the count at the top level and also for each sub level?
- How to apply projection at all level?
- How to apply filter on top of this?
I belive that having level field we can build hierarchical structure from an array using $reduce. To achieve that we need to get reportees
ordered by level descending after $graphLookup
. Unfortunately the only way to do it currently is to use $unwind + $sort + $group which makes the aggregation quite long.
Then we can process that ordered array using $reduce
. In each step we just have to add an employee to the result set including his reportees
from previous level. Additionally we need to detect when level
changes during our processing and rearrange helper arrays in that case.
$addFields simply replaces existing reportees
field in this case. $concatArrays allows us to append current employee ($$this
) to the result. Using $filter we can get reportees
from lower level.
db.getCollection('employees').aggregate([
{
$match: {
empId : "10"
}
},
{
$graphLookup: {
from: "employees",
startWith: "$empId",
connectFromField: "empId",
connectToField: "managerId",
as: "reportees",
maxDepth: 4,
depthField: "level"
}
},
{
$project: {
"empId":1,
"managerId":1,
"reportees.empId":1,
"reportees.name":1,
"reportees.managerId":1,
"reportees.level":1
}
},
{
$unwind: "$reportees"
},
{
$sort: { "reportees.level": -1 }
},
{
$group: {
_id: "$_id",
empId: { $first: "$empId" },
managerId: { $first: "$managerId" },
reportees: { $push: "$reportees" }
}
},
{
$addFields: {
reportees: {
$reduce: {
input: "$reportees",
initialValue: {
currentLevel: -1,
currentLevelEmployees: [],
previousLevelEmployees: []
},
in: {
$let: {
vars: {
prev: {
$cond: [
{ $eq: [ "$$value.currentLevel", "$$this.level" ] },
"$$value.previousLevelEmployees",
"$$value.currentLevelEmployees"
]
},
current: {
$cond: [
{ $eq: [ "$$value.currentLevel", "$$this.level" ] },
"$$value.currentLevelEmployees",
[]
]
}
},
in: {
currentLevel: "$$this.level",
previousLevelEmployees: "$$prev",
currentLevelEmployees: {
$concatArrays: [
"$$current",
[
{ $mergeObjects: [
"$$this",
{ reportees: { $filter: { input: "$$prev", as: "e", cond: { $eq: [ "$$e.managerId", "$$this.empId" ] } } } }
] }
]
]
}
}
}
}
}
}
}
},
{
$addFields: { reportees: "$reportees.currentLevelEmployees" }
}
]).pretty()
Above solution should work for multiple levels. Outputs:
{
"_id" : "10",
"empId" : "10",
"managerId" : "15",
"reportees" : [
{
"empId" : "6",
"name" : "Employee6",
"managerId" : "10",
"level" : NumberLong(0),
"reportees" : [
{
"empId" : "1",
"name" : "Employee1",
"managerId" : "6",
"level" : NumberLong(1),
"reportees" : [ ]
},
{
"empId" : "2",
"name" : "Employee2",
"managerId" : "6",
"level" : NumberLong(1),
"reportees" : [ ]
}
]
},
{
"empId" : "8",
"name" : "Employee8",
"managerId" : "10",
"level" : NumberLong(0),
"reportees" : [
{
"empId" : "5",
"name" : "Employee5",
"managerId" : "8",
"level" : NumberLong(1),
"reportees" : [ ]
},
{
"empId" : "4",
"name" : "Employee4",
"managerId" : "8",
"level" : NumberLong(1),
"reportees" : [ ]
}
]
}
]
}
That's precicsely what you would $graphLookup for (the traversal bit at least). For the filtering part you could simply use $filter or $match depending on how exactly you want to filter.
Have a look at the results of this query:
db.employees.aggregate({
$graphLookup: {
from: "employees",
startWith: "$managerId",
connectFromField: "managerId",
connectToField: "empId",
as: "managers",
}
})
UPDATE 1 based on your clarification:
In order to get the hierarchical structure that you'd like to get you could do the following. However, I wouldn't call this a pretty solution since it requires you statically define the number of levels you want to go down and also to repeat sections but it does the job for your example. Not sure, if/how easily this can be extended to more levels, either. Personally, I think a client side loop solution would be more suitable for this kind of job:
db.employees.aggregate([
{
$match: {
empId : "10"
}
},
// level 0
{
$graphLookup: {
from: "employees",
startWith: "$empId",
connectFromField: "empId",
connectToField: "managerId",
as: "reportees",
maxDepth: 0
}
},
{
$unwind: "$reportees" // flatten
},
{
$addFields: {
"reportees.level": 0 // add level field
}
},
// level 1
{
$graphLookup: {
from: "employees",
startWith: "$reportees.empId",
connectFromField: "reportees.empId",
connectToField: "managerId",
as: "reportees.reportees",
maxDepth: 0
}
},
{
$group: { // group previously flattened documents back together
_id: "$_id",
empId: { $first: "$empId" },
name: { $first: "$name" },
managerId: { $first: "$managerId" },
reportees: { $push: "$reportees" },
}
},
{
$addFields: {
"reportees.reportees.level": 1 // add level field
}
}
])
UPDATE 2:
The following query gets you to where you want to be from an output structure point of view (I omitted the level
field but it should be easy to add). It is, however, not particularly pretty and, again, requires you to define a maximum organisational depth upfront.
db.employees.aggregate([
{
$match: {
empId : "10"
}
},
{
$graphLookup: { // get the relevant documents out of our universe of employees
from: "employees",
startWith: "$empId",
connectFromField: "empId",
connectToField: "managerId",
as: "reportees"
}
},
{
$project: { // add the employee we are interested in into the array of employees we're looking at
_id: 0,
reportees: { $concatArrays: [ "$reportees", [ { _id: "$_id", empId: "$empId", name: "$name", managerId: "$managerId" } ] ] }
}
},
{
$project: {
reportees: {
$let: {
vars: {
managers: {
$filter: { // remove employees with no reportess so keep managers only
input: {
$map: {
input: "$reportees",
as: "this",
in: {
$mergeObjects: [
"$$this",
{
reportees: {
$filter: { // extract reportees from list of employees
input: "$reportees",
as: "that",
cond: {
$eq: [ "$$this._id", "$$that.managerId" ]
}
}
}
}
]
}
}
},
as: "this",
cond: { $ne: [ "$$this.reportees", [] ] }
}
}
},
in: {
$cond: [ // this is to break the processing once we have reached a top level manager
{ $eq: [ "$$managers", [] ] },
"$reportees",
"$$managers"
]
}
}
}
}
},
// second level: exactly identical to the previous stage
// third level: exactly identical to the previous stage
// basically, from here onwards you would need to repeat an exact copy of the previous stage to go one level deeper
]);