Traverse all the Nodes of a JSON Object Tree with JavaScript
I'd like to traverse a JSON object tree, but cannot find any library for that. It doesn't seem difficult but it feels like reinventing the wheel.
In XML there are so many tutorials showing how to traverse an XML tree with DOM :(
If you think jQuery is kind of overkill for such a primitive task, you could do something like that:
//your object
var o = {
foo:"bar",
arr:[1,2,3],
subo: {
foo2:"bar2"
}
};
//called with every property and its value
function process(key,value) {
console.log(key + " : "+value);
}
function traverse(o,func) {
for (var i in o) {
func.apply(this,[i,o[i]]);
if (o[i] !== null && typeof(o[i])=="object") {
//going one step down in the object tree!!
traverse(o[i],func);
}
}
}
//that's all... no magic, no bloated framework
traverse(o,process);
A JSON object is simply a Javascript object. That's actually what JSON stands for: JavaScript Object Notation. So you'd traverse a JSON object however you'd choose to "traverse" a Javascript object in general.
In ES2017 you would do:
Object.entries(jsonObj).forEach(([key, value]) => {
// do something with key and val
});
You can always write a function to recursively descend into the object:
function traverse(jsonObj) {
if( jsonObj !== null && typeof jsonObj == "object" ) {
Object.entries(jsonObj).forEach(([key, value]) => {
// key is either an array index or object key
traverse(value);
});
}
else {
// jsonObj is a number or string
}
}
This should be a good starting point. I highly recommend using modern javascript methods for such things, since they make writing such code much easier.
function traverse(o) {
for (var i in o) {
if (!!o[i] && typeof(o[i])=="object") {
console.log(i, o[i]);
traverse(o[i]);
} else {
console.log(i, o[i]);
}
}
}
There's a new library for traversing JSON data with JavaScript that supports many different use cases.
https://npmjs.org/package/traverse
https://github.com/substack/js-traverse
It works with all kinds of JavaScript objects. It even detects cycles.
It provides the path of each node, too.
Original Simplified Answer
For a newer way to do it if you don't mind dropping IE and mainly supporting more current browsers (check kangax's es6 table for compatibility). You can use es2015 generators for this. I've updated @TheHippo's answer accordingly. Of course if you really want IE support you can use the babel JavaScript transpiler.
// Implementation of Traverse
function* traverse(o, path=[]) {
for (var i in o) {
const itemPath = path.concat(i);
yield [i,o[i],itemPath,o];
if (o[i] !== null && typeof(o[i])=="object") {
//going one step down in the object tree!!
yield* traverse(o[i], itemPath);
}
}
}
// Traverse usage:
//that's all... no magic, no bloated framework
for(var [key, value, path, parent] of traverse({
foo:"bar",
arr:[1,2,3],
subo: {
foo2:"bar2"
}
})) {
// do something here with each key and value
console.log(key, value, path, parent);
}
If you want only own enumerable properties (basically non-prototype chain properties) you can change it to iterate using Object.keys
and a for...of
loop instead:
function* traverse(o,path=[]) {
for (var i of Object.keys(o)) {
const itemPath = path.concat(i);
yield [i,o[i],itemPath,o];
if (o[i] !== null && typeof(o[i])=="object") {
//going one step down in the object tree!!
yield* traverse(o[i],itemPath);
}
}
}
//that's all... no magic, no bloated framework
for(var [key, value, path, parent] of traverse({
foo:"bar",
arr:[1,2,3],
subo: {
foo2:"bar2"
}
})) {
// do something here with each key and value
console.log(key, value, path, parent);
}
EDIT: This edited answer solves infinite looping traversals.
Stopping Pesky Infinite Object Traversals
This edited answer still provides one of the added benefits of my original answer which allows you to use the provided generator function in order to use a cleaner and simple iterable interface (think using for of
loops as in for(var a of b)
where b
is an iterable and a
is an element of the iterable). By using the generator function along with being a simpler api it also helps with code reuse by making it so you don't have to repeat the iteration logic everywhere you want to iterate deeply on an object's properties and it also makes it possible to break
out of the loop if you would like to stop iteration earlier.
One thing that I notice that has not been addressed and that isn't in my original answer is that you should be careful traversing arbitrary (i.e. any "random" set of) objects, because JavaScript objects can be self referencing. This creates the opportunity to have infinite looping traversals. Unmodified JSON data however cannot be self referencing, so if you are using this particular subset of JS objects you don't have to worry about infinite looping traversals and you can refer to my original answer or other answers. Here is an example of a non-ending traversal (note it is not a runnable piece of code, because otherwise it would crash your browser tab).
Also in the generator object in my edited example I opted to use Object.keys
instead of for in
which iterates only non-prototype keys on the object. You can swap this out yourself if you want the prototype keys included. See my original answer section below for both implementations with Object.keys
and for in
.
Worse - This will infinite loop on self-referential objects:
function* traverse(o, path=[]) {
for (var i of Object.keys(o)) {
const itemPath = path.concat(i);
yield [i,o[i],itemPath, o];
if (o[i] !== null && typeof(o[i])=="object") {
//going one step down in the object tree!!
yield* traverse(o[i], itemPath);
}
}
}
//your object
var o = {
foo:"bar",
arr:[1,2,3],
subo: {
foo2:"bar2"
}
};
// this self-referential property assignment is the only real logical difference
// from the above original example which ends up making this naive traversal
// non-terminating (i.e. it makes it infinite loop)
o.o = o;
//that's all... no magic, no bloated framework
for(var [key, value, path, parent] of traverse(o)) {
// do something here with each key and value
console.log(key, value, path, parent);
}
To save yourself from this you can add a set within a closure, so that when the function is first called it starts to build a memory of the objects it has seen and does not continue iteration once it comes across an already seen object. The below code snippet does that and thus handles infinite looping cases.
Better - This will not infinite loop on self-referential objects:
function* traverse(o) {
const memory = new Set();
function * innerTraversal (o, path=[]) {
if(memory.has(o)) {
// we've seen this object before don't iterate it
return;
}
// add the new object to our memory.
memory.add(o);
for (var i of Object.keys(o)) {
const itemPath = path.concat(i);
yield [i,o[i],itemPath, o];
if (o[i] !== null && typeof(o[i])=="object") {
//going one step down in the object tree!!
yield* innerTraversal(o[i], itemPath);
}
}
}
yield* innerTraversal(o);
}
//your object
var o = {
foo:"bar",
arr:[1,2,3],
subo: {
foo2:"bar2"
}
};
/// this self-referential property assignment is the only real logical difference
// from the above original example which makes more naive traversals
// non-terminating (i.e. it makes it infinite loop)
o.o = o;
console.log(o);
//that's all... no magic, no bloated framework
for(var [key, value, path, parent] of traverse(o)) {
// do something here with each key and value
console.log(key, value, path, parent);
}
EDIT: All above examples in this answer have been edited to include a new path variable yielded from the iterator as per @supersan's request. The path variable is an array of strings where each string in the array represents each key that was accessed to get to the resulting iterated value from the original source object. The path variable can be fed into lodash's get function/method. Or you could write your own version of lodash's get which handles only arrays like so:
function get (object, path) {
return path.reduce((obj, pathItem) => obj ? obj[pathItem] : undefined, object);
}
const example = {a: [1,2,3], b: 4, c: { d: ["foo"] }};
// these paths exist on the object
console.log(get(example, ["a", "0"]));
console.log(get(example, ["c", "d", "0"]));
console.log(get(example, ["b"]));
// these paths do not exist on the object
console.log(get(example, ["e", "f", "g"]));
console.log(get(example, ["b", "f", "g"]));
You could also make a set function like so:
function set (object, path, value) {
const obj = path.slice(0,-1).reduce((obj, pathItem) => obj ? obj[pathItem] : undefined, object)
if(obj && obj[path[path.length - 1]]) {
obj[path[path.length - 1]] = value;
}
return object;
}
const example = {a: [1,2,3], b: 4, c: { d: ["foo"] }};
// these paths exist on the object
console.log(set(example, ["a", "0"], 2));
console.log(set(example, ["c", "d", "0"], "qux"));
console.log(set(example, ["b"], 12));
// these paths do not exist on the object
console.log(set(example, ["e", "f", "g"], false));
console.log(set(example, ["b", "f", "g"], null));
EDIT Sep. 2020: I added a parent for quicker access of the previous object. This could allow you to more quickly build a reverse traverser. Also you could always modify the traversal algorithm to do breadth first search instead of depth first which is actually probably more predictable in fact here's a TypeScript version with Breadth First Search. Since this is a JavaScript question I'll put the JS version here:
var TraverseFilter;
(function (TraverseFilter) {
/** prevents the children from being iterated. */
TraverseFilter["reject"] = "reject";
})(TraverseFilter || (TraverseFilter = {}));
function* traverse(o) {
const memory = new Set();
function* innerTraversal(root) {
const queue = [];
queue.push([root, []]);
while (queue.length > 0) {
const [o, path] = queue.shift();
if (memory.has(o)) {
// we've seen this object before don't iterate it
continue;
}
// add the new object to our memory.
memory.add(o);
for (var i of Object.keys(o)) {
const item = o[i];
const itemPath = path.concat([i]);
const filter = yield [i, item, itemPath, o];
if (filter === TraverseFilter.reject)
continue;
if (item !== null && typeof item === "object") {
//going one step down in the object tree!!
queue.push([item, itemPath]);
}
}
}
}
yield* innerTraversal(o);
}
//your object
var o = {
foo: "bar",
arr: [1, 2, 3],
subo: {
foo2: "bar2"
}
};
/// this self-referential property assignment is the only real logical difference
// from the above original example which makes more naive traversals
// non-terminating (i.e. it makes it infinite loop)
o.o = o;
//that's all... no magic, no bloated framework
for (const [key, value, path, parent] of traverse(o)) {
// do something here with each key and value
console.log(key, value, path, parent);
}