How to efficiently build a tree from a flat structure?

117,517

Solution 1

Store IDs of the objects in a hash table mapping to the specific object. Enumerate through all the objects and find their parent if it exists and update its parent pointer accordingly.

class MyObject
{ // The actual object
    public int ParentID { get; set; }
    public int ID { get; set; }
}

class Node
{
    public List<Node> Children = new List<Node>();
    public Node Parent { get; set; }
    public MyObject AssociatedObject { get; set; }
}

IEnumerable<Node> BuildTreeAndGetRoots(List<MyObject> actualObjects)
{
    Dictionary<int, Node> lookup = new Dictionary<int, Node>();
    actualObjects.ForEach(x => lookup.Add(x.ID, new Node { AssociatedObject = x }));
    foreach (var item in lookup.Values) {
        Node proposedParent;
        if (lookup.TryGetValue(item.AssociatedObject.ParentID, out proposedParent)) {
            item.Parent = proposedParent;
            proposedParent.Children.Add(item);
        }
    }
    return lookup.Values.Where(x => x.Parent == null);
}

Solution 2

Here is a simple JavaScript algorithm to parse a flat table into a parent/child tree structure that runs in N time:

var table = [
    {parent_id: 0, id: 1, children: []},
    {parent_id: 0, id: 2, children: []},
    {parent_id: 0, id: 3, children: []},
    {parent_id: 1, id: 4, children: []},
    {parent_id: 1, id: 5, children: []},
    {parent_id: 1, id: 6, children: []},
    {parent_id: 2, id: 7, children: []},
    {parent_id: 7, id: 8, children: []},
    {parent_id: 8, id: 9, children: []},
    {parent_id: 3, id: 10, children: []}
];

var root = {id:0, parent_id: null, children: []};
var node_list = { 0 : root};

for (var i = 0; i < table.length; i++) {
    node_list[table[i].id] = table[i];
    node_list[table[i].parent_id].children.push(node_list[table[i].id]);
}

console.log(root);

Solution 3

Based on the answer of Mehrdad Afshari and the comment of Andrew Hanlon for a speedup, here is my take.

Important difference to the original task: A root node has ID==parentID.

class MyObject
{   // The actual object
    public int ParentID { get; set; }
    public int ID { get; set; }
}

class Node
{
    public List<Node> Children = new List<Node>();
    public Node Parent { get; set; }
    public MyObject Source { get; set; }
}

List<Node> BuildTreeAndGetRoots(List<MyObject> actualObjects)
{
    var lookup = new Dictionary<int, Node>();
    var rootNodes = new List<Node>();

    foreach (var item in actualObjects)
    {
        // add us to lookup
        Node ourNode;
        if (lookup.TryGetValue(item.ID, out ourNode))
        {   // was already found as a parent - register the actual object
            ourNode.Source = item;
        }
        else
        {
            ourNode = new Node() { Source = item };
            lookup.Add(item.ID, ourNode);
        }

        // hook into parent
        if (item.ParentID == item.ID)
        {   // is a root node
            rootNodes.Add(ourNode);
        }
        else
        {   // is a child row - so we have a parent
            Node parentNode;
            if (!lookup.TryGetValue(item.ParentID, out parentNode))
            {   // unknown parent, construct preliminary parent
                parentNode = new Node();
                lookup.Add(item.ParentID, parentNode);
            }
            parentNode.Children.Add(ourNode);
            ourNode.Parent = parentNode;
        }
    }

    return rootNodes;
}

Solution 4

Python solution

    def subtree(node, relationships):
        return {
            v: subtree(v, relationships) 
            for v in [x[0] for x in relationships if x[1] == node]
        }

For example:

    # (child, parent) pairs where -1 means no parent    
    flat_tree = [
         (1, -1),
         (4, 1),
         (10, 4),
         (11, 4),
         (16, 11),
         (17, 11),
         (24, 17),
         (25, 17),
         (5, 1),
         (8, 5),
         (9, 5),
         (7, 9),
         (12, 9),
         (22, 12),
         (23, 12),
         (2, 23),
         (26, 23),
         (27, 23),
         (20, 9),
         (21, 9)
        ]
    
    subtree(-1, flat_tree)

Produces:

    {
        "1": {
            "4": {
                "10": {}, 
                "11": {
                    "16": {}, 
                    "17": {
                        "24": {}, 
                        "25": {}
                    }
                }
            }, 
            "5": {
                "8": {}, 
                "9": {
                    "20": {}, 
                    "12": {
                        "22": {}, 
                        "23": {
                            "2": {}, 
                            "27": {}, 
                            "26": {}
                        }
                    }, 
                    "21": {}, 
                    "7": {}
                }
            }
        }
    }

Solution 5

JS version that returns one root or an array of roots each of which will have a Children array property containing the related children. Does not depend on ordered input, decently compact, and does not use recursion. Enjoy!

// creates a tree from a flat set of hierarchically related data
var MiracleGrow = function(treeData, key, parentKey)
{
    var keys = [];
    treeData.map(function(x){
        x.Children = [];
        keys.push(x[key]);
    });
    var roots = treeData.filter(function(x){return keys.indexOf(x[parentKey])==-1});
    var nodes = [];
    roots.map(function(x){nodes.push(x)});
    while(nodes.length > 0)
    {

        var node = nodes.pop();
        var children =  treeData.filter(function(x){return x[parentKey] == node[key]});
        children.map(function(x){
            node.Children.push(x);
            nodes.push(x)
        });
    }
    if (roots.length==1) return roots[0];
    return roots;
}


// demo/test data
var treeData = [

    {id:9, name:'Led Zep', parent:null},
    {id:10, name:'Jimmy', parent:9},
    {id:11, name:'Robert', parent:9},
    {id:12, name:'John', parent:9},

    {id:8, name:'Elec Gtr Strings', parent:5},
    {id:1, name:'Rush', parent:null},
    {id:2, name:'Alex', parent:1},
    {id:3, name:'Geddy', parent:1},
    {id:4, name:'Neil', parent:1},
    {id:5, name:'Gibson Les Paul', parent:2},
    {id:6, name:'Pearl Kit', parent:4},
    {id:7, name:'Rickenbacker', parent:3},

    {id:100, name:'Santa', parent:99},
    {id:101, name:'Elf', parent:100},

];
var root = MiracleGrow(treeData, "id", "parent")
console.log(root)
Share:
117,517

Related videos on Youtube

Costo
Author by

Costo

I'm a Web developer using mainly Microsoft technologies (C# / SQL Server)

Updated on April 20, 2022

Comments

  • Costo
    Costo about 2 years

    I have a bunch of objects in a flat structure. These objects have an ID and a ParentID property so they can be arranged in trees. They are in no particular order. Each ParentID property does not necessarily matches with an ID in the structure. Therefore their could be several trees emerging from these objects.

    How would you process these objects to create the resulting trees ?

    I'm not so far from a solution but I'm sure it is far from optimal...

    I need to create these trees to then insert Data into a database, in proper order.

    There are no circular references. A Node is a RootNode when ParentID == null or when ParentID can't be found in the other objects

    • D'Arcy Rittich
      D'Arcy Rittich over 15 years
      What do you mean by "create"? Render in a UI? Store in a hierarchical fashion in XML or a database?
    • nes1983
      nes1983 over 15 years
      I find this question quite cool.
    • ebram khalil
      ebram khalil almost 7 years
  • Jason S
    Jason S over 15 years
    which language is that? (I take it C#)
  • Andrew Hanlon
    Andrew Hanlon over 9 years
    This algo is (in informal notation) O(3N), where as an O(1N) solution is easily achievable by either instantiating partial Nodes for non 'traversed' parents OR by keeping a secondary look-up tables for children of non-instantiated parents. Likely does not matter for most real-world uses, but it could be significant on large data sets.
  • Ced
    Ced almost 9 years
    @AndrewHanlon maybe you should post the sol for 0(1N)
  • Andrew Hanlon
    Andrew Hanlon almost 9 years
    Nice, this is basically the approach I was alluding to. I would however just use a pseudo root node (with ID = 0 and null Parent) and remove the self reference requirement.
  • Andrew Hanlon
    Andrew Hanlon almost 9 years
    @Ced Martin Schmidt's answer below is very close to how I would implement it. As can be seen, it uses a single loop and the rest are hashtable operations.
  • HuBeZa
    HuBeZa about 8 years
    Down voter, please comment. I'll be happy to know what I did wrong.
  • JakeWilson801
    JakeWilson801 almost 8 years
    O(3N) is just O(N) ;)
  • plauriola
    plauriola almost 8 years
    The only thing missing in this example is assigning the Parent field into every children node. To do so, we only need to set the Parent field after adding the children to it's Parent Collection. Like so: parentNode.Children.Add(ourNode); ourNode.Parent = parentNode;
  • Martin Schmidt
    Martin Schmidt over 7 years
    @plauriola True, thanks, I added this. An alternative would be to just remove the Parent property, it's not necessary for the core algorithm.
  • Jordi Nebot
    Jordi Nebot over 7 years
    This question is 7 years old and already has a voted and accepted answer. If you think you have a better solution, it'd be great to add some explanation to your code.
  • hakan
    hakan about 7 years
    trying to convert this approach to C#.
  • hakan
    hakan about 7 years
    realized that if id starts from something big like 1001 then we get index out of bound exception...
  • Philip Stanislaus
    Philip Stanislaus about 7 years
    Since I could not find a npm module which implements a O(n) solution, I created the following one (unit tested, 100% code coverage, only 0.5 kb in size and includes typings. Maybe it helps someone: npmjs.com/package/performant-array-to-tree
  • simple guy
    simple guy over 5 years
    Hi. How do I add another attribute in the output? ie. name, parent_id
  • Ziad Akiki
    Ziad Akiki over 5 years
    You should explain a bit what's your idea behind the code.
  • Vimal Bhatt
    Vimal Bhatt over 5 years
    It is just Java translation of the earlier answer
  • aloisdg
    aloisdg over 5 years
    Tip: use console.log(JSON.stringify(root, null, 2)); to pretty print the output.
  • Ed Randall
    Ed Randall over 5 years
    For every parentId it's looping the whole model - is this not O(N^2) ?
  • Silly Volley
    Silly Volley almost 5 years
    how could I make use of this to generate something like this github.com/jakezatecky/react-checkbox-tree/blob/master/examp‌​les/…
  • Cody C
    Cody C over 4 years
    This approach works well for this unordered type of data.
  • Ullauri
    Ullauri about 4 years
    I believe the check for null needs to be for undefined
  • Hirurg103
    Hirurg103 about 4 years
    @Ullauri null == undefined => true in NodeJS
  • ccpizza
    ccpizza about 4 years
    by far the most elegant!
  • ccpizza
    ccpizza about 4 years
    @simpleguy: the list comprehension can be unfolded in case you need more control, e.g.: def recurse(id, pages): for row in rows: if row['id'] == id: print(f'''{row['id']}:{row['parent_id']} {row['path']} {row['title']}''') recurse(row['id'], rows)
  • Marcelo Scofano Diniz
    Marcelo Scofano Diniz almost 4 years
    It is too old but the List Item 8 new Node { parent_id = 7, id = 9 }, prevents node_list.Add(item.id, item); to complete because Key cannot repeat; it's a typo; so, instead of id = 9, type id = 8
  • Joel Malone
    Joel Malone almost 4 years
    Fixed. Thanks @MarceloScofano!
  • Disappointed
    Disappointed over 2 years
    Looks it will fail for random nodes order. (for example,when root node will be in the end)
  • AaA
    AaA over 2 years
    This will fail if nodes are not sorted by parent id