How to create a trie in c# [closed]
This is my own code, pulled from my answer to How to find a word from arrays of characters? :
public class Trie
{
public struct Letter
{
public const string Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
public static implicit operator Letter(char c)
{
return new Letter() { Index = Chars.IndexOf(c) };
}
public int Index;
public char ToChar()
{
return Chars[Index];
}
public override string ToString()
{
return Chars[Index].ToString();
}
}
public class Node
{
public string Word;
public bool IsTerminal { get { return Word != null; } }
public Dictionary<Letter, Node> Edges = new Dictionary<Letter, Node>();
}
public Node Root = new Node();
public Trie(string[] words)
{
for (int w = 0; w < words.Length; w++)
{
var word = words[w];
var node = Root;
for (int len = 1; len <= word.Length; len++)
{
var letter = word[len - 1];
Node next;
if (!node.Edges.TryGetValue(letter, out next))
{
next = new Node();
if (len == word.Length)
{
next.Word = word;
}
node.Edges.Add(letter, next);
}
node = next;
}
}
}
Take a look at this codeplex project:
https://github.com/gmamaladze/trienet
It is a library containing several different variants of well tested generic c# trie classes including patricia trie and parallel trie.
-
Trie
– the simple trie, allows only prefix search, like.Where(s => s.StartsWith(searchString))
-
SuffixTrie
- allows also infix search, like.Where(s => s.Contains(searchString))
-
PatriciaTrie
– compressed trie, more compact, a bit more efficient during look-up, but a quite slower durig build-up. -
SuffixPatriciaTrie
– the same asPatriciaTrie
, also enabling infix search. -
ParallelTrie
– very primitively implemented parallel data structure which allows adding data and retriving results from different threads simultaneusly.
A simple Trie implementation.
http://github.com/bharathkumarms/AlgorithmsMadeEasy/blob/master/AlgorithmsMadeEasy/Tries.cs
using System;
using System.Collections.Generic;
using System.Linq;
namespace AlgorithmsMadeEasy
{
class Tries
{
TrieNode root;
public void CreateRoot()
{
root = new TrieNode();
}
public void Add(char[] chars)
{
TrieNode tempRoot = root;
int total = chars.Count() - 1;
for (int i = 0; i < chars.Count(); i++)
{
TrieNode newTrie;
if (tempRoot.children.Keys.Contains(chars[i]))
{
tempRoot = tempRoot.children[chars[i]];
}
else
{
newTrie = new TrieNode();
if (total == i)
{
newTrie.endOfWord = true;
}
tempRoot.children.Add(chars[i], newTrie);
tempRoot = newTrie;
}
}
}
public bool FindPrefix(char[] chars)
{
TrieNode tempRoot = root;
for (int i = 0; i < chars.Count(); i++)
{
if (tempRoot.children.Keys.Contains(chars[i]))
{
tempRoot = tempRoot.children[chars[i]];
}
else
{
return false;
}
}
return true;
}
public bool FindWord(char[] chars)
{
TrieNode tempRoot = root;
int total = chars.Count() - 1;
for (int i = 0; i < chars.Count(); i++)
{
if (tempRoot.children.Keys.Contains(chars[i]))
{
tempRoot = tempRoot.children[chars[i]];
if (total == i)
{
if (tempRoot.endOfWord == true)
{
return true;
}
}
}
else
{
return false;
}
}
return false;
}
}
public class TrieNode
{
public Dictionary<char, TrieNode> children = new Dictionary<char, TrieNode>();
public bool endOfWord;
}
}
/*
Calling Code:
Tries t = new Tries();
t.CreateRoot();
t.Add("abc".ToCharArray());
t.Add("abgl".ToCharArray());
t.Add("cdf".ToCharArray());
t.Add("abcd".ToCharArray());
t.Add("lmn".ToCharArray());
bool findPrefix1 = t.FindPrefix("ab".ToCharArray());
bool findPrefix2 = t.FindPrefix("lo".ToCharArray());
bool findWord1 = t.FindWord("lmn".ToCharArray());
bool findWord2 = t.FindWord("ab".ToCharArray());
bool findWord3 = t.FindWord("cdf".ToCharArray());
bool findWord4 = t.FindWord("ghi".ToCharArray());
*/
Quick google results:
Taken from: Trie Generic
By Glenn Slayden
attributed to Kerry D. Wong
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
public class Trie<TValue> : System.Collections.IEnumerable, IEnumerable<Trie<TValue>.TrieNodeBase>
{
public abstract class TrieNodeBase
{
protected TValue m_value = default(TValue);
public TValue Value
{
get { return m_value; }
set { m_value = value; }
}
public bool HasValue { get { return !Object.Equals(m_value, default(TValue)); } }
public abstract bool IsLeaf { get; }
public abstract TrieNodeBase this[char c] { get; }
public abstract TrieNodeBase[] Nodes { get; }
public abstract void SetLeaf();
public abstract int ChildCount { get; }
public abstract bool ShouldOptimize { get; }
public abstract KeyValuePair<Char, TrieNodeBase>[] CharNodePairs();
public abstract TrieNodeBase AddChild(char c, ref int node_count);
/// <summary>
/// Includes current node value
/// </summary>
/// <returns></returns>
public IEnumerable<TValue> SubsumedValues()
{
if (Value != null)
yield return Value;
if (Nodes != null)
foreach (TrieNodeBase child in Nodes)
if (child != null)
foreach (TValue t in child.SubsumedValues())
yield return t;
}
/// <summary>
/// Includes current node
/// </summary>
/// <returns></returns>
public IEnumerable<TrieNodeBase> SubsumedNodes()
{
yield return this;
if (Nodes != null)
foreach (TrieNodeBase child in Nodes)
if (child != null)
foreach (TrieNodeBase n in child.SubsumedNodes())
yield return n;
}
/// <summary>
/// Doesn't include current node
/// </summary>
/// <returns></returns>
public IEnumerable<TrieNodeBase> SubsumedNodesExceptThis()
{
if (Nodes != null)
foreach (TrieNodeBase child in Nodes)
if (child != null)
foreach (TrieNodeBase n in child.SubsumedNodes())
yield return n;
}
/// <summary>
/// Note: doesn't de-optimize optimized nodes if re-run later
/// </summary>
public void OptimizeChildNodes()
{
if (Nodes != null)
foreach (var q in CharNodePairs())
{
TrieNodeBase n_old = q.Value;
if (n_old.ShouldOptimize)
{
TrieNodeBase n_new = new SparseTrieNode(n_old.CharNodePairs());
n_new.m_value = n_old.m_value;
Trie<TValue>.c_sparse_nodes++;
ReplaceChild(q.Key, n_new);
}
n_old.OptimizeChildNodes();
}
}
public abstract void ReplaceChild(Char c, TrieNodeBase n);
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
/// Sparse Trie Node
///
/// currently, this one's "nodes" value is never null, because we leave leaf nodes as the non-sparse type,
/// (with nodes==null) and they currently never get converted back. Consequently, IsLeaf should always be 'false'.
/// However, we're gonna do the check anyway.
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public class SparseTrieNode : TrieNodeBase
{
Dictionary<Char, TrieNodeBase> d;
public SparseTrieNode(IEnumerable<KeyValuePair<Char, TrieNodeBase>> ie)
{
d = new Dictionary<char, TrieNodeBase>();
foreach (var kvp in ie)
d.Add(kvp.Key, kvp.Value);
}
public override TrieNodeBase this[Char c]
{
get
{
TrieNodeBase node;
return d.TryGetValue(c, out node) ? node : null;
}
}
public override TrieNodeBase[] Nodes { get { return d.Values.ToArray(); } }
/// <summary>
/// do not use in current form. This means, run OptimizeSparseNodes *after* any pruning
/// </summary>
public override void SetLeaf() { d = null; }
public override int ChildCount { get { return d.Count; } }
public override KeyValuePair<Char, TrieNodeBase>[] CharNodePairs()
{
return d.ToArray();
}
public override TrieNodeBase AddChild(char c, ref int node_count)
{
TrieNodeBase node;
if (!d.TryGetValue(c, out node))
{
node = new TrieNode();
node_count++;
d.Add(c, node);
}
return node;
}
public override void ReplaceChild(Char c, TrieNodeBase n)
{
d[c] = n;
}
public override bool ShouldOptimize { get { return false; } }
public override bool IsLeaf { get { return d == null; } }
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
/// Non-sparse Trie Node
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public class TrieNode : TrieNodeBase
{
private TrieNodeBase[] nodes = null;
private Char m_base;
public override int ChildCount { get { return (nodes != null) ? nodes.Count(e => e != null) : 0; } }
public int AllocatedChildCount { get { return (nodes != null) ? nodes.Length : 0; } }
public override TrieNodeBase[] Nodes { get { return nodes; } }
public override void SetLeaf() { nodes = null; }
public override KeyValuePair<Char, TrieNodeBase>[] CharNodePairs()
{
KeyValuePair<Char, TrieNodeBase>[] rg = new KeyValuePair<char, TrieNodeBase>[ChildCount];
Char ch = m_base;
int i = 0;
foreach (TrieNodeBase child in nodes)
{
if (child != null)
rg[i++] = new KeyValuePair<char, TrieNodeBase>(ch, child);
ch++;
}
return rg;
}
public override TrieNodeBase this[char c]
{
get
{
if (nodes != null && m_base <= c && c < m_base + nodes.Length)
return nodes[c - m_base];
return null;
}
}
public override TrieNodeBase AddChild(char c, ref int node_count)
{
if (nodes == null)
{
m_base = c;
nodes = new TrieNodeBase[1];
}
else if (c >= m_base + nodes.Length)
{
Array.Resize(ref nodes, c - m_base + 1);
}
else if (c < m_base)
{
Char c_new = (Char)(m_base - c);
TrieNodeBase[] tmp = new TrieNodeBase[nodes.Length + c_new];
nodes.CopyTo(tmp, c_new);
m_base = c;
nodes = tmp;
}
TrieNodeBase node = nodes[c - m_base];
if (node == null)
{
node = new TrieNode();
node_count++;
nodes[c - m_base] = node;
}
return node;
}
public override void ReplaceChild(Char c, TrieNodeBase n)
{
if (nodes == null || c >= m_base + nodes.Length || c < m_base)
throw new Exception();
nodes[c - m_base] = n;
}
public override bool ShouldOptimize
{
get
{
if (nodes == null)
return false;
return (ChildCount * 9 < nodes.Length); // empirically determined optimal value (space & time)
}
}
public override bool IsLeaf { get { return nodes == null; } }
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
/// Trie proper begins here
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
private TrieNodeBase _root = new TrieNode();
public int c_nodes = 0;
public static int c_sparse_nodes = 0;
// in combination with Add(...), enables C# 3.0 initialization syntax, even though it never seems to call it
public System.Collections.IEnumerator GetEnumerator()
{
return _root.SubsumedNodes().GetEnumerator();
}
IEnumerator<TrieNodeBase> IEnumerable<TrieNodeBase>.GetEnumerator()
{
return _root.SubsumedNodes().GetEnumerator();
}
public IEnumerable<TValue> Values { get { return _root.SubsumedValues(); } }
public void OptimizeSparseNodes()
{
if (_root.ShouldOptimize)
{
_root = new SparseTrieNode(_root.CharNodePairs());
c_sparse_nodes++;
}
_root.OptimizeChildNodes();
}
public TrieNodeBase Root { get { return _root; } }
public TrieNodeBase Add(String s, TValue v)
{
TrieNodeBase node = _root;
foreach (Char c in s)
node = node.AddChild(c,ref c_nodes);
node.Value = v;
return node;
}
public bool Contains(String s)
{
TrieNodeBase node = _root;
foreach (Char c in s)
{
node = node[c];
if (node == null)
return false;
}
return node.HasValue;
}
/// <summary>
/// Debug only; this is hideously inefficient
/// </summary>
public String GetKey(TrieNodeBase seek)
{
String sofar = String.Empty;
GetKeyHelper fn = null;
fn = (TrieNodeBase cur) =>
{
sofar += " "; // placeholder
foreach (var kvp in cur.CharNodePairs())
{
Util.SetStringChar(ref sofar, sofar.Length - 1, kvp.Key);
if (kvp.Value == seek)
return true;
if (kvp.Value.Nodes != null && fn(kvp.Value))
return true;
}
sofar = sofar.Substring(0, sofar.Length - 1);
return false;
};
if (fn(_root))
return sofar;
return null;
}
/// <summary>
/// Debug only; this is hideously inefficient
/// </summary>
delegate bool GetKeyHelper(TrieNodeBase cur);
public String GetKey(TValue seek)
{
String sofar = String.Empty;
GetKeyHelper fn = null;
fn = (TrieNodeBase cur) =>
{
sofar += " "; // placeholder
foreach (var kvp in cur.CharNodePairs())
{
Util.SetStringChar(ref sofar, sofar.Length - 1, kvp.Key);
if (kvp.Value.Value != null && kvp.Value.Value.Equals(seek))
return true;
if (kvp.Value.Nodes != null && fn(kvp.Value))
return true;
}
sofar = sofar.Substring(0, sofar.Length - 1);
return false;
};
if (fn(_root))
return sofar;
return null;
}
public TrieNodeBase FindNode(String s_in)
{
TrieNodeBase node = _root;
foreach (Char c in s_in)
if ((node = node[c]) == null)
return null;
return node;
}
/// <summary>
/// If continuation from the terminal node is possible with a different input string, then that node is not
/// returned as a 'last' node for the given input. In other words, 'last' nodes must be leaf nodes, where
/// continuation possibility is truly unknown. The presense of a nodes array that we couldn't match to
/// means the search fails; it is not the design of the 'OrLast' feature to provide 'closest' or 'best'
/// matching but rather to enable truncated tails still in the context of exact prefix matching.
/// </summary>
public TrieNodeBase FindNodeOrLast(String s_in, out bool f_exact)
{
TrieNodeBase node = _root;
foreach (Char c in s_in)
{
if (node.IsLeaf)
{
f_exact = false;
return node;
}
if ((node = node[c]) == null)
{
f_exact = false;
return null;
}
}
f_exact = true;
return node;
}
// even though I found some articles that attest that using a foreach enumerator with arrays (and Lists)
// returns a value type, thus avoiding spurious garbage, I had already changed the code to not use enumerator.
public unsafe TValue Find(String s_in)
{
TrieNodeBase node = _root;
fixed (Char* pin_s = s_in)
{
Char* p = pin_s;
Char* p_end = p + s_in.Length;
while (p < p_end)
{
if ((node = node[*p]) == null)
return default(TValue);
p++;
}
return node.Value;
}
}
public unsafe TValue Find(Char* p_tag, int cb_ctag)
{
TrieNodeBase node = _root;
Char* p_end = p_tag + cb_ctag;
while (p_tag < p_end)
{
if ((node = node[*p_tag]) == null)
return default(TValue);
p_tag++;
}
return node.Value;
}
public IEnumerable<TValue> FindAll(String s_in)
{
TrieNodeBase node = _root;
foreach (Char c in s_in)
{
if ((node = node[c]) == null)
break;
if (node.Value != null)
yield return node.Value;
}
}
public IEnumerable<TValue> SubsumedValues(String s)
{
TrieNodeBase node = FindNode(s);
if (node == null)
return Enumerable.Empty<TValue>();
return node.SubsumedValues();
}
public IEnumerable<TrieNodeBase> SubsumedNodes(String s)
{
TrieNodeBase node = FindNode(s);
if (node == null)
return Enumerable.Empty<TrieNodeBase>();
return node.SubsumedNodes();
}
public IEnumerable<TValue> AllSubstringValues(String s)
{
int i_cur = 0;
while (i_cur < s.Length)
{
TrieNodeBase node = _root;
int i = i_cur;
while (i < s.Length)
{
node = node[s[i]];
if (node == null)
break;
if (node.Value != null)
yield return node.Value;
i++;
}
i_cur++;
}
}
/// <summary>
/// note: only returns nodes with non-null values
/// </summary>
public void DepthFirstTraverse(Action<String,TrieNodeBase> callback)
{
Char[] rgch = new Char[100];
int depth = 0;
Action<TrieNodeBase> fn = null;
fn = (TrieNodeBase cur) =>
{
if (depth >= rgch.Length)
{
Char[] tmp = new Char[rgch.Length * 2];
Buffer.BlockCopy(rgch, 0, tmp, 0, rgch.Length * sizeof(Char));
rgch = tmp;
}
foreach (var kvp in cur.CharNodePairs())
{
rgch[depth] = kvp.Key;
TrieNodeBase n = kvp.Value;
if (n.Nodes != null)
{
depth++;
fn(n);
depth--;
}
else if (n.Value == null) // leaf nodes should always have a value
throw new Exception();
if (n.Value != null)
callback(new String(rgch, 0, depth+1), n);
}
};
fn(_root);
}
/// <summary>
/// note: only returns nodes with non-null values
/// </summary>
public void EnumerateLeafPaths(Action<String,IEnumerable<TrieNodeBase>> callback)
{
Stack<TrieNodeBase> stk = new Stack<TrieNodeBase>();
Char[] rgch = new Char[100];
Action<TrieNodeBase> fn = null;
fn = (TrieNodeBase cur) =>
{
if (stk.Count >= rgch.Length)
{
Char[] tmp = new Char[rgch.Length * 2];
Buffer.BlockCopy(rgch, 0, tmp, 0, rgch.Length * sizeof(Char));
rgch = tmp;
}
foreach (var kvp in cur.CharNodePairs())
{
rgch[stk.Count] = kvp.Key;
TrieNodeBase n = kvp.Value;
stk.Push(n);
if (n.Nodes != null)
fn(n);
else
{
if (n.Value == null) // leaf nodes should always have a value
throw new Exception();
callback(new String(rgch, 0, stk.Count), stk);
}
stk.Pop();
}
};
fn(_root);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
/// Convert a trie with one value type to another
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public Trie<TNew> ToTrie<TNew>(Func<TValue, TNew> value_converter)
{
Trie<TNew> t = new Trie<TNew>();
DepthFirstTraverse((s,n)=>{
t.Add(s,value_converter(n.Value));
});
return t;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///
///
///
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public static class TrieExtension
{
public static Trie<TValue> ToTrie<TValue>(this IEnumerable<String> src, Func<String, int, TValue> selector)
{
Trie<TValue> t = new Trie<TValue>();
int idx = 0;
foreach (String s in src)
t.Add(s,selector(s,idx++));
return t;
}
public static Trie<TValue> ToTrie<TValue>(this Dictionary<String, TValue> src)
{
Trie<TValue> t = new Trie<TValue>();
foreach (var kvp in src)
t.Add(kvp.Key, kvp.Value);
return t;
}
public static IEnumerable<TValue> AllSubstringValues<TValue>(this String s, Trie<TValue> trie)
{
return trie.AllSubstringValues(s);
}
public static void AddToValueHashset<TKey, TValue>(this Dictionary<TKey, HashSet<TValue>> d, TKey k, TValue v)
{
HashSet<TValue> hs;
if (d.TryGetValue(k, out hs))
hs.Add(v);
else
d.Add(k, new HashSet<TValue> { v });
}
};