Performance Benchmarking of Contains, Exists and Any

I have been searching for a performance benchmarking between Contains, Exists and Any methods available in the List<T>. I wanted to find this out just out of curiosity as I was always confused among these. Many questions on SO described definitions of these methods such as:

  1. LINQ Ring: Any() vs Contains() for Huge Collections
  2. Linq .Any VS .Exists - Whats the difference?
  3. LINQ extension methods - Any() vs. Where() vs. Exists()

So I decided to do it myself. I am adding it as an answer. Any more insight on the results is most welcomed. I also did this benchmarking for arrays to see the results


The fastest way is to use a HashSet. The Contains for a HashSet is O(1).

I took your code and added a benchmark for HashSet<int> The performance cost of HashSet<int> set = new HashSet<int>(list); is nearly zero.

Code

void Main()
{
    ContainsExistsAnyVeryShort();
    
    ContainsExistsAnyShort();

    ContainsExistsAny();
}

private static void ContainsExistsAny()
{
    Console.WriteLine("***************************************");
    Console.WriteLine("********* ContainsExistsAny ***********");
    Console.WriteLine("***************************************");

    List<int> list = new List<int>(6000000);
    Random random = new Random();
    for (int i = 0; i < 6000000; i++)
    {
        list.Add(random.Next(6000000));
    }
    int[] arr = list.ToArray();
    HashSet<int> set = new HashSet<int>(list);

    find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");

}

private static void ContainsExistsAnyShort()
{
    Console.WriteLine("***************************************");
    Console.WriteLine("***** ContainsExistsAnyShortRange *****");
    Console.WriteLine("***************************************");

    List<int> list = new List<int>(2000);
    Random random = new Random();
    for (int i = 0; i < 2000; i++)
    {
        list.Add(random.Next(6000000));
    }
    int[] arr = list.ToArray();
    HashSet<int> set = new HashSet<int>(list);

    find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");

}

private static void ContainsExistsAnyVeryShort()
{
    Console.WriteLine("*******************************************");
    Console.WriteLine("***** ContainsExistsAnyVeryShortRange *****");
    Console.WriteLine("*******************************************");

    List<int> list = new List<int>(10);
    Random random = new Random();
    for (int i = 0; i < 10; i++)
    {
        list.Add(random.Next(6000000));
    }
    int[] arr = list.ToArray();
    HashSet<int> set = new HashSet<int>(list);

    find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedTicks} ticks");

}

private static void find(List<int> list, int[] arr, HashSet<int> set, Func<string, Stopwatch, string> format)
{
    Random random = new Random();
    int[] find = new int[10000];
    for (int i = 0; i < 10000; i++)
    {
        find[i] = random.Next(6000000);
    }

    Stopwatch watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        list.Contains(find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("List/Contains", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        list.Exists(a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("List/Exists", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        list.Any(a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("List/Any", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        arr.Contains(find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/Contains", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        Array.Exists(arr, a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/Exists", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        Array.IndexOf(arr, find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/IndexOf", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        arr.Any(a => a == find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("Array/Any", watch));

    watch = Stopwatch.StartNew();
    for (int rpt = 0; rpt < 10000; rpt++)
    {
        set.Contains(find[rpt]);
    }
    watch.Stop();
    Console.WriteLine(format("HashSet/Contains", watch));
}

RESULTS

*******************************************
***** ContainsExistsAnyVeryShortRange *****
*******************************************
List/Contains: 1067 ticks
List/Exists: 2884 ticks
List/Any: 10520 ticks
Array/Contains: 1880 ticks
Array/Exists: 5526 ticks
Array/IndexOf: 601 ticks
Array/Any: 13295 ticks
HashSet/Contains: 6629 ticks
***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 4ms
List/Exists: 28ms
List/Any: 138ms
Array/Contains: 6ms
Array/Exists: 34ms
Array/IndexOf: 3ms
Array/Any: 96ms
HashSet/Contains: 0ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 11504ms
List/Exists: 57084ms
List/Any: 257659ms
Array/Contains: 11643ms
Array/Exists: 52477ms
Array/IndexOf: 11741ms
Array/Any: 194184ms
HashSet/Contains: 3ms

Edit (2021-08-25)

I added a comparison for very short collections (10 items) and also added Array.Contains and Array.IndexOf. You can see that Array.IndexOf is the fastest for such small ranges. That is, because as @lucky-brian said n is so small here, that a for-loop performs better than a somewhat complex search algorithm. However I still advice to use HashSet<T> whenever possible, as it better reflects the intend of only having unique values and the difference for small collections is below 1ms.


According to documentation:

List.Exists (Object method)

Determines whether the List(T) contains elements that match the conditions defined by the specified predicate.

IEnumerable.Any (Extension method)

Determines whether any element of a sequence satisfies a condition.

List.Contains (Object Method)

Determines whether an element is in the List.

Benchmarking:

CODE:

    static void Main(string[] args)
    {
        ContainsExistsAnyShort();

        ContainsExistsAny();
    }
    
    private static void ContainsExistsAny()
    {
        Console.WriteLine("***************************************");
        Console.WriteLine("********* ContainsExistsAny ***********");
        Console.WriteLine("***************************************");

        List<int> list = new List<int>(6000000);
        Random random = new Random();
        for (int i = 0; i < 6000000; i++)
        {
            list.Add(random.Next(6000000));
        }
        int[] arr = list.ToArray();

        find(list, arr);
    }

    private static void ContainsExistsAnyShort()
    {
        Console.WriteLine("***************************************");
        Console.WriteLine("***** ContainsExistsAnyShortRange *****");
        Console.WriteLine("***************************************");

        List<int> list = new List<int>(2000);
        Random random = new Random();
        for (int i = 0; i < 2000; i++)
        {
            list.Add(random.Next(6000000));
        }
        int[] arr = list.ToArray();

        find(list, arr);
    }

    private static void find(List<int> list, int[] arr)
    {
        Random random = new Random();
        int[] find = new int[10000];
        for (int i = 0; i < 10000; i++)
        {
            find[i] = random.Next(6000000);
        }

        Stopwatch watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            list.Contains(find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("List/Contains: {0:N0}ms", watch.ElapsedMilliseconds);

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            list.Exists(a => a == find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("List/Exists: {0:N0}ms", watch.ElapsedMilliseconds);

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            list.Any(a => a == find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("List/Any: {0:N0}ms", watch.ElapsedMilliseconds);

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            arr.Contains(find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("Array/Contains: {0:N0}ms", watch.ElapsedMilliseconds);

        Console.WriteLine("Arrays do not have Exists");

        watch = Stopwatch.StartNew();
        for (int rpt = 0; rpt < 10000; rpt++)
        {
            arr.Any(a => a == find[rpt]);
        }
        watch.Stop();
        Console.WriteLine("Array/Any: {0:N0}ms", watch.ElapsedMilliseconds);
    }

RESULTS

***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 96ms
List/Exists: 146ms
List/Any: 381ms
Array/Contains: 34ms
Arrays do not have Exists
Array/Any: 410ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 257,996ms
List/Exists: 379,951ms
List/Any: 884,853ms
Array/Contains: 72,486ms
Arrays do not have Exists
Array/Any: 1,013,303ms

It is worth mentioning that this comparison is a bit unfair, since the Array class doesn't own the Contains() method. It uses an extension method for IEnumerable<T> via a sequential Enumerator, hence it is not optimized for Array instances. On the other side, HashSet<T> has its own implementation fully optimized for all sizes.

To compare fairly you could use the static method int Array.IndexOf() which is implemented for Array instances, even though it uses a for loop slightly more efficient that an Enumerator.

Using a fair comparison algorithm, the performance for small sets of up to 5 elements of HashSet<T>.Contains() is similar to the Array.IndexOf() but it is much more efficient for larger sets.