c# Fastest way to remove extra white spaces

What is the fastest way to replace extra white spaces to one white space?
e.g.

from

foo      bar 

to

foo bar

The fastest way? Iterate over the string and build a second copy in a StringBuilder character by character, only copying one space for each group of spaces.

The easier to type Replace variants will create a bucket load of extra strings (or waste time building the regex DFA).

Edit with comparison results:

Using http://ideone.com/NV6EzU, with n=50 (had to reduce it on ideone because it took so long they had to kill my process), I get:

Regex: 7771ms.

Stringbuilder: 894ms.

Which is indeed as expected, Regex is horribly inefficient for something this simple.


You can use a regex:

static readonly Regex trimmer = new Regex(@"\s\s+");

s = trimmer.Replace(s, " ");

For added performance, pass RegexOptions.Compiled.


A bit late, but I have done some benchmarking to get the fastest way to remove extra whitespaces. If there are any faster answers, I would love to add them.

Results:

  1. NormalizeWhiteSpaceForLoop: 156 ms (by Me - From my answer on removing all whitespace)
  2. NormalizeWhiteSpace: 267 ms (by Alex K.)
  3. RegexCompiled: 1950 ms (by SLaks)
  4. Regex: 2261 ms (by SLaks)

Code:

public class RemoveExtraWhitespaces
{
    public static string WithRegex(string text)
    {
        return Regex.Replace(text, @"\s+", " ");
    }

    public static string WithRegexCompiled(Regex compiledRegex, string text)
    {
        return compiledRegex.Replace(text, " ");
    }

    public static string NormalizeWhiteSpace(string input)
    {
        if (string.IsNullOrEmpty(input))
            return string.Empty;

        int current = 0;
        char[] output = new char[input.Length];
        bool skipped = false;

        foreach (char c in input.ToCharArray())
        {
            if (char.IsWhiteSpace(c))
            {
                if (!skipped)
                {
                    if (current > 0)
                        output[current++] = ' ';

                    skipped = true;
                }
            }
            else
            {
                skipped = false;
                output[current++] = c;
            }
        }

        return new string(output, 0, current);
    }

    public static string NormalizeWhiteSpaceForLoop(string input)
    {
        int len = input.Length,
            index = 0,
            i = 0;
        var src = input.ToCharArray();
        bool skip = false;
        char ch;
        for (; i < len; i++)
        {
            ch = src[i];
            switch (ch)
            {
                case '\u0020':
                case '\u00A0':
                case '\u1680':
                case '\u2000':
                case '\u2001':
                case '\u2002':
                case '\u2003':
                case '\u2004':
                case '\u2005':
                case '\u2006':
                case '\u2007':
                case '\u2008':
                case '\u2009':
                case '\u200A':
                case '\u202F':
                case '\u205F':
                case '\u3000':
                case '\u2028':
                case '\u2029':
                case '\u0009':
                case '\u000A':
                case '\u000B':
                case '\u000C':
                case '\u000D':
                case '\u0085':
                    if (skip) continue;
                    src[index++] = ch;
                    skip = true;
                    continue;
                default:
                    skip = false;
                    src[index++] = ch;
                continue;
            }
        }

        return new string(src, 0, index);
    }
}

Tests:

[TestFixture]
public class RemoveExtraWhitespacesTest
{
    private const string _text = "foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo foo                  bar                  foobar                     moo ";
    private const string _expected = "foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo foo bar foobar moo ";

    private const int _iterations = 10000;

    [Test]
    public void Regex()
    {
        var result = TimeAction("Regex", () => RemoveExtraWhitespaces.WithRegex(_text));
        Assert.AreEqual(_expected, result);
    }

    [Test]
    public void RegexCompiled()
    {
        var compiledRegex = new Regex(@"\s+", RegexOptions.Compiled);
        var result = TimeAction("RegexCompiled", () => RemoveExtraWhitespaces.WithRegexCompiled(compiledRegex, _text));
        Assert.AreEqual(_expected, result);
    }

    [Test]
    public void NormalizeWhiteSpace()
    {
        var result = TimeAction("NormalizeWhiteSpace", () => RemoveExtraWhitespaces.NormalizeWhiteSpace(_text));
        Assert.AreEqual(_expected, result);
    }

    [Test]
    public void NormalizeWhiteSpaceForLoop()
    {
        var result = TimeAction("NormalizeWhiteSpaceForLoop", () => RemoveExtraWhitespaces.NormalizeWhiteSpaceForLoop(_text));
        Assert.AreEqual(_expected, result);
    }

    public string TimeAction(string name, Func<string> func)
    {
        var timer = Stopwatch.StartNew();
        string result = string.Empty; ;
        for (int i = 0; i < _iterations; i++)
        {
            result = func();
        }

        timer.Stop();
        Console.WriteLine(string.Format("{0}: {1} ms", name, timer.ElapsedMilliseconds));
        return result;
    }
}

I use below methods - they handle all whitespace chars not only spaces, trim both leading and trailing whitespaces, remove extra whitespaces, and all whitespaces are replaced to space char (so we have uniform space separator). And these methods are fast.

public static String CompactWhitespaces( String s )
{
    StringBuilder sb = new StringBuilder( s );

    CompactWhitespaces( sb );

    return sb.ToString();
}

public static void CompactWhitespaces( StringBuilder sb )
{
    if( sb.Length == 0 )
        return;

    // set [start] to first not-whitespace char or to sb.Length

    int start = 0;

    while( start < sb.Length )
    {
        if( Char.IsWhiteSpace( sb[ start ] ) )
            start++;
        else 
            break;
    }

    // if [sb] has only whitespaces, then return empty string

    if( start == sb.Length )
    {
        sb.Length = 0;
        return;
    }

    // set [end] to last not-whitespace char

    int end = sb.Length - 1;

    while( end >= 0 )
    {
        if( Char.IsWhiteSpace( sb[ end ] ) )
            end--;
        else 
            break;
    }

    // compact string

    int dest = 0;
    bool previousIsWhitespace = false;

    for( int i = start; i <= end; i++ )
    {
        if( Char.IsWhiteSpace( sb[ i ] ) )
        {
            if( !previousIsWhitespace )
            {
                previousIsWhitespace = true;
                sb[ dest ] = ' ';
                dest++;
            }
        }
        else
        {
            previousIsWhitespace = false;
            sb[ dest ] = sb[ i ];
            dest++;
        }
    }

    sb.Length = dest;
}

string q = " Hello     how are   you           doing?";
string a = String.Join(" ", q.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries));