T-SQL strip all non-alpha and non-numeric characters

Is there a smarter way to remove all special characters rather than having a series of about 15 nested replace statements?

The following works, but only handles three characters (ampersand, blank and period).

select CustomerID, CustomerName, 
   Replace(Replace(Replace(CustomerName,'&',''),' ',''),'.','') as CustomerNameStripped
from Customer 

Solution 1:

One flexible-ish way;

CREATE FUNCTION [dbo].[fnRemovePatternFromString](@BUFFER VARCHAR(MAX), @PATTERN VARCHAR(128)) RETURNS VARCHAR(MAX) AS
BEGIN
    DECLARE @POS INT = PATINDEX(@PATTERN, @BUFFER)
    WHILE @POS > 0 BEGIN
        SET @BUFFER = STUFF(@BUFFER, @POS, 1, '')
        SET @POS = PATINDEX(@PATTERN, @BUFFER)
    END
    RETURN @BUFFER
END

select dbo.fnRemovePatternFromString('cake & beer $3.99!?c', '%[$&.!?]%')

(No column name)
cake  beer 399c

Solution 2:

Create a function:

CREATE FUNCTION dbo.StripNonAlphaNumerics
(
  @s VARCHAR(255)
)
RETURNS VARCHAR(255)
AS
BEGIN
  DECLARE @p INT = 1, @n VARCHAR(255) = '';
  WHILE @p <= LEN(@s)
  BEGIN
    IF SUBSTRING(@s, @p, 1) LIKE '[A-Za-z0-9]'
    BEGIN
      SET @n += SUBSTRING(@s, @p, 1);
    END 
    SET @p += 1;
  END
  RETURN(@n);
END
GO

Then:

SELECT Result = dbo.StripNonAlphaNumerics
('My Customer''s dog & #1 friend are dope, yo!');

Results:

Result
------
MyCustomersdog1friendaredopeyo

To make it more flexible, you could pass in the pattern you want to allow:

CREATE FUNCTION dbo.StripNonAlphaNumerics
(
  @s VARCHAR(255),
  @pattern VARCHAR(255)
)
RETURNS VARCHAR(255)
AS
BEGIN
  DECLARE @p INT = 1, @n VARCHAR(255) = '';
  WHILE @p <= LEN(@s)
  BEGIN
    IF SUBSTRING(@s, @p, 1) LIKE @pattern
    BEGIN
      SET @n += SUBSTRING(@s, @p, 1);
    END 
    SET @p += 1;
  END
  RETURN(@n);
END
GO

Then:

SELECT r = dbo.StripNonAlphaNumerics
('Bob''s dog & #1 friend are dope, yo!', '[A-Za-z0-9]');

Results:

r
------
Bobsdog1friendaredopeyo

Solution 3:

I faced this problem several years ago, so I wrote a SQL function to do the trick. Here is the original article (was used to scrape text out of HTML). I have since updated the function, as follows:

IF (object_id('dbo.fn_CleanString') IS NOT NULL)
BEGIN
  PRINT 'Dropping: dbo.fn_CleanString'
  DROP function dbo.fn_CleanString
END
GO
PRINT 'Creating: dbo.fn_CleanString'
GO
CREATE FUNCTION dbo.fn_CleanString 
(
  @string varchar(8000)
) 
returns varchar(8000)
AS
BEGIN
---------------------------------------------------------------------------------------------------
-- Title:        CleanString
-- Date Created: March 26, 2011
-- Author:       William McEvoy
--               
-- Description:  This function removes special ascii characters from a string.
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


declare @char        char(1),
        @len         int,
        @count       int,
        @newstring   varchar(8000),
        @replacement char(1)

select  @count       = 1,
        @len         = 0,
        @newstring   = '',
        @replacement = ' '



---------------------------------------------------------------------------------------------------
-- M A I N   P R O C E S S I N G
---------------------------------------------------------------------------------------------------


-- Remove Backspace characters
select @string = replace(@string,char(8),@replacement)

-- Remove Tabs
select @string = replace(@string,char(9),@replacement)

-- Remove line feed
select @string = replace(@string,char(10),@replacement)

-- Remove carriage return
select @string = replace(@string,char(13),@replacement)


-- Condense multiple spaces into a single space
-- This works by changing all double spaces to be OX where O = a space, and X = a special character
-- then all occurrences of XO are changed to O,
-- then all occurrences of X  are changed to nothing, leaving just the O which is actually a single space
select @string = replace(replace(replace(ltrim(rtrim(@string)),'  ', ' ' + char(7)),char(7)+' ',''),char(7),'')


--  Parse each character, remove non alpha-numeric

select @len = len(@string)

WHILE (@count <= @len)
BEGIN

  -- Examine the character
  select @char = substring(@string,@count,1)


  IF (@char like '[a-z]') or (@char like '[A-Z]') or (@char like '[0-9]')
    select @newstring = @newstring + @char
  ELSE
    select @newstring = @newstring + @replacement

  select @count = @count + 1

END


return @newstring
END

GO
IF (object_id('dbo.fn_CleanString') IS NOT NULL)
  PRINT 'Function created.'
ELSE
  PRINT 'Function NOT created.'
GO