Removing a string that includes CRLF characters from body of e-mail

My full diagnostic routine

The subroutine InvestigateEmails() will output to either the Immediate Window or a file on the desktop. The Immediate Window is usually the more convenient but has a limit of about 200 lines. So if the output is likely to be over 200 lines, output must be to a file. If output is less than 200 lines, the choice is yours.

For output to the Immediate Window, review subroutine OutSomeProperties. Add any properties that you wish to see but are missing. Consider removing any properties not currently required. Check that #Const Selected = True.

For output to a file, review subroutine OutAllProperties. More correctly this should be “all properties of which I am aware and have ever been interested in.” You may wish to check that all the properties of interest to you are included. I recommend not removing any existing properties. Check that #Const Selected = False.

Select the emails whose properties you wish to see. Run subroutine InvestigateEmails()

This code uses conditional compiling which will be confusing to a programmer not familiar with this technique. Either research conditional compiling or accept that it is doing something useful that you do not need to understand.

Option Explicit

' This code requires references to:
'    "Microsoft Scripting Runtime"
'   "Microsoft ActiveX Data Objects n.n Library".  Tested with version 6.1.

Public Sub InvestigateEmails()

  ' Outputs all or selected properties of one or more emails.

  ' ========================================================================
  ' "Selected = True" to output a small number of properties for
  ' a small number of emails to the Immediate Window.
  ' "Selected = False" to output all properties for any number of emails
  ' to desktop file "InvestigateEmails.txt".
  #Const Selected = True
  ' ========================================================================

  ' Technique for locating desktop from answer by Kyle:
  '                     http://stackoverflow.com/a/17551579/973283

  Dim Exp As Explorer
  Dim ItemCrnt As MailItem

  #If Not Selected Then
    Dim FileBody As String
    Dim Fso As FileSystemObject
    Dim Path As String

    Path = CreateObject("WScript.Shell").specialfolders("Desktop")
  #End If

  Set Exp = Outlook.Application.ActiveExplorer

  If Exp.Selection.Count = 0 Then
    Call MsgBox("Please select one or more emails then try again", vbOKOnly)
    Exit Sub
  Else
    For Each ItemCrnt In Exp.Selection
      If ItemCrnt.Class = olMail Then
        #If Selected Then
          Call OutSomeProperties(ItemCrnt)
        #Else
          Call OutAllProperties(ItemCrnt, FileBody)
        #End If
      End If
    Next
  End If

  #If Not Selected Then
    Call PutTextFileUtf8NoBom(Path & "\InvestigateEmails.txt", FileBody)
  #End If

End Sub
Public Sub OutSomeProperties(ItemCrnt As Outlook.MailItem)

  ' Outputs selected properties of a MailItem to the Immediate Window.

  ' The Immediate Window can only display about 200 rows before the older
  ' rows start scrolling off the top.  This means this routine is only
  ' suitable for displaying a small number of simple properties.  Add or
  ' remove properties as necessary to meet the current requirement.

  Dim InxA As Long
  Dim InxR As Long

  Debug.Print "=============================================="
  With ItemCrnt
    Debug.Print "  EntryId: " & .EntryID
    Debug.Print "  Created: " & .CreationTime
    Debug.Print " Receiver: " & .ReceivedByName
    Debug.Print " Received: " & .ReceivedTime
    For InxR = 1 To .Recipients.Count
      Debug.Print "Recipient: " & .Recipients(InxR)
    Next
    Debug.Print "   Sender: " & .Sender
    Debug.Print " SenderEA: " & .SenderEmailAddress
    Debug.Print " SenderNm: " & .SenderName
    Debug.Print "   SentOn: " & .SentOn
    Debug.Print "  Subject: " & .Subject
    Debug.Print "       To: " & .To
    If .Attachments.Count > 0 Then
      Debug.Print "Attachments:"
      For InxA = 1 To .Attachments.Count
        Debug.Print "    " & InxA & ": " & .Attachments(InxA).DisplayName
      Next
    End If
  End With

End Sub
Sub OutAllProperties(ItemCrnt As Outlook.MailItem, ByRef FileBody As String)

  ' Adds all properties of a MailItem to FileBody.

  ' The phrase "all properties" should more correctly be "all properties
  ' that I know of and have ever been interested in".

  ' Source of PropertyAccessor information:
  '   https://www.slipstick.com/developer/read-mapi-properties-exposed-outlooks-object-model/

  Dim InxA As Long
  Dim InxR As Long
  Dim PropAccess As Outlook.propertyAccessor

  If FileBody <> "" Then
    FileBody = FileBody & String(80, "=") & vbLf
  End If

  With ItemCrnt
    FileBody = FileBody & "EntryId: " & .EntryID
    FileBody = FileBody & "From (Sender): " & .Sender
    FileBody = FileBody & vbLf & "From (Sender name): " & .SenderName
    FileBody = FileBody & vbLf & "From (Sender email address): " & _
                                                     .SenderEmailAddress
    FileBody = FileBody & vbLf & "Subject: " & CStr(.Subject)
    FileBody = FileBody & vbLf & "Received: " & Format(.ReceivedTime, "dmmmyy hh:mm:ss")
    FileBody = FileBody & vbLf & "To: " & .To
    FileBody = FileBody & vbLf & "CC: " & .CC
    FileBody = FileBody & vbLf & "BCC: " & .BCC
    If .Attachments.Count = 0 Then
      FileBody = FileBody & vbLf & "No attachments"
    Else
      FileBody = FileBody & vbLf & "Attachments:"
      FileBody = FileBody & vbLf & "No.|Type|Path|Filename|DisplayName|"
      For InxR = 1 To .Recipients.Count
        FileBody = FileBody & vbLf & "Recipient" & InxR & ": " & .Recipients(InxR)
      Next
      For InxA = 1 To .Attachments.Count
        With .Attachments(InxA)
          FileBody = FileBody & vbLf & InxA & "|"
          Select Case .Type
            Case olByValue
              FileBody = FileBody & "Val"
            Case olEmbeddeditem
              FileBody = FileBody & "Ebd"
            Case olByReference
              FileBody = FileBody & "Ref"
            Case olOLE
              FileBody = FileBody & "OLE"
            Case Else
              FileBody = FileBody & "Unk"
          End Select
          ' Not all types have all properties.  This code handles
          ' those missing properties of which I am aware.  However,
          ' I have never found an attachment of type Reference or OLE.
          ' Additional code may be required for them.
          Select Case .Type
            Case olEmbeddeditem
              FileBody = FileBody & "|"
            Case Else
              FileBody = FileBody & "|" & .Pathname
          End Select
          FileBody = FileBody & "|" & .FileName
          FileBody = FileBody & "|" & .DisplayName & "|"
        End With
      Next
    End If  ' .Attachments.Count = 0
    Call OutLongTextRtn(FileBody, "Text: ", .Body)
    Call OutLongTextRtn(FileBody, "Html: ", .HtmlBody)

    Set PropAccess = .propertyAccessor

    FileBody = FileBody & vbLf & "PR_RECEIVED_BY_NAME: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0040001E")
    FileBody = FileBody & vbLf & "PR_SENT_REPRESENTING_NAME: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0042001E")
    FileBody = FileBody & vbLf & "PR_REPLY_RECIPIENT_NAMES: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0050001E")
    FileBody = FileBody & vbLf & "PR_SENT_REPRESENTING_EMAIL_ADDRESS: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0065001E")
    FileBody = FileBody & vbLf & "PR_RECEIVED_BY_EMAIL_ADDRESS: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0076001E")
    FileBody = FileBody & vbLf & "PR_TRANSPORT_MESSAGE_HEADERS:" & vbLf & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x007D001E")
    FileBody = FileBody & vbLf & "PR_SENDER_NAME: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0C1A001E")
    FileBody = FileBody & vbLf & "PR_SENDER_EMAIL_ADDRESS: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0C1F001E")
    FileBody = FileBody & vbLf & "PR_DISPLAY_BCC: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0E02001E")
    FileBody = FileBody & vbLf & "PR_DISPLAY_CC: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0E03001E")
    FileBody = FileBody & vbLf & "PR_DISPLAY_TO: " & _
                           PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0E04001E")
    FileBody = FileBody & vbLf

    Set PropAccess = Nothing

  End With

End Sub
Sub OutLongTextRtn(ByRef TextOut As String, ByVal Head As String, _
                          ByVal TextIn As String)

  ' * Break TextIn into lines of not more than 100 characters
  '   and append to TextOut.
  ' * The output is arranged so:
  '     xxxx|sssssssssssssss|
  '         |sssssssssssssss|
  '         |ssssssssss|
  '   where "xxxx" is the value of Head and "ssss..." are characters from
  '         TextIn.  The third line in the example could be shorter because:
  '           * it contains the last few characters of TextIn
  '           * there a linefeed in TextIn
  '           * a <xxx> string recording whitespace would have been split
  '             across two lines.

  If TextIn = "" Then
    ' Nothing to do
    Exit Sub
  End If

  Const LenLineMax As Long = 100

  Dim PosBrktEnd As Long     ' Last > before PosEnd
  Dim PosBrktStart As Long   ' Last < before PosEnd
  Dim PosNext As Long        ' Start of block to be output after current block
  Dim PosStart As Long       ' First character of TextIn not yet output

  TextIn = TidyTextForDspl(TextIn)
  TextIn = Replace(TextIn, "lf›", "lf›" & vbLf)

  PosStart = 1
  Do While True
    PosNext = InStr(PosStart, TextIn, vbLf)
    If PosNext = 0 Then
      ' No LF in [Remaining] TextIn
      'Debug.Assert False
      PosNext = Len(TextIn) + 1
    End If
    If PosNext - PosStart > LenLineMax Then
      PosNext = PosStart + LenLineMax
    End If
    ' Check for <xxx> being split across lines
    PosBrktStart = InStrRev(TextIn, "‹", PosNext - 1)
    PosBrktEnd = InStrRev(TextIn, "›", PosNext - 1)
    If PosBrktStart < PosStart And PosBrktEnd < PosStart Then
      ' No <xxx> within text to be displayed
      ' No change to PosNext
      'Debug.Assert False
    ElseIf PosBrktStart > 0 And PosBrktEnd > 0 And PosBrktEnd > PosBrktStart Then
      ' Last or only <xxx> totally within text to be displayed
      ' No change to PosNext
      'Debug.Assert False
    ElseIf PosBrktStart > 0 And _
           (PosBrktEnd = 0 Or (PosBrktEnd > 0 And PosBrktEnd < PosBrktStart)) Then
      ' Last or only <xxx> will be split across rows
      'Debug.Assert False
      PosNext = PosBrktStart
    Else
      ' Are there other combinations?
      Debug.Assert False
    End If

    'Debug.Assert Right$(Mid$(TextIn, PosStart, PosNext - PosStart), 1) <> "‹"

    If TextOut <> "" Then
      TextOut = TextOut & vbLf
    End If
    If PosStart = 1 Then
      TextOut = TextOut & Head & "|"
    Else
      TextOut = TextOut & Space(Len(Head)) & "|"
    End If
    TextOut = TextOut & Mid$(TextIn, PosStart, PosNext - PosStart) & "|"
    PosStart = PosNext
    If Mid$(TextIn, PosStart, 1) = vbLf Then
      PosStart = PosStart + 1
    End If
    If PosStart > Len(TextIn) Then
      Exit Do
    End If
  Loop

End Sub
Sub PutTextFileUtf8NoBom(ByVal PathFileName As String, ByVal FileBody As String)

  ' Outputs FileBody as a text file named PathFileName using
  ' UTF-8 encoding without leading BOM

  '  1Nov16  Copied from http://stackoverflow.com/a/4461250/973283
  '          but replaced literals with parameters.
  ' 15Aug17  Discovered routine was adding an LF to the end of the file.
  '          Added code to discard that LF.
  ' 11Oct17  Posted to StackOverflow
  '  9Aug18  Comment from rellampec suggested removal of adWriteLine from
  '          WriteTest statement would avoid adding LF.
  ' 30Sep18  Amended routine to remove adWriteLine from WriteTest statement
  '          and code to remove LF from file. Successfully tested new version.

  ' References: http://stackoverflow.com/a/4461250/973283
  '             https://www.w3schools.com/asp/ado_ref_stream.asp

  Dim BinaryStream As Object
  Dim UTFStream As Object

  Set UTFStream = CreateObject("adodb.stream")

  UTFStream.Type = adTypeText
  UTFStream.Mode = adModeReadWrite
  UTFStream.Charset = "UTF-8"
  UTFStream.Open
  UTFStream.WriteText FileBody

  UTFStream.Position = 3 'skip BOM

  Set BinaryStream = CreateObject("adodb.stream")
  BinaryStream.Type = adTypeBinary
  BinaryStream.Mode = adModeReadWrite
  BinaryStream.Open

  UTFStream.CopyTo BinaryStream

  UTFStream.Flush
  UTFStream.Close
  Set UTFStream = Nothing

  BinaryStream.SaveToFile PathFileName, adSaveCreateOverWrite
  BinaryStream.Flush
  BinaryStream.Close
  Set BinaryStream = Nothing

End Sub

I will answer your question in parts as I have the spare time. Someone else may get to the important bit before I do.

I have edited your question. I did not understand a couple of sentences so I looked at the source and found my suspicion was correct, you had included less than characters. Stack Overflow permits a limited number of Html tags. Anything else that looks like an Html tag is ignored. I replaced each "<" with "&lt;" so readers could see your Html. I can add an explanation if you do not understand why this works.

You have:

NewBody = Replace(obj.HTMLBody, strDelete01, "")
NewBody = Replace(obj.HTMLBody, strDelete02, "")
NewBody = Replace(obj.HTMLBody, strDelete03, "")
NewBody = Replace(obj.HTMLBody, strDelete04, "")
If NewBody <> "" Then

Each Replace (except the first) overwrites the value of NewBody created by the previous Replace. You seem to think that if strDelete04 is not found, NewBody will be empty. No, if strDelete04 is not found, NewBody will be a copy of obj.HTMLBody.

You need something like:

NewBody = Replace(obj.HTMLBody, strDelete01, "")
NewBody = Replace(NewBody, strDelete02, "")
NewBody = Replace(NewBody, strDelete03, "")
NewBody = Replace(NewBody, strDelete04, "")
If NewBody <> obj.HTMLBody Then
  ' One or more delete strings found and removed

You say that the CRLFs are not in fixed positions. If so, no simple modification of your code will have the effect you seek. I will show you how to achieve the effect you seek but first I will have to create some emails containing your text so I can test my code.

Part 2

Having looked at your image of the Html more closely, I believe there is a simple solution. The two CRLFs in the text replace spaces. Providing this is always what happens, you can use:

NewBody = Replace(obj.HTMLBody, vbCr & vbLf, " ")

This would remove any CRLF present wherever it appeared within the Html. It would not matter if there were extra CRLFs because any string of whitespace characters (which includes CR and LF) in an Html document is replaced by a single space when the document is displayed.

You finish the removal of the unwanted text with:

Dim strDelete = "Diese E-Mail kommt von Personen außerhalb " & _
                "der Stadtverwaltung. Klicken Sie nur auf " & _
                "Links oder Dateianhänge, wenn Sie die Personen " & _
                "für vertrauenswürdig halten."

NewBody = Replace(NewBody, strDelete, "")

If the above does not work, you need a more convenient diagnostic technique. Saving the entire email as Html may be easy but you cannot be quite sure how the result differs from what a VBA macro would see. You wonder if Outlook stores emails in a format other than Html. I cannot imagine why Outlook would convert the incoming SMTP message to some secret format and then convert it back when the user wishes to view it. If Outlook does have a secret format, it is totally hidden from the VBA programmer.

The following is a simple version of the diagnostic tool I use. If you need something more advanced, I can provide it but let us try this first.

Copy the code below to an Outlook module. Select one of these emails and then run macro DsplHtmlBodyFromSelectedEmails. The entire Html body of the email will be output to the Immediate Window in a readable format. I believe I have included all the subroutines called by the macro. I apologise in advance if I have not. If you get a message about an undefined routine, let me know and I will add it to the answer.

Sub DsplHtmlBodyFromSelectedEmails()

  ' Select one or emails then run this macro.  For each selected email, the Received Time, the Subject and the Html body are output to the Immediate Window.  Note: the Immediate Window can only display about 200 lines before
The older lines are lost.

  Dim Exp As Explorer
  Dim Html As String
  Dim ItemCrnt As MailItem

  Set Exp = Outlook.Application.ActiveExplorer

  If Exp.Selection.Count = 0 Then
    Call MsgBox("Please select one or more emails then try again", vbOKOnly)
    Exit Sub
  Else
    For Each ItemCrnt In Exp.Selection
      With ItemCrnt
        If .Class = olMail Then
          Debug.Print .ReceivedTime & " " & .Subject
          Call OutLongTextRtn(Html, "Html", .HtmlBody)
          Debug.Print Html
        End If
      End With
    Next
  End If

End Sub
Sub OutLongTextRtn(ByRef TextOut As String, ByVal Head As String, _
                          ByVal TextIn As String)

  ' * Break TextIn into lines of not more than 100 characters
  '   and append to TextOut.
  ' * The output is arranged so:
  '     xxxx|sssssssssssssss|
  '         |sssssssssssssss|
  '         |ssssssssss|
  '   where "xxxx" is the value of Head and "ssss..." are characters from
  '         TextIn.  The third line in the example could be shorter because:
  '           * it contains the last few characters of TextIn
  '           * there a linefeed in TextIn
  '           * a <xxx> string recording whitespace would have been split
  '             across two lines.

  If TextIn = "" Then
    ' Nothing to do
    Exit Sub
  End If

  Const LenLineMax As Long = 100

  Dim PosBrktEnd As Long     ' Last > before PosEnd
  Dim PosBrktStart As Long   ' Last < before PosEnd
  Dim PosNext As Long        ' Start of block to be output after current block
  Dim PosStart As Long       ' First character of TextIn not yet output

  TextIn = TidyTextForDspl(TextIn)
  TextIn = Replace(TextIn, "lf›", "lf›" & vbLf)

  PosStart = 1
  Do While True
    PosNext = InStr(PosStart, TextIn, vbLf)
    If PosNext = 0 Then
      ' No LF in [Remaining] TextIn
      'Debug.Assert False
      PosNext = Len(TextIn) + 1
    End If
    If PosNext - PosStart > LenLineMax Then
      PosNext = PosStart + LenLineMax
    End If
    ' Check for <xxx> being split across lines
    PosBrktStart = InStrRev(TextIn, "‹", PosNext - 1)
    PosBrktEnd = InStrRev(TextIn, "›", PosNext - 1)
    If PosBrktStart < PosStart And PosBrktEnd < PosStart Then
      ' No <xxx> within text to be displayed
      ' No change to PosNext
      'Debug.Assert False
    ElseIf PosBrktStart > 0 And PosBrktEnd > 0 And PosBrktEnd > PosBrktStart Then
      ' Last or only <xxx> totally within text to be displayed
      ' No change to PosNext
      'Debug.Assert False
    ElseIf PosBrktStart > 0 And _
           (PosBrktEnd = 0 Or (PosBrktEnd > 0 And PosBrktEnd < PosBrktStart)) Then
      ' Last or only <xxx> will be split across rows
      'Debug.Assert False
      PosNext = PosBrktStart
    Else
      ' Are there other combinations?
      Debug.Assert False
    End If

    'Debug.Assert Right$(Mid$(TextIn, PosStart, PosNext - PosStart), 1) <> "‹"

    If TextOut <> "" Then
      TextOut = TextOut & vbLf
    End If
    If PosStart = 1 Then
      TextOut = TextOut & Head & "|"
    Else
      TextOut = TextOut & Space(Len(Head)) & "|"
    End If
    TextOut = TextOut & Mid$(TextIn, PosStart, PosNext - PosStart) & "|"
    PosStart = PosNext
    If Mid$(TextIn, PosStart, 1) = vbLf Then
      PosStart = PosStart + 1
    End If
    If PosStart > Len(TextIn) Then
      Exit Do
    End If
  Loop

End Sub
Function TidyTextForDspl(ByVal Text As String) As String

  ' Tidy Text for display by replacing white space with visible strings:
  '   Leave single space unchanged
  '   Replace single LF by                 ‹lf›
  '   Replace single CR by                 ‹cr›
  '   Replace single TB by                 ‹tb›
  '   Replace single non-break space by    ‹nbs›
  '   Replace single CRLF by               ‹crlf›
  '   Replace multiple spaces by           ‹n s›       where n is number of repeats
  '   Replace multiple LFs by              ‹n lf›      of white space character
  '   Replace multiple CRs by ‹cr› or      ‹n cr›
  '   Replace multiple TBs by              ‹n tb›
  '   Replace multiple non-break spaces by ‹n nbs›
  '   Replace multiple CRLFs by            ‹n crlf›

  Dim InsStr As String
  Dim InxWsChar As Long
  Dim NumWsChar As Long
  Dim PosWsChar As Long
  Dim RetnVal As String
  Dim WsCharCrnt As Variant
  Dim WsCharValue As Variant
  Dim WsCharDspl As Variant

  WsCharValue = VBA.Array(" ", vbCr & vbLf, vbLf, vbCr, vbTab, Chr(160))
  WsCharDspl = VBA.Array("s", "crlf", "lf", "cr", "tb", "nbs")

  RetnVal = Text

  ' Replace each whitespace individually
  For InxWsChar = 0 To UBound(WsCharValue)
    RetnVal = Replace(RetnVal, WsCharValue(InxWsChar), "‹" & WsCharDspl(InxWsChar) & "›")
  Next

  ' Look for repeats. If found replace <x> by <n x>
  For InxWsChar = 0 To UBound(WsCharValue)
    'Debug.Assert InxWsChar <> 1
    PosWsChar = 1
    Do While True
      InsStr = "‹" & WsCharDspl(InxWsChar) & "›"
      PosWsChar = InStr(PosWsChar, RetnVal, InsStr & InsStr)
      If PosWsChar = 0 Then
        ' No [more] repeats of this <x>
        Exit Do
      End If
      ' Have <x><x>.  Count number of extra <x>s
      NumWsChar = 2
      Do While Mid(RetnVal, PosWsChar + NumWsChar * Len(InsStr), Len(InsStr)) = InsStr
        NumWsChar = NumWsChar + 1
      Loop
      RetnVal = Mid(RetnVal, 1, PosWsChar - 1) & _
                "‹" & NumWsChar & " " & WsCharDspl(InxWsChar) & "›" & _
                Mid(RetnVal, PosWsChar + NumWsChar * Len(InsStr))
      PosWsChar = PosWsChar + Len(InsStr) + Len(NumWsChar)

    Loop
  Next

  ' Restore any single spaces
  RetnVal = Replace(RetnVal, "‹" & WsCharDspl(0) & "›", " ")

  TidyTextForDspl = RetnVal

End Function