Removing a string that includes CRLF characters from body of e-mail
My full diagnostic routine
The subroutine InvestigateEmails()
will output to either the Immediate Window or a file on the desktop. The Immediate Window is usually the more convenient but has a limit of about 200 lines. So if the output is likely to be over 200 lines, output must be to a file. If output is less than 200 lines, the choice is yours.
For output to the Immediate Window, review subroutine OutSomeProperties
. Add any properties that you wish to see but are missing. Consider removing any properties not currently required. Check that #Const Selected = True
.
For output to a file, review subroutine OutAllProperties
. More correctly this should be “all properties of which I am aware and have ever been interested in.” You may wish to check that all the properties of interest to you are included. I recommend not removing any existing properties. Check that #Const Selected = False
.
Select the emails whose properties you wish to see. Run subroutine InvestigateEmails()
This code uses conditional compiling which will be confusing to a programmer not familiar with this technique. Either research conditional compiling or accept that it is doing something useful that you do not need to understand.
Option Explicit
' This code requires references to:
' "Microsoft Scripting Runtime"
' "Microsoft ActiveX Data Objects n.n Library". Tested with version 6.1.
Public Sub InvestigateEmails()
' Outputs all or selected properties of one or more emails.
' ========================================================================
' "Selected = True" to output a small number of properties for
' a small number of emails to the Immediate Window.
' "Selected = False" to output all properties for any number of emails
' to desktop file "InvestigateEmails.txt".
#Const Selected = True
' ========================================================================
' Technique for locating desktop from answer by Kyle:
' http://stackoverflow.com/a/17551579/973283
Dim Exp As Explorer
Dim ItemCrnt As MailItem
#If Not Selected Then
Dim FileBody As String
Dim Fso As FileSystemObject
Dim Path As String
Path = CreateObject("WScript.Shell").specialfolders("Desktop")
#End If
Set Exp = Outlook.Application.ActiveExplorer
If Exp.Selection.Count = 0 Then
Call MsgBox("Please select one or more emails then try again", vbOKOnly)
Exit Sub
Else
For Each ItemCrnt In Exp.Selection
If ItemCrnt.Class = olMail Then
#If Selected Then
Call OutSomeProperties(ItemCrnt)
#Else
Call OutAllProperties(ItemCrnt, FileBody)
#End If
End If
Next
End If
#If Not Selected Then
Call PutTextFileUtf8NoBom(Path & "\InvestigateEmails.txt", FileBody)
#End If
End Sub
Public Sub OutSomeProperties(ItemCrnt As Outlook.MailItem)
' Outputs selected properties of a MailItem to the Immediate Window.
' The Immediate Window can only display about 200 rows before the older
' rows start scrolling off the top. This means this routine is only
' suitable for displaying a small number of simple properties. Add or
' remove properties as necessary to meet the current requirement.
Dim InxA As Long
Dim InxR As Long
Debug.Print "=============================================="
With ItemCrnt
Debug.Print " EntryId: " & .EntryID
Debug.Print " Created: " & .CreationTime
Debug.Print " Receiver: " & .ReceivedByName
Debug.Print " Received: " & .ReceivedTime
For InxR = 1 To .Recipients.Count
Debug.Print "Recipient: " & .Recipients(InxR)
Next
Debug.Print " Sender: " & .Sender
Debug.Print " SenderEA: " & .SenderEmailAddress
Debug.Print " SenderNm: " & .SenderName
Debug.Print " SentOn: " & .SentOn
Debug.Print " Subject: " & .Subject
Debug.Print " To: " & .To
If .Attachments.Count > 0 Then
Debug.Print "Attachments:"
For InxA = 1 To .Attachments.Count
Debug.Print " " & InxA & ": " & .Attachments(InxA).DisplayName
Next
End If
End With
End Sub
Sub OutAllProperties(ItemCrnt As Outlook.MailItem, ByRef FileBody As String)
' Adds all properties of a MailItem to FileBody.
' The phrase "all properties" should more correctly be "all properties
' that I know of and have ever been interested in".
' Source of PropertyAccessor information:
' https://www.slipstick.com/developer/read-mapi-properties-exposed-outlooks-object-model/
Dim InxA As Long
Dim InxR As Long
Dim PropAccess As Outlook.propertyAccessor
If FileBody <> "" Then
FileBody = FileBody & String(80, "=") & vbLf
End If
With ItemCrnt
FileBody = FileBody & "EntryId: " & .EntryID
FileBody = FileBody & "From (Sender): " & .Sender
FileBody = FileBody & vbLf & "From (Sender name): " & .SenderName
FileBody = FileBody & vbLf & "From (Sender email address): " & _
.SenderEmailAddress
FileBody = FileBody & vbLf & "Subject: " & CStr(.Subject)
FileBody = FileBody & vbLf & "Received: " & Format(.ReceivedTime, "dmmmyy hh:mm:ss")
FileBody = FileBody & vbLf & "To: " & .To
FileBody = FileBody & vbLf & "CC: " & .CC
FileBody = FileBody & vbLf & "BCC: " & .BCC
If .Attachments.Count = 0 Then
FileBody = FileBody & vbLf & "No attachments"
Else
FileBody = FileBody & vbLf & "Attachments:"
FileBody = FileBody & vbLf & "No.|Type|Path|Filename|DisplayName|"
For InxR = 1 To .Recipients.Count
FileBody = FileBody & vbLf & "Recipient" & InxR & ": " & .Recipients(InxR)
Next
For InxA = 1 To .Attachments.Count
With .Attachments(InxA)
FileBody = FileBody & vbLf & InxA & "|"
Select Case .Type
Case olByValue
FileBody = FileBody & "Val"
Case olEmbeddeditem
FileBody = FileBody & "Ebd"
Case olByReference
FileBody = FileBody & "Ref"
Case olOLE
FileBody = FileBody & "OLE"
Case Else
FileBody = FileBody & "Unk"
End Select
' Not all types have all properties. This code handles
' those missing properties of which I am aware. However,
' I have never found an attachment of type Reference or OLE.
' Additional code may be required for them.
Select Case .Type
Case olEmbeddeditem
FileBody = FileBody & "|"
Case Else
FileBody = FileBody & "|" & .Pathname
End Select
FileBody = FileBody & "|" & .FileName
FileBody = FileBody & "|" & .DisplayName & "|"
End With
Next
End If ' .Attachments.Count = 0
Call OutLongTextRtn(FileBody, "Text: ", .Body)
Call OutLongTextRtn(FileBody, "Html: ", .HtmlBody)
Set PropAccess = .propertyAccessor
FileBody = FileBody & vbLf & "PR_RECEIVED_BY_NAME: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0040001E")
FileBody = FileBody & vbLf & "PR_SENT_REPRESENTING_NAME: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0042001E")
FileBody = FileBody & vbLf & "PR_REPLY_RECIPIENT_NAMES: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0050001E")
FileBody = FileBody & vbLf & "PR_SENT_REPRESENTING_EMAIL_ADDRESS: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0065001E")
FileBody = FileBody & vbLf & "PR_RECEIVED_BY_EMAIL_ADDRESS: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0076001E")
FileBody = FileBody & vbLf & "PR_TRANSPORT_MESSAGE_HEADERS:" & vbLf & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x007D001E")
FileBody = FileBody & vbLf & "PR_SENDER_NAME: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0C1A001E")
FileBody = FileBody & vbLf & "PR_SENDER_EMAIL_ADDRESS: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0C1F001E")
FileBody = FileBody & vbLf & "PR_DISPLAY_BCC: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0E02001E")
FileBody = FileBody & vbLf & "PR_DISPLAY_CC: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0E03001E")
FileBody = FileBody & vbLf & "PR_DISPLAY_TO: " & _
PropAccess.GetProperty("http://schemas.microsoft.com/mapi/proptag/0x0E04001E")
FileBody = FileBody & vbLf
Set PropAccess = Nothing
End With
End Sub
Sub OutLongTextRtn(ByRef TextOut As String, ByVal Head As String, _
ByVal TextIn As String)
' * Break TextIn into lines of not more than 100 characters
' and append to TextOut.
' * The output is arranged so:
' xxxx|sssssssssssssss|
' |sssssssssssssss|
' |ssssssssss|
' where "xxxx" is the value of Head and "ssss..." are characters from
' TextIn. The third line in the example could be shorter because:
' * it contains the last few characters of TextIn
' * there a linefeed in TextIn
' * a <xxx> string recording whitespace would have been split
' across two lines.
If TextIn = "" Then
' Nothing to do
Exit Sub
End If
Const LenLineMax As Long = 100
Dim PosBrktEnd As Long ' Last > before PosEnd
Dim PosBrktStart As Long ' Last < before PosEnd
Dim PosNext As Long ' Start of block to be output after current block
Dim PosStart As Long ' First character of TextIn not yet output
TextIn = TidyTextForDspl(TextIn)
TextIn = Replace(TextIn, "lf›", "lf›" & vbLf)
PosStart = 1
Do While True
PosNext = InStr(PosStart, TextIn, vbLf)
If PosNext = 0 Then
' No LF in [Remaining] TextIn
'Debug.Assert False
PosNext = Len(TextIn) + 1
End If
If PosNext - PosStart > LenLineMax Then
PosNext = PosStart + LenLineMax
End If
' Check for <xxx> being split across lines
PosBrktStart = InStrRev(TextIn, "‹", PosNext - 1)
PosBrktEnd = InStrRev(TextIn, "›", PosNext - 1)
If PosBrktStart < PosStart And PosBrktEnd < PosStart Then
' No <xxx> within text to be displayed
' No change to PosNext
'Debug.Assert False
ElseIf PosBrktStart > 0 And PosBrktEnd > 0 And PosBrktEnd > PosBrktStart Then
' Last or only <xxx> totally within text to be displayed
' No change to PosNext
'Debug.Assert False
ElseIf PosBrktStart > 0 And _
(PosBrktEnd = 0 Or (PosBrktEnd > 0 And PosBrktEnd < PosBrktStart)) Then
' Last or only <xxx> will be split across rows
'Debug.Assert False
PosNext = PosBrktStart
Else
' Are there other combinations?
Debug.Assert False
End If
'Debug.Assert Right$(Mid$(TextIn, PosStart, PosNext - PosStart), 1) <> "‹"
If TextOut <> "" Then
TextOut = TextOut & vbLf
End If
If PosStart = 1 Then
TextOut = TextOut & Head & "|"
Else
TextOut = TextOut & Space(Len(Head)) & "|"
End If
TextOut = TextOut & Mid$(TextIn, PosStart, PosNext - PosStart) & "|"
PosStart = PosNext
If Mid$(TextIn, PosStart, 1) = vbLf Then
PosStart = PosStart + 1
End If
If PosStart > Len(TextIn) Then
Exit Do
End If
Loop
End Sub
Sub PutTextFileUtf8NoBom(ByVal PathFileName As String, ByVal FileBody As String)
' Outputs FileBody as a text file named PathFileName using
' UTF-8 encoding without leading BOM
' 1Nov16 Copied from http://stackoverflow.com/a/4461250/973283
' but replaced literals with parameters.
' 15Aug17 Discovered routine was adding an LF to the end of the file.
' Added code to discard that LF.
' 11Oct17 Posted to StackOverflow
' 9Aug18 Comment from rellampec suggested removal of adWriteLine from
' WriteTest statement would avoid adding LF.
' 30Sep18 Amended routine to remove adWriteLine from WriteTest statement
' and code to remove LF from file. Successfully tested new version.
' References: http://stackoverflow.com/a/4461250/973283
' https://www.w3schools.com/asp/ado_ref_stream.asp
Dim BinaryStream As Object
Dim UTFStream As Object
Set UTFStream = CreateObject("adodb.stream")
UTFStream.Type = adTypeText
UTFStream.Mode = adModeReadWrite
UTFStream.Charset = "UTF-8"
UTFStream.Open
UTFStream.WriteText FileBody
UTFStream.Position = 3 'skip BOM
Set BinaryStream = CreateObject("adodb.stream")
BinaryStream.Type = adTypeBinary
BinaryStream.Mode = adModeReadWrite
BinaryStream.Open
UTFStream.CopyTo BinaryStream
UTFStream.Flush
UTFStream.Close
Set UTFStream = Nothing
BinaryStream.SaveToFile PathFileName, adSaveCreateOverWrite
BinaryStream.Flush
BinaryStream.Close
Set BinaryStream = Nothing
End Sub
I will answer your question in parts as I have the spare time. Someone else may get to the important bit before I do.
I have edited your question. I did not understand a couple of sentences so I looked at the source and found my suspicion was correct, you had included less than characters. Stack Overflow permits a limited number of Html tags. Anything else that looks like an Html tag is ignored. I replaced each "<" with "<" so readers could see your Html. I can add an explanation if you do not understand why this works.
You have:
NewBody = Replace(obj.HTMLBody, strDelete01, "")
NewBody = Replace(obj.HTMLBody, strDelete02, "")
NewBody = Replace(obj.HTMLBody, strDelete03, "")
NewBody = Replace(obj.HTMLBody, strDelete04, "")
If NewBody <> "" Then
Each Replace
(except the first) overwrites the value of NewBody
created by the previous Replace
. You seem to think that if strDelete04
is not found, NewBody will be empty. No, if strDelete04
is not found, NewBody
will be a copy of obj.HTMLBody
.
You need something like:
NewBody = Replace(obj.HTMLBody, strDelete01, "")
NewBody = Replace(NewBody, strDelete02, "")
NewBody = Replace(NewBody, strDelete03, "")
NewBody = Replace(NewBody, strDelete04, "")
If NewBody <> obj.HTMLBody Then
' One or more delete strings found and removed
You say that the CRLFs are not in fixed positions. If so, no simple modification of your code will have the effect you seek. I will show you how to achieve the effect you seek but first I will have to create some emails containing your text so I can test my code.
Part 2
Having looked at your image of the Html more closely, I believe there is a simple solution. The two CRLFs in the text replace spaces. Providing this is always what happens, you can use:
NewBody = Replace(obj.HTMLBody, vbCr & vbLf, " ")
This would remove any CRLF present wherever it appeared within the Html. It would not matter if there were extra CRLFs because any string of whitespace characters (which includes CR and LF) in an Html document is replaced by a single space when the document is displayed.
You finish the removal of the unwanted text with:
Dim strDelete = "Diese E-Mail kommt von Personen außerhalb " & _
"der Stadtverwaltung. Klicken Sie nur auf " & _
"Links oder Dateianhänge, wenn Sie die Personen " & _
"für vertrauenswürdig halten."
NewBody = Replace(NewBody, strDelete, "")
If the above does not work, you need a more convenient diagnostic technique. Saving the entire email as Html may be easy but you cannot be quite sure how the result differs from what a VBA macro would see. You wonder if Outlook stores emails in a format other than Html. I cannot imagine why Outlook would convert the incoming SMTP message to some secret format and then convert it back when the user wishes to view it. If Outlook does have a secret format, it is totally hidden from the VBA programmer.
The following is a simple version of the diagnostic tool I use. If you need something more advanced, I can provide it but let us try this first.
Copy the code below to an Outlook module. Select one of these emails and then run macro DsplHtmlBodyFromSelectedEmails
. The entire Html body of the email will be output to the Immediate Window in a readable format. I believe I have included all the subroutines called by the macro. I apologise in advance if I have not. If you get a message about an undefined routine, let me know and I will add it to the answer.
Sub DsplHtmlBodyFromSelectedEmails()
' Select one or emails then run this macro. For each selected email, the Received Time, the Subject and the Html body are output to the Immediate Window. Note: the Immediate Window can only display about 200 lines before
The older lines are lost.
Dim Exp As Explorer
Dim Html As String
Dim ItemCrnt As MailItem
Set Exp = Outlook.Application.ActiveExplorer
If Exp.Selection.Count = 0 Then
Call MsgBox("Please select one or more emails then try again", vbOKOnly)
Exit Sub
Else
For Each ItemCrnt In Exp.Selection
With ItemCrnt
If .Class = olMail Then
Debug.Print .ReceivedTime & " " & .Subject
Call OutLongTextRtn(Html, "Html", .HtmlBody)
Debug.Print Html
End If
End With
Next
End If
End Sub
Sub OutLongTextRtn(ByRef TextOut As String, ByVal Head As String, _
ByVal TextIn As String)
' * Break TextIn into lines of not more than 100 characters
' and append to TextOut.
' * The output is arranged so:
' xxxx|sssssssssssssss|
' |sssssssssssssss|
' |ssssssssss|
' where "xxxx" is the value of Head and "ssss..." are characters from
' TextIn. The third line in the example could be shorter because:
' * it contains the last few characters of TextIn
' * there a linefeed in TextIn
' * a <xxx> string recording whitespace would have been split
' across two lines.
If TextIn = "" Then
' Nothing to do
Exit Sub
End If
Const LenLineMax As Long = 100
Dim PosBrktEnd As Long ' Last > before PosEnd
Dim PosBrktStart As Long ' Last < before PosEnd
Dim PosNext As Long ' Start of block to be output after current block
Dim PosStart As Long ' First character of TextIn not yet output
TextIn = TidyTextForDspl(TextIn)
TextIn = Replace(TextIn, "lf›", "lf›" & vbLf)
PosStart = 1
Do While True
PosNext = InStr(PosStart, TextIn, vbLf)
If PosNext = 0 Then
' No LF in [Remaining] TextIn
'Debug.Assert False
PosNext = Len(TextIn) + 1
End If
If PosNext - PosStart > LenLineMax Then
PosNext = PosStart + LenLineMax
End If
' Check for <xxx> being split across lines
PosBrktStart = InStrRev(TextIn, "‹", PosNext - 1)
PosBrktEnd = InStrRev(TextIn, "›", PosNext - 1)
If PosBrktStart < PosStart And PosBrktEnd < PosStart Then
' No <xxx> within text to be displayed
' No change to PosNext
'Debug.Assert False
ElseIf PosBrktStart > 0 And PosBrktEnd > 0 And PosBrktEnd > PosBrktStart Then
' Last or only <xxx> totally within text to be displayed
' No change to PosNext
'Debug.Assert False
ElseIf PosBrktStart > 0 And _
(PosBrktEnd = 0 Or (PosBrktEnd > 0 And PosBrktEnd < PosBrktStart)) Then
' Last or only <xxx> will be split across rows
'Debug.Assert False
PosNext = PosBrktStart
Else
' Are there other combinations?
Debug.Assert False
End If
'Debug.Assert Right$(Mid$(TextIn, PosStart, PosNext - PosStart), 1) <> "‹"
If TextOut <> "" Then
TextOut = TextOut & vbLf
End If
If PosStart = 1 Then
TextOut = TextOut & Head & "|"
Else
TextOut = TextOut & Space(Len(Head)) & "|"
End If
TextOut = TextOut & Mid$(TextIn, PosStart, PosNext - PosStart) & "|"
PosStart = PosNext
If Mid$(TextIn, PosStart, 1) = vbLf Then
PosStart = PosStart + 1
End If
If PosStart > Len(TextIn) Then
Exit Do
End If
Loop
End Sub
Function TidyTextForDspl(ByVal Text As String) As String
' Tidy Text for display by replacing white space with visible strings:
' Leave single space unchanged
' Replace single LF by ‹lf›
' Replace single CR by ‹cr›
' Replace single TB by ‹tb›
' Replace single non-break space by ‹nbs›
' Replace single CRLF by ‹crlf›
' Replace multiple spaces by ‹n s› where n is number of repeats
' Replace multiple LFs by ‹n lf› of white space character
' Replace multiple CRs by ‹cr› or ‹n cr›
' Replace multiple TBs by ‹n tb›
' Replace multiple non-break spaces by ‹n nbs›
' Replace multiple CRLFs by ‹n crlf›
Dim InsStr As String
Dim InxWsChar As Long
Dim NumWsChar As Long
Dim PosWsChar As Long
Dim RetnVal As String
Dim WsCharCrnt As Variant
Dim WsCharValue As Variant
Dim WsCharDspl As Variant
WsCharValue = VBA.Array(" ", vbCr & vbLf, vbLf, vbCr, vbTab, Chr(160))
WsCharDspl = VBA.Array("s", "crlf", "lf", "cr", "tb", "nbs")
RetnVal = Text
' Replace each whitespace individually
For InxWsChar = 0 To UBound(WsCharValue)
RetnVal = Replace(RetnVal, WsCharValue(InxWsChar), "‹" & WsCharDspl(InxWsChar) & "›")
Next
' Look for repeats. If found replace <x> by <n x>
For InxWsChar = 0 To UBound(WsCharValue)
'Debug.Assert InxWsChar <> 1
PosWsChar = 1
Do While True
InsStr = "‹" & WsCharDspl(InxWsChar) & "›"
PosWsChar = InStr(PosWsChar, RetnVal, InsStr & InsStr)
If PosWsChar = 0 Then
' No [more] repeats of this <x>
Exit Do
End If
' Have <x><x>. Count number of extra <x>s
NumWsChar = 2
Do While Mid(RetnVal, PosWsChar + NumWsChar * Len(InsStr), Len(InsStr)) = InsStr
NumWsChar = NumWsChar + 1
Loop
RetnVal = Mid(RetnVal, 1, PosWsChar - 1) & _
"‹" & NumWsChar & " " & WsCharDspl(InxWsChar) & "›" & _
Mid(RetnVal, PosWsChar + NumWsChar * Len(InsStr))
PosWsChar = PosWsChar + Len(InsStr) + Len(NumWsChar)
Loop
Next
' Restore any single spaces
RetnVal = Replace(RetnVal, "‹" & WsCharDspl(0) & "›", " ")
TidyTextForDspl = RetnVal
End Function