How can I improve this sorting algorithm?
What I need:
I often need to rearrange multidimensional arrays, especially with timestamps. For that I need a routine, that results in a permanent sort order. Since the data can be huge, it has to be performant as possible.
I would like to have some feedback to my current efforts. I'm trying to understand sorting arrays practical. I'm not a programmer, if possible be patient. :)
I'll appreciate every help/tips! I'm going to learn some new things maybe.
What my efforts are so far:
For the beginning I took the bubble sort algorithm. It does what is needed, BUT its performance is very low. It needs more than 20 seconds for sorting a column within 582 rows and 114 columns.
The code works with single- and multi-column-arrays. I use regular expressions, so keep in mind the little function a the end of the code.
I've commented my code step by step, I hope its still readable.
I know QuickSort would be much faster, but I haven't understand to make this algorithm permanent/stable yet. I've found this solution Sorting a multidimensionnal array in VBA, but as said, its not permanent.
Especially for Excel I know the way to copy an array to a worksheet and to sort it there. My goal is to avoid this solution. :)
'++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
'+
'+ BubbleSort_Array
'+
'+ Sort algorithm: BubbleSort
'+ Sorts by: 1. numbers, 2. dates, 3. Zeichenketten (also with consecutive number, e.g. "Book1,Book2,Book3..."; Capital letters before small letters)
'+ Parameter "Data": Requires an array (VARIANT) with one or more columns and rows, by reference
'+ Paramater "Column" is a LONG, follows the counting by "Option Base 0" (first column = 0)
'+ Parameter "Direction" is an EXCEL-based constant, that determines the sortdirection (ascending/descending)
'+
'++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Public Sub BubbleSort_Array( _
ByRef Data() As Variant, _
Optional Column As Long = -1, _
Optional Direction As XlSortOrder = 1 _
)
Dim InnerIndex As Long 'common variable, for the inner loop
Dim OuterIndex As Long 'common variable, for the outer loop
Dim SwapItem As Variant 'variable to temporarily save content, that could be swapped with another item
Dim SwapItem2 As Variant 'variable to temporarily save content, that could be swapped with another item
Dim ErrNum As Long 'variable for error number of the ERR-object
Dim lngRow As Long 'common variable for the rows of an array
Dim lngColumn As Long 'common variable for the column of an array
Dim colNumber As New Collection 'variable to save a part of digits from an entry
Dim colText As New Collection 'variable to save a part of text from an entry
Dim colDates As New Collection 'variable to save dates from an entry
Dim SortIndex() As Variant 'array for sorting and mapping the specified COLUMN
Dim CopyData() As Variant 'array for the original data, but sorted
'Check, whether the given array is a one- or multi-column array
On Error Resume Next
ErrNum = UBound(Data, 2)
ErrNum = Err.Number
On Error GoTo 0
'If there is an error and the parameter COLUMN is still -1 the parameter DATA is an one-column-array
If ErrNum > 0 And Column = -1 Then
'Outer loop
For OuterIndex = LBound(Data) To UBound(Data)
'Inner loop
For InnerIndex = LBound(Data) To UBound(Data)
'Execute the following statement as long the current index is not the last one (it would throw an error 9 by trying to access the next item)
If InnerIndex < UBound(Data) Then
'To differentiate between the values
'Check, whether the value and the next value are dates
If VBA.IsDate(Data(InnerIndex)) And VBA.IsDate(Data(InnerIndex + 1)) Then
'Save the dates in a temporary collection
colDates.Add VBA.CDate(Data(InnerIndex)), "date1"
colDates.Add VBA.CDate(Data(InnerIndex + 1)), "date2"
Else
'If both values are not dates, split the value in case it is a STRING with an number at the end
'like "Paper1", "Paper2" etc.
colNumber.Add RegEx_Replace(Data(InnerIndex), ".*(\d+$)", "$1"), "current"
colNumber.Add RegEx_Replace(Data(InnerIndex + 1), ".*(\d+$)", "$1"), "next"
colText.Add RegEx_Replace(Data(InnerIndex), "(.*)\d+$", "$1"), "current"
colText.Add RegEx_Replace(Data(InnerIndex + 1), "(.*)\d+$", "$1"), "next"
End If
'Check, whether the sortdirection is ascending
If Direction = xlAscending Then
'Sort by date
If VBA.IsDate(Data(InnerIndex)) And VBA.IsDate(Data(InnerIndex + 1)) Then
'Check the items depending from the sortdirection
If VBA.CDbl(colDates("date1")) > VBA.CDbl(colDates("date2")) Then
'In case the first item is bigger then the second, swap the items
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
'Sort by strings with consecutive number
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) _
And (colText("current") = colText("next")) Then
'In case the first item is bigger then the second, swap the items
If colNumber("current") > colNumber("next") Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
Else
'Sort by strings
'In case the first item is bigger then the second, swap the items
If Data(InnerIndex) > Data(InnerIndex + 1) Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
End If
'Sort descending
Else
'Sort descending
'Sort by date
If VBA.IsDate(Data(InnerIndex)) And VBA.IsDate(Data(InnerIndex + 1)) Then
If VBA.CDbl(colDates("date1")) < VBA.CDbl(colDates("date2")) Then
'In case the first item is smaller then the second, swap the items
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
'Sort by strings with consecutive number
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) And _
(colText("current") = colText("next")) Then
'In case the first item is smaller then the second, swap the items
If colNumber("current") < colNumber("next") Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
Else
'Sort by strings
'In case the first item is smaller then the second, swap the items
If Data(InnerIndex) < Data(InnerIndex + 1) Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
End If
End If
End If
Set colNumber = Nothing
Set colText = Nothing
Set colDates = Nothing
Next
Next
Else
'Resize the array SortIndex for sorting the specified COLUMN
'Needs two columns: One for the index of the original data, and one for the values to be sorted
ReDim SortIndex(UBound(Data, 1), 1)
For InnerIndex = LBound(Data, 1) To UBound(Data, 1)
'Save index of the original data
SortIndex(InnerIndex, 0) = InnerIndex
'Save values of the specified COLUMN
SortIndex(InnerIndex, 1) = Data(InnerIndex, Column)
Next
'Outer loop
For OuterIndex = LBound(SortIndex, 1) To UBound(SortIndex, 1)
'Inner loop
For InnerIndex = LBound(SortIndex, 1) To UBound(SortIndex, 1)
'Execute the following statement as long the current index is not the last one (it would throw an error 9 by trying to access the next item)
If InnerIndex < UBound(SortIndex, 1) Then
'To differentiate between the values
'Check, whether the value and the next value are dates
If VBA.IsDate(SortIndex(InnerIndex, 1)) And VBA.IsDate(SortIndex(InnerIndex + 1, 1)) Then
'Save the dates in a temporary collection
colDates.Add VBA.CDate(SortIndex(InnerIndex, 1)), "date1"
colDates.Add VBA.CDate(SortIndex(InnerIndex + 1, 1)), "date2"
Else
'If both values are not dates, split the value in case it is a STRING with an number at the end
'like "Paper1", "Paper2" etc.
colNumber.Add RegEx_Replace(SortIndex(InnerIndex, 1), ".*(\d+$)", "$1"), "current"
colNumber.Add RegEx_Replace(SortIndex(InnerIndex + 1, 1), ".*(\d+$)", "$1"), "next"
colText.Add RegEx_Replace(SortIndex(InnerIndex, 1), "(.*)\d+$", "$1"), "current"
colText.Add RegEx_Replace(SortIndex(InnerIndex + 1, 1), "(.*)\d+$", "$1"), "next"
End If
'Check the sortdirection
If Direction = xlAscending Then
'Sort by date
If VBA.IsDate(SortIndex(InnerIndex, 1)) And VBA.IsDate(SortIndex(InnerIndex + 1, 1)) Then
If VBA.CDbl(colDates("date1")) > VBA.CDbl(colDates("date2")) Then
'In case the first item is bigger then the second, swap the items
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
'Sort by strings with consecutive numbers
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) _
And (colText("current") = colText("next")) Then
'In case the first item is bigger then the second, swap the items
If colNumber("current") > colNumber("next") Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
Else
'Sort by strings
'In case the first item is bigger then the second, swap the items
If SortIndex(InnerIndex, 1) > SortIndex(InnerIndex + 1, 1) Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
End If
Else
'Sort descending
'Sort by dates
If VBA.IsDate(SortIndex(InnerIndex, 1)) And VBA.IsDate(SortIndex(InnerIndex + 1, 1)) Then
'In case the first item is smaller then the second, swap the items
If VBA.CDbl(colDates("date1")) < VBA.CDbl(colDates("date2")) Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
'Sort by strings with consecutive numbers
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) And _
(colText("current") = colText("next")) Then
'In case the first item is smaller then the second, swap the items
If colNumber("current") < colNumber("next") Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
Else
'Sort by strings
If SortIndex(InnerIndex, 1) < SortIndex(InnerIndex + 1, 1) Then
'In case the first item is smaller then the second, swap the items
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
End If
End If
End If
Set colNumber = Nothing
Set colText = Nothing
Set colDates = Nothing
Next
Next
'Resize a new array with the same size like the original DATA
ReDim CopyData(UBound(Data, 1), UBound(Data, 2))
'Write the data according to the array SortIndex (= sorts the whole original data)
For lngRow = LBound(Data, 1) To UBound(Data, 1)
For lngColumn = LBound(Data, 2) To UBound(Data, 2)
CopyData(lngRow, lngColumn) = Data(SortIndex(lngRow, 0), lngColumn)
Next
Next
'Overwrite the original data with the sorted data
For lngRow = LBound(Data, 1) To UBound(Data, 1)
For lngColumn = LBound(Data, 2) To UBound(Data, 2)
Data(lngRow, lngColumn) = CopyData(lngRow, lngColumn)
Next
Next
End If
End Sub
'+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
'+
'+ RegEx_Replace
'+
'+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Public Function RegEx_Replace( _
varString As Variant, _
strSearchPattern As String, _
strReplaceString As String, _
Optional blnCase_Insensitive As Boolean = True, _
Optional blnGlobalSearch As Boolean = True, _
Optional blnMultiLine As Boolean = False _
) As String
Dim RegEx As Object
Set RegEx = CreateObject("vbscript.regexp")
With RegEx
.IgnoreCase = blnCase_Insensitive
.Global = blnGlobalSearch
.MultiLine = blnMultiLine
.Pattern = strSearchPattern
End With
RegEx_Replace = RegEx.Replace(varString, strReplaceString)
End Function
Here's a slightly different approach - broken out some of the functionality into separate methods but the main Sub has a similar signature to yours (with one additional parameter)
'run some tests
Sub Tester()
Dim arr
BubbleSort_Array Array(), 1 'empty array: does nothing
arr = Array(5, 4, 1, 3, 2)
BubbleSort_Array arr, 1
[P1].Resize(1, UBound(arr) + 1).Value = arr
'1-dimensional array
arr = Array("1 Title", "2 Title", "10 Title", "33 Title", "16 Title", "blah")
BubbleSort_Array arr, 1 'sort raw values
[P2].Resize(1, UBound(arr) + 1).Value = arr
arr = Array("1 Title", "2 Title", "10 Title", "33 Title", "16 Title", "blah")
BubbleSort_Array arr, 1, "SortOnVal" 'sort on Val() transformation
[P3].Resize(1, UBound(arr) + 1).Value = arr
arr = Array("1 Title", "2 Title", "10 Title", "33 Title", "16 Title", "blah")
BubbleSort_Array arr, 1, "SortOnVal", xlDescending 'sort on Val() transformation, descending
[P4].Resize(1, UBound(arr) + 1).Value = arr
'2-dimensional array (from A1:N22)
arr = [A1].CurrentRegion.Value
BubbleSort_Array arr, 3 'sort 2D array on third column ("Val1", "Val2",...."Val22")
[A25].Resize(UBound(arr, 1), UBound(arr, 2)).Value = arr 'sort is "ascibetical"
arr = [A1].CurrentRegion.Value
BubbleSort_Array arr, 3, "NumberOnly" 'sort 2D array on third column, after extracting a number where present
[A49].Resize(UBound(arr, 1), UBound(arr, 2)).Value = arr 'sort looks correct
End Sub
'Sort array `data` in-place, using optional column position if 2D array
'Optional `ParseFunction` parameter is the name of a single-input function to transform values prior to sorting
Sub BubbleSort_Array(ByRef data As Variant, Optional Column As Long = -1, _
Optional ParseFunction As String = "", _
Optional Direction As XlSortOrder = 1)
Dim dims As Long, lbr As Long, lbc As Long, ubr As Long, ubc As Long, i As Long, j As Long
Dim arrSort, tmp, tmp2, swap As Boolean, arrOut
dims = Dimensions(data) 'check input array dimensions
Debug.Print "dims", dims
If dims < 1 Or dims > 2 Then Exit Sub
lbr = LBound(data, 1)
ubr = UBound(data, 1)
If dims = 1 Then data = Make2D(data) 'normalize input to 2D array (single column)
lbc = LBound(data, 2)
ubc = UBound(data, 2)
If Column = -1 Then Column = lbc 'sort defaults to first column
'make an array for sorting: first column is values to sort on, second is row indexes from `data`
' advantage is you're shuffling fewer items when sorting, and expensive transformations only run once
ReDim arrSort(lbr To ubr, 1 To 2)
For i = lbr To ubr
tmp = data(i, Column) 'value to sort on
If Len(ParseFunction) > 0 Then tmp = Application.Run(ParseFunction, tmp) 'custom transformation?
arrSort(i, 1) = tmp
arrSort(i, 2) = i
Next i
'now sort the array...
For i = lbr To ubr - 1
For j = i + 1 To ubr
swap = IIf(Direction = xlAscending, arrSort(i, 1) > arrSort(j, 1), _
arrSort(i, 1) < arrSort(j, 1))
If swap Then
tmp = arrSort(j, 1) 'swap positions in the "comparison" array
tmp2 = arrSort(j, 2)
arrSort(j, 1) = arrSort(i, 1)
arrSort(j, 2) = arrSort(i, 2)
arrSort(i, 1) = tmp
arrSort(i, 2) = tmp2
End If
Next j
Next i
ReDim arrOut(lbr To ubr, lbc To ubc) 'size the output array
'using the sorted array, copy data from the original array
For i = lbr To ubr
For j = lbc To ubc
arrOut(i, j) = data(arrSort(i, 2), j)
Next j
Next i
If dims = 1 Then arrOut = Make1D(arrOut) 'switch back to 1D if input was 1D
data = arrOut 'replace the input array in-place
End Sub
'return result of Val()
Function SortOnVal(v)
SortOnVal = Val(v)
End Function
'extract the first *whole* number from string `v`
Function NumberOnly(v) As Long
Dim rv, i, c
For i = 1 To Len(v)
c = Mid(v, i, 1)
If IsNumeric(c) Then
rv = rv & c
Else
If Len(rv) > 0 Then Exit For
End If
Next i
If Len(rv) = 0 Then rv = 0
NumberOnly = CLng(rv)
End Function
'----Helper functions
'find the dimension of an array
Function Dimensions(data As Variant)
Dim d As Long, ub
d = 1
Do
ub = Empty
On Error Resume Next
'Debug.Print d, LBound(data, d), UBound(data, d)
ub = UBound(data, d)
On Error GoTo 0
If ub = -1 Or IsEmpty(ub) Then Exit Do 'also checking for undimensioned case...
d = d + 1
Loop
Dimensions = d - 1
End Function
'transform a 1-D array into a 2D array (single-column)
Function Make2D(arr)
Dim i As Long, arrOut
ReDim arrOut(LBound(arr) To UBound(arr), 1 To 1)
For i = LBound(arr) To UBound(arr)
arrOut(i, 1) = arr(i)
Next i
Make2D = arrOut
End Function
'transform a single-column 2-D array into a 1D array
Function Make1D(arr)
Dim i As Long, arrOut
ReDim arrOut(LBound(arr) To UBound(arr))
For i = LBound(arr) To UBound(arr)
arrOut(i) = arr(i, 1)
Next i
Make1D = arrOut
End Function
Weird, I thought I uploaded this screenshot yesterday:
As you see, you can check the "Data" ribbon, "Filter&Sort" choice, and off you go.