Swift: what is the right way to split up a [String] resulting in a [[String]] with a given subarray size?

Starting with a large [String] and a given subarray size, what is the best way I could go about splitting up this array into smaller arrays? (The last array will be smaller than the given subarray size).

Concrete example:

Split up ["1","2","3","4","5","6","7"] with max split size 2

The code would produce [["1","2"],["3","4"],["5","6"],["7"]]

Obviously I could do this a little more manually, but I feel like in swift something like map() or reduce() may do what I want really beautifully.


In Swift 3/4 this would look like the following:

let numbers = ["1","2","3","4","5","6","7"]
let chunkSize = 2
let chunks = stride(from: 0, to: numbers.count, by: chunkSize).map {
    Array(numbers[$0..<min($0 + chunkSize, numbers.count)])
}
// prints as [["1", "2"], ["3", "4"], ["5", "6"], ["7"]]

As an extension to Array:

extension Array {
    func chunked(by chunkSize: Int) -> [[Element]] {
        return stride(from: 0, to: self.count, by: chunkSize).map {
            Array(self[$0..<Swift.min($0 + chunkSize, self.count)])
        }
    }
}

Or the slightly more verbose, yet more general:

let numbers = ["1","2","3","4","5","6","7"]
let chunkSize = 2
let chunks: [[String]] = stride(from: 0, to: numbers.count, by: chunkSize).map {
    let end = numbers.endIndex
    let chunkEnd = numbers.index($0, offsetBy: chunkSize, limitedBy: end) ?? end
    return Array(numbers[$0..<chunkEnd])
}

This is more general because I am making fewer assumptions about the type of the index into the collection. In the previous implementation I assumed that they could be could be compared and added.

Note that in Swift 3 the functionality of advancing indices has been transferred from the indices themselves to the collection.


With Swift 5, according to your needs, you can choose one of the five following ways in order to solve your problem.


1. Using AnyIterator in a Collection extension method

AnyIterator is a good candidate to iterate over the indices of an object that conforms to Collection protocol in order to return subsequences of this object. In a Collection protocol extension, you can declare a chunked(by:) method with the following implementation:

extension Collection {
    
    func chunked(by distance: Int) -> [[Element]] {
        precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop

        var index = startIndex
        let iterator: AnyIterator<Array<Element>> = AnyIterator({
            let newIndex = self.index(index, offsetBy: distance, limitedBy: self.endIndex) ?? self.endIndex
            defer { index = newIndex }
            let range = index ..< newIndex
            return index != self.endIndex ? Array(self[range]) : nil
        })
        
        return Array(iterator)
    }
    
}

Usage:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

2. Using stride(from:to:by:) function in an Array extension method

Array indices are of type Int and conform to Strideable protocol. Therefore, you can use stride(from:to:by:) and advanced(by:) with them. In an Array extension, you can declare a chunked(by:) method with the following implementation:

extension Array {
    
    func chunked(by distance: Int) -> [[Element]] {
        let indicesSequence = stride(from: startIndex, to: endIndex, by: distance)
        let array: [[Element]] = indicesSequence.map {
            let newIndex = $0.advanced(by: distance) > endIndex ? endIndex : $0.advanced(by: distance)
            //let newIndex = self.index($0, offsetBy: distance, limitedBy: self.endIndex) ?? self.endIndex // also works
            return Array(self[$0 ..< newIndex])
        }
        return array
    }
    
}

Usage:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

3. Using a recursive approach in an Array extension method

Based on Nate Cook recursive code, you can declare a chunked(by:) method in an Array extension with the following implementation:

extension Array {

    func chunked(by distance: Int) -> [[Element]] {
        precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop

        if self.count <= distance {
            return [self]
        } else {
            let head = [Array(self[0 ..< distance])]
            let tail = Array(self[distance ..< self.count])
            return head + tail.chunked(by: distance)
        }
    }
    
}

Usage:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

4. Using a for loop and batches in a Collection extension method

Chris Eidhof and Florian Kugler show in Swift Talk #33 - Sequence & Iterator (Collections #2) video how to use a simple for loop to fill batches of sequence elements and append them on completion to an array. In a Sequence extension, you can declare a chunked(by:) method with the following implementation:

extension Collection {
    
    func chunked(by distance: Int) -> [[Element]] {
        var result: [[Element]] = []
        var batch: [Element] = []
        
        for element in self {
            batch.append(element)
            
            if batch.count == distance {
                result.append(batch)
                batch = []
            }
        }
        
        if !batch.isEmpty {
            result.append(batch)
        }
        
        return result
    }
    
}

Usage:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let newArray = array.chunked(by: 2)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

5. Using a custom struct that conforms to Sequence and IteratorProtocol protocols

If you don't want to create extensions of Sequence, Collection or Array, you can create a custom struct that conforms to Sequence and IteratorProtocol protocols. This struct should have the following implementation:

struct BatchSequence<T>: Sequence, IteratorProtocol {
    
    private let array: [T]
    private let distance: Int
    private var index = 0
    
    init(array: [T], distance: Int) {
        precondition(distance > 0, "distance must be greater than 0") // prevents infinite loop
        self.array = array
        self.distance = distance
    }
    
    mutating func next() -> [T]? {
        guard index < array.endIndex else { return nil }
        let newIndex = index.advanced(by: distance) > array.endIndex ? array.endIndex : index.advanced(by: distance)
        defer { index = newIndex }
        return Array(array[index ..< newIndex])
    }
    
}

Usage:

let array = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
let batchSequence = BatchSequence(array: array, distance: 2)
let newArray = Array(batchSequence)
print(newArray) // prints: [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"], ["9"]]

I wouldn't call it beautiful, but here's a method using map:

let numbers = ["1","2","3","4","5","6","7"]
let splitSize = 2
let chunks = numbers.startIndex.stride(to: numbers.count, by: splitSize).map {
  numbers[$0 ..< $0.advancedBy(splitSize, limit: numbers.endIndex)]
}

The stride(to:by:) method gives you the indices for the first element of each chunk, so you can map those indices to a slice of the source array using advancedBy(distance:limit:).

A more "functional" approach would simply be to recurse over the array, like so:

func chunkArray<T>(s: [T], splitSize: Int) -> [[T]] {
    if countElements(s) <= splitSize {
        return [s]
    } else {
        return [Array<T>(s[0..<splitSize])] + chunkArray(Array<T>(s[splitSize..<s.count]), splitSize)
    }
}

I like Nate Cook's answer, it looks like Swift has moved on since it was written, here's my take on this as an extension to Array:

extension Array {
    func chunk(chunkSize : Int) -> Array<Array<Element>> {
        return 0.stride(to: self.count, by: chunkSize)
            .map { Array(self[$0..<$0.advancedBy(chunkSize, limit: self.count)]) }
    }
}

Note, it returns [] for negative numbers and will result in a fatal error as written above. You'll have to put a guard in if you want to prevent that.

func testChunkByTwo() {
    let input = [1,2,3,4,5,6,7]
    let output = input.chunk(2)
    let expectedOutput = [[1,2], [3,4], [5,6], [7]]
    XCTAssertEqual(expectedOutput, output)
}

func testByOne() {
    let input = [1,2,3,4,5,6,7]
    let output = input.chunk(1)
    let expectedOutput = [[1],[2],[3],[4],[5],[6],[7]]
    XCTAssertEqual(expectedOutput, output)
}

func testNegative() {
    let input = [1,2,3,4,5,6,7]
    let output = input.chunk(-2)
    let expectedOutput = []
    XCTAssertEqual(expectedOutput, output)
}

I don't think you'll want to use map or reduce. Map is for applying a function on each individual element in an array while reduce is for flattening an array. What you want to do is slice the array into subarrays of a certain size. This snippet uses slices.

var arr = ["1","2","3","4","5","6","7"]
var splitSize = 2

var newArr = [[String]]()
var i = 0
while i < arr.count {
    var slice: Slice<String>!
    if i + splitSize >= arr.count {
        slice = arr[i..<arr.count]
    }
    else {
        slice = arr[i..<i+splitSize]
    }
    newArr.append(Array(slice))
    i += slice.count
}
println(newArr)