Swift Merge audio and video files into one video

Improved code (of Govind's answer) with some additional features:

  1. Merge audio of the video + external audio (the initial answer was dropping the sound of the video)
  2. Flip video horizontally if needed (I personally use it when user captures using frontal camera, btw instagram flips it too)
  3. Apply preferredTransform correctly which solves the issue when video was saved rotated (video is external: captured by other device/generated by other app)
  4. Removed some unused code with VideoComposition.
  5. Added a completion handler to the method so that it can be called from a different class.
  6. Update to Swift 4.

Step 1.

import UIKit
import AVFoundation
import AVKit
import AssetsLibrary

Step 2.

/// Merges video and sound while keeping sound of the video too
///
/// - Parameters:
///   - videoUrl: URL to video file
///   - audioUrl: URL to audio file
///   - shouldFlipHorizontally: pass True if video was recorded using frontal camera otherwise pass False
///   - completion: completion of saving: error or url with final video
func mergeVideoAndAudio(videoUrl: URL,
                        audioUrl: URL,
                        shouldFlipHorizontally: Bool = false,
                        completion: @escaping (_ error: Error?, _ url: URL?) -> Void) {

    let mixComposition = AVMutableComposition()
    var mutableCompositionVideoTrack = [AVMutableCompositionTrack]()
    var mutableCompositionAudioTrack = [AVMutableCompositionTrack]()
    var mutableCompositionAudioOfVideoTrack = [AVMutableCompositionTrack]()

    //start merge

    let aVideoAsset = AVAsset(url: videoUrl)
    let aAudioAsset = AVAsset(url: audioUrl)

    let compositionAddVideo = mixComposition.addMutableTrack(withMediaType: AVMediaTypeVideo,
                                                                   preferredTrackID: kCMPersistentTrackID_Invalid)

    let compositionAddAudio = mixComposition.addMutableTrack(withMediaType: AVMediaTypeAudio,
                                                                 preferredTrackID: kCMPersistentTrackID_Invalid)

    let compositionAddAudioOfVideo = mixComposition.addMutableTrack(withMediaType: AVMediaTypeAudio,
                                                                        preferredTrackID: kCMPersistentTrackID_Invalid)

    let aVideoAssetTrack: AVAssetTrack = aVideoAsset.tracks(withMediaType: AVMediaTypeVideo)[0]
    let aAudioOfVideoAssetTrack: AVAssetTrack? = aVideoAsset.tracks(withMediaType: AVMediaTypeAudio).first
    let aAudioAssetTrack: AVAssetTrack = aAudioAsset.tracks(withMediaType: AVMediaTypeAudio)[0]

    // Default must have tranformation
    compositionAddVideo.preferredTransform = aVideoAssetTrack.preferredTransform

    if shouldFlipHorizontally {
        // Flip video horizontally
        var frontalTransform: CGAffineTransform = CGAffineTransform(scaleX: -1.0, y: 1.0)
        frontalTransform = frontalTransform.translatedBy(x: -aVideoAssetTrack.naturalSize.width, y: 0.0)
        frontalTransform = frontalTransform.translatedBy(x: 0.0, y: -aVideoAssetTrack.naturalSize.width)
        compositionAddVideo.preferredTransform = frontalTransform
    }

    mutableCompositionVideoTrack.append(compositionAddVideo)
    mutableCompositionAudioTrack.append(compositionAddAudio)
    mutableCompositionAudioOfVideoTrack.append(compositionAddAudioOfVideo)

    do {
        try mutableCompositionVideoTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero,
                                                                            aVideoAssetTrack.timeRange.duration),
                                                            of: aVideoAssetTrack,
                                                            at: kCMTimeZero)

        //In my case my audio file is longer then video file so i took videoAsset duration
        //instead of audioAsset duration
        try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero,
                                                                            aVideoAssetTrack.timeRange.duration),
                                                            of: aAudioAssetTrack,
                                                            at: kCMTimeZero)

        // adding audio (of the video if exists) asset to the final composition
        if let aAudioOfVideoAssetTrack = aAudioOfVideoAssetTrack {
            try mutableCompositionAudioOfVideoTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero,
                                                                                       aVideoAssetTrack.timeRange.duration),
                                                                       of: aAudioOfVideoAssetTrack,
                                                                       at: kCMTimeZero)
        }
    } catch {
        print(error.localizedDescription)
    }

    // Exporting
    let savePathUrl: URL = URL(fileURLWithPath: NSHomeDirectory() + "/Documents/newVideo.mp4")
    do { // delete old video
        try FileManager.default.removeItem(at: savePathUrl)
    } catch { print(error.localizedDescription) }

    let assetExport: AVAssetExportSession = AVAssetExportSession(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality)!
    assetExport.outputFileType = AVFileTypeMPEG4
    assetExport.outputURL = savePathUrl
    assetExport.shouldOptimizeForNetworkUse = true

    assetExport.exportAsynchronously { () -> Void in
        switch assetExport.status {
        case AVAssetExportSessionStatus.completed:
            print("success")
            completion(nil, savePathUrl)
        case AVAssetExportSessionStatus.failed:
            print("failed \(assetExport.error?.localizedDescription ?? "error nil")")
            completion(assetExport.error, nil)
        case AVAssetExportSessionStatus.cancelled:
            print("cancelled \(assetExport.error?.localizedDescription ?? "error nil")")
            completion(assetExport.error, nil)
        default:
            print("complete")
            completion(assetExport.error, nil)
        }
    }

}

Again thanks to @Govind's answer! It helped me a lot!

Hope this update helps someone too:)


In Above question same error I found due to wrong savePathUrl, destination URL should be like below code including new video name.

I was looking for the code to Merge audio and video files into one video but couldn't find anywhere so after spending hours while reading apple docs I wrote this code.

NOTE : This is tested and 100% working code for me.

Stap : 1 Import this modules in your viewController.

import UIKit
import AVFoundation
import AVKit
import AssetsLibrary

step 2: Add this function in your code

func mergeFilesWithUrl(videoUrl:NSURL, audioUrl:NSURL)
{
    let mixComposition : AVMutableComposition = AVMutableComposition()
    var mutableCompositionVideoTrack : [AVMutableCompositionTrack] = []
    var mutableCompositionAudioTrack : [AVMutableCompositionTrack] = []
    let totalVideoCompositionInstruction : AVMutableVideoCompositionInstruction = AVMutableVideoCompositionInstruction()


    //start merge

    let aVideoAsset : AVAsset = AVAsset(URL: videoUrl)
    let aAudioAsset : AVAsset = AVAsset(URL: audioUrl)

    mutableCompositionVideoTrack.append(mixComposition.addMutableTrackWithMediaType(AVMediaTypeVideo, preferredTrackID: kCMPersistentTrackID_Invalid))
    mutableCompositionAudioTrack.append( mixComposition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: kCMPersistentTrackID_Invalid))

    let aVideoAssetTrack : AVAssetTrack = aVideoAsset.tracksWithMediaType(AVMediaTypeVideo)[0]
    let aAudioAssetTrack : AVAssetTrack = aAudioAsset.tracksWithMediaType(AVMediaTypeAudio)[0]



    do{
        try mutableCompositionVideoTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), ofTrack: aVideoAssetTrack, atTime: kCMTimeZero)

        //In my case my audio file is longer then video file so i took videoAsset duration
        //instead of audioAsset duration

        try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), ofTrack: aAudioAssetTrack, atTime: kCMTimeZero)

        //Use this instead above line if your audiofile and video file's playing durations are same

        //            try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), ofTrack: aAudioAssetTrack, atTime: kCMTimeZero)

    }catch{

    }

    totalVideoCompositionInstruction.timeRange = CMTimeRangeMake(kCMTimeZero,aVideoAssetTrack.timeRange.duration )

    let mutableVideoComposition : AVMutableVideoComposition = AVMutableVideoComposition()
    mutableVideoComposition.frameDuration = CMTimeMake(1, 30)

    mutableVideoComposition.renderSize = CGSizeMake(1280,720)

    //        playerItem = AVPlayerItem(asset: mixComposition)
    //        player = AVPlayer(playerItem: playerItem!)
    //
    //
    //        AVPlayerVC.player = player



    //find your video on this URl
    let savePathUrl : NSURL = NSURL(fileURLWithPath: NSHomeDirectory() + "/Documents/newVideo.mp4")

    let assetExport: AVAssetExportSession = AVAssetExportSession(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality)!
    assetExport.outputFileType = AVFileTypeMPEG4
    assetExport.outputURL = savePathUrl
    assetExport.shouldOptimizeForNetworkUse = true

    assetExport.exportAsynchronouslyWithCompletionHandler { () -> Void in
        switch assetExport.status {

        case AVAssetExportSessionStatus.Completed:

            //Uncomment this if u want to store your video in asset

            //let assetsLib = ALAssetsLibrary()
            //assetsLib.writeVideoAtPathToSavedPhotosAlbum(savePathUrl, completionBlock: nil)

            print("success")
        case  AVAssetExportSessionStatus.Failed:
            print("failed \(assetExport.error)")
        case AVAssetExportSessionStatus.Cancelled:
            print("cancelled \(assetExport.error)")
        default:
            print("complete")
        }
    }


}

Step 3: Call function where u want like this

let videoUrl : NSURL =  NSURL(fileURLWithPath: NSBundle.mainBundle().pathForResource("SampleVideo", ofType: "mp4")!)
let audioUrl : NSURL = NSURL(fileURLWithPath: NSBundle.mainBundle().pathForResource("SampleAudio", ofType: "mp3")!)

mergeFilesWithUrl(videoUrl, audioUrl: audioUrl)

hope this will help you and will save your time.


Swift 4.2 / 5

func mergeVideoWithAudio(videoUrl: URL, audioUrl: URL, success: @escaping ((URL) -> Void), failure: @escaping ((Error?) -> Void)) {


    let mixComposition: AVMutableComposition = AVMutableComposition()
    var mutableCompositionVideoTrack: [AVMutableCompositionTrack] = []
    var mutableCompositionAudioTrack: [AVMutableCompositionTrack] = []
    let totalVideoCompositionInstruction : AVMutableVideoCompositionInstruction = AVMutableVideoCompositionInstruction()

    let aVideoAsset: AVAsset = AVAsset(url: videoUrl)
    let aAudioAsset: AVAsset = AVAsset(url: audioUrl)

    if let videoTrack = mixComposition.addMutableTrack(withMediaType: .video, preferredTrackID: kCMPersistentTrackID_Invalid), let audioTrack = mixComposition.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid) {
        mutableCompositionVideoTrack.append(videoTrack)
        mutableCompositionAudioTrack.append(audioTrack)

    if let aVideoAssetTrack: AVAssetTrack = aVideoAsset.tracks(withMediaType: .video).first, let aAudioAssetTrack: AVAssetTrack = aAudioAsset.tracks(withMediaType: .audio).first {
        do {
            try mutableCompositionVideoTrack.first?.insertTimeRange(CMTimeRangeMake(start: CMTime.zero, duration: aVideoAssetTrack.timeRange.duration), of: aVideoAssetTrack, at: CMTime.zero)
            try mutableCompositionAudioTrack.first?.insertTimeRange(CMTimeRangeMake(start: CMTime.zero, duration: aVideoAssetTrack.timeRange.duration), of: aAudioAssetTrack, at: CMTime.zero)
               videoTrack.preferredTransform = aVideoAssetTrack.preferredTransform 

        } catch{
            print(error)
        }


       totalVideoCompositionInstruction.timeRange = CMTimeRangeMake(start: CMTime.zero,duration: aVideoAssetTrack.timeRange.duration)
    }
    }

    let mutableVideoComposition: AVMutableVideoComposition = AVMutableVideoComposition()
    mutableVideoComposition.frameDuration = CMTimeMake(value: 1, timescale: 30)
    mutableVideoComposition.renderSize = CGSize(width: 480, height: 640)

    if let documentsPath = NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true).first {
        let outputURL = URL(fileURLWithPath: documentsPath).appendingPathComponent("\("fileName").m4v")

        do {
            if FileManager.default.fileExists(atPath: outputURL.path) {

                try FileManager.default.removeItem(at: outputURL)
            }
        } catch { }

        if let exportSession = AVAssetExportSession(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality) {
            exportSession.outputURL = outputURL
            exportSession.outputFileType = AVFileType.mp4
            exportSession.shouldOptimizeForNetworkUse = true

            /// try to export the file and handle the status cases
            exportSession.exportAsynchronously(completionHandler: {
                switch exportSession.status {
                case .failed:
                    if let _error = exportSession.error {
                        failure(_error)
                    }

                case .cancelled:
                    if let _error = exportSession.error {
                        failure(_error)
                    }

                default:
                    print("finished")
                    success(outputURL)
                }
            })
        } else {
            failure(nil)
        }
    }
}

Version Swift3 with URL and new syntax.

func mergeFilesWithUrl(videoUrl:URL, audioUrl:URL)
{
    let mixComposition : AVMutableComposition = AVMutableComposition()
    var mutableCompositionVideoTrack : [AVMutableCompositionTrack] = []
    var mutableCompositionAudioTrack : [AVMutableCompositionTrack] = []
    let totalVideoCompositionInstruction : AVMutableVideoCompositionInstruction = AVMutableVideoCompositionInstruction()


    //start merge

    let aVideoAsset : AVAsset = AVAsset(url: videoUrl)
    let aAudioAsset : AVAsset = AVAsset(url: audioUrl)

    mutableCompositionVideoTrack.append(mixComposition.addMutableTrack(withMediaType: AVMediaTypeVideo, preferredTrackID: kCMPersistentTrackID_Invalid))
    mutableCompositionAudioTrack.append( mixComposition.addMutableTrack(withMediaType: AVMediaTypeAudio, preferredTrackID: kCMPersistentTrackID_Invalid))

    let aVideoAssetTrack : AVAssetTrack = aVideoAsset.tracks(withMediaType: AVMediaTypeVideo)[0]
    let aAudioAssetTrack : AVAssetTrack = aAudioAsset.tracks(withMediaType: AVMediaTypeAudio)[0]



    do{
        try mutableCompositionVideoTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), of: aVideoAssetTrack, at: kCMTimeZero)

        //In my case my audio file is longer then video file so i took videoAsset duration
        //instead of audioAsset duration

        try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), of: aAudioAssetTrack, at: kCMTimeZero)

        //Use this instead above line if your audiofile and video file's playing durations are same

        //            try mutableCompositionAudioTrack[0].insertTimeRange(CMTimeRangeMake(kCMTimeZero, aVideoAssetTrack.timeRange.duration), ofTrack: aAudioAssetTrack, atTime: kCMTimeZero)

    }catch{

    }

    totalVideoCompositionInstruction.timeRange = CMTimeRangeMake(kCMTimeZero,aVideoAssetTrack.timeRange.duration )

    let mutableVideoComposition : AVMutableVideoComposition = AVMutableVideoComposition()
    mutableVideoComposition.frameDuration = CMTimeMake(1, 30)

    mutableVideoComposition.renderSize = CGSize(width: 1280, height: 720)
    //        playerItem = AVPlayerItem(asset: mixComposition)
    //        player = AVPlayer(playerItem: playerItem!)
    //
    //
    //        AVPlayerVC.player = player

    //find your video on this URl
    let savePathUrl : URL = URL(fileURLWithPath: NSHomeDirectory() + "/Documents/newVideo.mp4")

    let assetExport: AVAssetExportSession = AVAssetExportSession(asset: mixComposition, presetName: AVAssetExportPresetHighestQuality)!
    assetExport.outputFileType = AVFileTypeMPEG4
    assetExport.outputURL = savePathUrl
    assetExport.shouldOptimizeForNetworkUse = true

    assetExport.exportAsynchronously { () -> Void in
        switch assetExport.status {

        case AVAssetExportSessionStatus.completed:

            //Uncomment this if u want to store your video in asset

            //let assetsLib = ALAssetsLibrary()
            //assetsLib.writeVideoAtPathToSavedPhotosAlbum(savePathUrl, completionBlock: nil)

            print("success")
        case  AVAssetExportSessionStatus.failed:
            print("failed \(assetExport.error)")
        case AVAssetExportSessionStatus.cancelled:
            print("cancelled \(assetExport.error)")
        default:
            print("complete")
        }
    }
}