iphone - iOS: Audio format for recording voice encoding for network transfer -
i looking audio format save voice recordings locally , transport on network. requirements are:
- decent quality. these clips, when received listened many times
- workflow should support trimming , fading before transport
- decent file size
this current approach recording:
// see ima4 vs m4a http://stackoverflow.com/questions/3509921/recorder-works-on-iphone-3gs-but-not-on-iphone-3g nsdictionary *recordsettings = [[nsdictionary alloc] initwithobjectsandkeys: [nsnumber numberwithfloat: 11025], avsampleratekey, [nsnumber numberwithint: kaudioformatlinearpcm], avformatidkey, [nsnumber numberwithint: 1], avnumberofchannelskey, [nsnumber numberwithbool:no], avlinearpcmisfloatkey, [nsnumber numberwithint: avaudioqualitymax], avencoderaudioqualitykey, nil]; nserror *error = nil; self.audiorecorder = [[ avaudiorecorder alloc] initwithurl:self.recordingfile settings:recordsettings error:&error]; and approach encoding:
nsstring *file = [nsstring stringwithformat:@"recordingconverted%x.caf", arc4random()]; self.filepath = [nstemporarydirectory() stringbyappendingpathcomponent: file]; nsfilemanager *filemanager = [nsfilemanager defaultmanager]; if ([filemanager fileexistsatpath:self.filepath]) { nserror *error; if ([filemanager removeitematpath:self.filepath error:&error] == no) { nslog(@"removeitematpath %@ error:%@", self.filepath, error); } } nslog(@"in: %@", self.recordingfile); nslog(@"out: %@", self.filepath); avasset *avasset = [avasset assetwithurl:self.recordingfile]; // first audio track nsarray *tracks = [avasset trackswithmediatype:avmediatypeaudio]; if ([tracks count] == 0) return nil; avassettrack *track = [tracks objectatindex:0]; // create export session // no need retain here, session retained // completion handler since referenced there avassetexportsession *exportsession = [avassetexportsession exportsessionwithasset:avasset presetname:avassetexportpresetapplem4a]; if (nil == exportsession) return nil; // create trim time range cmtime starttime = cmtimemake(self.speakingbegintime*44100, 44100); cmtime stoptime = cmtimemake((self.speakingbegintime+[self.duration doublevalue])*44100, 44100); cmtimerange exporttimerange = cmtimerangefromtimetotime(starttime, stoptime); // create fade in time range cmtime startfadeintime = starttime; cmtime endfadeintime = cmtimemake((self.speakingbegintime+recording_interval)*1.5*44100, 44100); cmtimerange fadeintimerange = cmtimerangefromtimetotime(startfadeintime, endfadeintime); // setup audio mix avmutableaudiomix *exportaudiomix = [avmutableaudiomix audiomix]; avmutableaudiomixinputparameters *exportaudiomixinputparameters = [avmutableaudiomixinputparameters audiomixinputparameterswithtrack:track]; [exportaudiomixinputparameters setvolumerampfromstartvolume:0.0 toendvolume:1.0 timerange:fadeintimerange]; exportaudiomix.inputparameters = [nsarray arraywithobject:exportaudiomixinputparameters]; // configure export session output our parameters exportsession.outputurl = [nsurl fileurlwithpath:self.filepath]; // output path exportsession.outputfiletype = avfiletypeapplem4a; // output file type exportsession.timerange = exporttimerange; // trim time range exportsession.audiomix = exportaudiomix; // fade in audio mix // make export synchronous dispatch_semaphore_t semaphore = dispatch_semaphore_create(0); [exportsession exportasynchronouslywithcompletionhandler:^{ dispatch_semaphore_signal(semaphore); }]; dispatch_semaphore_wait(semaphore, dispatch_time_forever); dispatch_release(semaphore); if (avassetexportsessionstatuscompleted == exportsession.status) { return self.filepath; //nslog(@"avassetexportsessionstatuscompleted"); } else if (avassetexportsessionstatusfailed == exportsession.status) { // failure may happen because of event out of control // example, interruption phone call comming in // make sure , handle case appropriately nslog(@"avassetexportsessionstatusfailed %@", exportsession.error.localizeddescription); } else { nslog(@"export session status: %d", exportsession.status); } currently, performance on 3 second audio clip is: 62,228 bytes pcm , 36,654 bytes encoded. seems better.
i have found guide here:
http://gamua.com/blog/2010/06/sound-on-ios-best-practices/
to helpful in picking sound formats (especially comments)
there examples here:
how record audio on iphone avaudiorecorder?
especially example various formats export, , answer:
https://stackoverflow.com/a/3870385/2214106
which reduced file size significantly.
Comments
Post a Comment