where to start with audio synthesis on iPhone - iphone

I'd like to build a synthesizer for the iPhone. I understand that it's possible to use custom audio units for the iPhone. At first glance, this sounds promising, since there's lots and lots of Audio Unit programming resources available. However, using custom audio units on the iPhone seems a bit tricky ( see: http://lists.apple.com/archives/Coreaudio-api/2008/Nov/msg00262.html)
This seems like the sort of thing that loads of people must be doing, but a simple google search for "iphone audio synthesis" doesn't turn up anything along the lines of a nice and easy tutorial or recommended tool kit.
So, anyone here have experience synthesizing sound on the iPhone? Are custom audio units the way to go, or is there another, simpler approach I should consider?

I'm also investigating this. I think the AudioQueue API is probably the way to go.
Here's as far as I got, seems to work okay.
File: BleepMachine.h
//
// BleepMachine.h
// WgHeroPrototype
//
// Created by Andy Buchanan on 05/01/2010.
// Copyright 2010 Andy Buchanan. All rights reserved.
//
#include <AudioToolbox/AudioToolbox.h>
// Class to implement sound playback using the AudioQueue API's
// Currently just supports playing two sine wave tones, one per
// stereo channel. The sound data is liitle-endian signed 16-bit # 44.1KHz
//
class BleepMachine
{
static void staticQueueCallback( void* userData, AudioQueueRef outAQ, AudioQueueBufferRef outBuffer )
{
BleepMachine* pThis = reinterpret_cast<BleepMachine*> ( userData );
pThis->queueCallback( outAQ, outBuffer );
}
void queueCallback( AudioQueueRef outAQ, AudioQueueBufferRef outBuffer );
AudioStreamBasicDescription m_outFormat;
AudioQueueRef m_outAQ;
enum
{
kBufferSizeInFrames = 512,
kNumBuffers = 4,
kSampleRate = 44100,
};
AudioQueueBufferRef m_buffers[kNumBuffers];
bool m_isInitialised;
struct Wave
{
Wave(): volume(1.f), phase(0.f), frequency(0.f), fStep(0.f) {}
float volume;
float phase;
float frequency;
float fStep;
};
enum
{
kLeftWave = 0,
kRightWave = 1,
kNumWaves,
};
Wave m_waves[kNumWaves];
public:
BleepMachine();
~BleepMachine();
bool Initialise();
void Shutdown();
bool Start();
bool Stop();
bool SetWave( int id, float frequency, float volume );
};
// Notes by name. Integer value is number of semitones above A.
enum Note
{
A = 0,
Asharp,
B,
C,
Csharp,
D,
Dsharp,
E,
F,
Fsharp,
G,
Gsharp,
Bflat = Asharp,
Dflat = Csharp,
Eflat = Dsharp,
Gflat = Fsharp,
Aflat = Gsharp,
};
// Helper function calculates fundamental frequency for a given note
float CalculateFrequencyFromNote( SInt32 semiTones, SInt32 octave=4 );
float CalculateFrequencyFromMIDINote( SInt32 midiNoteNumber );
File:BleepMachine.mm
//
// BleepMachine.mm
// WgHeroPrototype
//
// Created by Andy Buchanan on 05/01/2010.
// Copyright 2010 Andy Buchanan. All rights reserved.
//
#include "BleepMachine.h"
void BleepMachine::queueCallback( AudioQueueRef outAQ, AudioQueueBufferRef outBuffer )
{
// Render the wave
// AudioQueueBufferRef is considered "opaque", but it's a reference to
// an AudioQueueBuffer which is not.
// All the samples manipulate this, so I'm not quite sure what they mean by opaque
// saying....
SInt16* coreAudioBuffer = (SInt16*)outBuffer->mAudioData;
// Specify how many bytes we're providing
outBuffer->mAudioDataByteSize = kBufferSizeInFrames * m_outFormat.mBytesPerFrame;
// Generate the sine waves to Signed 16-Bit Stero interleaved ( Little Endian )
float volumeL = m_waves[kLeftWave].volume;
float volumeR = m_waves[kRightWave].volume;
float phaseL = m_waves[kLeftWave].phase;
float phaseR = m_waves[kRightWave].phase;
float fStepL = m_waves[kLeftWave].fStep;
float fStepR = m_waves[kRightWave].fStep;
for( int s=0; s<kBufferSizeInFrames*2; s+=2 )
{
float sampleL = ( volumeL * sinf( phaseL ) );
float sampleR = ( volumeR * sinf( phaseR ) );
short sampleIL = (int)(sampleL * 32767.0);
short sampleIR = (int)(sampleR * 32767.0);
coreAudioBuffer[s] = sampleIL;
coreAudioBuffer[s+1] = sampleIR;
phaseL += fStepL;
phaseR += fStepR;
}
m_waves[kLeftWave].phase = fmodf( phaseL, 2 * M_PI ); // Take modulus to preserve precision
m_waves[kRightWave].phase = fmodf( phaseR, 2 * M_PI );
// Enqueue the buffer
AudioQueueEnqueueBuffer( m_outAQ, outBuffer, 0, NULL );
}
bool BleepMachine::SetWave( int id, float frequency, float volume )
{
if ( ( id < kLeftWave ) || ( id >= kNumWaves ) ) return false;
Wave& wave = m_waves[ id ];
wave.volume = volume;
wave.frequency = frequency;
wave.fStep = 2 * M_PI * frequency / kSampleRate;
return true;
}
bool BleepMachine::Initialise()
{
m_outFormat.mSampleRate = kSampleRate;
m_outFormat.mFormatID = kAudioFormatLinearPCM;
m_outFormat.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
m_outFormat.mFramesPerPacket = 1;
m_outFormat.mChannelsPerFrame = 2;
m_outFormat.mBytesPerPacket = m_outFormat.mBytesPerFrame = sizeof(UInt16) * 2;
m_outFormat.mBitsPerChannel = 16;
m_outFormat.mReserved = 0;
OSStatus result = AudioQueueNewOutput(
&m_outFormat,
BleepMachine::staticQueueCallback,
this,
NULL,
NULL,
0,
&m_outAQ
);
if ( result < 0 )
{
printf( "ERROR: %d\n", (int)result );
return false;
}
// Allocate buffers for the audio
UInt32 bufferSizeBytes = kBufferSizeInFrames * m_outFormat.mBytesPerFrame;
for ( int buf=0; buf<kNumBuffers; buf++ )
{
OSStatus result = AudioQueueAllocateBuffer( m_outAQ, bufferSizeBytes, &m_buffers[ buf ] );
if ( result )
{
printf( "ERROR: %d\n", (int)result );
return false;
}
// Prime the buffers
queueCallback( m_outAQ, m_buffers[ buf ] );
}
m_isInitialised = true;
return true;
}
void BleepMachine::Shutdown()
{
Stop();
if ( m_outAQ )
{
// AudioQueueDispose also chucks any audio buffers it has
AudioQueueDispose( m_outAQ, true );
}
m_isInitialised = false;
}
BleepMachine::BleepMachine()
: m_isInitialised(false), m_outAQ(0)
{
for ( int buf=0; buf<kNumBuffers; buf++ )
{
m_buffers[ buf ] = NULL;
}
}
BleepMachine::~BleepMachine()
{
Shutdown();
}
bool BleepMachine::Start()
{
OSStatus result = AudioQueueSetParameter( m_outAQ, kAudioQueueParam_Volume, 1.0 );
if ( result ) printf( "ERROR: %d\n", (int)result );
// Start the queue
result = AudioQueueStart( m_outAQ, NULL );
if ( result ) printf( "ERROR: %d\n", (int)result );
return true;
}
bool BleepMachine::Stop()
{
OSStatus result = AudioQueueStop( m_outAQ, true );
if ( result ) printf( "ERROR: %d\n", (int)result );
return true;
}
// A (A4=440)
// A# f(n)=2^(n/12) * r
// B where n = number of semitones
// C and r is the root frequency e.g. 440
// C#
// D frq -> MIDI note number
// D# p = 69 + 12 x log2(f/440)
// E
// F
// F#
// G
// G#
//
// MIDI Note ref: http://www.phys.unsw.edu.au/jw/notes.html
//
// MIDI Node numbers:
// A3 57
// A#3 58
// B3 59
// C4 60 <--
// C#4 61
// D4 62
// D#4 63
// E4 64
// F4 65
// F#4 66
// G4 67
// G#4 68
// A4 69 <--
// A#4 70
// B4 71
// C5 72
float CalculateFrequencyFromNote( SInt32 semiTones, SInt32 octave )
{
semiTones += ( 12 * (octave-4) );
float root = 440.f;
float fn = powf( 2.f, (float)semiTones/12.f ) * root;
return fn;
}
float CalculateFrequencyFromMIDINote( SInt32 midiNoteNumber )
{
SInt32 semiTones = midiNoteNumber - 69;
return CalculateFrequencyFromNote( semiTones, 4 );
}
//for ( SInt32 midiNote=21; midiNote<=108; ++midiNote )
//{
// printf( "MIDI Note %d: %f Hz \n",(int)midiNote,CalculateFrequencyFromMIDINote( midiNote ) );
//}
Update: Basic usage info
Initialise. Somehere near the start, I'm using initFromNib: in my code
m_bleepMachine = new BleepMachine;
m_bleepMachine->Initialise();
m_bleepMachine->Start();
Now the sound playback is running, but generating silence.
In your code, call this when you want to change the tone generation
m_bleepMachine->SetWave( ch, frq, vol );
where ch is the channel ( 0 or 1 )
where frq is the frequency to set in Hz
where vol is the volume ( 0=-Inf db, 1=-0db )
At program termination
delete m_bleepMachine;

Since my original post almost a year ago, I've come a long way. After a pretty exhaustive search, I came up with very few high-level synthesis tools suitable for iOS development. There are many which are GPL licensed, but the GPL license is too restrictive for me to feel comfortable using it. LibPD works great, and is what rjdj uses, but I found myself really frustrated by the graphical programming paradigm. JSyn's c-based engine, csyn, is an option, but it requires licensing, and I'm really used to programming with open-source tools. It does look worth a close look though.
In the end, I'm using STK as my basic framework. STK is a very low-level tool, and requires extensive buffer-level programming to get working. This is in contrast to something higher level like PD or SuperCollider, which allows you to simply plug unit generators together and not worry about handling the raw audio data.
Working this way with STK is certainly a bit slower than with a high level tool, but I'm becoming comfortable with it. Especially now that I'm becoming more comfortable with C/C++ programming in general.
There's a new project under way to create a patching-style add on to Open Frameworks. It's called Cleo I think, out of the University of Vancouver. It hasn't been released yet, but it looks like a very nice mix of patching-style connection of unit generators in C++ rather than requiring the use of another language. And it's tightly integrated with Open Frameworks, which may be appealing or not, depending.
So, to answer my original question, first you need to learn how to write to the output buffer. Here's some good sample code for that:
http://atastypixel.com/blog/using-remoteio-audio-unit/
Then you need to do some synthesis to generate the audio data. If you like patching, I wouldn't hesitate to recommend libpd. It seems to work great, and you can work the way you're accustomed to. If you hate graphical patching (like me), your best starting place for now is probably STK. If STK and low-level audio programming seems a bit over your head (like it was for me), just roll up your sleeves, pack a tent, and set up on a bit of a long hike up the learning curve. You'll be a much better programmer for it in the end.
Another bit of advice I wish I could have given myself a year ago: join Apple's Core Audio mailing list.
============== 2014 Edit ===========
I'm now using (and actively contributing to) the Tonic audio synthesis library. It's awesome, if I don't say so myself.

With the enormous caveat that I have yet to get past all the documentation or finishing browsing some classes / sample code, it looks like the fine folks from CCRMA over at Stanford may have put some nice toolkits together for our audio hacking pleasure. No guarantees these will do exactly what you want, but based on what I know about the original STK, they should do the trick. I'm about to embark on an audio synth app myself and the more code I can reuse, the better.
Links / descriptions from their site...
MoMu : MoMu is a light-weight software toolkit for creating musical instruments and experiences on mobile device, and currently supports the iPhone platform (iPhone, iPad, iPod Touches). MoMu provides API's for real-time full-duplex audio, accelerometer, location, multi-touch, networking (via OpenSoundControl), graphics, and utilities. (yada yada)
• and •
MoMu STK : The MoMu release of the Synthesis Toolkit (STK, originally by Perry R. Cook and Gary P. Scavone) is a lightly modified version of STK 4.4.2, and currently supports the iPhone platform (iPhone, iPad, iPod Touches).

I'm just getting into Audio Unit programming for iPhone to build a synth-like app as well. The Apple guide "Audio Unit Hosting Guide for iOS" seems like a good reference:
http://developer.apple.com/library/ios/#documentation/MusicAudio/Conceptual/AudioUnitHostingGuide_iOS/AudioUnitHostingFundamentals/AudioUnitHostingFundamentals.html#//apple_ref/doc/uid/TP40009492-CH3-SW11
The guide includes links to a couple sample projects. Audio Mixer (MixerHost) and aurioTouch:
http://developer.apple.com/library/ios/samplecode/MixerHost/Introduction/Intro.html#//apple_ref/doc/uid/DTS40010210
http://developer.apple.com/library/ios/samplecode/aurioTouch/Introduction/Intro.html#//apple_ref/doc/uid/DTS40007770

I'm one of the other contributors to Tonic along with morgancodes. For wrangling CoreAudio in a higher-level framework, I can't give enough praise to The Amazing Audio Engine.
We've both used it in tandem with Tonic in a number of projects. It takes so much of the pain out of dealing with CoreAudio directly, letting you focus on the actual content and synthesis instead of the hardware abstraction layer.

Lately I've been using AudioKit
It's a fresh and well designed wrapper over CSound which has been around for ages
I was using tonic with openframeworks and I was finding myself missing programming in swift.
Although tonic and openframeworks are both powerful tools,
I've chosen to get in bed with swift

PD has a version that runs on the iphone, used by RjDj. If you are OK with using someone else's app rather than writing your own, you can do quite a bit in an RjDj scene, and there is a set of objects that let you patch it out and test it on a regular PD on your own computer.
I should mention: PD is a visual dataflow programming language, that is to say, it is turing complete, and can be used to develop graphical applications - but if you are going to do anything interesting I would definitely look into best practices for patching.

Last time I checked you couldn't use custom AUs on iOS in a way that would allow all installed apps to use it (like on MacOS X).
You could theoretically use a custom AU from inside your iOS app by loading it from the app's bundle and calling the AU's render function directly, but then you could as well add the code directly to your app. Also, I'm pretty sure that loading and calling code that sits in a dynamic library would go against the AppStore policies.
So you will either have to do the processing in your remote IO callback or use the Apple AUs that are preinstalled, within an AUGraph.

Related

Packing Speex with Ogg on iOS

I'm using libogg and libogg, I've succeeded to add those libraries to my iPhone xCode project and encode my voice with Speex. The problem is that I cannot figure out how to pack those audio packet with ogg. Does someone know how a packet of that kind should look like or have a reference code I can use.
I know in Java it's pretty simple (you have a dedicated function for that) but not on iOS. Please help.
UPD 10.09.2013: Please, see the demo project, which basically takes pcm audiodata from wave container, encodes it with speex codec and pack everything into ogg container. Maybe later I'll create a full-fledged library/framework for all that speex routines on IOS.
UPD 16.02.2015: The demo project is republished on GitHub.
I also have been experimenting with Speex on iOS recently, with varied success, but here is something I found. Basically, if you want to pack some speex-encoded voice into an ogg file, you need to do follow three steps (assuming libogg and libspeex are already compiled and added to the project).
1) Add the first ogg page with Speex header; libspeex provides built-in tools for that (the code below is from my project, not optimal, just for the sake of example):
// create speex header
SpeexHeader spxHeader;
SpeexMode spxMode = speex_wb_mode;
int spxRate = 16000;
int spxNumberOfChannels = 1;
speex_init_header(&spxHeader, spxRate, spxNumberOfChannels, &spxMode);
// set audio and ogg packing parameters
spxHeader.vbr = 0;
spxHeader.bitrate = 16;
spxHeader.frame_size = 320;
spxHeader.frames_per_packet = 1;
// wrap speex header in ogg packet
int oggPacketSize;
_oggPacket.packet = (unsigned char *)speex_header_to_packet(&spxHeader, &oggPacketSize);
_oggPacket.bytes = oggPacketSize;
_oggPacket.b_o_s = 1;
_oggPacket.e_o_s = 0;
_oggPacket.granulepos = 0;
_oggPacket.packetno = 0;
// submit the packet to the ogg streaming layer
ogg_stream_packetin(&_oggStreamState, &_oggPacket);
free(_oggPacket.packet);
// form an ogg page
ogg_stream_flush(&_oggStreamState, &_oggPage);
// write the page to file
[_oggFile appendBytes:&_oggStreamState.header length:_oggStreamState.header_fill];
[_oggFile appendBytes:_oggStreamState.body_data length:_oggStreamState.body_fill];
2) Add the second ogg page with Vorbis comment:
// form any comment you like (I use custom struct with all fields)
vorbisCommentStruct *vorbisComment = calloc(sizeof(vorbisCommentStruct), sizeof(char));
...
// wrap Vorbis comment in ogg packet
_oggPacket.packet = (unsigned char *)vorbisComment;
_oggPacket.bytes = vorbisCommentLength;
_oggPacket.b_o_s = 0;
_oggPacket.e_o_s = 0;
_oggPacket.granulepos = 0;
_oggPacket.packetno = _oggStreamState.packetno;
// the rest should be same as in previous step
...
3) Add subsequent ogg pages with your speex-encoded audio in the similar manner.
First of all decide how many frames with audio data you want to have on every ogg page (0-255; I choose 79 quite arbitrarily):
_framesPerOggPage = 79;
Then for each frame:
// calculate current granule position of audio data within ogg file
int curGranulePos = _spxSamplesPerFrame * _oggTotalFramesCount;
// wrap audio data in ogg packet
oggPacket.packet = (unsigned char *)spxFrame;
oggPacket.bytes = spxFrameLength;
oggPacket.granulepos = curGranulePos;
oggPacket.packetno = _oggStreamState.packetno;
oggPacket.b_o_s = 0;
oggPacket.e_o_s = 0;
// submit packets to streaming layer until their number reaches _framesPerOggPage
...
// if we've reached this limit, we're ready to create another ogg page
ogg_stream_flush(&_oggStreamState, &_oggPage);
[_oggFile appendBytes:&_oggStreamState.header length:_oggStreamState.header_fill];
[_oggFile appendBytes:_oggStreamState.body_data length:_oggStreamState.body_fill];
// finally, if this is the last frame, flush all remaining packets,
// which have been created but not packed into a page, to the last page
// (don't forget to set oggPacket.e_o_s to 1 for this frame)
That's it. Hope it will help. Any corrections or questions are welcome.

OpenSL ES can not play audio on Android emulator

I decode amrnb to PCM, then put right pcm buffer to Enqueue buffer (I'm sure PCM data is right), but no sound is heard. And when feeding buffer, log outputs:
/AudioTrack(14857): obtainBuffer timed out (is the CPU pegged?)
My code is below, and my questions are:
Is there something wrong when I use the OpenSL ES?
Is it true that OpenSL ES only works on the real device?
Sample code:
void AudioTest()
{
StartAudioPlay();
while(1)
{
//decode AMR to PCM
/* Convert to little endian and write to wav */
//write buffer to buffer queue
AudioBufferWrite(littleendian, 320);
}
}
void bqPlayerCallback(SLAndroidSimpleBufferQueueItf bq, void *context)
{
//do nothing
}
void AudioBufferWrite(const void* buffer, int size)
{
(*gBQBufferQueue)->Enqueue(gBQBufferQueue, buffer, size );
}
// create buffer queue audio player
void SlesCreateBQPlayer(/*AudioCallBackSL funCallback, void *soundMix,*/ int rate, int nChannel, int bitsPerSample )
{
SLresult result;
// configure audio source
SLDataLocator_AndroidSimpleBufferQueue loc_bufq = {SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 2};
SLDataFormat_PCM format_pcm = {SL_DATAFORMAT_PCM, 1, SL_SAMPLINGRATE_8,
SL_PCMSAMPLEFORMAT_FIXED_16, SL_PCMSAMPLEFORMAT_FIXED_16,
SL_SPEAKER_FRONT_CENTER, SL_BYTEORDER_LITTLEENDIAN};
SLDataSource audioSrc = {&loc_bufq, &format_pcm};
// configure audio sink
SLDataLocator_OutputMix loc_outmix = {SL_DATALOCATOR_OUTPUTMIX, gOutputMixObject};
SLDataSink audioSnk = {&loc_outmix, NULL};
// create audio player
const SLInterfaceID ids[3] = {SL_IID_BUFFERQUEUE, SL_IID_EFFECTSEND, SL_IID_VOLUME};
const SLboolean req[3] = {SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE, SL_BOOLEAN_TRUE};
result = (*gEngineEngine)->CreateAudioPlayer(gEngineEngine, &gBQObject, &audioSrc, &audioSnk,
3, ids, req);
// realize the player
result = (*gBQObject)->Realize(gBQObject, SL_BOOLEAN_FALSE);
// get the play interface
result = (*gBQObject)->GetInterface(gBQObject, SL_IID_PLAY, &gBQPlay);
// get the buffer queue interface
result = (*gBQObject)->GetInterface(gBQObject, SL_IID_BUFFERQUEUE,
&gBQBufferQueue);
// register callback on the buffer queue
result = (*gBQBufferQueue)->RegisterCallback(gBQBufferQueue, bqPlayerCallback, NULL/*soundMix*/);
// get the effect send interface
result = (*gBQObject)->GetInterface(gBQObject, SL_IID_EFFECTSEND,
&gBQEffectSend);
// set the player's state to playing
result = (*gBQPlay)->SetPlayState(gBQPlay, SL_PLAYSTATE_PLAYING );
}
I'm not entirely sure, but I think you're correct in that the emulator's OpenSL ES support doesn't actually work. I've never gotten it to work in practice, while it works on any device I've tried.
In my application I have to support Android 2.2 as well, so I have a fallback to use JNI to access the Java AudioTrack APIs. I added a special case to my app to always use the AudioTrack interface when the emulator is detected.

Photo manipulation/filter library techniques for iPhone

Just wondering how difficult it would be to convert the following library to Objective-C to be used on the iPhone?
I guess I'm after some similar image processing libraries that would lead me in the right direction? I'm aware that it's not easy to apply the same filters as existing applications like Instragram, Path and Hipstamatic.
However, I'd like to be able to do something similar.
Here is the JavaScript library:
https://github.com/alexmic/filtrr/blob/master/filtrr.js
A demo of its functionality can be found here:
http://alexmic.net/demos/filtrr
I've started a bit of converting, here is a sample. Now of course, fully converting it would a lot of time, too much for me to do it. But just see how I've done it. I'm hoping you have prior experience with Obj-C?
Also, perhaps you could look at some existing libraries.
http://code.google.com/p/simple-iphone-image-processing/
http://mattgemmell.com/2010/07/05/mgimageutilities/
http://developer.apple.com/library/ios/#samplecode/GLImageProcessing/Introduction/Intro.html
Also, dont forget that XCode can compile C++ into your project so also investigate C or C++ libraries.
NSObject canvas;
int w;
int h;
int ctx;
NSData imageData;
#implementation filtr
{
-(id) initWithCanvas:(id)_canvas
{
if (!_canvas) {
throw "Canvas supplied to filtr was null or undefined.";
}
canvas = _canvas;
w = canvas.width;
h = canvas.height;
ctx = canvas.getContext("2d");
imageData = ctx.getImageData(0, 0, w, h);
}
/**
* Clamps the intensity level between 0 - 255.
*
* #param i The intensity level.
*/
-(int)safe:(int)i
{
return MIN(255, MAX(0, i));
}

How do I program a stereo-capable graphics card to display stereo images?

I'd like to write my own stereo image viewer, because there are certain features I need which are missing from the one bundled with my NVidia/EVGA GTX 580.
I can't figure out how to program the card to enter "shutterglass" mode where every other frame (at 120 HZ) alternates left and right.
I've looked at the OpenGL, Direct3D, and XNA APIs, as well as information from NVIDIA, and can't figure out how to get started. How do I set separate left and right images, how do I tell the screen to display it, and how to I tell the driver to activate the shutterglass transmitter?
(Another disconcerting thing is that whenever I use the bundled software to view stereo images and video in shutterglass mode, it's in fullscreen, and the screen blinks when entering that mode--even though I run the screen at 120Hz in 2D. Is there a way to have a 3D surface in a window without upsetting the rest of the screen on the NVidia "gamer" cards that are 3D capable (570, 580)?
I'm a bit late to this, but I just got the stereoscopic 3D to work using nothing but a GTX 580 and OpenGL. No need for a quadro card or DirectX.
I have the nVidia 3D Vision driver and IR emitter and simply set the emitter to "Always on" in the nVidia control panel.
In my game engine, I switched to a full screen mode with 120Hz and render the scene twice with a slight frustum offset (as per nVidia's own documentation PDF on the manual implementation "2010_GTC2010.pdf").
No quad buffers or any other tricks needed, it works great. Plus, I am in control of all the settings, like convergence etc.
For the NVidia 3Dvision with the GEForce range you need to write a full screen directX surface twice the width of the display with the left image on the left,right on the right (duh).
Then you need to write a magic value into the bottom left of the image which the NVision driver picks up and turns on the glasses, you don't need the nvapi.dll
With the Nvidia pro glasses and a Quadra card you can use the regular OpenGL stereo API.
ps.I did find some sample code that manages to do this with a normal window.
Edit - it was a low level USB code talking to the xmitter that I could never get to build, I think it eventually became this http://sourceforge.net/projects/libnvstusb/
Here is some sample code for full screen with the NVision glasses.
I'm not a DirectX expert so some of this might be less than optimal.
My app is also based on Qt, there might be some Qt bits left in the code
-----------------------------------------------------------------
// header
void create3D();
void set3D();
IDirect3D9 *_d3d;
IDirect3DDevice9 *_d3ddev;
QSize _size; // full screen size
IDirect3DSurface9 *_imageBuf; //Source stereo image
IDirect3DSurface9 *_backBuf;
--------------------------------------------------------
// the code
#include <windows.h>
#include <windowsx.h>
#include <d3d9.h>
#include <d3dx9.h>
#include <strsafe.h>
#pragma comment (lib, "d3d9.lib")
#define NVSTEREO_IMAGE_SIGNATURE 0x4433564e //NV3D
typedef struct _Nv_Stereo_Image_Header
{
unsigned int dwSignature;
unsigned int dwWidth;
unsigned int dwHeight;
unsigned int dwBPP;
unsigned int dwFlags;
} NVSTEREOIMAGEHEADER, *LPNVSTEREOIMAGEHEADER;
// ORedflags in the dwFlagsfielsof the _Nv_Stereo_Image_Headerstructure above
#define SIH_SWAP_EYES 0x00000001
#define SIH_SCALE_TO_FIT 0x00000002
// call at start to set things up
void DisplayWidget::create3D()
{
_size = QSize(1680,1050); //resolution of my Samsung 2233z
_d3d = Direct3DCreate9(D3D_SDK_VERSION); // create the Direct3D interface
D3DPRESENT_PARAMETERS d3dpp; // create a struct to hold various device information
ZeroMemory(&d3dpp, sizeof(d3dpp)); // clear out the struct for use
d3dpp.Windowed = FALSE; // program fullscreen
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD; // discard old frames
d3dpp.hDeviceWindow = winId(); // set the window to be used by Direct3D
d3dpp.BackBufferFormat = D3DFMT_A8R8G8B8; // set the back buffer format to 32 bit // or D3DFMT_R8G8B8
d3dpp.BackBufferWidth = _size.width();
d3dpp.BackBufferHeight = _size.height();
d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_ONE;
d3dpp.BackBufferCount = 1;
// create a device class using this information and information from the d3dpp stuct
_d3d->CreateDevice(D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
winId(),
D3DCREATE_SOFTWARE_VERTEXPROCESSING,
&d3dpp,
&_d3ddev);
//3D VISION uses a single surface 2x images wide and image high
// create the surface
_d3ddev->CreateOffscreenPlainSurface(_size.width()*2, _size.height(), D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &_imageBuf, NULL);
set3D();
}
// call to put 3d signature in image
void DisplayWidget::set3D()
{
// Lock the stereo image
D3DLOCKED_RECT lock;
_imageBuf->LockRect(&lock,NULL,0);
// write stereo signature in the last raw of the stereo image
LPNVSTEREOIMAGEHEADER pSIH = (LPNVSTEREOIMAGEHEADER)(((unsigned char *) lock.pBits) + (lock.Pitch * (_size.height()-1)));
// Update the signature header values
pSIH->dwSignature = NVSTEREO_IMAGE_SIGNATURE;
pSIH->dwBPP = 32;
//pSIH->dwFlags = SIH_SWAP_EYES; // Src image has left on left and right on right, thats why this flag is not needed.
pSIH->dwFlags = SIH_SCALE_TO_FIT;
pSIH->dwWidth = _size.width() *2;
pSIH->dwHeight = _size.height();
// Unlock surface
_imageBuf->UnlockRect();
}
// call in display loop
void DisplayWidget::paintEvent()
{
// clear the window to a deep blue
//_d3ddev->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 40, 100), 1.0f, 0);
_d3ddev->BeginScene(); // begins the 3D scene
// do 3D rendering on the back buffer here
RECT destRect;
destRect.left = 0;
destRect.top = 0;
destRect.bottom = _size.height();
destRect.right = _size.width();
// Get the Backbuffer then Stretch the Surface on it.
_d3ddev->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &_backBuf);
_d3ddev->StretchRect(_imageBuf, NULL, _backBuf, &destRect, D3DTEXF_NONE);
_backBuf->Release();
_d3ddev->EndScene(); // ends the 3D scene
_d3ddev->Present(NULL, NULL, NULL, NULL); // displays the created frame
}
// my images come from a camera
// _left and _right are QImages but it should be obvious what the functions do
void DisplayWidget::getImages()
{
RECT srcRect;
srcRect.left = 0;
srcRect.top = 0;
srcRect.bottom = _size.height();
srcRect.right = _size.width();
RECT destRect;
destRect.top = 0;
destRect.bottom = _size.height();
if ( isOdd() ) {
destRect.left = _size.width();
destRect.right = _size.width()*2;
// get camera data for _left here, code not shown
D3DXLoadSurfaceFromMemory(_imageBuf, NULL, &destRect,_right.bits(),D3DFMT_A8R8G8B8,_right.bytesPerLine(),NULL,&srcRect,D3DX_DEFAULT,0);
} else {
destRect.left = 0;
destRect.right = _size.width();
// get camera data for _right here, code not shown
D3DXLoadSurfaceFromMemory(_imageBuf, NULL, &destRect,_left.bits(),D3DFMT_A8R8G8B8,_left.bytesPerLine(),NULL,&srcRect,D3DX_DEFAULT,0);
}
set3D(); // add NVidia signature
}
DisplayWidget::~DisplayWidget()
{
_d3ddev->Release(); // close and release the 3D device
_d3d->Release(); // close and release Direct3D
}

AudioQueue screws up output after modification

I am currently working on an audio DSP App development. The project requires direct access and modification of audio data. Right now I can successfully access and modify the raw audio data using AudioQueue but encounters error during playback. The output audio after any modification turns out be noise.
In short, the code is something like this:
(Modified from Speakhere sample code. The rest remains unchanged.)
void AQPlayer::AQBufferCallback(void * inUserData,
AudioQueueRef inAQ,
AudioQueueBufferRef inCompleteAQBuffer)
{
AQPlayer *THIS = (AQPlayer *)inUserData;
if (THIS->mIsDone) return;
UInt32 numBytes;
UInt32 nPackets = THIS->GetNumPacketsToRead();
OSStatus result = AudioFileReadPackets(THIS->GetAudioFileID(),
false,
&numBytes,
inCompleteAQBuffer->mPacketDescriptions,
THIS->GetCurrentPacket(),
&nPackets,
inCompleteAQBuffer->mAudioData);
if (result)
printf("AudioFileReadPackets failed: %d", (int)result);
if (nPackets > 0) {
inCompleteAQBuffer->mAudioDataByteSize = numBytes;
inCompleteAQBuffer->mPacketDescriptionCount = nPackets;
//My modification starts from here
//Modifying audio data
SInt16 *testBuffer = (SInt16*)inCompleteAQBuffer->mAudioData;
for (int i = 0; i < (inCompleteAQBuffer->mAudioDataByteSize)/sizeof(SInt16); i++)
{
//printf("before modification %d", (int)*testBuffer);
*testBuffer = (SInt16) *testBuffer/2; //Say some simple modification
//printf("after modification %d", (int)*testBuffer);
testBuffer++;
}
AudioQueueEnqueueBuffer(inAQ, inCompleteAQBuffer, 0, NULL);
}
During debugging, the data in buffer is displayed as expected, but the actual output is nothing but noise.
Here are some other strange behaviors of the code that makes both the whole team crazy:
If there is no change to the data (add/sub by 0, multiply by 1) or the whole buffer is assigned to a constant (say 0, then the audio will be muted), the playback behaves normally (Of course!) But if I perform anything more than it, it still turns out to be noise.
In the case I hardcode a single tone as test audio, the output noise spreads into another channel also.
So where is the bug in this code? Or if I am on the wrong track, what is the correct approach to modify the audio data and perform playback CORRECTLY? Any insight will be sincerely appreciated.
Thank you very much :-)
Cheers,
Manca
are you SURE the sample format is SInt16? And how many channels are there? You seem to treat the audio as a single channel short stream, but suppose the format is actually dual channel Float32 or so, and you do the modifications there, than the effect would be exactly as you describe, including the noise on other channels.