Memory Leak in a Simple Metal Program - objective-c++

I am trying to learn Metal for scientific programming. I tried creating a simple kernel that did morphological dilation. The issue that I am facing is that the memory seems to be increasing by a couple of KBs every time I call dilate on an image.
I verified the memory leak by running the dilate method in a for loop for 10000 iterations, and watched the allocated memory in Xcode's debug navigator grow from 16MB to 17 MBs.
Is there anything that you see in my code that would contribute to the memory leak? I have also pushed the project to Github in case that helps.
class MorphologyIOS : public Morphology
{
public:
MorphologyIOS(
const uint kernel,
const uint width,
const uint height
) {
device_ = MTLCreateSystemDefaultDevice();
kernelSize_ = kernel;
buffer_ = [device_ newBufferWithBytes:&kernelSize_ length:4 options:MTLStorageModeShared];
library_ = [device_ newDefaultLibrary];
commandQueue_ = [device_ newCommandQueue];
identityFunction_ = [library_ newFunctionWithName:#"identity"];
MTLTextureDescriptor* readDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
width:width height:height mipmapped:false];
MTLTextureDescriptor* writeDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormat::MTLPixelFormatR8Uint
width:width height:height mipmapped:false];
[writeDesc setUsage:MTLTextureUsageShaderWrite];
inTexture_ = [device_ newTextureWithDescriptor:readDesc];
outTexture_ = [device_ newTextureWithDescriptor:writeDesc];
entireImage_ = MTLRegionMake2D(0, 0, width, height);
pipelineState_ = [device_ newComputePipelineStateWithFunction:identityFunction_ error:NULL];
}
virtual ~MorphologyIOS() override {}
virtual std::shared_ptr<unsigned char> dilate(
const std::shared_ptr<unsigned char>& inImage
) override {
void* result = malloc(outTexture_.width * outTexture_.height);
std::shared_ptr<unsigned char> outImage;
#autoreleasepool
{
commandBuffer_ = [commandQueue_ commandBuffer];
commandEncoder_ = [commandBuffer_ computeCommandEncoder];
[commandEncoder_ setComputePipelineState:pipelineState_];
[inTexture_ replaceRegion:entireImage_ mipmapLevel:0 withBytes:inImage.get() bytesPerRow:outTexture_.width];
[commandEncoder_ setTexture:inTexture_ atIndex:0];
[commandEncoder_ setTexture:outTexture_ atIndex:1];
[commandEncoder_ setBuffer:buffer_ offset:0 atIndex:0];
MTLSize threadGroupCount = MTLSizeMake(10, 10, 1);
MTLSize threadGroups = MTLSizeMake(inTexture_.width / threadGroupCount.width,
inTexture_.height / threadGroupCount.height, 1);
[commandEncoder_ dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupCount];
[commandEncoder_ endEncoding];
[commandBuffer_ commit];
[commandBuffer_ waitUntilCompleted];
[outTexture_ getBytes:result bytesPerRow:outTexture_.width fromRegion:entireImage_ mipmapLevel:0];
outImage.reset(reinterpret_cast<unsigned char*>(result));
}
return outImage;
}
private:
id<MTLDevice> device_;
uint kernelSize_;
id<MTLBuffer> buffer_;
id<MTLLibrary> library_;
id<MTLComputePipelineState> pipelineState_;
id<MTLCommandQueue> commandQueue_;
id<MTLFunction> identityFunction_;
id<MTLCommandBuffer> commandBuffer_;
id<MTLComputeCommandEncoder> commandEncoder_;
id<MTLTexture> inTexture_;
id<MTLTexture> outTexture_;
MTLRegion entireImage_;
};
And my kernel looks like this:
kernel void dilation(
texture2d<uint, access::read> inTexture [[texture(0)]],
texture2d<uint, access::write> outTexture [[texture(1)]],
device uint *kernelSize [[buffer(0)]],
uint2 gid [[thread_position_in_grid]]
) {
uint halfKernel = kernelSize[0] / 2;
uint minX = gid.x >= halfKernel ? gid.x - halfKernel : 0;
uint minY = gid.y >= halfKernel ? gid.y - halfKernel : 0;
uint maxX = gid.x + halfKernel < inTexture.get_width() ? gid.x + halfKernel : inTexture.get_width();
uint maxY = gid.y + halfKernel < inTexture.get_height() ? gid.y + halfKernel : inTexture.get_height();
uint maxValue = 0;
for (uint i = minX; i <= maxX; i++)
{
for (uint j = minY; j <= maxY; j++)
{
uint4 value = inTexture.read(uint2(i, j));
if (maxValue < value[0])
maxValue = value[0];
}
}
outTexture.write(maxValue, gid);
}

This isn't so much a bug as it is an artifact of the capture/validation layer doing some bookkeeping on your behalf. Since it won't occur in real-world usage, it's probably not something to worry about.

Related

babel-loader can't deal with multiline string?

Running babel-loader from create-react-app, seeing the following error from the #react-three/drei module:
Failed to compile.
./node_modules/#react-three/drei/core/softShadows.js 11:40
Module parse failed: Unexpected token (11:40)
File was processed with these loaders:
* ./node_modules/babel-loader/lib/index.js
You may need an additional loader to handle the result of these loaders.
| rings = 11
| } = {}) => `#define LIGHT_WORLD_SIZE ${size}
> #define LIGHT_FRUSTUM_WIDTH ${frustrum ?? frustum}
| #define LIGHT_SIZE_UV (LIGHT_WORLD_SIZE / LIGHT_FRUSTUM_WIDTH)
| #define NEAR_PLANE ${near}
That just looks like a multiline string to me, so I'm not sure why the loader would be choking on it. Here's a more complete look at the module that's failing to be loaded:
// node_modules\#react-three\drei\core> cat .\softShadows.js
import * as THREE from 'three';
const pcss = ({
frustrum,
frustum = 3.75,
size = 0.005,
near = 9.5,
samples = 17,
rings = 11
} = {}) => `#define LIGHT_WORLD_SIZE ${size}
#define LIGHT_FRUSTUM_WIDTH ${frustrum ?? frustum}
#define LIGHT_SIZE_UV (LIGHT_WORLD_SIZE / LIGHT_FRUSTUM_WIDTH)
#define NEAR_PLANE ${near}
#define NUM_SAMPLES ${samples}
#define NUM_RINGS ${rings}
#define BLOCKER_SEARCH_NUM_SAMPLES NUM_SAMPLES
#define PCF_NUM_SAMPLES NUM_SAMPLES
vec2 poissonDisk[NUM_SAMPLES];
void initPoissonSamples(const in vec2 randomSeed) {
float ANGLE_STEP = PI2 * float(NUM_RINGS) / float(NUM_SAMPLES);
float INV_NUM_SAMPLES = 1.0 / float(NUM_SAMPLES);
float angle = rand(randomSeed) * PI2;
float radius = INV_NUM_SAMPLES;
float radiusStep = radius;
for (int i = 0; i < NUM_SAMPLES; i++) {
poissonDisk[i] = vec2(cos(angle), sin(angle)) * pow(radius, 0.75);
radius += radiusStep;
angle += ANGLE_STEP;
}
}
float penumbraSize(const in float zReceiver, const in float zBlocker) { // Parallel plane estimation
return (zReceiver - zBlocker) / zBlocker;
}
float findBlocker(sampler2D shadowMap, const in vec2 uv, const in float zReceiver) {
float searchRadius = LIGHT_SIZE_UV * (zReceiver - NEAR_PLANE) / zReceiver;
float blockerDepthSum = 0.0;
int numBlockers = 0;
for (int i = 0; i < BLOCKER_SEARCH_NUM_SAMPLES; i++) {
float shadowMapDepth = unpackRGBAToDepth(texture2D(shadowMap, uv + poissonDisk[i] * searchRadius));
if (shadowMapDepth < zReceiver) {
blockerDepthSum += shadowMapDepth;
numBlockers++;
}
}
if (numBlockers == 0) return -1.0;
return blockerDepthSum / float(numBlockers);
}
float PCF_Filter(sampler2D shadowMap, vec2 uv, float zReceiver, float filterRadius) {
float sum = 0.0;
for (int i = 0; i < PCF_NUM_SAMPLES; i++) {
float depth = unpackRGBAToDepth(texture2D(shadowMap, uv + poissonDisk[ i ] * filterRadius));
if (zReceiver <= depth) sum += 1.0;
}
for (int i = 0; i < PCF_NUM_SAMPLES; i++) {
float depth = unpackRGBAToDepth(texture2D(shadowMap, uv + -poissonDisk[ i ].yx * filterRadius));
if (zReceiver <= depth) sum += 1.0;
}
return sum / (2.0 * float(PCF_NUM_SAMPLES));
}
float PCSS(sampler2D shadowMap, vec4 coords) {
vec2 uv = coords.xy;
float zReceiver = coords.z; // Assumed to be eye-space z in this code
initPoissonSamples(uv);
float avgBlockerDepth = findBlocker(shadowMap, uv, zReceiver);
if (avgBlockerDepth == -1.0) return 1.0;
float penumbraRatio = penumbraSize(zReceiver, avgBlockerDepth);
float filterRadius = penumbraRatio * LIGHT_SIZE_UV * NEAR_PLANE / zReceiver;
return PCF_Filter(shadowMap, uv, zReceiver, filterRadius);
}`;
Fixed this issue by downgrading drei to version 4, seems to be happening on all version after 5.0.0
The particular version I downgraded to is 4.1.2

arduino unity serial communication delay

i wanted to rotate object based on MPU-6050 sensor so i wrote this code for arduino.
#include <Wire.h>
const int MPU_addr = 0x68;
int16_t AcX, AcY, AcZ, Tmp, GyX, GyY, GyZ;
void setup() {
initMPU6050(); //MPU-6050 센서에 대한 초기 설정 함수
Serial.begin(115200); //Serial 통신 시작
calibAccelGyro(); //센서 보정
initDT(); //시간 간격에 대한 초기화 -> 현재 시각 저장
//즉, 드론이 전원이 ON 되면 그떄부터 측정 시작!
}
void loop() {
readAccelGyro(); //가속도, 자이로 센서 값 읽어드림
//SendDataToProcessing(); //프로세싱으로 값 전달
calcDT(); //측정 주기 시간 계산
calcAccelYPR();
static int cnt;
cnt++;
if(cnt%2 == 0)
SendDataToProcessing(); //위에 동일한 함수는 주석처리!
//측정 주기 시간이 짝수(2ms 단위로 하기 위해서)이면 프로세싱으로 보낸다.
}
void initMPU6050(){
Wire.begin(); //I2C 통신 시작 아림
Wire.beginTransmission(MPU_addr); //0x68번지 값을 가지는 MPU-6050과 I2C 통신
Wire.write(0x6B);
Wire.write(0); //잠자는 MPU-6050을 깨우고 있다.
Wire.endTransmission(true); //I2C 버스 제어권에서 손 놓음
}
void readAccelGyro(){
Wire.beginTransmission(MPU_addr); //0x68번지 값을 가지는 MPU-6050과 I2C 통신 시작
Wire.write(0x3B); //0x3B번지에 저장
Wire.endTransmission(false); //데이터 전송 후 재시작 메새지 전송(연결은 계속 지속)
Wire.requestFrom(MPU_addr, 14, true); //0x68 번지에 0x3B 부터 48까지 총 14바이트 저장
AcX = Wire.read() << 8 | Wire.read();
AcY = Wire.read() << 8 | Wire.read();
AcZ = Wire.read() << 8 | Wire.read();
Tmp = Wire.read() << 8 | Wire.read();
GyX = Wire.read() << 8 | Wire.read();
GyY = Wire.read() << 8 | Wire.read();
GyZ = Wire.read() << 8 | Wire.read();
}
float dt;
float accel_angle_x, accel_angle_y, accel_angle_z;
float gyro_angle_x, gyro_angle_y, gyro_angle_z;
float filtered_angle_x, filtered_angle_y, filtered_angle_z;
float baseAcX, baseAcY, baseAcZ; //가속도 평균값 저장 변수
float baseGyX, baseGyY, baseGyZ; //자이로 평균값 저장 변수
void SendDataToProcessing(){
Serial.print(accel_angle_x, 2);
Serial.print(F(","));
Serial.print(accel_angle_y, 2);
Serial.print(F(","));
Serial.print(accel_angle_z, 2);
Serial.println(F(""));
}
void calibAccelGyro(){
float sumAcX = 0, sumAcY = 0, sumAcZ = 0;
float sumGyX = 0, sumGyY = 0, sumGyZ = 0;
readAccelGyro(); //가속도 자이로 센서 읽어들임
//평균값 구하기
for(int i=0; i<10; i++){
readAccelGyro();
sumAcX += AcX; sumAcY += AcY; sumAcZ += AcZ;
sumGyX += GyX; sumGyY += GyY; sumGyZ += GyZ;
delay(100);
}
baseAcX = sumAcX / 10; baseAcY = sumAcY / 10; baseAcZ = sumAcZ / 10;
baseGyX = sumGyX / 10; baseGyY = sumGyY / 10; baseGyZ = sumGyZ / 10;
}
unsigned long t_now; //현재 측정 주기 시간
unsigned long t_prev; //이전 측정 주기 시간
void initDT(){
t_prev = millis();
}
void calcDT(){
t_now = millis();
dt = (t_now - t_prev) / 1000.0; //millis()로 얻은 값은 밀리초 단위이니까!!!!
t_prev = t_now;
}
void calcAccelYPR(){
float accel_x, accel_y, accel_z; //가속도 센서의 최종적인 보정값!!!
float accel_xz, accel_yz;
const float RADIANS_TO_DEGREES = 180/3.14159;
accel_x = AcX - baseAcX; // 가속도(직선) X축에 대한 현재 값 - 가속도 센서의 평균값
accel_y = AcY - baseAcY;
accel_z = AcZ + (16384 - baseAcZ);
//직석 +X축이 기울어진 각도 구함
accel_yz = sqrt(pow(accel_y, 2) + pow(accel_z, 2));
accel_angle_y = atan(-accel_x / accel_yz)*RADIANS_TO_DEGREES;
accel_xz = sqrt(pow(accel_x, 2) + pow(accel_z, 2));
accel_angle_x = atan(accel_y / accel_xz)*RADIANS_TO_DEGREES;
accel_angle_z = 0;
}
and this code for unity.
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.IO.Ports;
public class rotate : MonoBehaviour
{
float x;
float y;
float z;
SerialPort ardu = new SerialPort("COM3", 115200);
// Start is called before the first frame update
void Start()
{
ardu.Open();
ardu.ReadTimeout = 10;
}
// Update is called once per frame
void FixedUpdate()
{
if (ardu.IsOpen)
{
string phrase = ardu.re
string[] words = phrase.Split(',');
x = float.Parse(words[0]);
y = float.Parse(words[1]);
z = float.Parse(words[2]);
Debug.Log(y);
transform.eulerAngles = new Vector3(x, z, y * -1);
}
else
{
Debug.Log("ohnonono");
}
}
}
it works just fine. but it has a delay and the gap between them increases over and over.
I think the problem is Arduino's loop and unity's update has a different time scale. but I don't know how to solve this.
there seem to be a bit missing after
string phrase = ardu.re
as there is no semicolon, and SerialPort does not have a .re getter.
I believe you are right in your suspicions, arduino can easily complete the request thousands of times per second, and unless you read all the results, they will just accumulate in SerialPorts input buffer.
Other than delaying the read on the arduino, you should read the serial buffer in full every time you read it, for example using ReadExisting(), or repeating ReadLine within a while loop, for as long as the returned string is not empty.
keep in mind that next thing you'll face is that some frames will not be fully accumulated in the read buffer when you check (there is a possibility that you'll catch a frame in the middle of it transmitting) so do a validity check (a simple length check after .Split) should do it, to handle such case

Alternative to System.Drawing.Bitmap for Xamarin Forms

I need to use Bitmap class from System.Drawing.Bitmap, this is a function that works fine on windows Platform. But after I tried to run on Xamarin Forms, and installed nuget package System.Drawing, the program compiles correctly without errors.
But when running program I receive an error. Somehow seems to point to System.Drawing from windows, not the System.Drawing from the nuget package.
What I need to do is, get Photo from Camera and print it.
Below is the code to print. Problem is with "Bitmap" converter.
Tried several nuget packages, none worked:
System.Drawing.Common
Fast-Bitmap
Bitmap.Net
public byte[] PrintImage(byte[] PHOTO)
{
Bitmap bmp;
using (var ms = new MemoryStream(PHOTO))
{
bmp = new Bitmap(ms);
}
BitmapData data = GetBitmapData(bmp);
BitArray dots = data.Dots;
byte[] width = BitConverter.GetBytes(data.Width);
int offset = 0;
MemoryStream stream = new MemoryStream();
BinaryWriter bw = new BinaryWriter(stream);
// center command
bw.Write(27);
bw.Write('a');
bw.Write(1);
// print image
bw.Write((char)0x1B);
bw.Write('#');
bw.Write((char)0x1B);
bw.Write('3');
bw.Write((byte)24);
while (offset < data.Height)
{
bw.Write((char)0x1B);
bw.Write('*'); // bit-image mode
bw.Write((byte)33); // 24-dot double-density
bw.Write(width[0]); // width low byte
bw.Write(width[1]); // width high byte
for (int x = 0; x < data.Width; ++x)
{
for (int k = 0; k < 3; ++k)
{
byte slice = 0;
for (int b = 0; b < 8; ++b)
{
int y = (((offset / 8) + k) * 8) + b;
// Calculate the location of the pixel.
// It'll be at (y * width) + x.
int i = (y * data.Width) + x;
// If the image is shorter than 24 dots.
bool v = false;
if (i < dots.Length)
{
v = dots[i];
}
slice |= (byte)((v ? 1 : 0) << (7 - b));
}
bw.Write(slice);
}
}
offset += 24;
bw.Write((char)0x0A);
}
// Restore the line spacing to the default of 30 dots.
bw.Write((char)0x1B);
bw.Write('3');
bw.Write((byte)30);
bw.Flush();
byte[] bytes = stream.ToArray();
return bytes; // logo + Encoding.Default.GetString(bytes);
}
public BitmapData GetBitmapData(Bitmap bmp) // (string bmpFileName)
{
//using (var bitmap = (Bitmap)Bitmap.FromFile(bmpFileName))
using (var bitmap = bmp)
{
var threshold = 127;
var index = 0;
double multiplier = 570; // this depends on your printer
double scale = (double)(multiplier / (double)bitmap.Width);
int xheight = (int)(bitmap.Height * scale);
int xwidth = (int)(bitmap.Width * scale);
var dimensions = xwidth * xheight;
var dots = new BitArray(dimensions);
for (var y = 0; y < xheight; y++)
{
for (var x = 0; x < xwidth; x++)
{
var _x = (int)(x / scale);
var _y = (int)(y / scale);
var color = bitmap.GetPixel(_x, _y);
var luminance = (int)(color.R * 0.3 + color.G * 0.59 + color.B * 0.11);
dots[index] = (luminance < threshold);
index++;
}
}
return new BitmapData()
{
Dots = dots,
Height = (int)(bitmap.Height * scale),
Width = (int)(bitmap.Width * scale)
};
}
}
public class BitmapData
{
public BitArray Dots
{
get;
set;
}
public int Height
{
get;
set;
}
public int Width
{
get;
set;
}
}
Error occurs when function is called as:
byte[] _buffer = PrintImage(FOTO);
The error:
"Could not resolve type with token 01000119 from typeref (expected class 'System.Drawing.Bitmap' in assembly 'System.Drawing.Common, Version=4.0.1.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51')"

Source for a good, simple, soft modem library [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 6 years ago.
Improve this question
I a doing a weird project, and looking to convert some short, simple datagrams to audio - send them over a (physical) radio - then to receive and decode them on another device (think - embedded devices with audio out jack and GSM/GPRS-type radios).
(I have to use a physical, existing external radio).
Does anyone know of a good, simple software modem library good for such a project? I'm not so concerned about data rate, and would prefer simplicity over functionality. Even something akin to a basic 1200 baud modem would be fantastic.
Looking at this more of a learning experience and potential building block, rather than anything horribly practical.
As an exercise I've implemented a simple V.23-like modem using FSK modulation and supporting a data rate of 1200 bits/second (960 bits/second effective because of the start and stop bits).
I'm curious to see if it works with your radio. Noise, signal reflection and imperfect demodulation can all affect the performance of the modem.
Prior to trying to integrate this code into your project, first see if it works with audio recorded from your radio.
Code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <math.h>
#ifndef M_PI
#define M_PI 3.14159265358979324
#endif
typedef unsigned char uchar, uint8;
typedef signed char schar, int8;
typedef unsigned short ushort, uint16;
typedef short int16;
typedef unsigned int uint;
typedef unsigned long ulong;
#if UINT_MAX >= 0xFFFFFFFF
typedef int int32;
typedef unsigned int uint32;
#else
typedef long int32;
typedef unsigned long uint32;
#endif
typedef long long int64;
typedef unsigned long long uint64;
typedef struct
{
double x, y;
} tComplex;
tComplex complexAdd(const tComplex* a, const tComplex* b)
{
tComplex c;
c.x = a->x + b->x;
c.y = a->y + b->y;
return c;
}
tComplex complexMul(const tComplex* a, const tComplex* b)
{
tComplex c;
c.x = a->x * b->x - a->y * b->y;
c.y = a->x * b->y + a->y * b->x;
return c;
}
void dft(tComplex out[], const tComplex in[], size_t n, int direction)
{
size_t k, i;
for (k = 0; k < n; k++)
{
tComplex r = { 0, 0 }, e;
for (i = 0; i < n; i++)
{
e.x = cos(-2 * direction * M_PI / n * ((double)k - n / 2) * ((double)i - n / 2));
e.y = sin(-2 * direction * M_PI / n * ((double)k - n / 2) * ((double)i - n / 2));
e = complexMul(&e, &in[i]);
r = complexAdd(&r, &e);
}
out[k] = r;
}
}
#define FILTER_LENGTH 64
typedef struct tTx
{
enum
{
stSendingOnes,
stSendingData
} State;
uint SampleRate;
uint OnesFreq;
uint ZeroesFreq;
uint BitRate;
uint32 SampleCnt;
uint BitSampleCnt;
uint Data;
uint DataLeft;
double Phase;
double PhaseIncrement;
uint (*pTxGetDataCallBack)(struct tTx*, uint8*);
} tTx;
void TxInit(tTx* pTx,
uint SampleRate,
uint (*pTxGetDataCallBack)(tTx*, uint8*))
{
memset(pTx, 0, sizeof(*pTx));
pTx->State = stSendingOnes;
pTx->SampleRate = SampleRate;
pTx->OnesFreq = 1300;
pTx->ZeroesFreq = 2100;
pTx->BitRate = 1200;
pTx->pTxGetDataCallBack = pTxGetDataCallBack;
pTx->SampleCnt = 0;
pTx->BitSampleCnt = pTx->SampleRate;
pTx->Data = 0;
pTx->DataLeft = 0;
pTx->Phase = 0.0;
pTx->PhaseIncrement = 2 * M_PI * pTx->OnesFreq / pTx->SampleRate;
}
int16 TxGetSample(tTx* pTx)
{
int16 sample;
if (pTx->State == stSendingOnes &&
pTx->SampleCnt >= pTx->SampleRate)
{
// Sent 1 second worth of 1's, can now send data
pTx->State = stSendingData;
}
if (pTx->State == stSendingData &&
pTx->BitSampleCnt >= pTx->SampleRate)
{
// Another data bit can now be sent
uint8 d;
pTx->BitSampleCnt -= pTx->SampleRate;
if (!pTx->DataLeft)
{
// Get the next data byte (if any)
if (pTx->pTxGetDataCallBack(pTx, &d) != 0)
{
pTx->Data = d & 0xFF;
pTx->Data |= 1 << 8; // insert the stop bit
pTx->Data <<= 1; // insert the start bit
pTx->DataLeft = 10;
}
else
{
pTx->Data = 0x3FF; // no data, send 10 1's
pTx->DataLeft = 10;
}
}
// Extract the next data bit to send
d = pTx->Data & 1;
pTx->Data >>= 1;
pTx->DataLeft--;
// Choose the appropriate frequency for 0 and 1
if (d)
{
pTx->PhaseIncrement = 2 * M_PI * pTx->OnesFreq / pTx->SampleRate;
}
else
{
pTx->PhaseIncrement = 2 * M_PI * pTx->ZeroesFreq / pTx->SampleRate;
}
}
// Generate the next sample, advance the generator's phase
sample = (int16)(16000 * cos(pTx->Phase));
pTx->Phase += pTx->PhaseIncrement;
if (pTx->Phase >= 2 * M_PI)
{
pTx->Phase -= 2 * M_PI;
}
if (pTx->State == stSendingData)
{
pTx->BitSampleCnt += pTx->BitRate;
}
pTx->SampleCnt++;
return sample;
}
typedef struct tRx
{
enum
{
stCarrierLost,
stCarrierDetected,
stReceivingData
} State;
uint SampleRate;
uint OnesFreq;
uint ZeroesFreq;
uint MidFreq;
uint BitRate;
uint32 SampleCnt;
uint BitSampleCnt;
uint Data;
double Phase;
double PhaseIncrement;
tComplex Filter[FILTER_LENGTH];
double Delay[FILTER_LENGTH];
double LastAngle;
int LastDelta;
int32 Deltas;
int32 CarrierAngle;
int32 CarrierCnt;
double LongAvgPower;
double ShortAvgPower;
void (*pRxGetDataCallBack)(struct tRx*, uint8);
} tRx;
void RxInit(tRx* pRx,
uint SampleRate,
void (*pRxGetDataCallBack)(struct tRx*, uint8))
{
tComplex tmp[FILTER_LENGTH];
uint i;
memset(pRx, 0, sizeof(*pRx));
pRx->State = stCarrierLost;
pRx->SampleRate = SampleRate;
pRx->OnesFreq = 1300;
pRx->ZeroesFreq = 2100;
pRx->MidFreq = (pRx->OnesFreq + pRx->ZeroesFreq) / 2;
pRx->BitRate = 1200;
pRx->pRxGetDataCallBack = pRxGetDataCallBack;
pRx->SampleCnt = 0;
pRx->BitSampleCnt = 0;
pRx->Data = 0x3FF;
pRx->Phase = 0.0;
pRx->PhaseIncrement = 2 * M_PI * pRx->MidFreq / pRx->SampleRate;
pRx->LastAngle = 0.0;
pRx->LastDelta = 0;
pRx->Deltas = 0;
pRx->CarrierAngle = 0;
pRx->CarrierCnt = 0;
pRx->LongAvgPower = 0.0;
pRx->ShortAvgPower = 0.0;
for (i = 0; i < FILTER_LENGTH; i++)
{
pRx->Delay[i] = 0.0;
}
for (i = 0; i < FILTER_LENGTH; i++)
{
if (i == 0) // w < 0 (min w)
{
pRx->Filter[i].x = 0;
pRx->Filter[i].y = 0;
}
else if (i < FILTER_LENGTH / 2) // w < 0
{
pRx->Filter[i].x = 0;
pRx->Filter[i].y = 0;
}
else if (i == FILTER_LENGTH / 2) // w = 0
{
pRx->Filter[i].x = 0;
pRx->Filter[i].y = 0;
}
else if (i > FILTER_LENGTH / 2) // w > 0
{
pRx->Filter[i].x = 0;
pRx->Filter[i].y = -1;
// Extra filter to combat channel noise
if (i - FILTER_LENGTH / 2 < 875UL * FILTER_LENGTH / pRx->SampleRate ||
i - FILTER_LENGTH / 2 > (2350UL * FILTER_LENGTH + pRx->SampleRate - 1) / pRx->SampleRate)
{
pRx->Filter[i].y = 0;
}
}
}
memcpy(tmp, pRx->Filter, sizeof(tmp));
dft(pRx->Filter, tmp, FILTER_LENGTH, -1);
}
#define RX_VERBOSE 0
void RxGetSample(tRx* pRx, int16 Sample)
{
tComplex s = { 0.0, 0.0 }, ss;
double angle;
uint i;
int delta;
double pwr;
// Insert the sample into the delay line
memmove(&pRx->Delay[0], &pRx->Delay[1], sizeof(pRx->Delay) - sizeof(pRx->Delay[0]));
pRx->Delay[FILTER_LENGTH - 1] = Sample;
// Get the next analytic signal sample by applying Hilbert transform/filter
for (i = 0; i < FILTER_LENGTH; i++)
{
s.x += pRx->Delay[i] * pRx->Filter[FILTER_LENGTH - 1 - i].x;
s.y += pRx->Delay[i] * pRx->Filter[FILTER_LENGTH - 1 - i].y;
}
// Frequency shift by MidFreq down
ss.x = cos(-pRx->Phase);
ss.y = sin(-pRx->Phase);
s = complexMul(&s, &ss);
pRx->Phase += pRx->PhaseIncrement;
if (pRx->Phase >= 2 * M_PI)
{
pRx->Phase -= 2 * M_PI;
}
// Calculate signal power
pwr = (s.x * s.x + s.y * s.y) / 32768 / 32768;
pRx->LongAvgPower *= 1 - pRx->BitRate / (pRx->SampleRate * 8.0 * 8);
pRx->LongAvgPower += pwr;
pRx->ShortAvgPower *= 1 - pRx->BitRate / (pRx->SampleRate * 8.0);
pRx->ShortAvgPower += pwr;
#if 0
printf("LongAvgPower:%f ShortAvgPower:%f\n", pRx->LongAvgPower, pRx->ShortAvgPower);
#endif
// Disconnect if the signal power changes abruptly.
if (pRx->State != stCarrierLost &&
pRx->LongAvgPower > pRx->ShortAvgPower * 8 * 8)
{
// N.B. The receiver may have received a few extra (garbage) bytes
// while demodulating the abruptly changed signal.
// Prefixing data with its size or using a more advanced protocol
// may be a good solution to this little problem.
pRx->State = stCarrierLost;
pRx->BitSampleCnt = 0;
pRx->Data = 0x3FF;
pRx->Phase = 0.0;
pRx->LastAngle = 0.0;
pRx->LastDelta = 0;
pRx->Deltas = 0;
pRx->CarrierAngle = 0;
pRx->CarrierCnt = 0;
}
// Get the phase angle from the analytic signal sample
angle = (fpclassify(s.x) == FP_ZERO && fpclassify(s.y) == FP_ZERO) ?
0.0 : 180 / M_PI * atan2(s.y, s.x);
// Calculate the phase angle change and force it to the -PI to +PI range
delta = (int)(360.5 + angle - pRx->LastAngle) % 360;
if (delta > 180) delta -= 360;
if (pRx->State == stCarrierLost)
{
// Accumulate the phase angle change to see if we're receiving 1's
pRx->CarrierAngle += delta;
pRx->CarrierCnt++;
// Check whether or not the phase corresponds to 1's
if (delta < 0)
{
if (pRx->CarrierCnt >= pRx->SampleRate / pRx->OnesFreq * 8)
{
double ph = (double)pRx->CarrierAngle / pRx->CarrierCnt;
#if RX_VERBOSE
printf("ca:%5ld, cc:%4ld, ca/cc:%4ld\n",
(long)pRx->CarrierAngle,
(long)pRx->CarrierCnt,
(long)(pRx->CarrierAngle / pRx->CarrierCnt));
#endif
// Frequency tolerance is +/-16 Hz per the V.23 spec
if (ph < (pRx->OnesFreq - 17.0 - pRx->MidFreq) * 360.0 / pRx->SampleRate ||
ph > (pRx->OnesFreq + 17.0 - pRx->MidFreq) * 360.0 / pRx->SampleRate)
{
goto BadCarrier;
}
}
}
else
{
BadCarrier:
// Phase doesn't correspond to 1's
pRx->CarrierAngle = 0.0;
pRx->CarrierCnt = 0;
}
if (pRx->CarrierCnt >= pRx->SampleRate / 2 + pRx->SampleRate / 4)
{
// 0.75 seconds worth of 1's have been detected, ready to receive data
// Adjust MidFreq to compensate for the DAC and ADC sample rate difference
double f1 = (double)pRx->CarrierAngle / pRx->CarrierCnt / 360 * pRx->SampleRate + pRx->MidFreq;
pRx->MidFreq = (uint)(pRx->MidFreq * f1 / pRx->OnesFreq);
pRx->PhaseIncrement = 2 * M_PI * pRx->MidFreq / pRx->SampleRate;
#if RX_VERBOSE
printf("f1:%u, new MidFreq:%u\n", (uint)f1, pRx->MidFreq);
#endif
pRx->State = stCarrierDetected;
}
}
else
{
// Detect frequency changes (transitions between 0's and 1's)
int freqChange = ((int32)pRx->LastDelta * delta < 0 || pRx->LastDelta && !delta);
int reAddDelta = 0;
#if RX_VERBOSE
printf("%6lu: delta:%4d freqChange:%d BitSampleCnt:%u\n",
(ulong)pRx->SampleCnt,
delta,
freqChange,
pRx->BitSampleCnt);
#endif
// Synchronize with 1<->0 transitions
if (freqChange)
{
if (pRx->BitSampleCnt >= pRx->SampleRate / 2)
{
pRx->BitSampleCnt = pRx->SampleRate;
pRx->Deltas -= delta;
reAddDelta = 1;
}
else
{
pRx->BitSampleCnt = 0;
pRx->Deltas = 0;
}
}
// Accumulate analytic signal phase angle changes
// (positive for 0, negative for 1)
pRx->Deltas += delta;
if (pRx->BitSampleCnt >= pRx->SampleRate)
{
// Another data bit has accumulated
pRx->BitSampleCnt -= pRx->SampleRate;
#if RX_VERBOSE
printf("bit: %u\n", pRx->Deltas < 0);
#endif
pRx->Data >>= 1;
pRx->Data |= (pRx->Deltas < 0) << 9;
pRx->Deltas = delta * reAddDelta;
if ((pRx->Data & 0x201) == 0x200)
{
// Start and stop bits have been detected
if (pRx->State == stCarrierDetected)
{
pRx->State = stReceivingData;
}
pRx->Data = (pRx->Data >> 1) & 0xFF;
pRx->pRxGetDataCallBack(pRx, (uint8)pRx->Data);
#if RX_VERBOSE
printf("byte: 0x%02X ('%c')\n",
pRx->Data,
(pRx->Data >= 0x20 && pRx->Data <= 0x7F) ? pRx->Data : '?');
#endif
pRx->Data = 0x3FF;
}
}
pRx->BitSampleCnt += pRx->BitRate;
}
pRx->LastAngle = angle;
pRx->LastDelta = delta;
pRx->SampleCnt++;
}
typedef struct
{
tTx Tx;
FILE* DataFile;
int CountDown;
} tTxTest;
uint TxGetDataCallBack(tTx* pTx, uint8* pTxData)
{
tTxTest* pTxTest = (tTxTest*)pTx;
uchar c;
if (pTxTest->CountDown)
{
pTxTest->CountDown--;
return 0;
}
if (fread(&c, 1, 1, pTxTest->DataFile) != 1)
{
pTxTest->CountDown = 20;
return 0;
}
*pTxData = c;
return 1;
}
int testTx(uint SampleRate,
double NoiseLevel,
const char* DataFileName,
const char* AudioFileName)
{
FILE *fData = NULL, *fAudio = NULL;
int err = EXIT_FAILURE;
tTxTest txTest;
if ((fData = fopen(DataFileName, "rb")) == NULL)
{
printf("Can't open file \"%s\"\n", DataFileName);
goto Exit;
}
if ((fAudio = fopen(AudioFileName, "wb")) == NULL)
{
printf("Can't create file \"%s\"\n", AudioFileName);
goto Exit;
}
txTest.DataFile = fData;
txTest.CountDown = 0;
TxInit(&txTest.Tx,
SampleRate,
&TxGetDataCallBack);
do
{
int16 sample = TxGetSample(&txTest.Tx);
if (txTest.CountDown > 1 && txTest.CountDown <= 10)
{
#if 0 // Enable this code to test disconnecting.
// Finish with silence.
sample = 0;
#endif
}
sample += (rand() - (int)RAND_MAX / 2) * NoiseLevel * 16000 / (RAND_MAX / 2);
fwrite(&sample, 1, sizeof(sample), fAudio);
} while (txTest.CountDown != 1); // Drain all data-containing samples
err = EXIT_SUCCESS;
Exit:
if (fData != NULL) fclose(fData);
if (fAudio != NULL) fclose(fAudio);
return err;
}
typedef struct
{
tRx Rx;
FILE* DataFile;
} tRxTest;
void RxGetDataCallBack(tRx* pRx, uint8 RxData)
{
tRxTest* pRxTest = (tRxTest*)pRx;
uchar c = RxData;
fwrite(&c, 1, 1, pRxTest->DataFile);
}
int testRx(uint SampleRate,
const char* AudioFileName,
const char* DataFileName)
{
uint lastState;
FILE *fAudio = NULL, *fData = NULL;
int err = EXIT_FAILURE;
tRxTest rxTest;
if ((fAudio = fopen(AudioFileName, "rb")) == NULL)
{
printf("Can't open file \"%s\"\n", AudioFileName);
goto Exit;
}
if ((fData = fopen(DataFileName, "wb")) == NULL)
{
printf("Can't create file \"%s\"\n", DataFileName);
goto Exit;
}
rxTest.DataFile = fData;
RxInit(&rxTest.Rx,
SampleRate,
&RxGetDataCallBack);
for (;;)
{
int16 sample;
if (fread(&sample, 1, sizeof(sample), fAudio) != sizeof(sample))
{
if (rxTest.Rx.State != stCarrierLost) goto NoCarrier;
break;
}
lastState = rxTest.Rx.State;
RxGetSample(&rxTest.Rx, sample);
if (rxTest.Rx.State != lastState && rxTest.Rx.State == stCarrierDetected)
{
printf("\nCONNECT %u\n\n", rxTest.Rx.BitRate);
}
if (rxTest.Rx.State != lastState && rxTest.Rx.State == stCarrierLost)
{
NoCarrier:
printf("\n\nNO CARRIER\n");
break;
}
}
err = EXIT_SUCCESS;
Exit:
if (fAudio != NULL) fclose(fAudio);
if (fData != NULL) fclose(fData);
return err;
}
int main(int argc, char* argv[])
{
uint sampleRate;
double noiseLevel;
if (argc < 2 ||
!stricmp(argv[1], "-help") ||
!stricmp(argv[1], "/help") ||
!stricmp(argv[1], "-?") ||
!stricmp(argv[1], "/?"))
{
Usage:
printf("Usage:\n\n"
" %s tx <sample rate> <noise level> <data input file> <PCM output file>\n"
" %s rx <sample rate> <PCM input file> <data output file>\n",
argv[0],
argv[0]);
return 0;
}
if (!stricmp(argv[1], "tx") &&
argc == 6 &&
sscanf(argv[2], "%u", &sampleRate) == 1 &&
sscanf(argv[3], "%lf", &noiseLevel) == 1)
{
return testTx(sampleRate, noiseLevel, argv[4], argv[5]);
}
else if (!stricmp(argv[1], "rx") &&
argc == 5 &&
sscanf(argv[2], "%u", &sampleRate) == 1)
{
return testRx(sampleRate, argv[3], argv[4]);
}
else
{
goto Usage;
}
}
Typical usage:
modem.exe tx 8000 0.2 testin.txt test8000.pcm
modem.exe rx 8000 test8000.pcm testout.txt
The resulting testout.txt should be identical to testin.txt.
A web search will turn up lots of amateur radio BPSK and RTTY/FSK solutions. Much of this code was written for older slower CPUs, so should run just fine on an iPhone. You can use the Audio Queue API or the RemoteIO Audio Unit for iOS audio IO to the codec.
If you're still looking for a soft modem, you might consider libquiet or Quiet.js. These offer a low power GMSK mode which is fairly capable as well as higher bitrate modes if you're using an audio cable. Quiet uses an existing SDR library to perform its modulation so you'll get something pretty full-featured.

How can i adapt speex echo canceller to process a float samples?

Good day! how сan i use float samples for echo cancellation processing? I tried to change interface and body of central function:
from
void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *rec, const spx_int16_t *play, spx_int16_t *out);
to
void float_speex_echo_cancellation(SpeexEchoState *st, const float rec[], const float play[], float out[]);
and from
...
for (i=0;i<st->frame_size;i++)
{
spx_word32_t tmp_out;
tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
{
if (st->saturated == 0)
st->saturated = 1;
}
**out[i*C+chan] = (spx_int16_t)WORD2INT(tmp_out);**
st->memE[chan] = tmp_out;
}
...
to
...
for (i=0;i<st->frame_size;i++)
{
spx_word32_t tmp_out;
tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
{
if (st->saturated == 0)
st->saturated = 1;
}
**out[i*C+chan] = /*(spx_int16_t)WORD2INT(*/tmp_out*/)*/;**
st->memE[chan] = tmp_out;
}
...
and from
static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
{
int i;
spx_word16_t den2;
den2 = (spx_word16_t)(radius*radius + .7f*(1.f-radius)*(1.f-radius));
for (i=0;i<len;i++)
{
spx_int16_t vin = in[i*stride];
spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
mem[0] = mem[1] + 2*(-vin + radius*vout);
mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
}
}
to
static inline void float_filter_dc_notch16(const /*spx_int16_t*/spx_word16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
{
int i;
spx_word16_t den2;
den2 = /*(spx_word16_t)*/(radius*radius + .7f*(1.f-radius)*(1.f-radius));
for (i=0;i<len;i++)
{
/*spx_int16_t*/spx_word16_t vin = in[i*stride];
spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
mem[0] = mem[1] + 2*(-vin + radius*vout);
mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
out[i] = /*SATURATE32(*/PSHR32(MULT16_32_Q15(radius,vout),15)/*,32767)*/;
}
}
So, i prevented conversion from float type output result to short int, but now i get a warning:
speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
that points to st->screwed_up parameter having 50 values and it signs of setting to zero all out samples:
...
if (!(Syy>=0 && Sxx>=0 && See >= 0)
|| !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
)
{ st->screwed_up += 50; for (i=0;iframe_size*C;i++) out[i] = 0; }
...
What can i do?
enter code here
Why do you want to use float samples?
Standard linear PCM audio is represented as integer samples according to the chosen bitrate - 8 bit, 16 bit and so on.
Where do you get that input from?
If I were you I would just convert whatever you got to shorts and provide it to Speex so it can work with it.