read bmp from sd card faster - sd-card

i am working on Cy8CKIT -050 cypress kit, flash chip W25Q128FV ,SD card, and TFT display https://www.buydisplay.com/default/lcd-5-ssd1963-tft-module-touch-screen-display-800x480-mcu-arduino
i have interfaced display, flash chip using SPIFFS and sd card using FATFS with Cy8CKIT - 050
but the speed of BMP image loading from the sd card is very slow, right now size of image is 150*90 and its taking 635ms
how can i improve this?
also i want write BMP image from the sd card to Flash chip and load image from flash chip to display, how can i do that
i am attaching code for reference
`void DrawImage(UG_S16 x1, UG_S16 y1, UG_S16 x2, UG_S16 y2, const TCHAR* namefile)
{
FATFS fatFs;
FIL fileO;
uint16_t buffimg,buffimg1;
uint16_t buffer[1];
UINT br,br1;
FRESULT pt;
char read[50];
uint32_t bmpWidth = 0;
uint32_t bmpHeight = 0;
uint16_t bmpImageoffset;
uint16_t rowSize;
uint8_t rdBuf[60];
uint8_t header_buff[60];
uint16_t position = 0;
uint8_t numberOfPixels = 0;
uint32_t commpression = 0;
uint32_t imageSize = 0;
unsigned int byte_read;
uint16_t bmpSig;
uint8_t row_buff[rowSize]; // only read 1 row at a time
uint8_t cc,R,G,B;
uint16_t pixel;
uint16_t color;
uint32 i,j, count=0;
uint8_t pColorData[DISPLAY_WIDTH*3];
uint8_t pColorData_flash[DISPLAY_WIDTH*3];
uint8_t pColorData_flash1[DISPLAY_WIDTH*3];
uint32_t pos;
int row =0 ;
if((x1 < 0) ||(x1 >= DISPLAY_WIDTH) || (y1 < 0) || (y1 >= DISPLAY_HEIGHT)) return;
if((x2 < 0) ||(x2 >= DISPLAY_WIDTH) || (y2 < 0) || (y2 >= DISPLAY_HEIGHT)) return;
/* Mount drive */
pt = f_mount(&fatFs, "", 1);
if(pt != FR_OK) return;
/* open file */
pt = f_open(&fileO, namefile, FA_READ);
if(pt != FR_OK) return;
/* read bmp header */
pt = f_read(&fileO, header_buff, sizeof header_buff, &byte_read);
if(pt != FR_OK){
return;
}
//my_spiffs_mount();
//spiffs_file fd = SPIFFS_open(&fs, "bmpFile", SPIFFS_CREAT | SPIFFS_TRUNC | SPIFFS_RDWR, 0);
//if (SPIFFS_write(&fs, fd,header_buff, sizeof(header_buff)) < 0) sprintf(bufferOut,"wr errno %i\n", SPIFFS_errno(&fs));
//SPIFFS_close(&fs, fd);
//UG_PutString(1,200,bufferOut);
//fd = SPIFFS_open(&fs, "bmpFile", SPIFFS_RDWR, 0);
//if (SPIFFS_read(&fs, fd, rdBuf, sizeof(rdBuf)) < 0) sprintf(bufferOut,"rd errno %i\n", SPIFFS_errno(&fs));
//SPIFFS_close(&fs, fd);
//UG_PutString(1,220,bufferOut);
bmpSig = (header_buff[1] << 8) | header_buff [0];
sprintf(bufferOut,"Signature:\t0x%X\n", bmpSig);
UG_PutString(1,160,bufferOut);
if(bmpSig != 0x4D42){
UG_PutString(1,180,"not a bmp");
return; // not a bmp
}
bmpImageoffset = header_buff [7] * 256 + header_buff [6];
bmpHeight = (header_buff [25] << 24) | (header_buff [24] << 16) | (header_buff [23] << 8) | header_buff [22];
bmpWidth = (header_buff [21] << 24) | (header_buff [20] << 16) | (header_buff [19] << 8) | header_buff [18];
position = header_buff [10]; // starting point in file
numberOfPixels = header_buff [29] * 256 + header_buff [28];
commpression = (header_buff [30] << 24) | (header_buff [31] << 16) | (header_buff [32] << 8) | header_buff [33];
imageSize = (header_buff [37] << 24) | (header_buff [36] << 16) | (header_buff [35] << 8) | header_buff [34];
rowSize = ((numberOfPixels * bmpWidth + 31) /32 ) * 4;
sprintf(read ,"Offset:\t%d\n", bmpImageoffset);
UG_PutString(1,1,read);
sprintf(read ,"Height:\t%u\n", bmpHeight);
UG_PutString(1,20,read);
sprintf(read ,"Width:\t\t%u\n", bmpWidth);
UG_PutString(1,40,read);
sprintf(read ,"Row Size:\t%d\n", rowSize);
UG_PutString(1,60,read);
sprintf(read ,"Start Index:\t%d\n", position);
UG_PutString(1,80,read);
sprintf(read ,"# of Pixels:\t%d\n", numberOfPixels);
UG_PutString(1,100,read);
sprintf(read ,"Compression:\t%u\n", commpression);
UG_PutString(1,120,read);
sprintf(read ,"Image Size:\t%u\n", imageSize);
UG_PutString(1,140,read);
Display_WindowSet(x1, x2, y1, y2);
/* Write to LCD-GRAM */
Display_WriteCommand(0x2c);
/* get current millisecond count */
unsigned long nCurrentMillis = nmillis;
for(i=0;i<bmpHeight; i++)
{
for(j=0; j<rowSize; j++)
{
count++;
LED3_Write(1);
f_read(&fileO,pColorData+j,1,&byte_read);
//fd = SPIFFS_open(&fs, "bmpFile", SPIFFS_RDWR , 0);
//if (SPIFFS_write(&fs, fd,pColorData+j, 1) < 0) sprintf(bufferOut,"wr errno %i\n", SPIFFS_errno(&fs));
//SPIFFS_close(&fs, fd);
//fd = SPIFFS_open(&fs, "bmpFile", SPIFFS_RDWR, 0);
//if (SPIFFS_read(&fs, fd, pColorData_flash+j, 1) < 0) sprintf(bufferOut,"rd errno %i\n", SPIFFS_errno(&fs));
//SPIFFS_close(&fs, fd);
//sprintf(bufferOut," row %lu",j);
//UG_PutString(1,360,bufferOut);
//CyDelay(1);
LED3_Write(0);
}
for(j=0;j<bmpWidth;j++)
{
//fd = SPIFFS_open(&fs, "bmpFile", SPIFFS_RDWR, 0);
//if (SPIFFS_read(&fs, fd, pColorData_flash,rowSize) < 0) sprintf(bufferOut,"rd errno %i\n", SPIFFS_errno(&fs));
//SPIFFS_close(&fs, fd);
uint32_t k = j*3;
R = pColorData[k+2];
G = pColorData[k+1];
B = pColorData[k];
color = color565(R,G,B);
Display_WriteData (color);
//sprintf(bufferOut," width %lu",j);
//UG_PutString(1,380,bufferOut);
}
//sprintf(bufferOut," height %lu",i);
//UG_PutString(1,400,bufferOut);
}
unsigned long nTime = nmillis-nCurrentMillis;
sprintf(bufferOut,"time taken for load : %lu ms",nTime);
UG_PutString(1,320,bufferOut);
sprintf(bufferOut,"%lu",count);
UG_PutString(1,340,bufferOut);
f_close(&fileO);
}
`

Related

CUDA sha256 produce difference hash compared to OpenSSL

iam trying to port my sha256 hash function from CPU code to CUDA. after googling, i found few working example for cuda sha256. However when tested, the hash result of cuda sha256 is difference from OpenSSL.
My input is "hello world" which is declared as const char*. result are as below;
Constant Char* Input : hello world
Hash on CPU : b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9
Hash on GPU : c1114db6b517b4db8d360a9e14f5c2a57de95d955ec20cbd4cb73facb2b13e5f
I need help to fix my GPU code for sha256 so that it will produce same hash as given by CPU (OpenSSL).
Here my code for CPU Hash
#pragma warning(disable : 4996) //disable compiler error
#include <iostream>
#include <openssl/sha.h>
unsigned char hash[SHA256_DIGEST_LENGTH];
void SHA256(const char* input, size_t input_size){
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, input, input_size);
SHA256_Final(hash, &sha256);
}
void CPU() {
const char* input = "hello world";
size_t input_size = strlen(input);
SHA256(input, input_size);
for (size_t i = 0; i < 32; i++) {
printf("%02x", hash[i]);
}
printf("\n");
}
and Here my code for GPU hash
#include <stdio.h>
#include <string.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#define BLOCK_SIZE 256
__constant__ unsigned int k[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
__device__ unsigned int Ch(unsigned int x, unsigned int y, unsigned int z) {
return (x & y) ^ (~x & z);
}
__device__ unsigned int Maj(unsigned int x, unsigned int y, unsigned int z) {
return (x & y) ^ (x & z) ^ (y & z);
}
__device__ unsigned int Sigma0(unsigned int x) {
return (x >> 2u) | (x << 30u);
}
__device__ unsigned int Sigma1(unsigned int x) {
return (x >> 6u) | (x << 26u);
}
__device__ unsigned int sigma0(unsigned int x) {
return (x >> 7u) | (x << 25u);
}
__device__ unsigned int sigma1(unsigned int x) {
return (x >> 17u) | (x << 15u);
}
//solve using 256 thread in 1 block
__global__ void sha256_kernel(const char* input, size_t input_size, unsigned char* output) {
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
size_t grid_size = blockDim.x * gridDim.x;
for (; i < input_size; i += grid_size) {
unsigned int h[8] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
unsigned int w[64];
for (size_t j = 0; j < input_size; j += 64) {
for (size_t t = 0; t < 16; t++) {
w[t] = ((unsigned int)input[j + t * 4 + 0] << 24u) | ((unsigned int)input[j + t * 4 + 1] << 16u) |
((unsigned int)input[j + t * 4 + 2] << 8u) | ((unsigned int)input[j + t * 4 + 3] << 0u);
}
for (size_t t = 16; t < 64; t++) {
w[t] = sigma1(w[t - 2]) + w[t - 7] + sigma0(w[t - 15]) + w[t - 16];
}
unsigned int a = h[0];
unsigned int b = h[1];
unsigned int c = h[2];
unsigned int d = h[3];
unsigned int e = h[4];
unsigned int f = h[5];
unsigned int g = h[6];
unsigned int hh = h[7];
for (size_t t = 0; t < 64; t++) {
unsigned int t1 = hh + Sigma1(e) + Ch(e, f, g) + k[t] + w[t];
unsigned int t2 = Sigma0(a) + Maj(a, b, c);
hh = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
h[0] += a;
h[1] += b;
h[2] += c;
h[3] += d;
h[4] += e;
h[5] += f;
h[6] += g;
h[7] += hh;
}
for (size_t t = 0; t < 8; t++) {
output[i + t * 4 + 0] = (unsigned char)(h[t] >> 24u);
output[i + t * 4 + 1] = (unsigned char)(h[t] >> 16u);
output[i + t * 4 + 2] = (unsigned char)(h[t] >> 8u);
output[i + t * 4 + 3] = (unsigned char)(h[t] >> 0u);
}
}
}
void GPU() {
const char* input = "hello world";
size_t input_size = strlen(input);
size_t output_size = 32;
unsigned char* output;
char* input_device;
cudaMalloc((void**)&output, output_size);
cudaMalloc((void**)&input_device, input_size);
cudaMemcpy(input_device, input, input_size, cudaMemcpyHostToDevice);
//solve using 256 thread and 1 block
sha256_kernel << < ((input_size + BLOCK_SIZE - 1) / BLOCK_SIZE), BLOCK_SIZE >> > (input_device, input_size, output);
unsigned char* output_host = (unsigned char*)malloc(output_size);
cudaMemcpy(output_host, output, output_size, cudaMemcpyDeviceToHost);
for (size_t i = 0; i < output_size; i++) {
printf("%02x", output_host[i]);
}
printf("\n");
free(output_host);
cudaFree(output);
cudaFree(input_device);}
Thanks in advance.

Problem writing FDC1004 registers with ESP32 in freeRTOS

I'm trying to read some capative sensors using FDC1004 connected to a ESP32 in freeRTOS. i'm having problems writing FDC1004 registers of 16bits. I programmed a test code reading the ID and writing the configuration registers. The read task is well, i can read correctly the ID of the FDC1004(0x1004), but when i tried to write the register i only read correctly the 8 LSBs. Here is my i2c functions:
esp_err_t I2C_write(i2c_port_t i2c_num, uint8_t slave_address, uint8_t data_address, uint8_t *data_wr ,size_t size)
{
i2c_cmd_handle_t cmd = i2c_cmd_link_create();
i2c_master_start(cmd);
i2c_master_write_byte(cmd, (slave_address << 1) | WRITE_BIT, ACK_CHECK_EN);
i2c_master_write_byte(cmd, data_address, ACK_CHECK_EN);
i2c_master_write(cmd, data_wr, size, ACK_CHECK_EN);
//i2c_master_write(cmd, data_addres, 1, ACK_CHECK_EN);
//i2c_master_write(cmd, data_wr, size, ACK_CHECK_EN);
//i2c_master_write_byte(cmd, data_wr + size - 1, ACK_CHECK_EN);
i2c_master_stop(cmd);
esp_err_t ret = i2c_master_cmd_begin(i2c_num, cmd, 1000/portTICK_RATE_MS);
i2c_cmd_link_delete(cmd);
return ret;
}
esp_err_t I2C_read(i2c_port_t i2c_num, uint8_t slave_address, uint8_t *data_addres, uint8_t *data_rd , size_t size){
if (size == 0) {
return ESP_OK;
}
i2c_cmd_handle_t cmd = i2c_cmd_link_create();
i2c_master_start(cmd);
i2c_master_write_byte(cmd, (slave_address << 1) | WRITE_BIT, ACK_CHECK_EN);
i2c_master_write(cmd, data_addres, size, ACK_CHECK_EN);
i2c_master_stop(cmd);
esp_err_t ret = i2c_master_cmd_begin(i2c_num, cmd, 1000 / portTICK_RATE_MS);
i2c_cmd_link_delete(cmd);
cmd = i2c_cmd_link_create();
i2c_master_start(cmd);
i2c_master_write_byte(cmd, (slave_address << 1) | READ_BIT, ACK_CHECK_EN);
if (size > 1) {
i2c_master_read(cmd, data_rd, size - 1, ACK_VAL);
}
i2c_master_read_byte(cmd, data_rd + size - 1, NACK_VAL);
i2c_master_stop(cmd);
ret = i2c_master_cmd_begin(i2c_num, cmd, 1000 / portTICK_RATE_MS);
i2c_cmd_link_delete(cmd);
return ret;
}
void FDC1004_writeReg(i2c_port_t i2c_num, uint8_t reg_address, uint16_t data){
uint8_t tx_buff[2];
tx_buff[0] = data >> 8;
tx_buff[1] = data & 0x00FF;
ESP_ERROR_CHECK(I2C_write( i2c_num, FDC_ADDR, reg_address, tx_buff, 2));
vTaskDelay(20 / portTICK_PERIOD_MS);
}
uint16_t FDC01004_readReg(i2c_port_t i2c_num, uint8_t reg_address){
uint8_t read_buff[2];
uint16_t data;
ESP_ERROR_CHECK(I2C_read( i2c_num, FDC_ADDR, &reg_address, read_buff, 2));
data = ((uint16_t)(read_buff[0] << 8) | read_buff[1]);
return data;
}
The results when i tried to write 0x10C0, 0x30C0, 0x50C0, 0x70C0 in the registers 0x08, 0x09, 0x0A and 0x0B respectively, are: 0xFFC0, 0xFFC0, 0xFFC0 and 0xFFC0, respectively.
I hope you can help me.
Regards
Finally, after some test with the code i change the read function as follow:
esp_err_t I2C_read(i2c_port_t i2c_num, uint8_t slave_address, uint8_t data_addres, uint8_t *data_rd , size_t size){
if (size == 0) {
return ESP_OK;
}
i2c_cmd_handle_t cmd = i2c_cmd_link_create();
i2c_master_start(cmd);
i2c_master_write_byte(cmd, (slave_address << 1) | WRITE_BIT, ACK_CHECK_EN);
i2c_master_write_byte(cmd, data_addres, ACK_CHECK_EN);
i2c_master_stop(cmd);
esp_err_t ret = i2c_master_cmd_begin(i2c_num, cmd, 1000 / portTICK_RATE_MS);
i2c_cmd_link_delete(cmd);
cmd = i2c_cmd_link_create();
i2c_master_start(cmd);
i2c_master_write_byte(cmd, (slave_address << 1) | READ_BIT, ACK_CHECK_EN);
if (size > 1) {
i2c_master_read(cmd, data_rd, size - 1, ACK_VAL);
}
i2c_master_read_byte(cmd, data_rd + size - 1, NACK_VAL);
i2c_master_stop(cmd);
ret = i2c_master_cmd_begin(i2c_num, cmd, 1000 / portTICK_RATE_MS);
i2c_cmd_link_delete(cmd);
return ret;
}
The wrong thing in the read function was that it make a write of the reg address a size times, and only once was needed. This way works for me.

Fatal error: Not enough bits to represent the passed value

Trying to use Mikrotik API library written in Swift:
https://wiki.mikrotik.com/wiki/API_in_Swift
It works well, when I'm sending small commands
However, If I will try to send large script string, I'm getting error:
Fatal error: Not enough bits to represent the passed value
The code that crashes:
private func writeLen(_ command : String) -> Data {
let data = command.data(using: String.Encoding.utf8)
var len = data?.count ?? 0
var dat = Data()
if len < 0x80 {
dat.append([UInt8(len)], count: 1)
}else if len < 0x4000 {
len = len | 0x8000;
dat.append(Data(bytes: [UInt8(len >> 8)]))
dat.append(Data(bytes: [UInt8(len)]))
}else if len < 0x20000 {
len = len | 0xC00000;
dat.append(Data(bytes: [UInt8(len >> 16)]))
dat.append(Data(bytes: [UInt8(len >> 8)]))
dat.append(Data(bytes: [UInt8(len)]))
}
else if len < 0x10000000 {
len = len | 0xE0000000;
dat.append(Data(bytes: [UInt8(len >> 24)]))
dat.append(Data(bytes: [UInt8(len >> 16)]))
dat.append(Data(bytes: [UInt8(len >> 8)]))
dat.append(Data(bytes: [UInt8(len)]))
}else{
dat.append(Data(bytes: [0xF0]))
dat.append(Data(bytes: [UInt8(len >> 24)]))
dat.append(Data(bytes: [UInt8(len >> 16)]))
dat.append(Data(bytes: [UInt8(len >> 8)]))
dat.append(Data(bytes: [UInt8(len)]))
}
return dat
}
The fatal error appears in this part:
else if len < 0x4000 {
len = len | 0x8000;
dat.append(Data(bytes: [UInt8(len >> 8)]))
dat.append(Data(bytes: [UInt8(len)]))
}
at line:
dat.append(Data(bytes: [UInt8(len)]))
Data size at this moment is 1072 bytes and len equals to 33840, UInt8 cannot be initiated with that len value.
How can I edit the code to avoid the error?
I'm using Swift 4.2
EDIT:
Here is an example of the same logic but written in JavaScript
module.exports.encodeString = function encodeString(s) {
var data = null;
var len = Buffer.byteLength(s);
var offset = 0;
if (len < 0x80) {
data = new Buffer(len + 1);
data[offset++] = len;
} else if (len < 0x4000) {
data = new Buffer(len + 2);
len |= 0x8000;
data[offset++] = (len >> 8) & 0xff;
data[offset++] = len & 0xff;
} else if (len < 0x200000) {
data = new Buffer(len + 3);
len |= 0xC00000;
data[offset++] = (len >> 16) & 0xff;
data[offset++] = (len >> 8) & 0xff;
data[offset++] = len & 0xff;
} else if (len < 0x10000000) {
data = new Buffer(len + 4);
len |= 0xE0000000;
data[offset++] = (len >> 24) & 0xff;
data[offset++] = (len >> 16) & 0xff;
data[offset++] = (len >> 8) & 0xff;
data[offset++] = len & 0xff;
} else {
data = new Buffer(len + 5);
data[offset++] = 0xF0;
data[offset++] = (len >> 24) & 0xff;
data[offset++] = (len >> 16) & 0xff;
data[offset++] = (len >> 8) & 0xff;
data[offset++] = len & 0xff;
}
data.utf8Write(s, offset);
return data;
};
Maybe someone sees the difference
Thanks for the JavaScript translation. It clearly shows the problem, since the Swift version does not resemble it.
Let's take this stretch of the JavaScript, as it is the part you are stumbling over in Swift:
} else if (len < 0x4000) {
data = new Buffer(len + 2);
len |= 0x8000;
data[offset++] = (len >> 8) & 0xff;
data[offset++] = len & 0xff;
}
That is "translated" in Swift like this:
} else if len < 0x4000 {
len = len | 0x8000;
dat.append(Data(bytes: [UInt8(len >> 8)]))
dat.append(Data(bytes: [UInt8(len)]))
}
Well, you can see at once that they are not at all the same. In the last line, the Swift version has forgotten the & 0xff.
If you put that in, everything starts working. And we can make it look a lot more like the JavaScript original too:
} else if len < 0x4000 {
len |= 0x8000;
dat.append(Data(bytes: [UInt8(len >> 8)]))
dat.append(Data(bytes: [UInt8(len & 0xff)]))
}
So I'd say, yes, use the JavaScript as a guide and you'll be fine. If that last line doesn't feel "swifty" enough to you, then write it like this:
dat.append(Data(bytes: [UInt8(truncatingIfNeeded: len)]))
It's exactly the same result.
I don't guarantee that everything will work perfectly after you make those changes (the Swift code you showed still does not look to me like it does the same thing as the JavaScript), but at least the part where we write the length bytes into the start of the Data will work correctly.

Saving Nef polyhedron as Polyhedron_3 or Surface_mesh gives different results

I wanted to save a Nef polyhedron into an OFF file for visualizing it. As written in the CGAL Nef polyhedra user manual (see paragraphs 5.4 and 5.5), a Nef polyhedron can be converted both to a Polyhedron_3 or a Surface_mesh.
However, I noticed that when converting to those structures and then saving it into an OFF file, the results are different.
Here I report the code for a minimal example:
#include <list>
#include <iostream>
#include <fstream>
#include <CGAL/Exact_predicates_exact_constructions_kernel.h>
#include <CGAL/Polyhedron_3.h>
#include <CGAL/Nef_polyhedron_3.h>
#include <CGAL/IO/Nef_polyhedron_iostream_3.h>
#include <CGAL/Surface_mesh.h>
#include <CGAL/boost/graph/convert_nef_polyhedron_to_polygon_mesh.h>
typedef CGAL::Exact_predicates_exact_constructions_kernel Kernel;
typedef Kernel::Point_3 Point_3;
typedef CGAL::Surface_mesh<Point_3> Mesh;
typedef CGAL::Polyhedron_3<Kernel> Polyhedron_3;
typedef CGAL::Nef_polyhedron_3<Kernel> Nef_polyhedron;
typedef Kernel::Vector_3 Vector_3;
typedef Kernel::Aff_transformation_3 Aff_transformation_3;
int convertStlToOff(const char* inputFilename, const char* outputFilename)
{
//read 80 bytes and put in std::cerr
std::ifstream obj(inputFilename, std::ios::in | std::ios::binary);
for (int i = 0; i < 80; i++) {
boost::uint8_t c;
obj.read(reinterpret_cast<char*>(&c), sizeof(c));
std::cerr << c;
}
std::cerr << std::endl;
//read 4 bytes and initialize number of triangles
boost::uint32_t N32;
obj.read(reinterpret_cast<char*>(&N32), sizeof(N32));
unsigned int N = N32;
std::cerr << N << " triangles" << std::endl;
//reserve space for N faces
std::vector<Point_3> points;
std::map<Point_3, int> pmap;
typedef boost::tuple<int, int, int> Face;
std::vector<Face> faces;
faces.reserve(N);
//read all faces
int number_of_points = 0;
int number_of_snapped_points = 0;
for (int i = 0; i < N; i++)
{
//read face normal (it is ignored)
float normal[3];
obj.read(reinterpret_cast<char*>(&normal[0]), sizeof(normal[0]));
obj.read(reinterpret_cast<char*>(&normal[1]), sizeof(normal[1]));
obj.read(reinterpret_cast<char*>(&normal[2]), sizeof(normal[2]));
//read coordinates of all 3 points
int index[3];
for (int j = 0; j < 3; j++)
{
float x, y, z;
obj.read(reinterpret_cast<char*>(&x), sizeof(x));
obj.read(reinterpret_cast<char*>(&y), sizeof(y));
obj.read(reinterpret_cast<char*>(&z), sizeof(z));
Point_3 p(x, y, z);
if (pmap.find(p) == pmap.end())
{
// check brute force if there is a close point
bool found_close_point = false;
/*for (int k = 0; k < points.size(); k++)
{
if (sqrt(CGAL::squared_distance(p, points[k])) < 0.00001)
{
index[j] = k;
found_close_point = true;
number_of_snapped_points++;
}
}*/
if (!found_close_point)
{
points.push_back(p);
index[j] = number_of_points;
pmap[p] = number_of_points++;
}
}
else {
index[j] = pmap[p];
}
}
faces.push_back(boost::make_tuple(index[0], index[1], index[2]));
//read two additional bytes, and ignore them
char c;
obj.read(reinterpret_cast<char*>(&c), sizeof(c));
obj.read(reinterpret_cast<char*>(&c), sizeof(c));
}
std::cerr << number_of_snapped_points << " snapped points" << std::endl;
std::ofstream outputFile(outputFilename);
outputFile.precision(20);
outputFile << "OFF\n" << points.size() << " " << faces.size() << " 0" << std::endl;
for (int i = 0; i < points.size(); i++)
{
outputFile << points[i] << std::endl;
}
for (int i = 0; i < faces.size(); i++)
{
outputFile << "3 " << boost::get<0>(faces[i]) << " " << boost::get<1>(faces[i]) << " " << boost::get<2>(faces[i]) << std::endl;
}
return 0;
}
void fill_cube_1(Polyhedron_3 & poly)
{
std::string input =
"OFF\n\
8 12 0\n\
-1 -1 -1\n\
-1 1 -1\n\
1 1 -1\n\
1 -1 -1\n\
-1 -1 1\n\
-1 1 1\n\
1 1 1\n\
1 -1 1\n\
3 0 1 3\n\
3 3 1 2\n\
3 0 4 1\n\
3 1 4 5\n\
3 3 2 7\n\
3 7 2 6\n\
3 4 0 3\n\
3 7 4 3\n\
3 6 4 7\n\
3 6 5 4\n\
3 1 5 6\n\
3 2 1 6";
std::stringstream ss;
ss << input;
ss >> poly;
}
enum savingModality
{
SAVE_AS_POLYHEDRON_3 = 0,
SAVE_AS_SURFACE_MESH = 1,
};
int saveNefObjectInOffFile(Nef_polyhedron offObject, const char* filename, savingModality modality)
{
if (!offObject.is_simple())
{
printf("Object is not simple. Cannot convert to mesh or polyhedron\n");
return 1;
}
std::ofstream outStream;
outStream.open(filename);
if (modality == SAVE_AS_POLYHEDRON_3)
{
Polyhedron_3 outputPolyhedron;
offObject.convert_to_Polyhedron(outputPolyhedron);
outStream << outputPolyhedron;
}
else if (modality == SAVE_AS_SURFACE_MESH)
{
Mesh outputMesh;
CGAL::convert_nef_polyhedron_to_polygon_mesh(offObject, outputMesh);
outStream << outputMesh;
}
outStream.close();
return 0;
}
int main()
{
int ret;
//construct nef object #1
Polyhedron_3 cube1;
fill_cube_1(cube1);
Nef_polyhedron nefObject1(cube1);
//construct nef object #2
Nef_polyhedron nefObject2(cube1);
Aff_transformation_3 scale2(1, 0, 0,
0, 1, 0,
0, 0, 1,
2);
nefObject2.transform(scale2);
Aff_transformation_3 translation2(CGAL::TRANSLATION, Vector_3(-0.5, -0.5, -0.5));
nefObject2.transform(translation2);
//construct nef object #3
Nef_polyhedron nefObject3;
nefObject3 = nefObject1 - nefObject2;
//save results into .off file
ret = saveNefObjectInOffFile(nefObject3, "out1.off", SAVE_AS_POLYHEDRON_3);
ret = saveNefObjectInOffFile(nefObject3, "out2.off", SAVE_AS_SURFACE_MESH);
return 0;
}
and the screenshots of the visualization of the two files: saving as Polyhedron_3 and saving as Surface_mesh. As you can see, it seems like if some faces were missing.
My question is: "Why the results are visualized different?"
The output to Polyhedron_3 is triangulated while the output to Surface_mesh is not. There is a bug in meshlab to display non convex faces I guess.
Look at the doc you'll see that there is a Boolean parameter to trigger or not the triangulation.

CUFFT: trying to implement row by row fft of a matrix

I am trying to replicate matlab fft functionality, where it does a row by row (or column by column) fft of a matrix. Each row would be one of the batches in the cufft plan.
I can get it working using cufftExecC2C (the commented out part in the code below works), but not cufftExecR2C. My code is using cufftPlan1d, but ideally I want to implement it using cufftPlanMany.
I am wondering what I'm doing wrong, and if there is a better way of doing this. Thank you.
// linker -> input -> additional dependencies -> add 'cufft.lib'
// VC++ Directories -> include directories - > add 'C:\ProgramData\NVIDIA Corporation\CUDA Samples\v6.0\common\inc'
#include <stdio.h>
#include <stdlib.h>
#include <cufft.h>
#include <cuda_runtime.h>
#include <iostream>
#define NX 6
#define NY 5
void printArray(float *my_array);
void printComplexArray(float2 *my_array);
int main(){
/************************************************************ C2C ************************************************************/
/*
float2 *initial_array = (float2 *)malloc(sizeof(float2) * NX * NY);
for (int h = 0; h < NX; h++){
for (int w = 0; w < NY; w++){
initial_array[NY * h + w].x = 0;
initial_array[NY * h + w].y = 0;
}
}
initial_array[NY*3 + 0].x = 1;
initial_array[NY*5 + 0].x = 1;
printComplexArray(initial_array);
float2 *transformed_array= (float2 *)malloc(sizeof(float2) * NX * NY);
cufftComplex *gpu_initial_array;
cufftComplex *gpu_transformed_array;
cudaMalloc((void **)&gpu_initial_array, NX*NY*sizeof(cufftComplex));
cudaMalloc((void **)&gpu_transformed_array, NX*NY*sizeof(cufftComplex));
cudaMemcpy(gpu_initial_array, initial_array, NX*NY*sizeof(float2), cudaMemcpyHostToDevice);
cufftHandle plan;
cufftPlan1d(&plan, NY, CUFFT_C2C, NX);
cufftExecC2C(plan, gpu_initial_array, gpu_transformed_array, CUFFT_FORWARD);
cudaMemcpy(transformed_array, gpu_transformed_array, NX*NY*sizeof(cufftComplex), cudaMemcpyDeviceToHost);
printComplexArray(transformed_array);
*/
/************************************************************ C2C ************************************************************/
/************************************************************ R2C ************************************************************/
float *initial_array = (float *)malloc(sizeof(float) * NX * NY);
for (int h = 0; h < NX; h++){
for (int w = 0; w < NY; w++)
initial_array[NY * h + w] = 0;
}
initial_array[NY*3 + 0] = 1;
printArray(initial_array);
float2 *transformed_array= (float2 *)malloc(sizeof(float2) * (NY/2+1) * NX);
cufftReal *gpu_initial_array;
cufftComplex *gpu_transformed_array;
cudaMalloc((void **)&gpu_initial_array, NX*NY*sizeof(cufftReal));
cudaMalloc((void **)&gpu_transformed_array, (NY/2+1)*NX*sizeof(cufftComplex));
cudaMemcpy(gpu_initial_array, initial_array, NX*NY*sizeof(float), cudaMemcpyHostToDevice);
cufftHandle plan;
cufftPlan1d(&plan, NY, CUFFT_R2C, NX);
// ***** cufftPlanMany *****
//int n[2] = {NX, NY};
//cufftPlanMany(&plan,1,n,NULL,1,0,NULL,1,0,CUFFT_R2C,NX);
cufftExecR2C(plan, gpu_initial_array, gpu_transformed_array);
cudaMemcpy(transformed_array, gpu_transformed_array, NX*(NY/2+1)*sizeof(cufftComplex), cudaMemcpyDeviceToHost);
printComplexArray(transformed_array);
/************************************************************ R2C ************************************************************/
cufftDestroy(plan);
free(initial_array);
free(transformed_array);
cudaFree(gpu_initial_array);
cudaFree(gpu_transformed_array);
std::system("pause");
return 0;
}
void printArray(float *my_array){
for (int h = 0; h < NX; h++){
for (int w = 0; w < NY; w++)
std::cout << my_array[NY * h + w] << " | ";
std::cout << std::endl;
}
std::cout << std::endl;
}
void printComplexArray(float2 *my_array){
for (int h = 0; h < NX; h++){
for (int w = 0; w < NY; w++)
std::cout << my_array[NY * h + w].x << " + " << my_array[NY * h + w].y << " | ";
std::cout << std::endl;
}
std::cout << std::endl;
}
It seems that your issue resides in the way you print out the result. You cannot use the same routine to print for the two cases of CUFFT_R2C and CUFFT_C2C. In the former case, you have a (NY/2+1)*NX sized output, while the the latter case you have a NY*NX sized output. The fixed code below should work.
Also, it would be also good to add proper CUDA error check and CUFFT error check, which I have also added to the code below.
#include <stdio.h>
#include <stdlib.h>
#include <cufft.h>
#include <cuda_runtime.h>
#include <assert.h>
#include <iostream>
#define NX 6
#define NY 5
void printArray(float *my_array);
void printComplexSymmetricArray(float2 *my_array);
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/*********************/
/* CUFFT ERROR CHECK */
/*********************/
static const char *_cudaGetErrorEnum(cufftResult error)
{
switch (error)
{
case CUFFT_SUCCESS:
return "CUFFT_SUCCESS";
case CUFFT_INVALID_PLAN:
return "CUFFT_INVALID_PLAN";
case CUFFT_ALLOC_FAILED:
return "CUFFT_ALLOC_FAILED";
case CUFFT_INVALID_TYPE:
return "CUFFT_INVALID_TYPE";
case CUFFT_INVALID_VALUE:
return "CUFFT_INVALID_VALUE";
case CUFFT_INTERNAL_ERROR:
return "CUFFT_INTERNAL_ERROR";
case CUFFT_EXEC_FAILED:
return "CUFFT_EXEC_FAILED";
case CUFFT_SETUP_FAILED:
return "CUFFT_SETUP_FAILED";
case CUFFT_INVALID_SIZE:
return "CUFFT_INVALID_SIZE";
case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA";
}
return "<unknown>";
}
#define cufftSafeCall(err) __cufftSafeCall(err, __FILE__, __LINE__)
inline void __cufftSafeCall(cufftResult err, const char *file, const int line)
{
if( CUFFT_SUCCESS != err) {
fprintf(stderr, "CUFFT error in file '%s', line %d\n %s\nerror %d: %s\nterminating!\n",__FILE__, __LINE__,err, \
_cudaGetErrorEnum(err)); \
cudaDeviceReset(); assert(0); \
}
}
/********/
/* MAIN */
/********/
int main(){
float *initial_array = (float *)malloc(sizeof(float) * NX * NY);
for (int h = 0; h < NX; h++){
for (int w = 0; w < NY; w++)
initial_array[NY * h + w] = 0;
}
initial_array[NY*3 + 0] = 1;
printArray(initial_array);
float2 *transformed_array= (float2 *)malloc(sizeof(float2) * (NY/2+1) * NX);
cufftReal *gpu_initial_array;
cufftComplex *gpu_transformed_array;
gpuErrchk(cudaMalloc((void **)&gpu_initial_array, NX*NY*sizeof(cufftReal)));
gpuErrchk(cudaMalloc((void **)&gpu_transformed_array, (NY/2+1)*NX*sizeof(cufftComplex)));
gpuErrchk(cudaMemcpy(gpu_initial_array, initial_array, NX*NY*sizeof(float), cudaMemcpyHostToDevice));
cufftHandle plan;
cufftSafeCall(cufftPlan1d(&plan, NY, CUFFT_R2C, NX));
cufftSafeCall(cufftExecR2C(plan, (cufftReal*)gpu_initial_array, (cufftComplex*)gpu_transformed_array));
gpuErrchk(cudaMemcpy(transformed_array, gpu_transformed_array, NX*(NY/2+1)*sizeof(cufftComplex), cudaMemcpyDeviceToHost));
printComplexSymmetricArray(transformed_array);
cufftSafeCall(cufftDestroy(plan));
free(initial_array);
free(transformed_array);
gpuErrchk(cudaFree(gpu_initial_array));
gpuErrchk(cudaFree(gpu_transformed_array));
std::system("pause");
return 0;
}
/***********************/
/* PRINTOUT REAL ARRAY */
/***********************/
void printArray(float *my_array){
for (int h = 0; h < NX; h++){
for (int w = 0; w < NY; w++)
std::cout << my_array[NY * h + w] << " | ";
std::cout << std::endl;
}
std::cout << std::endl;
}
/************************************/
/* PRINTOUT COMPLEX SYMMETRIC ARRAY */
/************************************/
void printComplexSymmetricArray(float2 *my_array){
for (int h = 0; h < NX; h++){
for (int w = 0; w < NY/2+1; w++)
std::cout << my_array[(NY/2+1) * h + w].x << " + " << my_array[(NY/2+1) * h + w].y << " | ";
std::cout << std::endl;
}
std::cout << std::endl;
}