CUDA fft 2d different results from MATLAB fft on 2d - matlab

I have tried to do a simple fft and compare the results between MATLAB and CUDA on 2d arrays.
MATLAB:
array of 9 numbers 1-9
I = [1 2 3
4 5 6
7 8 9];
and use this code:
fft(I)
gives the results:
12.0000 + 0.0000i 15.0000 + 0.0000i 18.0000 + 0.0000i
-4.5000 + 2.5981i -4.5000 + 2.5981i -4.5000 + 2.5981i
-4.5000 - 2.5981i -4.5000 - 2.5981i -4.5000 - 2.5981i
And CUDA code:
int FFT_Test_Function() {
int width = 3;
int height = 3;
int n = width * height;
double in[width][height];
Complex out[width][height];
for (int i = 0; i<width; i++)
{
for (int j = 0; j < height; j++)
{
in[i][j] = (i * width) + j + 1;
}
}
// Allocate the buffer
cufftDoubleReal *d_in;
cufftDoubleComplex *d_out;
unsigned int out_mem_size = sizeof(cufftDoubleComplex)*n;
unsigned int in_mem_size = sizeof(cufftDoubleReal)*n;
cudaMalloc((void **)&d_in, in_mem_size);
cudaMalloc((void **)&d_out, out_mem_size);
// Save time stamp
milliseconds timeStart = getCurrentTimeStamp();
cufftHandle plan;
cufftResult res = cufftPlan2d(&plan, width, height, CUFFT_D2Z);
if (res != CUFFT_SUCCESS) { cout << "cufft plan error: " << res << endl; return 1; }
cudaCheckErrors("cuda malloc fail");
for (int i = 0; i < width; i++)
{
cudaMemcpy(d_in + (i * width), &in[i], height * sizeof(double), cudaMemcpyHostToDevice);
cudaCheckErrors("cuda memcpy H2D fail");
}
cudaCheckErrors("cuda memcpy H2D fail");
res = cufftExecD2Z(plan, d_in, d_out);
if (res != CUFFT_SUCCESS) { cout << "cufft exec error: " << res << endl; return 1; }
for (int i = 0; i < width; i++)
{
cudaMemcpy(&out[i], d_out + (i * width), height * sizeof(Complex), cudaMemcpyDeviceToHost);
cudaCheckErrors("cuda memcpy H2D fail");
}
cudaCheckErrors("cuda memcpy D2H fail");
milliseconds timeEnd = getCurrentTimeStamp();
milliseconds totalTime = timeEnd - timeStart;
std::cout << "Total time: " << totalTime.count() << std::endl;
return 0;
}
In this CUDA code i got the result:
You can see that CUDA gives different results.
What am i missed?
Thank you very much for your attention!

The cuFFT result looks correct, but your FFT code is wrong - it should be:
octave:1> I = [ 1 2 3; 4 5 6; 7 8 9 ]
I =
1 2 3
4 5 6
7 8 9
octave:2> fft2(I)
ans =
45.00000 + 0.00000i -4.50000 + 2.59808i -4.50000 - 2.59808i
-13.50000 + 7.79423i 0.00000 + 0.00000i 0.00000 + 0.00000i
-13.50000 - 7.79423i 0.00000 - 0.00000i 0.00000 - 0.00000i
Note the use of fft2.

Related

Neural Net with batch size 1 and one input at a time

I tried a very simple autoencoder, 3 inputs, one layer with 2 neurons and the out put with 3.
Just numbers
0.01 0.02 ........... 1.0
0.011 0.021 1.01
0.012 0.022 1.02
That works if all samples, 100, are the input and with 200 epoches.
for (size_t i = 0; i < 100; i++)
{
for (size_t j = 0; j < 3; j++)
{
samples[i][0] = (float)(i+1) * 0.01;
samples[i][1] = (float)(i + 1) * 0.01 + 0.01;
samples[i][2] = (float)(i + 1) * 0.01 + 0.01 + 0.01;
}
}
net.fit<mse>(optimizer, samples, samples, 100, NUM_EPOCHS
, onMinibatch, onEpoch);
But feeding one sample at a time doesn't work.
Like this:
for (int i = 0; i < NUM_EPOCHS; i++)
{
for (int j = 0; j < 100; j++)
{
samples[0][0] = (float)(j + 1) * 0.01;
samples[0][1] = (float)(j + 1) * 0.01 + 0.01;
samples[0][2] = (float)(j + 1) * 0.01 + 0.01 + 0.01;
net.fit<mse>(optimizer, samples, samples, 1, 1
, onMinibatch, onEpoch);
}
}
Is it that bad to feed single samples into a neural network?
Many thanks for your help.
I made a mistake, use
net.train_once<mse>...
does the job.

how to solve subscript out of range in qbasic

I am in big problem in solving a code essential for me, and I need the solution as soon as possible
in fact, I have little knowledge of programming in basic
I have a problem with this code.
I have an equation, and I use this code to solve this equation
when I run the program
this error appears
subscript out of range
is there any solution to this problem
0 Print "******** impact *******"
20 Print "____________"
30 Print "this programs is used to solve impact integral"
40 Print "equation of simply supported slab to "
50 Print "optain the following"
60 Print " (1) force _time history"
70 Print " (2) central deflection - time history"
80 Print "-----------------"
90 Print "input data:"
100 Print " (1) FUNDAMENTAL NATURAL FREQUANCY (RAD/SEC)--- W1"
110 Print " (2) STRIKER MASS (KG.) ----- Mst"
120 Print " (3) MASS OF SLAB (KG)---- Ms"
130 Print " (4) hertz constant (n/m^1.5)----k"
140 Print " (5) STRICKER VELOCITY (M/S)----Vo"
150 Print " (6) NUMBER OF MODES----N"
160 Print "_________"
170 Input " W11, MST, MS, K, VO, N", W11, MST, MS, K, VO, N
180 Print "W1="; W11; "RAD/SEC"
190 Print "MST="; MST; "KG"
200 Print "MS="; MS; "KG"
210
220 Print " K = "; K; "N/M^1.5"
230 Print STANDARD
240 Print "VO="; VO; "M/S"
241 Print " N = "; N
250 Print
260 Print
270 K1 = K
280 V = VO
290 W1 = W11 / 2
300 TINF = 2.94 * (MST / (.8 * K1 * V ^ .5)) ^ .4 * 1000
310 DT = TINF / 10
320 M = 20
330 DT = PROUND(DT, 0)
340 DT = DT / 1000
350 M = 20
360 Option Base 1
370 Dim W(11, 11), Z(11, 11), F(30), D(30), BM(30), SH(30), A(30), T(30), DF(30), S(11, 11, 30), C(11, 11, 30)
380 ReDim W(N, N), Z(N, N)
390 For I = 1 To N Step 2
400 For K = 1 To N Step 2
410 W(K, I) = W1 * (I ^ 2 + K ^ 2)
420 Z(K, I) = W(K, I) * DT
430 Next K
440 Next I
450 ReDim F(M), D(M), A(M), T(M), DF(M), BM(M), SH(M), S(N, N, M), C(N, N, M)
460 F(1) = D(1) = A(1) = T(1) = 0
470 For I = 1 To N Step 2
480 For K = 1 To N Step 2
490 S(K, I, 1) = C(K, I, 1) = 0
500 Next K
510 Next I
520 B1 = 0
530 For I = 1 To N Step 2
540 For K = 1 To N Step 2
550 B1 = B1 + (1 - Sin(Z(K, I)) / Z(K, I)) / W(K, I) ^ 2
560 Next K
570 Next I
580 B = -DT ^ 2 / (6 * MST) - 4 * B1 / MS
590 VE = 0
600 For I = 2 To M
610 T(I) = (I - 1) * DT
620 GM = 0
630 If VE = 1 Then 970
640 For J = 2 To I
650 GM = GM + F(J - 1)
660 Next J
670 AA = 0
680 For J = 1 To N Step 2
690 For K = 1 To N Step 2
700 FF = F(I - 1) * (Sin(Z(K, J)) / Z(K, J) - Cos(Z(K, J))) / W(K, J)
710 AA = AA + 4 * (Cos(Z(K, J)) * S(K, J, I - 1) + Sin(Z(K, J)) * C(K, J, I - 1) + FF) / (MS * W(K, J))
720 Next K
730 Next J
740 A(I - 1) = V * (I - 1) * DT - (D(I - 1) + DT ^ 2 * (GM - F(I - 1) / 6)) / MST - AA
750 F = F(I - 1)
760 If A(I - 1) + B * F < 0 Then 840
770 F1 = (A(I - 1) + B * F) ^ 1.5 * K1
780 X = Abs(F1 - F)
790 If X < 10 Then 820
800 F = F1
810 GoTo 770
820 F(I) = F1
830 GoTo 850
840 F(I) = 0
850 D(I) = D(I - 1) + DT ^ 2 * (GM + (F(I) - F(I - 1)) / 6)
860 For J = 1 To N Step 2
870 For K = 1 To N Step 2
880 S(K, J, I) = Cos(Z(K, J)) * S(K, J, I - 1) + Sin(Z(K, J)) * C(K, J, I - 1) + (1 - Sin(Z(K, J)) / Z(K, J)) * (F(I) - F(I - 1)) / W(K, J) + (1 - Cos(Z(K, J))) * F(I - 1) / W(K, J)
890 C(K, J, I) = Cos(Z(K, J)) * C(K, J, I - 1) - Sin(Z(K, J)) * S(K, J, I - 1) + (1 - Cos(Z(K, J))) / Z(K, J) * (F(I) - F(I - 1)) / W(K, J) + Sin(Z(K, J)) * F(I - 1) / W(K, J)
900 Next K
910 Next J
920 DF = 0
930 For J = 1 To N Step 2
940 For K = 1 To N Step 2
950 DF = DF + 4 * S(K, J, I) / W(K, J) / MS
960 Next K
970 Next J
980 DF(I) = DF
990 If F(I) = 0 Then 1010
1000 Next I
1010 Print "----------------------------------------------------------"
1020 Print "{TIME (MS)},{FORCE (KN)},{DEFLECTION(MM)}"
1030 Print "----------------------------------------------------------"
1040 II = I
1050 For O = 1 To II
1060
1070 Print Tab(1); ":"; Tab(5); T(0) * 1000; Tab(18); ":"; Tab(22); F(O) / 1000; Tab(34); ":"; Tab(42); DF(O) * 1000; Tab(56); ":"
1080 Print "-----------------------------------------------------------"
1090 Next O
1100 End

Why filter2D in OpenCV gives different results than imfilter in Matlab?

I have an original image:
I then read it, create a PSF, and blur it in Matlab:
lenawords1=imread('lenawords.bmp');
%create PSF
sigma=6;
PSFgauss=fspecial('gaussian', 8*sigma+1, sigma);
%blur it
lenablur1=imfilter(lenawords1, PSFgauss, 'conv');
lenablurgray1=mat2gray(lenablur1);
PSFgauss1 = PSFgauss/max(PSFgauss(:));
and I saved the blurred image:
imwrite(lenablurgray1, 'lenablur.bmp');
imwrite(PSFgauss1, 'PSFgauss.bmp');
Their values in Matlab and OpenCV match.
Matlab:
disp(lenablurgray1(91:93, 71:75)*256)
142.2222 147.9111 153.6000 159.2889 164.9778
153.6000 164.9778 170.6667 176.3556 176.3556
164.9778 176.3556 182.0444 187.7333 187.7333
disp(PSFgauss1(24:26, 24:26)*256)
248.9867 252.4690 248.9867
252.4690 256.0000 252.4690
248.9867 252.4690 248.9867
OpenCV:
Mat img = imread("lenablur.bmp");
cvtColor(img, img, cv::COLOR_BGR2GRAY);
cv::Mat kernel = imread("PSFgauss.bmp");
cvtColor(kernel, kernel, cv::COLOR_BGR2GRAY);
for (int r = 90; r < 93; r++) {
for (int c = 70; c < 75; c++) {
cout << (int)img.at<uchar>(r, c) << " ";
}
cout << endl;
}
142 147 153 159 164
153 164 ...
164 ...
cout << "PSF" << endl;
for (int r = 23; r < 26; r++) {
for (int c = 23; c < 26; c++) {
cout << (int)kernel.at<uchar>(r, c) << " ";
}
cout << endl;
}
248 251 248
251 255 251
248 251 248
However, the values from filter2D in OpenCV and imfilter in Matlab do not match:
Matlab:
conv1=imfilter(lenablurgray1, PSFgauss1, 'conv');
disp(conv1(91:93, 71:75))
91.8094 96.1109 99.8904 103.1280 105.8210
97.3049 101.7757 105.6828 109.0073 111.7486
102.0122 106.5953 110.5755 113.9353 116.6769
OpenCV:
Mat conv1;
filter2D(img, conv1, img.depth(), kernel, Point(-1, -1), 0,
BORDER_REFLECT);
for (int r = 90; r < 93; r++) {
for (int c = 70; c < 75; c++) {
cout << (int)conv1.at<uchar>(r, c) << " ";
}
cout << endl;
}
255 255 255 255 255
255 255 255 255 255
255 255 255 255 255
Why are the filter2D values wrong?
EDIT2:
cv::Mat kernel = imread("PSFgauss.bmp");
cvtColor(kernel, kernel, cv::COLOR_BGR2GRAY);
kernel.convertTo(kernel, CV_64F);
cv::Scalar kernelsum= cv::sum(kernel);
divide(kernel, kernelsum, kernel);
filter2D(img, conv1, img.depth(), kernel, Point(-1, -1), 0, BORDER_REFLECT);
for (int r = 90; r < 93; r++) {
for (int c = 70; c < 75; c++) {
cout << (int)conv1.at<uchar>(r, c) << " ";
}
gives
103 108 112 116 119
109 ..
115 ..
which matches the Matlab values of conv1 when multiplied by the factor 1.133
disp(conv1(91:93, 71:75) * 1.133)
104.0201 108.8937 113.1758 116.8441 119.8952
110.2464 115.3118 119.7386 123.5053 126.6112
115.5798 120.7725 125.2820 129.0887 132.1950
However, the values differ when I divide img by conv1:
Matlab:
conv2 = lenablurgray1./conv1
disp(conv2(91:93, 71:75))
0.0061 0.0060 0.0060 0.0060 0.0061
0.0062 0.0063 0.0063 0.0063 0.0062
0.0063 0.0065 0.0064 0.0064 0.0063
OpenCV:
Mat conv2;
divide(img, conv1, conv2);
for (int r = 90; r < 93; r++) {
for (int c = 70; c < 75; c++) {
cout << (int)conv2.at<uchar>(r, c) << " ";
}
cout << endl;
}
1 1 1 1 1
1 1 ...
1 ...
why is this?
When you do
lenablur1 = imfilter(lenawords1, PSFgauss, 'conv');
in MATLAB, PSFgauss is normalized. That means that its values sum up to 1:
sum(PSFgauss(:)) == 1.0 % or at least it should be very close
Next, you scale it so its maximum value is 1, so that you can save it as a BMP file. This additionally causes rounding of the values to 256 distinct integers.
Then, in OpenCV, you read in the kernel using imread("PSFgauss.bmp"), and convert back to a grey-value image. This results in a kernel that has integer values in the range [0,255]. In particular, it is not normalized.
What happens then in the convolution is that you multiply each kernel element by an image pixel, and sum up all the values to produce one output value. If the kernel is normalized, this amounts to a weighted averaging. If the kernel is not normalized, the mean image intensity will not be preserved. Since the kernel here has values much larger than it originally had, the output values will be much larger than those of the input image. Because the input image is an 8-bit unsigned integer, and OpenCV uses saturated addition, the operation results in the value 255 for every pixel.
In mathematical notation, in MATLAB you do
g = f * k
(* is convolution, f is the image, k is the kernel). In OpenCV you do
g' = f * Ck
(where C is a constant approximately equal to 255/max(PSFgauss(:), which is the factor by which the kernel was multiplied during the transition from MATLAB to OpenCV).
Thus, dividing by C should bring the kernel back in the state it was when you used it for convolving in MATLAB. But note that the rounding effect you will not be able to remove.
The simplest way of deriving C in OpenCV is to divide kernel by its sum:
kernel.convertTo(kernel, CV_64F);
kernel /= cv::sum(kernel);

Getting expression too complex error .please correct me if i have done thing wrong

let b0 = UInt32(block[block.startIndex + 0 + (0 << 2)]) << 0 | UInt32(block[block.startIndex + 1 + (0 << 2)]) << 8 | UInt32(block[block.startIndex + 2 + (0 << 2)]) << 16
b0 = b0 | UInt32(block[block.startIndex + 3 + (0 << 2)]) << 24
let b1 = UInt32(block[block.startIndex + 0 + (1 << 2)]) << 0 | UInt32(block[block.startIndex + 1 + (1 << 2)]) << 8 | UInt32(block[block.startIndex + 2 + (1 << 2)]) << 16
b1 = b1 | UInt32(block[block.startIndex + 3 + (1 << 2)]) << 24
let b2 = UInt32(block[block.startIndex + 0 + (2 << 2)]) << 0 | UInt32(block[block.startIndex + 1 + (2 << 2)]) << 8 | UInt32(block[block.startIndex + 2 + (2 << 2)]) << 16
b2 = b2 | UInt32(block[block.startIndex + 3 + (2 << 2)]) << 24
let b3 = UInt32(block[block.startIndex + 0 + (3 << 2)]) << 0 | UInt32(block[block.startIndex + 1 + (3 << 2)]) << 8 | UInt32(block[block.startIndex + 2 + (3 << 2)]) << 16
b3 = b3 | UInt32(block[block.startIndex + 3 + (3 << 2)]) << 24
If you just format this code properly, you'll see there's a very clear pattern:
let start = block.startIndex
let b0 = UInt32(block[start + 0 + (0 << 2)]) << 0
| UInt32(block[start + 1 + (0 << 2)]) << 8
| UInt32(block[start + 2 + (0 << 2)]) << 16
| UInt32(block[start + 3 + (0 << 2)]) << 24
let b1 = UInt32(block[start + 0 + (1 << 2)]) << 0
| UInt32(block[start + 1 + (1 << 2)]) << 8
| UInt32(block[start + 2 + (1 << 2)]) << 16
| UInt32(block[start + 3 + (1 << 2)]) << 24
let b2 = UInt32(block[start + 0 + (2 << 2)]) << 0
| UInt32(block[start + 1 + (2 << 2)]) << 8
| UInt32(block[start + 2 + (2 << 2)]) << 16
| UInt32(block[start + 3 + (2 << 2)]) << 24
let b3 = UInt32(block[start + 0 + (3 << 2)]) << 0
| UInt32(block[start + 1 + (3 << 2)]) << 8
| UInt32(block[start + 2 + (3 << 2)]) << 16
| UInt32(block[start + 3 + (3 << 2)]) << 24
Each b constant is just the numbers 0...3 transformed in similar ways, all bitwise-OR'ed together. Sounds like a job for map/reduce:
let start = block.startIndex
let b0 = (0...3).lazy.map{ UInt32(block[start + $0 + (0 << $0)]) << $0 * 8 }.reduce(0, |)
let b1 = (0...3).lazy.map{ UInt32(block[start + $0 + (1 << $0)]) << $0 * 8 }.reduce(0, |)
let b2 = (0...3).lazy.map{ UInt32(block[start + $0 + (2 << $0)]) << $0 * 8 }.reduce(0, |)
let b3 = (0...3).lazy.map{ UInt32(block[start + $0 + (3 << $0)]) << $0 * 8 }.reduce(0, |)
This can be even further simplified, if you made a b array with 4 elements, rather than 4 seperate b# variables:
let start = block.startIndex
let b = (0...3).map{ x -> UInt32 in
fatalError("I don't know what the number x represents, so I just named it x. Give it a better name.")
return (0...3).lazy
.map{ UInt32(block[start + $0 + (x << $0)]) << $0*8 }
.reduce(0, |)
}

Unexpected computation result [duplicate]

This question already has answers here:
Why is 24.0000 not equal to 24.0000 in MATLAB?
(6 answers)
Closed 8 years ago.
Here is my whole function:
function val = deceptive_3_nd_function( arr )
alpha = [0.3 0.7];
beta = 0.2;
val = zeros(size(arr,1),1);
for part=1:size(arr,1)
for i=1:size(arr,2)
if 0 <= arr(part, i) && arr(part, i) <= 4/5*alpha(i);
val(part) = val(part) - arr(part, i)/alpha(i) + 4/5;
elseif 4/5*alpha(i) < arr(part, i) && arr(part, i) <= alpha(i)
val(part) = val(part) + 5*arr(part, i)/alpha(i) - 4;
elseif alpha(i) < arr(part, i) && arr(part, i) <= (1+4*alpha(i))/5
* a_ = 5.0 * ( arr(part, i) - alpha(i) );
* b_ = alpha(i)-1.0;
* c_ = a_/b_;
* val(part) = val(part) + c_ + 1.0;
elseif (1+4*alpha(i))/5 < arr(part, i) && arr(part, i) <= 1
val(part) = val(part) + (arr(part, i)-1)/(1-alpha(i)) +4/5;
end
end
val(part) = -(1/size(arr, 2)*val(part))^beta;
end
end
In lines marked with asterisks in got unexpected results. As you can see I tried to isolate a problem and that's where it led me:
K>> arr(part, i)
ans =
0.7600
K>> arr(part, i)==0.76
ans =
1
K>> alpha(i)
ans =
0.7000
K>> alpha(i)==0.7
ans =
1
K>> arr(part, i) - alpha(i)
ans =
0.0600
K>> arr(part, i) - alpha(i) == 0.06
ans =
0
Why is this happening..?
Looks like you've run into floating point error there.
(0.76 - 0.7) == 0.06
ans =
0
num2str(0.76 - 0.7, '%0.20f')
ans =
0.06000000000000005300