I have to compute SVD on a non-square matrix. I am using LAPACK's dgesvd_ routine for that. I have no problems with a square matrix, for which I receive expected values, compared with MATLAB. But I cannot produce expected results for a 4x5 matrix. I know the solution should match that of MATLAB since the singular values returned are sorted in descending order. I can see though that some of singular values can be found in original A input array to SVD. That indicates I must call dgesvd_ wrong or I refer incorrectly to the results, this might have to do with leading array dimensions.
In each case I first issue a call with LWORK = -1, querying LAPACK for optimal values, which are next input to the following call to compute SVD. I am not sure about all the meaning of the returned values and if they are valid, if they should be changed, etc. I assume they are OK, so I use them in a following call to compute SVD.
So this code works as expected (3x3 matrix):
41 /* Reference data. */
42 double ref_array_A[3][3] = {
43 { 1, 2, 3},
44 { 2, 4, 5 },
45 { 3, 5, 6 }
46 };
47
48 double ref_array_U[3][3] = {
49 { -0.327985, -0.736976, -0.591009 },
50 { -0.591009, -0.327985, 0.736976 },
51 { -0.736976, 0.591009, -0.327985 }
52 };
53
54 double ref_array_Sigma[3][1] = {
55 { 11.344814 },
56 { 0.515729 },
57 { 0.170915 }
58 };
59
60 double ref_array_VT[3][3] = {
61 { -0.327985, -0.591009, -0.736976 },
62 { 0.736976, 0.327985, -0.591009 },
63 { -0.591009, 0.736976, -0.327985 }
64 };
66 /* MATLAB result
67 *
68 * >> A = [ 1, 2, 3; 2, 4, 5; 3, 5, 6]
69 *
70 * A =
71 * 1 2 3
72 * 2 4 5
73 * 3 5 6
74 *
75 * >> [U, S, V] = svd(A)
76 *
77 * U =
78 * -0.3280 -0.7370 -0.5910
79 * -0.5910 -0.3280 0.7370
80 * -0.7370 0.5910 -0.3280
81 *
82 * S =
83 * 11.3448 0 0
84 * 0 0.5157 0
85 * 0 0 0.1709
86 *
87 * V =
88 * -0.3280 0.7370 -0.5910
89 * -0.5910 0.3280 0.7370
90 * -0.7370 -0.5910 -0.3280
91 */
double WORK_QUERY = 0;
206
207
208 /* Call dgesvd_ with lwork = -1 to query optimal workspace size. */
209
210 JOBU = 'A';
211 JOBVT = 'A';
212 M = 3;
213 N = 3;
214 LDA = 3; /* (out) */
215 LDU = 3; /* (out) */
216 S = NULL; /* (don't care) */
217 U = NULL; /* (don't care) */
218 VT = NULL; /* (don't care) */
219 LDVT = 3; /* (out) */
220 WORK = NULL; /* (out) , because LWORK is 0 do not care */
221 LWORK = 4 * M * N * M *N + 6 * M * N + dd_max(M, N);
222
223 A = calloc(M * N, sizeof(double));
224 if (!A) {
225 goto ddt2_fail_sys;
226 }
227 for (i = 0; i < M; ++i) {
228 for (j = 0; j < N; ++j) {
229 A[i * N + j] = ref_array_A[i][j];
230 }
231 }
232
233 S = calloc(dd_min(M, N), sizeof(double));
234 if (!S) {
235 goto ddt2_fail_sys;
236 }
237
238 U = calloc(LDU * M, sizeof(double));
239 if (!U) {
240 goto ddt2_fail_sys;
241 }
242
243 VT = calloc(LDVT * N, sizeof(double));
244 if (!A) {
245 goto ddt2_fail_sys;
246 }
247
248 fprintf(stderr, "Reference array A:\n");
249 dd_walk_dbl_arr_rowwise(A, M, N, cb_dbl, cb_dbl_row_end);
250
251 fprintf(stderr, "Reference array U:\n");
252 dd_walk_dbl_arr_rowwise(&ref_array_U[0][0], M, M, cb_dbl, cb_dbl_row_end);
253
254 fprintf(stderr, "Reference array Sigma:\n");
255 dd_walk_dbl_arr_rowwise(&ref_array_Sigma[0][0], dd_min(M, N), 1, cb_dbl, cb_dbl_row_end);
256
257 fprintf(stderr, "Reference array VT:\n");
258 dd_walk_dbl_arr_rowwise(&ref_array_VT[0][0], N, N, cb_dbl, cb_dbl_row_end);
LWORK = -1;
261 dgesvd_("A", "A", &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, &WORK_QUERY, &LWORK, &INFO);
262 if (INFO != 0) {
263 if (INFO < 0) {
264 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"the %d-th argument had illegal value\"\n", INFO);
265 } else {
266 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"DBDSDC didn't converge, updating process failed\"\n");
267 }
268 return -1;
269 }
270
271 LWORK = (int) WORK_QUERY;
272 WORK = calloc(LWORK, sizeof(double));
273 if (!WORK) {
274 goto ddt2_fail_sys;
275 }
276
277 fprintf(stderr, "LAPACK's dgesvd_ query optimal results: LDA %d, LDU %d, LDVT %d, LWORK %d, WORK_QUERY %f\n", LDA, LDU, LDVT, LWORK, WORK_QUERY);
278 fprintf(stderr, "Rest of params: M %d, N %d\n", M, N);
279
280 /* Compute SVD. */
281 dgesvd_(&JOBU, &JOBVT, &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, WORK, &LWORK, &INFO);
282 if (INFO != 0) {
283 if (INFO < 0) {
284 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"the %d-th argument had illegal value\"\n", INFO);
285 } else {
286 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"DBDSDC didn't converge, updating process failed\"\n");
287 }
288 return -1;
289 }
290
291 fprintf(stderr, "LAPACK's dgesvd_ SVD completed\n");
292
293 fprintf(stderr, "Result A:\n");
294 dd_walk_dbl_arr_rowwise(A, M, N, cb_dbl, cb_dbl_row_end);
295
296 fprintf(stderr, "Result U**T:\n");
297 dd_walk_dbl_arr_rowwise(U, LDU, M, cb_dbl, cb_dbl_row_end);
298 fprintf(stderr, "Result U:\n");
299 dd_walk_dbl_arr_colwise(U, LDU, M, cb_dbl, cb_dbl_row_end);
300
301
302 fprintf(stderr, "Result S:\n");
303 dd_walk_dbl_arr_rowwise(S, dd_min(M, N), 1, cb_dbl, cb_dbl_row_end);
304
305 fprintf(stderr, "Result VT:\n");
306 dd_walk_dbl_arr_rowwise(VT, LDVT, N, cb_dbl, cb_dbl_row_end);
307
308 free(WORK);
309 free(A);
310 free(S);
311 free(U);
312 free(VT);
313
314 return 0;
Proper result:
peter#xx:~$ ./test4
Reference array A:
1.000000 2.000000 3.000000
2.000000 4.000000 5.000000
3.000000 5.000000 6.000000
Reference array U:
-0.327985 -0.736976 -0.591009
-0.591009 -0.327985 0.736976
-0.736976 0.591009 -0.327985
Reference array Sigma:
11.344814
0.515729
0.170915
Reference array VT:
-0.327985 -0.591009 -0.736976
0.736976 0.327985 -0.591009
-0.591009 0.736976 -0.327985
LAPACK's dgesvd_ query optimal results: LDA 3, LDU 3, LDVT 3, LWORK 201, WORK_QUERY 201.000000
Rest of params: M 3, N 3
LAPACK's dgesvd_ SVD completed
Result A:
-3.741657 0.421793 0.632690
10.643576 1.261481 -0.720622
0.478213 -0.279401 -0.211863
Result U**T:
-0.327985 -0.591009 -0.736976
-0.736976 -0.327985 0.591009
-0.591009 0.736976 -0.327985
Result U:
-0.327985 -0.736976 -0.591009
-0.591009 -0.327985 0.736976
-0.736976 0.591009 -0.327985
Result S:
11.344814
0.515729
0.170915
Result VT:
-0.327985 0.736976 -0.591009
-0.591009 0.327985 0.736976
-0.736976 -0.591009 -0.327985
But not this (4x5 matrix):
39 /* Reference data. */
40 double ref_array_A[4][5] = {
41 { 1, 0, 0, 0, 2 },
42 { 0, 0, 3, 0, 0 },
43 { 0, 0, 0, 0, 0 },
44 { 0, 2, 0, 0, 0 }
45 };
46
47 double ref_array_U[4][4] = {
48 { 0, 0, 1, 0 },
49 { 0, 1, 0, 0 },
50 { 0, 0, 0, -1 },
51 { 1, 0, 0, 0 }
52 };
53
54 double ref_array_Sigma[4][5] = {
55 { 2, 0, 0, 0, 0 },
56 { 0, 3, 0, 0, 0 },
57 { 0, 0, 2.236068, 0, 0 },
58 { 0, 0, 0, 0, 0 }
59 };
60
61 double ref_array_VT[5][5] = {
62 { 0, 1, 0, 0, 0 },
63 { 0, 0, 1, 0, 0 },
64 { 0.447214, 0, 0, 0, 0.894427 },
65 { 0, 0, 0, 1, 0 },
66 { -0.894427, 0, 0, 0, -0.447214 }
67 };
68
69 /* MATLAB result
70 *
71 * >> A = [ 1 0 0 0 2; 0 0 3 0 0 ; 0 0 0 0 0 ;0 2 0 0 0 ];
72 * >> [U, S, V] = svd(A)
73 *
74 * U =
75 * 0 1 0 0
76 * 1 0 0 0
77 * 0 0 0 -1
78 * 0 0 1 0
79 *
80 * S =
81 * 3.0000 0 0 0 0
82 * 0 2.2361 0 0 0
83 * 0 0 2.0000 0 0
84 * 0 0 0 0 0
85 *
86 * V =
87 * 0 0.4472 0 0 -0.8944
88 * 0 0 1.0000 0 0
89 * 1.0000 0 0 0 0
90 * 0 0 0 1.0000 0
91 * 0 0.8944 0 0 0.4472
92 */
double WORK_QUERY = 0;
206
207
208 /* Call dgesvd_ with lwork = -1 to query optimal workspace size. */
209
210 JOBU = 'A';
211 JOBVT = 'A';
212 M = 4;
213 N = 5;
214 LDA = 4; /* (out) */
215 LDU = 4; /* (out) */
216 S = NULL; /* (don't care) */
217 U = NULL; /* (don't care) */
218 VT = NULL; /* (don't care) */
219 LDVT = 5; /* (out) */
220 WORK = NULL; /* (out) , because LWORK is 0 do not care */
221 LWORK = 4 * M * N * M *N + 6 * M * N + dd_max(M, N);
222
223 A = calloc(M * N, sizeof(double));
224 if (!A) {
225 goto ddt2_fail_sys;
226 }
227 for (i = 0; i < M; ++i) {
228 for (j = 0; j < N; ++j) {
229 A[i * N + j] = ref_array_A[i][j];
230 }
231 }
232
233 S = calloc(M * N, sizeof(double));
234 if (!S) {
235 goto ddt2_fail_sys;
236 }
237
238 U = calloc(LDU * M, sizeof(double));
239 if (!U) {
240 goto ddt2_fail_sys;
241 }
242
243 VT = calloc(LDVT * N, sizeof(double));
244 if (!A) {
245 goto ddt2_fail_sys;
246 }
247
248 fprintf(stderr, "Reference array A:\n");
249 dd_walk_dbl_arr_rowwise(A, M, N, cb_dbl, cb_dbl_row_end);
250
251 fprintf(stderr, "Reference array U:\n");
252 dd_walk_dbl_arr_rowwise(&ref_array_U[0][0], M, M, cb_dbl, cb_dbl_row_end);
253
254 fprintf(stderr, "Reference array Sigma:\n");
255 dd_walk_dbl_arr_rowwise(&ref_array_Sigma[0][0], M, N, cb_dbl, cb_dbl_row_end);
256
257 fprintf(stderr, "Reference array VT:\n");
258 dd_walk_dbl_arr_rowwise(&ref_array_VT[0][0], N, N, cb_dbl, cb_dbl_row_end);
259
260 LWORK = -1;
261 dgesvd_("A", "A", &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, &WORK_QUERY, &LWORK, &INFO);
if (INFO != 0) {
263 if (INFO < 0) {
264 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"the %d-th argument had illegal value\"\n", INFO);
265 } else {
266 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"DBDSDC didn't converge, updating process failed\"\n");
267 }
268 return -1;
269 }
270
271 LWORK = (int) WORK_QUERY;
272 WORK = calloc(LWORK, sizeof(double));
273 if (!WORK) {
274 goto ddt2_fail_sys;
275 }
276
277 fprintf(stderr, "LAPACK's dgesvd_ query optimal results: LDA %d, LDU %d, LDVT %d, LWORK %d, WORK_QUERY %f\n", LDA, LDU, LDVT, LWORK, WORK_QUERY);
278 fprintf(stderr, "Rest of params: M %d, N %d\n", M, N);
279
280 /* Compute SVD. */
281 dgesvd_(&JOBU, &JOBVT, &M, &N, A, &LDA, S, U, &LDU, VT, &LDVT, WORK, &LWORK, &INFO);
282 if (INFO != 0) {
283 if (INFO < 0) {
284 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"the %d-th argument had illegal value\"\n", INFO);
285 } else {
286 fprintf(stderr, "Error on LAPACK's dgesvd_ query: \"DBDSDC didn't converge, updating process failed\"\n");
287 }
288 return -1;
289 }
290
291 fprintf(stderr, "LAPACK's dgesvd_ SVD completed\n");
292
293 fprintf(stderr, "Result A:\n");
294 dd_walk_dbl_arr_rowwise(A, M, N, cb_dbl, cb_dbl_row_end);
295
296 fprintf(stderr, "Result U:\n");
297 dd_walk_dbl_arr_rowwise(U, LDU, M, cb_dbl, cb_dbl_row_end);
298
299 fprintf(stderr, "Result S:\n");
300 dd_walk_dbl_arr_rowwise(S, M, N, cb_dbl, cb_dbl_row_end);
301
302 fprintf(stderr, "Result VT:\n");
303 dd_walk_dbl_arr_rowwise(VT, LDVT, N, cb_dbl, cb_dbl_row_end);
304
305 free(WORK);
306 free(A);
307 free(S);
308 free(U);
309 free(VT);
310
311 return 0;
Bad result:
peter#xx:~/$ ./test2
Reference array A:
1.000000 0.000000 0.000000 0.000000 2.000000
0.000000 0.000000 3.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 0.000000 0.000000 0.000000
Reference array U:
0.000000 0.000000 1.000000 0.000000
0.000000 1.000000 0.000000 0.000000
0.000000 0.000000 0.000000 -1.000000
1.000000 0.000000 0.000000 0.000000
Reference array Sigma:
2.000000 0.000000 0.000000 0.000000 0.000000
0.000000 3.000000 0.000000 0.000000 0.000000
0.000000 0.000000 2.236068 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
Reference array VT:
0.000000 1.000000 0.000000 0.000000 0.000000
0.000000 0.000000 1.000000 0.000000 0.000000
0.447214 0.000000 0.000000 0.000000 0.894427
0.000000 0.000000 0.000000 1.000000 0.000000
-0.894427 0.000000 0.000000 0.000000 -0.447214
LAPACK's dgesvd_ query optimal results: LDA 4, LDU 4, LDVT 5, LWORK 300, WORK_QUERY 300.000000
Rest of params: M 4, N 5
LAPACK's dgesvd_ SVD completed
Result A:
-3.000000 -2.000000 0.000000 -1.000000 0.500000
-2.236068 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.500000 -0.236068 0.000000 0.000000
Result U:
0.707107 0.000000 0.000000 0.707107
-0.707107 0.000000 -0.000000 0.707107
0.000000 0.000000 1.000000 0.000000
0.000000 1.000000 0.000000 0.000000
Result S:
3.872983 1.732051 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
Result VT:
0.182574 -0.408248 0.000000 0.000000 -0.894427
0.912871 0.408248 0.000000 0.000000 0.000000
-0.000000 -0.000000 1.000000 0.000000 0.000000
-0.000000 -0.000000 0.000000 1.000000 0.000000
0.365148 -0.816497 0.000000 0.000000 0.447214
What do I do wrong in general matrix case?
The function dgesvd_ expects the matrices in column-major order, while your code supplies the data in row-major style:
227 for (i = 0; i < M; ++i) {
228 for (j = 0; j < N; ++j) {
229 A[i * N + j] = ref_array_A[i][j];
230 }
231 }
Effectively, your code is thus calculating SVD of
[ 1 2 0 0 2 ] [ 1 0 0 0 ] ^ T
[ 0 0 0 0 0 ] = [ 2 0 0 3 ]
[ 0 0 0 0 0 ] [ 0 0 0 0 ]
[ 0 3 0 0 0 ] [ 2 0 0 0 ]
which indeed yields approximately 3.87, 1.73.
This error does not occur in the first example since the matrix is square (M=N) and symmetric.
Also, the parameter S is supposed to be just one-dimensional array (as in your first example). Since you print it then in row-major format with dd_walk_dbl_arr_rowwise(S, M, N, cb_dbl, cb_dbl_row_end);, these values appear consecutively in the first row...
Is it possible to achieve bi-cubic interpolation beyond grid values?
For example:
L = [5,10,20,25,40];
W= 1:3;
S= [50 99 787
779 795 850
803 779 388
886 753 486
849 780 598];
size1 = griddata(W,L,S,2,40,'cubic')
sizeBeyond = griddata(W,L,S,2,41,'cubic')
sizeV4 = griddata(W,L,S,2,41,'v4')
returns:
size1 = 780
sizeBeyond = NaN
sizeV4 = 721.57
What I was suggesting is, you can input the values which are extrapolated. Check the below code. But note that, as suggested by flawr, the extrapolation behave really bad.
l = [5,10,20,25,40];
w = 1:3;
li = [l 41] ;
S = [50 99 787
779 795 850
803 779 388
886 753 486
849 780 598];
[W,L] = meshgrid(w,l) ;
[Wi,Li] = meshgrid(w,li) ;
Si = interp2(W,L,S,Wi,Li,'spline') ;
size1 = griddata(W,L,S,2,40,'cubic')
sizeBeyond = griddata(Wi,Li,Si,2,41,'cubic')
sizeV4 = griddata(W,L,S,2,41,'v4')
Note: Don't use inbuilt commands like length,size etc as variables in the code, even for demonstration, it is trouble some.
Though, this is not answer, I have to post it here as for discussion.
I try to decompose a sparse matrix(40,000×1,400,000) with scipy.sparse.linalg.svds on my 64-bit machine with 140GB RAM. as following:
k = 5000
tfidf_mtx = tfidf_m.tocsr()
u_45,s_45,vT_45 = scipy.sparse.linalg.svds(tfidf_mtx, k=k)
When the K ranges from 1000 to 4500, it works. But the K is 5000, it throws an MemoryError.The precise error is given below:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-6-31a69ce54e2c> in <module>()
4 k = 4000
5 tfidf_mtx = tfidf_m.tocsr()
----> 6 get_ipython().magic(u'time u_50,s_50,vT_50 =linalg.svds(tfidf_mtx, k=k))
7 # print len(s),s
8
/usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2163 magic_name, _, magic_arg_s = arg_s.partition(' ')
2164 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2165 return self.run_line_magic(magic_name, magic_arg_s)
2166
2167 #-------------------------------------------------------------------------
/usr/lib/python2.7/dist-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2084 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2085 with self.builtin_trap:
-> 2086 result = fn(*args,**kwargs)
2087 return result
2088
/usr/lib/python2.7/dist-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
/usr/lib/python2.7/dist-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
189 # but it's overkill for just that one bit of state.
190 def magic_deco(arg):
--> 191 call = lambda f, *a, **k: f(*a, **k)
192
193 if callable(arg):
/usr/lib/python2.7/dist-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
1043 else:
1044 st = clock2()
-> 1045 exec code in glob, local_ns
1046 end = clock2()
1047 out = None
<timed exec> in <module>()
/usr/local/lib/python2.7/dist-packages/scipy/sparse/linalg/eigen/arpack/arpack.pyc in svds(A, k, ncv, tol, which, v0, maxiter, return_singular_vectors)
1751 else:
1752 ularge = eigvec[:, above_cutoff]
-> 1753 vhlarge = _herm(X_matmat(ularge) / slarge)
1754
1755 u = _augmented_orthonormal_cols(ularge, nsmall)
/usr/local/lib/python2.7/dist-packages/scipy/sparse/base.pyc in dot(self, other)
244
245 """
--> 246 return self * other
247
248 def __eq__(self, other):
/usr/local/lib/python2.7/dist-packages/scipy/sparse/base.pyc in __mul__(self, other)
298 return self._mul_vector(other.ravel()).reshape(M, 1)
299 elif other.ndim == 2 and other.shape[0] == N:
--> 300 return self._mul_multivector(other)
301
302 if isscalarlike(other):
/usr/local/lib/python2.7/dist-packages/scipy/sparse/compressed.pyc in _mul_multivector(self, other)
463
464 result = np.zeros((M,n_vecs), dtype=upcast_char(self.dtype.char,
--> 465 other.dtype.char))
466
467 # csr_matvecs or csc_matvecs
MemoryError:
The when the k is 3000 and 4500, the ratio of the sum of the square of singular values to the sum of the square of all matrix entities is respectively 0.7033 and 0.8230. I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.
So the return is an (M,k) array. On an ordinary older machine:
In [368]: np.ones((40000,1000))
....
In [369]: np.ones((40000,4000))
...
In [370]: np.ones((40000,5000))
...
--> 190 a = empty(shape, dtype, order)
191 multiarray.copyto(a, 1, casting='unsafe')
192 return a
MemoryError:
Now may just be a coincidence that I hit the memory error at the same size are your code. But if you make the problem big enough you will hit memory errors at some point.
Your stacktrace shows the error occurs while multiplying a sparse matrix and a dense 2d array (other), and the result will be dense as well.
Consider these two use cases:
sub test1 {
my $v = 1;
sub test2 { print $v }
# ...
}
and
for (0..3) {
my $foo = $_;
sub test1 { print $foo }
# ...
}
The first one produces a Variable will not stay shared warning, while the second doesn't. It seems that the variable is not shared in both cases. Why isn't there any warning in the second case?
It seems that this may be a bug or omission in the warnings pragma.
Adding to the fun, this arrangement gives a different warning:
BEGIN {*outer = sub {
my $x;
sub inner {$x}
}}
Which warns Variable "$x" is not available
These warnings all come from the pad_findlex() API call defined in pad.c.
806 =for apidoc pad_findlex
807
808 Find a named lexical anywhere in a chain of nested pads. Add fake entries
809 in the inner pads if it's found in an outer one.
810
811 Returns the offset in the bottom pad of the lex or the fake lex.
812 cv is the CV in which to start the search, and seq is the current cop_seq
813 to match against. If warn is true, print appropriate warnings. The out_*
814 vars return values, and so are pointers to where the returned values
815 should be stored. out_capture, if non-null, requests that the innermost
816 instance of the lexical is captured; out_name_sv is set to the innermost
817 matched namesv or fake namesv; out_flags returns the flags normally
818 associated with the IVX field of a fake namesv.
819
820 Note that pad_findlex() is recursive; it recurses up the chain of CVs,
821 then comes back down, adding fake entries as it goes. It has to be this way
822 because fake namesvs in anon protoypes have to store in xlow the index into
823 the parent pad.
824
825 =cut
826 */
827
828 /* the CV has finished being compiled. This is not a sufficient test for
829 * all CVs (eg XSUBs), but suffices for the CVs found in a lexical chain */
830 #define CvCOMPILED(cv) CvROOT(cv)
831
832 /* the CV does late binding of its lexicals */
833 #define CvLATE(cv) (CvANON(cv) || SvTYPE(cv) == SVt_PVFM)
834
835
836 STATIC PADOFFSET
837 S_pad_findlex(pTHX_ const char *name, const CV* cv, U32 seq, int warn,
838 SV** out_capture, SV** out_name_sv, int *out_flags)
839 {
840 dVAR;
841 I32 offset, new_offset;
842 SV *new_capture;
843 SV **new_capturep;
844 const AV * const padlist = CvPADLIST(cv);
845
846 PERL_ARGS_ASSERT_PAD_FINDLEX;
847
848 *out_flags = 0;
849
850 DEBUG_Xv(PerlIO_printf(Perl_debug_log,
851 "Pad findlex cv=0x%"UVxf" searching \"%s\" seq=%d%s\n",
852 PTR2UV(cv), name, (int)seq, out_capture ? " capturing" : "" ));
853
854 /* first, search this pad */
855
856 if (padlist) { /* not an undef CV */
857 I32 fake_offset = 0;
858 const AV * const nameav = MUTABLE_AV(AvARRAY(padlist)[0]);
859 SV * const * const name_svp = AvARRAY(nameav);
860
861 for (offset = AvFILLp(nameav); offset > 0; offset--) {
862 const SV * const namesv = name_svp[offset];
863 if (namesv && namesv != &PL_sv_undef
864 && strEQ(SvPVX_const(namesv), name))
865 {
866 if (SvFAKE(namesv)) {
867 fake_offset = offset; /* in case we don't find a real one */
868 continue;
869 }
870 /* is seq within the range _LOW to _HIGH ?
871 * This is complicated by the fact that PL_cop_seqmax
872 * may have wrapped around at some point */
873 if (COP_SEQ_RANGE_LOW(namesv) == PERL_PADSEQ_INTRO)
874 continue; /* not yet introduced */
875
876 if (COP_SEQ_RANGE_HIGH(namesv) == PERL_PADSEQ_INTRO) {
877 /* in compiling scope */
878 if (
879 (seq > COP_SEQ_RANGE_LOW(namesv))
880 ? (seq - COP_SEQ_RANGE_LOW(namesv) < (U32_MAX >> 1))
881 : (COP_SEQ_RANGE_LOW(namesv) - seq > (U32_MAX >> 1))
882 )
883 break;
884 }
885 else if (
886 (COP_SEQ_RANGE_LOW(namesv) > COP_SEQ_RANGE_HIGH(namesv))
887 ?
888 ( seq > COP_SEQ_RANGE_LOW(namesv)
889 || seq <= COP_SEQ_RANGE_HIGH(namesv))
890
891 : ( seq > COP_SEQ_RANGE_LOW(namesv)
892 && seq <= COP_SEQ_RANGE_HIGH(namesv))
893 )
894 break;
895 }
896 }
897
898 if (offset > 0 || fake_offset > 0 ) { /* a match! */
899 if (offset > 0) { /* not fake */
900 fake_offset = 0;
901 *out_name_sv = name_svp[offset]; /* return the namesv */
902
903 /* set PAD_FAKELEX_MULTI if this lex can have multiple
904 * instances. For now, we just test !CvUNIQUE(cv), but
905 * ideally, we should detect my's declared within loops
906 * etc - this would allow a wider range of 'not stayed
907 * shared' warnings. We also treated already-compiled
908 * lexes as not multi as viewed from evals. */
909
910 *out_flags = CvANON(cv) ?
911 PAD_FAKELEX_ANON :
912 (!CvUNIQUE(cv) && ! CvCOMPILED(cv))
913 ? PAD_FAKELEX_MULTI : 0;
914
915 DEBUG_Xv(PerlIO_printf(Perl_debug_log,
916 "Pad findlex cv=0x%"UVxf" matched: offset=%ld (%lu,%lu)\n",
917 PTR2UV(cv), (long)offset,
918 (unsigned long)COP_SEQ_RANGE_LOW(*out_name_sv),
919 (unsigned long)COP_SEQ_RANGE_HIGH(*out_name_sv)));
920 }
921 else { /* fake match */
922 offset = fake_offset;
923 *out_name_sv = name_svp[offset]; /* return the namesv */
924 *out_flags = PARENT_FAKELEX_FLAGS(*out_name_sv);
925 DEBUG_Xv(PerlIO_printf(Perl_debug_log,
926 "Pad findlex cv=0x%"UVxf" matched: offset=%ld flags=0x%lx index=%lu\n",
927 PTR2UV(cv), (long)offset, (unsigned long)*out_flags,
928 (unsigned long) PARENT_PAD_INDEX(*out_name_sv)
929 ));
930 }
931
932 /* return the lex? */
933
934 if (out_capture) {
935
936 /* our ? */
937 if (SvPAD_OUR(*out_name_sv)) {
938 *out_capture = NULL;
939 return offset;
940 }
941
942 /* trying to capture from an anon prototype? */
943 if (CvCOMPILED(cv)
944 ? CvANON(cv) && CvCLONE(cv) && !CvCLONED(cv)
945 : *out_flags & PAD_FAKELEX_ANON)
946 {
947 if (warn)
948 Perl_ck_warner(aTHX_ packWARN(WARN_CLOSURE),
949 "Variable \"%s\" is not available", name);
950 *out_capture = NULL;
951 }
952
953 /* real value */
954 else {
955 int newwarn = warn;
956 if (!CvCOMPILED(cv) && (*out_flags & PAD_FAKELEX_MULTI)
957 && !SvPAD_STATE(name_svp[offset])
958 && warn && ckWARN(WARN_CLOSURE)) {
959 newwarn = 0;
960 Perl_warner(aTHX_ packWARN(WARN_CLOSURE),
961 "Variable \"%s\" will not stay shared", name);
962 }
963
964 if (fake_offset && CvANON(cv)
965 && CvCLONE(cv) &&!CvCLONED(cv))
966 {
967 SV *n;
968 /* not yet caught - look further up */
969 DEBUG_Xv(PerlIO_printf(Perl_debug_log,
970 "Pad findlex cv=0x%"UVxf" chasing lex in outer pad\n",
971 PTR2UV(cv)));
972 n = *out_name_sv;
973 (void) pad_findlex(name, CvOUTSIDE(cv),
974 CvOUTSIDE_SEQ(cv),
975 newwarn, out_capture, out_name_sv, out_flags);
976 *out_name_sv = n;
977 return offset;
978 }
979
980 *out_capture = AvARRAY(MUTABLE_AV(AvARRAY(padlist)[
981 CvDEPTH(cv) ? CvDEPTH(cv) : 1]))[offset];
982 DEBUG_Xv(PerlIO_printf(Perl_debug_log,
983 "Pad findlex cv=0x%"UVxf" found lex=0x%"UVxf"\n",
984 PTR2UV(cv), PTR2UV(*out_capture)));
985
986 if (SvPADSTALE(*out_capture)
987 && !SvPAD_STATE(name_svp[offset]))
988 {
989 Perl_ck_warner(aTHX_ packWARN(WARN_CLOSURE),
990 "Variable \"%s\" is not available", name);
991 *out_capture = NULL;
992 }
993 }
994 if (!*out_capture) {
995 if (*name == '#')
996 *out_capture = sv_2mortal(MUTABLE_SV(newAV()));
997 else if (*name == '%')
998 *out_capture = sv_2mortal(MUTABLE_SV(newHV()));
999 else
1000 *out_capture = sv_newmortal();
1001 }
1002 }
1003
1004 return offset;
1005 }
1006 }
1007
1008 /* it's not in this pad - try above */
1009
1010 if (!CvOUTSIDE(cv))
1011 return NOT_IN_PAD;
1012
1013 /* out_capture non-null means caller wants us to capture lex; in
1014 * addition we capture ourselves unless it's an ANON/format */
1015 new_capturep = out_capture ? out_capture :
1016 CvLATE(cv) ? NULL : &new_capture;
1017
1018 offset = pad_findlex(name, CvOUTSIDE(cv), CvOUTSIDE_SEQ(cv), 1,
1019 new_capturep, out_name_sv, out_flags);
1020 if ((PADOFFSET)offset == NOT_IN_PAD)
1021 return NOT_IN_PAD;
1022
1023 /* found in an outer CV. Add appropriate fake entry to this pad */
1024
1025 /* don't add new fake entries (via eval) to CVs that we have already
1026 * finished compiling, or to undef CVs */
1027 if (CvCOMPILED(cv) || !padlist)
1028 return 0; /* this dummy (and invalid) value isnt used by the caller */
1029
1030 {
1031 /* This relies on sv_setsv_flags() upgrading the destination to the same
1032 type as the source, independent of the flags set, and on it being
1033 "good" and only copying flag bits and pointers that it understands.
1034 */
1035 SV *new_namesv = newSVsv(*out_name_sv);
1036 AV * const ocomppad_name = PL_comppad_name;
1037 PAD * const ocomppad = PL_comppad;
1038 PL_comppad_name = MUTABLE_AV(AvARRAY(padlist)[0]);
1039 PL_comppad = MUTABLE_AV(AvARRAY(padlist)[1]);
1040 PL_curpad = AvARRAY(PL_comppad);
1041
1042 new_offset
1043 = pad_add_name_sv(new_namesv,
1044 (SvPAD_STATE(*out_name_sv) ? padadd_STATE : 0),
1045 SvPAD_TYPED(*out_name_sv)
1046 ? SvSTASH(*out_name_sv) : NULL,
1047 SvOURSTASH(*out_name_sv)
1048 );
1049
1050 SvFAKE_on(new_namesv);
1051 DEBUG_Xv(PerlIO_printf(Perl_debug_log,
1052 "Pad addname: %ld \"%.*s\" FAKE\n",
1053 (long)new_offset,
1054 (int) SvCUR(new_namesv), SvPVX(new_namesv)));
1055 PARENT_FAKELEX_FLAGS_set(new_namesv, *out_flags);
1056
1057 PARENT_PAD_INDEX_set(new_namesv, 0);
1058 if (SvPAD_OUR(new_namesv)) {
1059 NOOP; /* do nothing */
1060 }
1061 else if (CvLATE(cv)) {
1062 /* delayed creation - just note the offset within parent pad */
1063 PARENT_PAD_INDEX_set(new_namesv, offset);
1064 CvCLONE_on(cv);
1065 }
1066 else {
1067 /* immediate creation - capture outer value right now */
1068 av_store(PL_comppad, new_offset, SvREFCNT_inc(*new_capturep));
1069 DEBUG_Xv(PerlIO_printf(Perl_debug_log,
1070 "Pad findlex cv=0x%"UVxf" saved captured sv 0x%"UVxf" at offset %ld\n",
1071 PTR2UV(cv), PTR2UV(*new_capturep), (long)new_offset));
1072 }
1073 *out_name_sv = new_namesv;
1074 *out_flags = PARENT_FAKELEX_FLAGS(new_namesv);
1075
1076 PL_comppad_name = ocomppad_name;
1077 PL_comppad = ocomppad;
1078 PL_curpad = ocomppad ? AvARRAY(ocomppad) : NULL;
1079 }
1080 return new_offset;
1081 }
It seems it has to do with if the containing pad is held within a CV or not, but I am not sure of the exact specifics.