Say I want to multiply each element of a cell array A
with a coefficent k
. I can do that by:
A = cellfun(@(x) k*x, A, 'UniformOutput', false)
But this is extremely slow. Is there a faster and better way? The cell array elements are variable length vectors, so cell2num
doesn't apply.
Edit: Based on fpe's recommendation of a for loop, here is an example benchmark. Starting with this data
A = arrayfun(@(n) rand(n,1), randi(5,1000,1000), 'UniformOutput',false);
The cellfun
call above takes 9.45 seconds
, while a for loop:
A2 = cell(size(A));
for i = 1:size(A,1), for j = 1:size(A,2), A2{i,j} = A{i,j}*k; end; end
A = A2;
takes 1.67 seconds
, which is a significant improvement. I'd still prefer something a few orders of magnitude faster. (I also don't understand why the Matlab interpreter is unable to make the cellfun call as fast as the for loop. They are semantically identical.)
Edit 2: Amro's suggestion to make one single for loop is significantly faster:
for i = 1:numel(A), A{i} = A{i}*k; end
takes 1.11 seconds
, and if I run pack
prior to it to align the memory just 0.88 seconds
.
Implementing a MEX function to do this is actually not much better: 0.73 seconds
, (0.53 seconds
after pack
), which indicates that allocating many small matrices is slow in Matlab.
#include "mex.h"
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
if (nrhs != 2)
mexErrMsgTxt("need 2 arguments (Cell, Coefficient)");
mwSize const* size = mxGetDimensions(prhs[0]);
int N = mxGetNumberOfDimensions(prhs[0]);
if (mxGetNumberOfElements(prhs[1]) != 1)
mexErrMsgTxt("second argument to multcell must be a scalar");
double coefficient = *mxGetPr(prhs[1]);
plhs[0] = mxCreateCellArray(N, size);
int M = mxGetNumberOfElements(prhs[0]);
for (int i = 0; i < M; i++) {
mxArray *r = mxGetCell(prhs[0], i);
mxArray *l = mxCreateNumericArray(mxGetNumberOfDimensions(r),
mxGetDimensions(r),
mxDOUBLE_CLASS,
mxREAL);
double *rp = mxGetPr(r);
double *lp = mxGetPr(l);
int num_elements = mxGetNumberOfElements(r);
for (int i = 0; i < num_elements; i++)
lp[i] = rp[i] * coefficient;
mxSetCell(plhs[0], i, l);
}
}
Cheating a bit, however, and implementing a MEX function that actually edits the memory in place seems to be the only way to get reasonable performance out the operation: 0.030 seconds
. This uses the undocumented mxUnshareArray
as suggested by Amro.
#include "mex.h"
extern "C" bool mxUnshareArray(mxArray *array_ptr, bool noDeepCopy);
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
if (nrhs != 2)
mexErrMsgTxt("need 2 arguments (Cell, Coefficient)");
mwSize const* size = mxGetDimensions(prhs[0]);
int N = mxGetNumberOfDimensions(prhs[0]);
if (mxGetNumberOfElements(prhs[1]) != 1)
mexErrMsgTxt("second argument to multcell must be a scalar");
double coefficient = *mxGetPr(prhs[1]);
mxUnshareArray(const_cast<mxArray *>(prhs[0]), false);
plhs[0] = const_cast<mxArray *>(prhs[0]);
int M = mxGetNumberOfElements(prhs[0]);
for (int i = 0; i < M; i++) {
mxArray *r = mxGetCell(prhs[0], i);
double *rp = mxGetPr(r);
int num_elements = mxGetNumberOfElements(r);
for (int i = 0; i < num_elements; i++)
rp[i] = rp[i] * coefficient;
}
}
See Question&Answers more detail:
os