I just read this blog http://lemire.me/blog/archives/2012/06/20/do-not-waste-time-with-stl-vectors/ comparing performance of operator[]
assignment and push_back
on memory pre-reserved std::vector
and I decided to try it myself. The operation is simple:
// for vector
bigarray.reserve(N);
// START TIME TRACK
for(int k = 0; k < N; ++k)
// for operator[]:
// bigarray[k] = k;
// for push_back
bigarray.push_back(k);
// END TIME TRACK
// do some dummy operations to prevent compiler optimize
long sum = accumulate(begin(bigarray), end(array),0 0);
And here is the result:
~/t/benchmark> icc 1.cpp -O3 -std=c++11
~/t/benchmark> ./a.out
[ 1.cpp: 52] 0.789123s --> C++ new
[ 1.cpp: 52] 0.774049s --> C++ new
[ 1.cpp: 66] 0.351176s --> vector
[ 1.cpp: 80] 1.801294s --> reserve + push_back
[ 1.cpp: 94] 1.753786s --> reserve + emplace_back
[ 1.cpp: 107] 2.815756s --> no reserve + push_back
~/t/benchmark> clang++ 1.cpp -std=c++11 -O3
~/t/benchmark> ./a.out
[ 1.cpp: 52] 0.592318s --> C++ new
[ 1.cpp: 52] 0.566979s --> C++ new
[ 1.cpp: 66] 0.270363s --> vector
[ 1.cpp: 80] 1.763784s --> reserve + push_back
[ 1.cpp: 94] 1.761879s --> reserve + emplace_back
[ 1.cpp: 107] 2.815596s --> no reserve + push_back
~/t/benchmark> g++ 1.cpp -O3 -std=c++11
~/t/benchmark> ./a.out
[ 1.cpp: 52] 0.617995s --> C++ new
[ 1.cpp: 52] 0.601746s --> C++ new
[ 1.cpp: 66] 0.270533s --> vector
[ 1.cpp: 80] 1.766538s --> reserve + push_back
[ 1.cpp: 94] 1.998792s --> reserve + emplace_back
[ 1.cpp: 107] 2.815617s --> no reserve + push_back
For all the compilers, vector
with operator[]
is much faster than raw pointer with operator[]
. This led to the first question: why? The second question is, I have already "reserved" the memory, so why opeator[]
is faster?
The next question is, since the memory is already allocated, why push_back
is times slower than operator[]
?
The test code is attached below:
#include <iostream>
#include <iomanip>
#include <vector>
#include <numeric>
#include <chrono>
#include <string>
#include <cstring>
#define PROFILE(BLOCK, ROUTNAME) ProfilerRun([&](){do {BLOCK;} while(0);},
ROUTNAME, __FILE__, __LINE__);
template <typename T>
void ProfilerRun (T&& func, const std::string& routine_name = "unknown",
const char* file = "unknown", unsigned line = 0)
{
using std::chrono::duration_cast;
using std::chrono::microseconds;
using std::chrono::steady_clock;
using std::cerr;
using std::endl;
steady_clock::time_point t_begin = steady_clock::now();
// Call the function
func();
steady_clock::time_point t_end = steady_clock::now();
cerr << "[" << std::setw (20)
<< (std::strrchr (file, '/') ?
std::strrchr (file, '/') + 1 : file)
<< ":" << std::setw (5) << line << "] "
<< std::setw (10) << std::setprecision (6) << std::fixed
<< static_cast<float> (duration_cast<microseconds>
(t_end - t_begin).count()) / 1e6
<< "s --> " << routine_name << endl;
cerr.unsetf (std::ios_base::floatfield);
}
using namespace std;
const int N = (1 << 29);
int routine1()
{
int sum;
int* bigarray = new int[N];
PROFILE (
{
for (unsigned int k = 0; k < N; ++k)
bigarray[k] = k;
}, "C++ new");
sum = std::accumulate (bigarray, bigarray + N, 0);
delete [] bigarray;
return sum;
}
int routine2()
{
int sum;
vector<int> bigarray (N);
PROFILE (
{
for (unsigned int k = 0; k < N; ++k)
bigarray[k] = k;
}, "vector");
sum = std::accumulate (begin (bigarray), end (bigarray), 0);
return sum;
}
int routine3()
{
int sum;
vector<int> bigarray;
bigarray.reserve (N);
PROFILE (
{
for (unsigned int k = 0; k < N; ++k)
bigarray.push_back (k);
}, "reserve + push_back");
sum = std::accumulate (begin (bigarray), end (bigarray), 0);
return sum;
}
int routine4()
{
int sum;
vector<int> bigarray;
bigarray.reserve (N);
PROFILE (
{
for (unsigned int k = 0; k < N; ++k)
bigarray.emplace_back(k);
}, "reserve + emplace_back");
sum = std::accumulate (begin (bigarray), end (bigarray), 0);
return sum;
}
int routine5()
{
int sum;
vector<int> bigarray;
PROFILE (
{
for (unsigned int k = 0; k < N; ++k)
bigarray.push_back (k);
}, "no reserve + push_back");
sum = std::accumulate (begin (bigarray), end (bigarray), 0);
return sum;
}
int main()
{
long s0 = routine1();
long s1 = routine1();
long s2 = routine2();
long s3 = routine3();
long s4 = routine4();
long s5 = routine5();
return int (s1 + s2);
}
See Question&Answers more detail:
os