以下代码,如何让它更高效?(换成指针是不是更高效?)unsigned char temp_buffer;
unsigned char yuvBuffer_1024_768[1024 * 768 * 2] = {0};
m_dwImageWidth = 1024;
m_dwImageHeight = 768;for(count = 0; count < m_dwImageWidth * m_dwImageHeight * 2; count += 4)
{
// 1<-->3换位
temp_buffer = yuvBuffer_1024_768[count];
yuvBuffer_1024_768[count] = yuvBuffer_1024_768[count+2];
yuvBuffer_1024_768[count+2] = temp_buffer;
}
unsigned char yuvBuffer_1024_768[1024 * 768 * 2] = {0};
m_dwImageWidth = 1024;
m_dwImageHeight = 768;for(count = 0; count < m_dwImageWidth * m_dwImageHeight * 2; count += 4)
{
// 1<-->3换位
temp_buffer = yuvBuffer_1024_768[count];
yuvBuffer_1024_768[count] = yuvBuffer_1024_768[count+2];
yuvBuffer_1024_768[count+2] = temp_buffer;
}
unsigned char yuvBuffer_1024_768[1024 * 768 * 2] = {0};
m_dwImageWidth = 1024;
m_dwImageHeight = 768;unsigned long *tmp = (unsigned long*)yuvBuffer_1024_768;for(count = 0; count < m_dwImageWidth * m_dwImageHeight / 2; ++count)
{
// 1<-->3换位
tmp[count] = (tmp[count]&0x00FF00FF)&((tmp[count]&0xFF000000)>>16)&((tmp[count]&0xFF00)<<16);
}
应该是
tmp[count] = (tmp[count]&0x00FF00FF)|((tmp[count]&0xFF000000)>>16)|((tmp[count]&0xFF00)<<16);用或不用与
2,位运算release下不需要多余的存储内存,结果通常在寄存器里,不使用临时内存,比使用临时变量要快,比使用cpu缓存也快
3,5次位运算+1次mov的速度远远 > (3次mov+2次add)*4, 而且for循环次数降低为1/4