C语言 数组初始化为0 编译器是怎么实现的?为什么比memset更耗时
C语言 数组初始化为0 编译器是如何实现的?为什么比memset更耗时?
写了一段代码,测试了一下数组初始化为0(即{0}方式),memset, for循环赋值三种方式下的耗时:
代码如下:
执行5次,统计结果为:
耗时方面:for 平均耗时4300us, {0}平均耗时766us, memset 平均耗时72us。
for > {0} > memset。 用memset效率最高。
请问{0} 方式编译器是如何实现的,为什么比memset更耗时呢(并且不是一个数量级上)?
注:所用编译器为 gcc (GCC) 4.4.4
------解决思路----------------------
因为C:\Program Files\Microsoft Visual Studio 10.0\VC\crt\src\intel\memset.asm
[code=asm page ,132
title memset - set sections of memory all to one byte
;***
;memset.asm - set a section of memory to all one byte
;
; Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
; contains the memset() routine
;
;*******************************************************************************
.xlist
include cruntime.inc
.list
page
;***
;char *memset(dst, value, count) - sets "count" bytes at "dst" to "value"
;
;Purpose:
; Sets the first "count" bytes of the memory starting
; at "dst" to the character value "value".
;
; Algorithm:
; char *
; memset (dst, value, count)
; char *dst;
; char value;
; unsigned int count;
; {
; char *start = dst;
;
; while (count--)
; *dst++ = value;
; return(start);
; }
;
;Entry:
; char *dst - pointer to memory to fill with value
; char value - value to put in dst bytes
; int count - number of bytes of dst to fill
;
;Exit:
; returns dst, with filled bytes
;
;Uses:
;
;Exceptions:
;
;*******************************************************************************
CODESEG
extrn _VEC_memzero:near
extrn __sse2_available:dword
public memset
memset proc \
dst:ptr byte, \
value:byte, \
count:dword
OPTION PROLOGUE:NONE, EPILOGUE:NONE
.FPO ( 0, 3, 0, 0, 0, 0 )
mov edx,[esp + 0ch] ; edx = "count"
mov ecx,[esp + 4] ; ecx points to "dst"
test edx,edx ; 0?
jz short toend ; if so, nothing to do
xor eax,eax
mov al,[esp + 8] ; the byte "value" to be stored
; Special case large block zeroing using SSE2 support
test al,al ; memset using zero initializer?
jne dword_align
cmp edx,080h ; block size exceeds size threshold?
jb dword_align
cmp DWORD PTR __sse2_available,0 ; SSE2 supported?
je dword_align
jmp _VEC_memzero ; use fast zero SSE2 implementation
; no return
; Align address on dword boundary
dword_align:
写了一段代码,测试了一下数组初始化为0(即{0}方式),memset, for循环赋值三种方式下的耗时:
代码如下:
#define ARRAY_SIZE_MAX (1*1024*1024)
void function1()
{
char array[ARRAY_SIZE_MAX] = {0};
}
void function2()
{
char array[ARRAY_SIZE_MAX];
memset(array, 0, ARRAY_SIZE_MAX);
}
void function3()
{
int i = 0;
char array[ARRAY_SIZE_MAX];
for (i = 0; i < ARRAY_SIZE_MAX; i++)
{
array[i] = 0;
}
}
int main(int argc, char* argv[])
{
struct timeval start, end;
int timeuse = 0;
gettimeofday(&start, NULL);
function1();
gettimeofday(&end, NULL);
timeuse = 1000000 * ( end.tv_sec - start.tv_sec ) + end.tv_usec - start.tv_usec;
printf("%d\n", timeuse);
gettimeofday(&start, NULL);
function2();
gettimeofday(&end, NULL);
timeuse = 1000000 * ( end.tv_sec - start.tv_sec ) + end.tv_usec - start.tv_usec;
printf("%d\n", timeuse);
gettimeofday(&start, NULL);
function3();
gettimeofday(&end, NULL);
timeuse = 1000000 * ( end.tv_sec - start.tv_sec ) + end.tv_usec - start.tv_usec;
printf("%d\n", timeuse);
return 0;
}
执行5次,统计结果为:
耗时方面:for 平均耗时4300us, {0}平均耗时766us, memset 平均耗时72us。
for > {0} > memset。 用memset效率最高。
请问{0} 方式编译器是如何实现的,为什么比memset更耗时呢(并且不是一个数量级上)?
注:所用编译器为 gcc (GCC) 4.4.4
------解决思路----------------------
因为C:\Program Files\Microsoft Visual Studio 10.0\VC\crt\src\intel\memset.asm
[code=asm page ,132
title memset - set sections of memory all to one byte
;***
;memset.asm - set a section of memory to all one byte
;
; Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
; contains the memset() routine
;
;*******************************************************************************
.xlist
include cruntime.inc
.list
page
;***
;char *memset(dst, value, count) - sets "count" bytes at "dst" to "value"
;
;Purpose:
; Sets the first "count" bytes of the memory starting
; at "dst" to the character value "value".
;
; Algorithm:
; char *
; memset (dst, value, count)
; char *dst;
; char value;
; unsigned int count;
; {
; char *start = dst;
;
; while (count--)
; *dst++ = value;
; return(start);
; }
;
;Entry:
; char *dst - pointer to memory to fill with value
; char value - value to put in dst bytes
; int count - number of bytes of dst to fill
;
;Exit:
; returns dst, with filled bytes
;
;Uses:
;
;Exceptions:
;
;*******************************************************************************
CODESEG
extrn _VEC_memzero:near
extrn __sse2_available:dword
public memset
memset proc \
dst:ptr byte, \
value:byte, \
count:dword
OPTION PROLOGUE:NONE, EPILOGUE:NONE
.FPO ( 0, 3, 0, 0, 0, 0 )
mov edx,[esp + 0ch] ; edx = "count"
mov ecx,[esp + 4] ; ecx points to "dst"
test edx,edx ; 0?
jz short toend ; if so, nothing to do
xor eax,eax
mov al,[esp + 8] ; the byte "value" to be stored
; Special case large block zeroing using SSE2 support
test al,al ; memset using zero initializer?
jne dword_align
cmp edx,080h ; block size exceeds size threshold?
jb dword_align
cmp DWORD PTR __sse2_available,0 ; SSE2 supported?
je dword_align
jmp _VEC_memzero ; use fast zero SSE2 implementation
; no return
; Align address on dword boundary
dword_align: