What's the difference between const array and static const array in C/C++
Compiling the following code in Visual Studio 2015 (Win7 , x64, Debug configuration) took very, very, very long time (i.e., more than 10 minutes)
double tfuuuuuuu(int Ind)
{
const double Arr[600 * 258] = {3.5453, 45.234234234, 234234.234,// extends to 258 values for each line
// 599 lines here.....
};
return Arr[Ind];
}
But when I added the static
keyword, compilation took half a second
double tfuuuuuuu(int Ind)
{
static const double Arr[600 * 258] = {3.5453, 45.234234234, 234234.234,// extends to 258 values for each line
// 599 lines here.....
};
return Arr[Ind];
}
I know that static
means that the variable will keep its value between invocations, but if the array is const
anyway what difference does it make if I add static
? and why is compilation time changed so dramatically?
EDIT :
The actual code can be found here , (compilation was in Debug mode)
A local variable declared as static
has a lifetime of the entire running program, and is typically stored in the data segment. Compilers implement this by having a section that has the values in them.
Local variables not declared as static typically live on the stack and must be initialized every time the variable's scope is entered.
Looking at the assembly for the static
case, MSVC 2015 outputs the following:
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.00.24215.1
TITLE MyLBP.c
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
CONST SEGMENT
?Arr@?1??tfuuuuuuu@@9@9 DQ 04060c00000000000r ; 134 ; `tfuuuuuuu'::`2'::Arr
DQ 03fe15efd20a7955br ; 0.542845
DQ 03fdf59701e4b19afr ; 0.489834
DQ 0bfd8e38e9ab7fcb1r ; -0.388889
DQ 0bfe59f22c01e68a1r ; -0.675676
DQ 0bfeb13b15d5aa410r ; -0.846154
DQ 0bfe2c2355f07776er ; -0.586207
DQ 03fefffffbf935359r ; 1
...
ORG $+1036128
CONST ENDS
PUBLIC _tfuuuuuuu
EXTRN __fltused:DWORD
; Function compile flags: /Odtp
_TEXT SEGMENT
_Ind$ = 8 ; size = 4
_tfuuuuuuu PROC
; File c:\users\dennis bush\documents\x2.c
; Line 4
push ebp
mov ebp, esp
; Line 106
mov eax, DWORD PTR _Ind$[ebp]
fld QWORD PTR ?Arr@?1??tfuuuuuuu@@9@9[eax*8]
; Line 107
pop ebp
ret 0
_tfuuuuuuu ENDP
_TEXT ENDS
END
While gcc 4.8.5 outputs the following:
.file "MyLBP.c"
.text
.globl tfuuuuuuu
.type tfuuuuuuu, @function
tfuuuuuuu:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
cltq
movq Arr.1724(,%rax,8), %rax
movq %rax, -16(%rbp)
movsd -16(%rbp), %xmm0
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size tfuuuuuuu, .-tfuuuuuuu
.section .rodata
.align 32
.type Arr.1724, @object
.size Arr.1724, 1238400
Arr.1724:
.long 0
.long 1080082432
.long 547853659
.long 1071734525
.long 508238255
.long 1071602032
.long 2595749041
.long -1076305010
.long 3223218337
...
.ident "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-16)"
.section .note.GNU-stack,"",@progbits
So both define the data globally and reference that global array directly.
Now lets look at the non-static code. First for VSMC2015:
; Listing generated by Microsoft (R) Optimizing Compiler Version 19.00.24215.1
TITLE MyLBP.c
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC _tfuuuuuuu
PUBLIC __real@3e45798ee2308c3a
PUBLIC __real@3f40e1cf9350aa3c
PUBLIC __real@3f43b1f90beff84b
PUBLIC __real@3f4c6220dc6e8066
PUBLIC __real@3f4ea4c648794089
PUBLIC __real@3f50023666188dc0
PUBLIC __real@3f53957e56f300e9
PUBLIC __real@3f55235d7d33b25f
PUBLIC __real@3f5828f66e5bd33a
PUBLIC __real@3f5c044284dfce31
PUBLIC __real@3f5c87c05341c674
...
EXTRN @__security_check_cookie@4:PROC
EXTRN __chkstk:PROC
EXTRN _memset:PROC
EXTRN ___security_cookie:DWORD
EXTRN __fltused:DWORD
; COMDAT __real@bff0000000000000
CONST SEGMENT
__real@bff0000000000000 DQ 0bff0000000000000r ; -1
CONST ENDS
; COMDAT __real@bfefffffdfc9a9ad
CONST SEGMENT
__real@bfefffffdfc9a9ad DQ 0bfefffffdfc9a9adr ; -1
CONST ENDS
; COMDAT __real@bfefffffbf935359
CONST SEGMENT
__real@bfefffffbf935359 DQ 0bfefffffbf935359r ; -1
CONST ENDS
; COMDAT __real@bfefffff9f5cfd06
CONST SEGMENT
__real@bfefffff9f5cfd06 DQ 0bfefffff9f5cfd06r ; -1
CONST ENDS
; COMDAT __real@bfefffff7f26a6b3
CONST SEGMENT
__real@bfefffff7f26a6b3 DQ 0bfefffff7f26a6b3r ; -1
CONST ENDS
; COMDAT __real@bfefffff5ef05060
CONST SEGMENT
__real@bfefffff5ef05060 DQ 0bfefffff5ef05060r ; -1
CONST ENDS
...
; Function compile flags: /Odtp
_TEXT SEGMENT
_Arr$ = -1238404 ; size = 1238400
__$ArrayPad$ = -4 ; size = 4
_Ind$ = 8 ; size = 4
_tfuuuuuuu PROC
; File c:\users\dennis bush\documents\x2.c
; Line 4
push ebp
mov ebp, esp
mov eax, 1238404 ; 0012e584H
call __chkstk
mov eax, DWORD PTR ___security_cookie
xor eax, ebp
mov DWORD PTR __$ArrayPad$[ebp], eax
; Line 5
movsd xmm0, QWORD PTR __real@4060c00000000000
movsd QWORD PTR _Arr$[ebp], xmm0
movsd xmm0, QWORD PTR __real@3fe15efd20a7955b
movsd QWORD PTR _Arr$[ebp+8], xmm0
movsd xmm0, QWORD PTR __real@3fdf59701e4b19af
movsd QWORD PTR _Arr$[ebp+16], xmm0
movsd xmm0, QWORD PTR __real@bfd8e38e9ab7fcb1
movsd QWORD PTR _Arr$[ebp+24], xmm0
movsd xmm0, QWORD PTR __real@bfe59f22c01e68a1
movsd QWORD PTR _Arr$[ebp+32], xmm0
movsd xmm0, QWORD PTR __real@bfeb13b15d5aa410
movsd QWORD PTR _Arr$[ebp+40], xmm0
movsd xmm0, QWORD PTR __real@bfe2c2355f07776e
movsd QWORD PTR _Arr$[ebp+48], xmm0
...
push 1036128 ; 000fcf60H
push 0
lea eax, DWORD PTR _Arr$[ebp+202272]
push eax
call _memset
add esp, 12 ; 0000000cH
; Line 106
mov ecx, DWORD PTR _Ind$[ebp]
fld QWORD PTR _Arr$[ebp+ecx*8]
; Line 107
mov ecx, DWORD PTR __$ArrayPad$[ebp]
xor ecx, ebp
call @__security_check_cookie@4
mov esp, ebp
pop ebp
ret 0
_tfuuuuuuu ENDP
_TEXT ENDS
END
The initializers are still stored globally. However, notice how each value is given a name internally and that 2 move instruction are generated for each value in the array. Creating those names and the explicit moves is why it takes so long to generate the code.
And now the gcc 4.8.5 version:
.file "MyLBP.c"
.section .rodata
.align 32
.LC0:
.long 0
.long 1080082432
.long 547853659
.long 1071734525
.long 508238255
.long 1071602032
.long 2595749041
.long -1076305010
.long 3223218337
.long -1075470558
...
.text
.globl tfuuuuuuu
.type tfuuuuuuu, @function
tfuuuuuuu:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $1238416, %rsp
movl %edi, -1238404(%rbp)
leaq -1238400(%rbp), %rax
movl $.LC0, %ecx
movl $1238400, %edx
movq %rcx, %rsi
movq %rax, %rdi
call memcpy ; <-------------- call to memcpy
movl -1238404(%rbp), %eax
cltq
movq -1238400(%rbp,%rax,8), %rax
movq %rax, -1238416(%rbp)
movsd -1238416(%rbp), %xmm0
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size tfuuuuuuu, .-tfuuuuuuu
.ident "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-16)"
.section .note.GNU-stack,"",@progbits
Rather than generating explicit instructions to copy each value, gcc just calls memcpy
to copy the values from global data into the local array, so generating the initialization code is much faster.
So the moral of the story is that MSVC is being very inefficient in how it initializes local variables.
Also, as noted in the comments, this is a confirmed bug which is due to be fixed in VS 2019.
const
or not, a non-static
function local must be constructed whenever the function is entered and the declaration is reached. Your compiler is spending time generating the code to perform that action at runtime, which may be arduous when the initialiser is super long.
By constrast, a static
of this form can just have its initial value plonked into the executable somewhere, with no runtime spin-up needed.
It does sound like a bit of a QoI issue with your compiler if you're really seeing a big difference in build times (particularly as 1.2MB isn't that much data), but the two pieces of code are fundamentally different and huge initialisers for things destined to live "on the stack" are typically something to avoid.