@@ -119,14 +119,44 @@ function is_backward_branch_target(
119
119
return false ;
120
120
}
121
121
122
+ interface KnownConstantI32 {
123
+ type : "i32" ;
124
+ value : number ;
125
+ }
126
+
127
+ interface KnownConstantV128 {
128
+ type : "v128" ;
129
+ value : Uint8Array ;
130
+ }
131
+
132
+ interface KnownConstantLdloca {
133
+ type : "ldloca" ;
134
+ offset : number ;
135
+ }
136
+
137
+ type KnownConstant = KnownConstantI32 | KnownConstantV128 | KnownConstantLdloca ;
122
138
type KnownConstantValue = number | Uint8Array ;
123
- const knownConstantValues = new Map < number , KnownConstantValue > ( ) ;
139
+ const knownConstants = new Map < number , KnownConstant > ( ) ;
124
140
125
- function get_known_constant_value ( builder : WasmBuilder , localOffset : number ) : KnownConstantValue | undefined {
141
+ function get_known_constant ( builder : WasmBuilder , localOffset : number ) : KnownConstant | undefined {
126
142
if ( isAddressTaken ( builder , localOffset ) )
127
143
return undefined ;
128
144
129
- return knownConstantValues . get ( localOffset ) ;
145
+ return knownConstants . get ( localOffset ) ;
146
+ }
147
+
148
+ function get_known_constant_value ( builder : WasmBuilder , localOffset : number ) : KnownConstantValue | undefined {
149
+ const kc = get_known_constant ( builder , localOffset ) ;
150
+ if ( kc === undefined )
151
+ return undefined ;
152
+
153
+ switch ( kc . type ) {
154
+ case "i32" :
155
+ case "v128" :
156
+ return kc . value ;
157
+ }
158
+
159
+ return undefined ;
130
160
}
131
161
132
162
// Perform a quick scan through the opcodes potentially in this trace to build a table of
@@ -553,11 +583,20 @@ export function generateWasmBody(
553
583
builder . local ( "pLocals" ) ;
554
584
// locals[ip[1]] = &locals[ip[2]]
555
585
const offset = getArgU16 ( ip , 2 ) ,
556
- flag = isAddressTaken ( builder , offset ) ;
586
+ flag = isAddressTaken ( builder , offset ) ,
587
+ destOffset = getArgU16 ( ip , 1 ) ;
557
588
if ( ! flag )
558
589
mono_log_error ( `${ traceName } : Expected local ${ offset } to have address taken flag` ) ;
559
590
append_ldloca ( builder , offset ) ;
560
- append_stloc_tail ( builder , getArgU16 ( ip , 1 ) , WasmOpcode . i32_store ) ;
591
+ append_stloc_tail ( builder , destOffset , WasmOpcode . i32_store ) ;
592
+ // Record this ldloca as a known constant so that later uses of it turn into a lea,
593
+ // and the wasm runtime can constant fold them with other constants. It's not uncommon
594
+ // to have code that does '&x + c', which (if this optimization works) should
595
+ // turn into '&locals + offsetof(x) + c' and get constant folded to have the same cost
596
+ // as a regular ldloc
597
+ knownConstants . set ( destOffset , { type : "ldloca" , offset : offset } ) ;
598
+ // dreg invalidation would blow the known constant away, so disable it
599
+ skipDregInvalidation = true ;
561
600
break ;
562
601
}
563
602
@@ -1712,14 +1751,14 @@ let cknullOffset = -1;
1712
1751
function eraseInferredState ( ) {
1713
1752
cknullOffset = - 1 ;
1714
1753
notNullSince . clear ( ) ;
1715
- knownConstantValues . clear ( ) ;
1754
+ knownConstants . clear ( ) ;
1716
1755
}
1717
1756
1718
1757
function invalidate_local ( offset : number ) {
1719
1758
if ( cknullOffset === offset )
1720
1759
cknullOffset = - 1 ;
1721
1760
notNullSince . delete ( offset ) ;
1722
- knownConstantValues . delete ( offset ) ;
1761
+ knownConstants . delete ( offset ) ;
1723
1762
}
1724
1763
1725
1764
function invalidate_local_range ( start : number , bytes : number ) {
@@ -1792,7 +1831,47 @@ function computeMemoryAlignment(offset: number, opcodeOrPrefix: WasmOpcode, simd
1792
1831
return alignment ;
1793
1832
}
1794
1833
1834
+ function try_append_ldloc_cprop (
1835
+ builder : WasmBuilder , offset : number , opcodeOrPrefix : WasmOpcode ,
1836
+ dryRun : boolean , requireNonzero ?: boolean
1837
+ ) {
1838
+ if ( builder . options . cprop && ( opcodeOrPrefix === WasmOpcode . i32_load ) ) {
1839
+ // It's common to ldc.i4 or ldloca immediately before using the value
1840
+ // in these cases the known constant analysis will work consistently, and we can skip the extra
1841
+ // memory load to read the constant we just wrote to a local. the resulting traces should be
1842
+ // both smaller and faster, while still correct since the ldc still writes to memory
1843
+ // of course, if known constant analysis is broken, this will break too, but it's better to
1844
+ // learn immediately whether known constant analysis has been broken this whole time
1845
+ // at least on x86 this will enable much better native code generation for the trace, since
1846
+ // operations like memory stores have forms that accept an immediate as rhs
1847
+ const knownConstant = get_known_constant ( builder , offset ) ;
1848
+ if ( knownConstant ) {
1849
+ switch ( knownConstant . type ) {
1850
+ case "i32" :
1851
+ if ( requireNonzero && ( knownConstant . value === 0 ) )
1852
+ return false ;
1853
+ if ( ! dryRun )
1854
+ builder . i32_const ( knownConstant . value ) ;
1855
+ return true ;
1856
+ case "ldloca" :
1857
+ // FIXME: Do we need to invalidate the local again? I don't think we do, we invalidated it
1858
+ // when the ldloca operation originally happened, and we're just propagating that address
1859
+ // constant forward to its point of use
1860
+ // requireNonzero is a no-op since ldloca always produces a nonzero result
1861
+ if ( ! dryRun )
1862
+ append_ldloca ( builder , knownConstant . offset , 0 ) ;
1863
+ return true ;
1864
+ }
1865
+ }
1866
+ }
1867
+
1868
+ return false ;
1869
+ }
1870
+
1795
1871
function append_ldloc ( builder : WasmBuilder , offset : number , opcodeOrPrefix : WasmOpcode , simdOpcode ?: WasmSimdOpcode ) {
1872
+ if ( try_append_ldloc_cprop ( builder , offset , opcodeOrPrefix , false ) )
1873
+ return ;
1874
+
1796
1875
builder . local ( "pLocals" ) ;
1797
1876
mono_assert ( opcodeOrPrefix >= WasmOpcode . i32_load , ( ) => `Expected load opcode but got ${ opcodeOrPrefix } ` ) ;
1798
1877
builder . appendU8 ( opcodeOrPrefix ) ;
@@ -1828,8 +1907,6 @@ function append_stloc_tail(builder: WasmBuilder, offset: number, opcodeOrPrefix:
1828
1907
1829
1908
// Pass bytesInvalidated=0 if you are reading from the local and the address will never be
1830
1909
// used for writes
1831
- // Pass transient=true if the address will not persist after use (so it can't be used to later
1832
- // modify the contents of this local)
1833
1910
function append_ldloca ( builder : WasmBuilder , localOffset : number , bytesInvalidated ?: number ) {
1834
1911
if ( typeof ( bytesInvalidated ) !== "number" )
1835
1912
bytesInvalidated = 512 ;
@@ -1985,9 +2062,9 @@ function emit_ldc(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode):
1985
2062
invalidate_local ( localOffset ) ;
1986
2063
1987
2064
if ( typeof ( value ) === "number" )
1988
- knownConstantValues . set ( localOffset , value ) ;
2065
+ knownConstants . set ( localOffset , { type : "i32" , value : value } ) ;
1989
2066
else
1990
- knownConstantValues . delete ( localOffset ) ;
2067
+ knownConstants . delete ( localOffset ) ;
1991
2068
1992
2069
return true ;
1993
2070
}
@@ -2092,6 +2169,8 @@ function emit_fieldop(
2092
2169
notNullSince . has ( objectOffset ) &&
2093
2170
! isAddressTaken ( builder , objectOffset ) ;
2094
2171
2172
+ // TODO: Figure out whether this is commonly used to access fields of structs that
2173
+ // live on the stack, and if so, whether we want to do cprop of the ldloca
2095
2174
if (
2096
2175
( opcode !== MintOpcode . MINT_LDFLDA_UNSAFE ) &&
2097
2176
( opcode !== MintOpcode . MINT_STFLD_O )
@@ -3088,13 +3167,21 @@ function emit_indirectop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOp
3088
3167
return false ;
3089
3168
}
3090
3169
3091
- append_ldloc_cknull ( builder , addressVarIndex , ip , false ) ;
3170
+ // Check whether ldloc cprop is possible for the address var, if it is, skip doing the ldloc_cknull.
3171
+ // We'll also skip loading cknull_ptr later.
3172
+ const addressCprop = try_append_ldloc_cprop ( builder , addressVarIndex , WasmOpcode . i32_load , true , true ) ;
3173
+ if ( ! addressCprop )
3174
+ append_ldloc_cknull ( builder , addressVarIndex , ip , false ) ;
3092
3175
3093
3176
if ( isLoad ) {
3094
3177
// pre-load pLocals for the store operation
3095
3178
builder . local ( "pLocals" ) ;
3096
3179
// Load address
3097
- builder . local ( "cknull_ptr" ) ;
3180
+ if ( addressCprop )
3181
+ mono_assert ( try_append_ldloc_cprop ( builder , addressVarIndex , WasmOpcode . i32_load , false , true ) , "Unknown jiterpreter cprop failure" ) ;
3182
+ else
3183
+ builder . local ( "cknull_ptr" ) ;
3184
+
3098
3185
// For ldind_offset we need to load an offset from another local
3099
3186
// and then add it to the null checked address
3100
3187
if ( isAddMul ) {
@@ -3126,13 +3213,21 @@ function emit_indirectop(builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOp
3126
3213
append_stloc_tail ( builder , valueVarIndex , setter ) ;
3127
3214
} else if ( opcode === MintOpcode . MINT_STIND_REF ) {
3128
3215
// Load destination address
3129
- builder . local ( "cknull_ptr" ) ;
3216
+ if ( addressCprop )
3217
+ mono_assert ( try_append_ldloc_cprop ( builder , addressVarIndex , WasmOpcode . i32_load , false , true ) , "Unknown jiterpreter cprop failure" ) ;
3218
+ else
3219
+ builder . local ( "cknull_ptr" ) ;
3220
+
3130
3221
// Load address of value so that copy_managed_pointer can grab it
3131
3222
append_ldloca ( builder , valueVarIndex , 0 ) ;
3132
3223
builder . callImport ( "copy_ptr" ) ;
3133
3224
} else {
3134
3225
// Pre-load address for the store operation
3135
- builder . local ( "cknull_ptr" ) ;
3226
+ if ( addressCprop )
3227
+ mono_assert ( try_append_ldloc_cprop ( builder , addressVarIndex , WasmOpcode . i32_load , false , true ) , "Unknown jiterpreter cprop failure" ) ;
3228
+ else
3229
+ builder . local ( "cknull_ptr" ) ;
3230
+
3136
3231
// For ldind_offset we need to load an offset from another local
3137
3232
// and then add it to the null checked address
3138
3233
if ( isOffset && offsetVarIndex >= 0 ) {
@@ -3429,7 +3524,7 @@ function emit_simd(
3429
3524
const view = localHeapViewU8 ( ) . slice ( < any > ip + 4 , < any > ip + 4 + sizeOfV128 ) ;
3430
3525
builder . v128_const ( view ) ;
3431
3526
append_simd_store ( builder , ip ) ;
3432
- knownConstantValues . set ( getArgU16 ( ip , 1 ) , view ) ;
3527
+ knownConstants . set ( getArgU16 ( ip , 1 ) , { type : "v128" , value : view } ) ;
3433
3528
} else {
3434
3529
// dest
3435
3530
append_ldloca ( builder , getArgU16 ( ip , 1 ) , sizeOfV128 ) ;
0 commit comments