@@ -73,9 +73,9 @@ func dse(f *Func) {
7373 }
7474
7575 // Walk backwards looking for dead stores. Keep track of shadowed addresses.
76- // A "shadowed address" is a pointer and a size describing a memory region that
77- // is known to be written. We keep track of shadowed addresses in the shadowed
78- // map, mapping the ID of the address to the size of the shadowed region .
76+ // A "shadowed address" is a pointer, offset, and size describing a memory region that
77+ // is known to be written. We keep track of shadowed addresses in the shadowed map,
78+ // mapping the ID of the address to a shadowRange where future writes will happen .
7979 // Since we're walking backwards, writes to a shadowed region are useless,
8080 // as they will be immediately overwritten.
8181 shadowed .clear ()
@@ -88,13 +88,20 @@ func dse(f *Func) {
8888 shadowed .clear ()
8989 }
9090 if v .Op == OpStore || v .Op == OpZero {
91+ ptr := v .Args [0 ]
92+ var off int64
93+ for ptr .Op == OpOffPtr { // Walk to base pointer
94+ off += ptr .AuxInt
95+ ptr = ptr .Args [0 ]
96+ }
9197 var sz int64
9298 if v .Op == OpStore {
9399 sz = v .Aux .(* types.Type ).Size ()
94100 } else { // OpZero
95101 sz = v .AuxInt
96102 }
97- if shadowedSize := int64 (shadowed .get (v .Args [0 ].ID )); shadowedSize != - 1 && shadowedSize >= sz {
103+ sr := shadowRange (shadowed .get (ptr .ID ))
104+ if sr .contains (off , off + sz ) {
98105 // Modify the store/zero into a copy of the memory state,
99106 // effectively eliding the store operation.
100107 if v .Op == OpStore {
@@ -108,10 +115,8 @@ func dse(f *Func) {
108115 v .AuxInt = 0
109116 v .Op = OpCopy
110117 } else {
111- if sz > 0x7fffffff { // work around sparseMap's int32 value type
112- sz = 0x7fffffff
113- }
114- shadowed .set (v .Args [0 ].ID , int32 (sz ))
118+ // Extend shadowed region.
119+ shadowed .set (ptr .ID , int32 (sr .merge (off , off + sz )))
115120 }
116121 }
117122 // walk to previous store
@@ -131,6 +136,49 @@ func dse(f *Func) {
131136 }
132137}
133138
139+ // A shadowRange encodes a set of byte offsets [lo():hi()] from
140+ // a given pointer that will be written to later in the block.
141+ // A zero shadowRange encodes an empty shadowed range (and so
142+ // does a -1 shadowRange, which is what sparsemap.get returns
143+ // on a failed lookup).
144+ type shadowRange int32
145+
146+ func (sr shadowRange ) lo () int64 {
147+ return int64 (sr & 0xffff )
148+ }
149+ func (sr shadowRange ) hi () int64 {
150+ return int64 ((sr >> 16 ) & 0xffff )
151+ }
152+
153+ // contains reports whether [lo:hi] is completely within sr.
154+ func (sr shadowRange ) contains (lo , hi int64 ) bool {
155+ return lo >= sr .lo () && hi <= sr .hi ()
156+ }
157+
158+ // merge returns the union of sr and [lo:hi].
159+ // merge is allowed to return something smaller than the union.
160+ func (sr shadowRange ) merge (lo , hi int64 ) shadowRange {
161+ if lo < 0 || hi > 0xffff {
162+ // Ignore offsets that are too large or small.
163+ return sr
164+ }
165+ if sr .lo () == sr .hi () {
166+ // Old range is empty - use new one.
167+ return shadowRange (lo + hi << 16 )
168+ }
169+ if hi < sr .lo () || lo > sr .hi () {
170+ // The two regions don't overlap or abut, so we would
171+ // have to keep track of multiple disjoint ranges.
172+ // Because we can only keep one, keep the larger one.
173+ if sr .hi ()- sr .lo () >= hi - lo {
174+ return sr
175+ }
176+ return shadowRange (lo + hi << 16 )
177+ }
178+ // Regions overlap or abut - compute the union.
179+ return shadowRange (min (lo , sr .lo ()) + max (hi , sr .hi ())<< 16 )
180+ }
181+
134182// elimDeadAutosGeneric deletes autos that are never accessed. To achieve this
135183// we track the operations that the address of each auto reaches and if it only
136184// reaches stores then we delete all the stores. The other operations will then
0 commit comments