summaryrefslogtreecommitdiffstats
path: root/compiler/dex/quick/local_optimizations.cc
blob: 289315747a350f125c0615fcc1ab732a83d2fc87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
/*
 * Copyright (C) 2011 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "dex/compiler_internals.h"

namespace art {

#define DEBUG_OPT(X)

/* Check RAW, WAR, and RAW dependency on the register operands */
#define CHECK_REG_DEP(use, def, check) (def.Intersects(*check->u.m.use_mask)) || \
                                       (use.Union(def).Intersects(*check->u.m.def_mask))

/* Scheduler heuristics */
#define MAX_HOIST_DISTANCE 20
#define LDLD_DISTANCE 4
#define LD_LATENCY 2

static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) {
  int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->flags.alias_info);
  int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->flags.alias_info);
  int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->flags.alias_info);
  int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->flags.alias_info);

  return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
}

/* Convert a more expensive instruction (ie load) into a move */
void Mir2Lir::ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src) {
  /* Insert a move to replace the load */
  LIR* move_lir;
  move_lir = OpRegCopyNoInsert(dest, src);
  /*
   * Insert the converted instruction after the original since the
   * optimization is scannng in the top-down order and the new instruction
   * will need to be re-checked (eg the new dest clobbers the src used in
   * this_lir).
   */
  InsertLIRAfter(orig_lir, move_lir);
}

/*
 * Perform a pass of top-down walk, from the second-last instruction in the
 * superblock, to eliminate redundant loads and stores.
 *
 * An earlier load can eliminate a later load iff
 *   1) They are must-aliases
 *   2) The native register is not clobbered in between
 *   3) The memory location is not written to in between
 *
 * An earlier store can eliminate a later load iff
 *   1) They are must-aliases
 *   2) The native register is not clobbered in between
 *   3) The memory location is not written to in between
 *
 * A later store can be eliminated by an earlier store iff
 *   1) They are must-aliases
 *   2) The memory location is not written to in between
 */
void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) {
  LIR* this_lir;

  if (head_lir == tail_lir) {
    return;
  }

  for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) {
    if (IsPseudoLirOp(this_lir->opcode)) {
      continue;
    }

    int sink_distance = 0;

    uint64_t target_flags = GetTargetInstFlags(this_lir->opcode);

    /* Skip non-interesting instructions */
    if ((this_lir->flags.is_nop == true) ||
        (target_flags & IS_BRANCH) ||
        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||  // Skip wide loads.
        ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) ==
         (REG_USE0 | REG_USE1 | REG_USE2)) ||  // Skip wide stores.
        // Skip instructions that are neither loads or stores.
        !(target_flags & (IS_LOAD | IS_STORE)) ||
        // Skip instructions that do both load and store.
        ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
      continue;
    }

    int native_reg_id;
    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
      // If x86, location differs depending on whether memory/reg operation.
      native_reg_id = (target_flags & IS_STORE) ? this_lir->operands[2] : this_lir->operands[0];
    } else {
      native_reg_id = this_lir->operands[0];
    }
    bool is_this_lir_load = target_flags & IS_LOAD;
    LIR* check_lir;
    /* Use the mem mask to determine the rough memory location */
    ResourceMask this_mem_mask = kEncodeMem.Intersection(
        this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask));

    /*
     * Currently only eliminate redundant ld/st for constant and Dalvik
     * register accesses.
     */
    if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg))) {
      continue;
    }

    ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);

    /*
     * Add pc to the resource mask to prevent this instruction
     * from sinking past branch instructions. Also take out the memory
     * region bits since stop_mask is used to check data/control
     * dependencies.
     *
     * Note: on x86(-64) and Arm64 we use the IsBranch bit, as the PC is not exposed.
     */
    ResourceMask pc_encoding = GetPCUseDefEncoding();
    if (pc_encoding == kEncodeNone) {
      // TODO: Stop the abuse of kIsBranch as a bit specification for ResourceMask.
      pc_encoding = ResourceMask::Bit(kIsBranch);
    }
    ResourceMask  stop_use_reg_mask = pc_encoding.Union(*this_lir->u.m.use_mask).
        Without(kEncodeMem);

    for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) {
      /*
       * Skip already dead instructions (whose dataflow information is
       * outdated and misleading).
       */
      if (check_lir->flags.is_nop || IsPseudoLirOp(check_lir->opcode)) {
        continue;
      }

      ResourceMask check_mem_mask = kEncodeMem.Intersection(
          check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask));
      ResourceMask alias_condition = this_mem_mask.Intersection(check_mem_mask);
      bool stop_here = false;

      /*
       * Potential aliases seen - check the alias relations
       */
      uint64_t check_flags = GetTargetInstFlags(check_lir->opcode);
      // TUNING: Support instructions with multiple register targets.
      if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) {
        stop_here = true;
      } else if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) {
        bool is_check_lir_load = check_flags & IS_LOAD;
        if  (alias_condition.Equals(kEncodeLiteral)) {
          /*
           * Should only see literal loads in the instruction
           * stream.
           */
          DCHECK(!(check_flags & IS_STORE));
          /* Same value && same register type */
          if (check_lir->flags.alias_info == this_lir->flags.alias_info &&
              RegStorage::SameRegType(check_lir->operands[0], native_reg_id)) {
            /*
             * Different destination register - insert
             * a move
             */
            if (check_lir->operands[0] != native_reg_id) {
              // TODO: update for 64-bit regs.
              ConvertMemOpIntoMove(check_lir, RegStorage::Solo32(check_lir->operands[0]),
                                   RegStorage::Solo32(native_reg_id));
            }
            NopLIR(check_lir);
          }
        } else if (alias_condition.Equals(kEncodeDalvikReg)) {
          /* Must alias */
          if (check_lir->flags.alias_info == this_lir->flags.alias_info) {
            /* Only optimize compatible registers */
            bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id);
            if ((is_this_lir_load && is_check_lir_load) ||
                (!is_this_lir_load && is_check_lir_load)) {
              /* RAR or RAW */
              if (reg_compatible) {
                /*
                 * Different destination register -
                 * insert a move
                 */
                if (check_lir->operands[0] != native_reg_id) {
                  // TODO: update for 64-bit regs.
                  ConvertMemOpIntoMove(check_lir, RegStorage::Solo32(check_lir->operands[0]),
                                       RegStorage::Solo32(native_reg_id));
                }
                NopLIR(check_lir);
              } else {
                /*
                 * Destinaions are of different types -
                 * something complicated going on so
                 * stop looking now.
                 */
                stop_here = true;
              }
            } else if (is_this_lir_load && !is_check_lir_load) {
              /* WAR - register value is killed */
              stop_here = true;
            } else if (!is_this_lir_load && !is_check_lir_load) {
              /* WAW - nuke the earlier store */
              NopLIR(this_lir);
              stop_here = true;
            }
          /* Partial overlap */
          } else if (IsDalvikRegisterClobbered(this_lir, check_lir)) {
            /*
             * It is actually ok to continue if check_lir
             * is a read. But it is hard to make a test
             * case for this so we just stop here to be
             * conservative.
             */
            stop_here = true;
          }
        }
        /* Memory content may be updated. Stop looking now. */
        if (stop_here) {
          break;
        /* The check_lir has been transformed - check the next one */
        } else if (check_lir->flags.is_nop) {
          continue;
        }
      }


      /*
       * this and check LIRs have no memory dependency. Now check if
       * their register operands have any RAW, WAR, and WAW
       * dependencies. If so, stop looking.
       */
      if (stop_here == false) {
        stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask, check_lir);
      }

      if (stop_here == true) {
        if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
          // Prevent stores from being sunk between ops that generate ccodes and
          // ops that use them.
          uint64_t flags = GetTargetInstFlags(check_lir->opcode);
          if (sink_distance > 0 && (flags & IS_BRANCH) && (flags & USES_CCODES)) {
            check_lir = PREV_LIR(check_lir);
            sink_distance--;
          }
        }
        DEBUG_OPT(dump_dependent_insn_pair(this_lir, check_lir, "REG CLOBBERED"));
        /* Only sink store instructions */
        if (sink_distance && !is_this_lir_load) {
          LIR* new_store_lir =
              static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
          *new_store_lir = *this_lir;
          /*
           * Stop point found - insert *before* the check_lir
           * since the instruction list is scanned in the
           * top-down order.
           */
          InsertLIRBefore(check_lir, new_store_lir);
          NopLIR(this_lir);
        }
        break;
      } else if (!check_lir->flags.is_nop) {
        sink_distance++;
      }
    }
  }
}

/*
 * Perform a pass of bottom-up walk, from the second instruction in the
 * superblock, to try to hoist loads to earlier slots.
 */
void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) {
  LIR* this_lir, *check_lir;
  /*
   * Store the list of independent instructions that can be hoisted past.
   * Will decide the best place to insert later.
   */
  LIR* prev_inst_list[MAX_HOIST_DISTANCE];

  /* Empty block */
  if (head_lir == tail_lir) {
    return;
  }

  /* Start from the second instruction */
  for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) {
    if (IsPseudoLirOp(this_lir->opcode)) {
      continue;
    }

    uint64_t target_flags = GetTargetInstFlags(this_lir->opcode);
    /* Skip non-interesting instructions */
    if (!(target_flags & IS_LOAD) ||
        (this_lir->flags.is_nop == true) ||
        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||
        ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
      continue;
    }

    ResourceMask stop_use_all_mask = *this_lir->u.m.use_mask;

    /*
     * Branches for null/range checks are marked with the true resource
     * bits, and loads to Dalvik registers, constant pools, and non-alias
     * locations are safe to be hoisted. So only mark the heap references
     * conservatively here.
     *
     * Note: on x86(-64) and Arm64 this will add kEncodeNone.
     * TODO: Sanity check. LoadStoreElimination uses kBranchBit to fake a PC.
     */
    if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) {
      stop_use_all_mask.SetBits(GetPCUseDefEncoding());
    }

    /* Similar as above, but just check for pure register dependency */
    ResourceMask stop_use_reg_mask = stop_use_all_mask.Without(kEncodeMem);
    ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem);

    int next_slot = 0;
    bool stop_here = false;

    /* Try to hoist the load to a good spot */
    for (check_lir = PREV_LIR(this_lir); check_lir != head_lir; check_lir = PREV_LIR(check_lir)) {
      /*
       * Skip already dead instructions (whose dataflow information is
       * outdated and misleading).
       */
      if (check_lir->flags.is_nop) {
        continue;
      }

      ResourceMask check_mem_mask = check_lir->u.m.def_mask->Intersection(kEncodeMem);
      ResourceMask alias_condition = stop_use_all_mask.Intersection(check_mem_mask);
      stop_here = false;

      /* Potential WAR alias seen - check the exact relation */
      if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) {
        /* We can fully disambiguate Dalvik references */
        if (alias_condition.Equals(kEncodeDalvikReg)) {
          /* Must alias or partially overlap */
          if ((check_lir->flags.alias_info == this_lir->flags.alias_info) ||
            IsDalvikRegisterClobbered(this_lir, check_lir)) {
            stop_here = true;
          }
        /* Conservatively treat all heap refs as may-alias */
        } else {
          DCHECK(alias_condition.Equals(kEncodeHeapRef));
          stop_here = true;
        }
        /* Memory content may be updated. Stop looking now. */
        if (stop_here) {
          prev_inst_list[next_slot++] = check_lir;
          break;
        }
      }

      if (stop_here == false) {
        stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask,
                     check_lir);
      }

      /*
       * Store the dependent or non-pseudo/indepedent instruction to the
       * list.
       */
      if (stop_here || !IsPseudoLirOp(check_lir->opcode)) {
        prev_inst_list[next_slot++] = check_lir;
        if (next_slot == MAX_HOIST_DISTANCE) {
          break;
        }
      }

      /* Found a new place to put the load - move it here */
      if (stop_here == true) {
        DEBUG_OPT(dump_dependent_insn_pair(check_lir, this_lir "HOIST STOP"));
        break;
      }
    }

    /*
     * Reached the top - use head_lir as the dependent marker as all labels
     * are barriers.
     */
    if (stop_here == false && next_slot < MAX_HOIST_DISTANCE) {
      prev_inst_list[next_slot++] = head_lir;
    }

    /*
     * At least one independent instruction is found. Scan in the reversed
     * direction to find a beneficial slot.
     */
    if (next_slot >= 2) {
      int first_slot = next_slot - 2;
      int slot;
      LIR* dep_lir = prev_inst_list[next_slot-1];
      /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
      if (!IsPseudoLirOp(dep_lir->opcode) &&
        (GetTargetInstFlags(dep_lir->opcode) & IS_LOAD)) {
        first_slot -= LDLD_DISTANCE;
      }
      /*
       * Make sure we check slot >= 0 since first_slot may be negative
       * when the loop is first entered.
       */
      for (slot = first_slot; slot >= 0; slot--) {
        LIR* cur_lir = prev_inst_list[slot];
        LIR* prev_lir = prev_inst_list[slot+1];

        /* Check the highest instruction */
        if (prev_lir->u.m.def_mask->Equals(kEncodeAll)) {
          /*
           * If the first instruction is a load, don't hoist anything
           * above it since it is unlikely to be beneficial.
           */
          if (GetTargetInstFlags(cur_lir->opcode) & IS_LOAD) {
            continue;
          }
          /*
           * If the remaining number of slots is less than LD_LATENCY,
           * insert the hoisted load here.
           */
          if (slot < LD_LATENCY) {
            break;
          }
        }

        // Don't look across a barrier label
        if ((prev_lir->opcode == kPseudoTargetLabel) ||
            (prev_lir->opcode == kPseudoSafepointPC) ||
            (prev_lir->opcode == kPseudoBarrier)) {
          break;
        }

        /*
         * Try to find two instructions with load/use dependency until
         * the remaining instructions are less than LD_LATENCY.
         */
        bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false :
            (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD);
        if ((prev_is_load && (cur_lir->u.m.use_mask->Intersects(*prev_lir->u.m.def_mask))) ||
            (slot < LD_LATENCY)) {
          break;
        }
      }

      /* Found a slot to hoist to */
      if (slot >= 0) {
        LIR* cur_lir = prev_inst_list[slot];
        LIR* new_load_lir =
          static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
        *new_load_lir = *this_lir;
        /*
         * Insertion is guaranteed to succeed since check_lir
         * is never the first LIR on the list
         */
        InsertLIRBefore(cur_lir, new_load_lir);
        NopLIR(this_lir);
      }
    }
  }
}

void Mir2Lir::ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir) {
  if (!(cu_->disable_opt & (1 << kLoadStoreElimination))) {
    ApplyLoadStoreElimination(head_lir, tail_lir);
  }
  if (!(cu_->disable_opt & (1 << kLoadHoisting))) {
    ApplyLoadHoisting(head_lir, tail_lir);
  }
}

}  // namespace art