Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsARM.h"
36#include "llvm/IR/IntrinsicsNVPTX.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
38#include "llvm/IR/IntrinsicsWebAssembly.h"
39#include "llvm/IR/IntrinsicsX86.h"
40#include "llvm/IR/LLVMContext.h"
41#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Metadata.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/Verifier.h"
50#include "llvm/Support/Regex.h"
53#include <cstdint>
54#include <cstring>
55#include <numeric>
56
57using namespace llvm;
58
59static cl::opt<bool>
60 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
61 cl::desc("Disable autoupgrade of debug info"));
62
63static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
64
65// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
66// changed their type from v4f32 to v2i64.
68 Function *&NewFn) {
69 // Check whether this is an old version of the function, which received
70 // v4f32 arguments.
71 Type *Arg0Type = F->getFunctionType()->getParamType(0);
72 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
73 return false;
74
75 // Yes, it's old, replace it with new version.
76 rename(F);
77 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
78 return true;
79}
80
81// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
82// arguments have changed their type from i32 to i8.
84 Function *&NewFn) {
85 // Check that the last argument is an i32.
86 Type *LastArgType = F->getFunctionType()->getParamType(
87 F->getFunctionType()->getNumParams() - 1);
88 if (!LastArgType->isIntegerTy(32))
89 return false;
90
91 // Move this function aside and map down.
92 rename(F);
93 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
94 return true;
95}
96
97// Upgrade the declaration of fp compare intrinsics that change return type
98// from scalar to vXi1 mask.
100 Function *&NewFn) {
101 // Check if the return type is a vector.
102 if (F->getReturnType()->isVectorTy())
103 return false;
104
105 rename(F);
106 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
107 return true;
108}
109
110// Upgrade the declaration of multiply and add bytes intrinsics whose input
111// arguments' types have changed from vectors of i32 to vectors of i8
113 Function *&NewFn) {
114 // check if input argument type is a vector of i8
115 Type *Arg1Type = F->getFunctionType()->getParamType(1);
116 Type *Arg2Type = F->getFunctionType()->getParamType(2);
117 if (Arg1Type->isVectorTy() &&
118 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
119 Arg2Type->isVectorTy() &&
120 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
121 return false;
122
123 rename(F);
124 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
125 return true;
126}
127
129 Function *&NewFn) {
130 if (F->getReturnType()->getScalarType()->isBFloatTy())
131 return false;
132
133 rename(F);
134 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
135 return true;
136}
137
139 Function *&NewFn) {
140 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
141 return false;
142
143 rename(F);
144 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
145 return true;
146}
147
149 // All of the intrinsics matches below should be marked with which llvm
150 // version started autoupgrading them. At some point in the future we would
151 // like to use this information to remove upgrade code for some older
152 // intrinsics. It is currently undecided how we will determine that future
153 // point.
154 if (Name.consume_front("avx."))
155 return (Name.starts_with("blend.p") || // Added in 3.7
156 Name == "cvt.ps2.pd.256" || // Added in 3.9
157 Name == "cvtdq2.pd.256" || // Added in 3.9
158 Name == "cvtdq2.ps.256" || // Added in 7.0
159 Name.starts_with("movnt.") || // Added in 3.2
160 Name.starts_with("sqrt.p") || // Added in 7.0
161 Name.starts_with("storeu.") || // Added in 3.9
162 Name.starts_with("vbroadcast.s") || // Added in 3.5
163 Name.starts_with("vbroadcastf128") || // Added in 4.0
164 Name.starts_with("vextractf128.") || // Added in 3.7
165 Name.starts_with("vinsertf128.") || // Added in 3.7
166 Name.starts_with("vperm2f128.") || // Added in 6.0
167 Name.starts_with("vpermil.")); // Added in 3.1
168
169 if (Name.consume_front("avx2."))
170 return (Name == "movntdqa" || // Added in 5.0
171 Name.starts_with("pabs.") || // Added in 6.0
172 Name.starts_with("padds.") || // Added in 8.0
173 Name.starts_with("paddus.") || // Added in 8.0
174 Name.starts_with("pblendd.") || // Added in 3.7
175 Name == "pblendw" || // Added in 3.7
176 Name.starts_with("pbroadcast") || // Added in 3.8
177 Name.starts_with("pcmpeq.") || // Added in 3.1
178 Name.starts_with("pcmpgt.") || // Added in 3.1
179 Name.starts_with("pmax") || // Added in 3.9
180 Name.starts_with("pmin") || // Added in 3.9
181 Name.starts_with("pmovsx") || // Added in 3.9
182 Name.starts_with("pmovzx") || // Added in 3.9
183 Name == "pmul.dq" || // Added in 7.0
184 Name == "pmulu.dq" || // Added in 7.0
185 Name.starts_with("psll.dq") || // Added in 3.7
186 Name.starts_with("psrl.dq") || // Added in 3.7
187 Name.starts_with("psubs.") || // Added in 8.0
188 Name.starts_with("psubus.") || // Added in 8.0
189 Name.starts_with("vbroadcast") || // Added in 3.8
190 Name == "vbroadcasti128" || // Added in 3.7
191 Name == "vextracti128" || // Added in 3.7
192 Name == "vinserti128" || // Added in 3.7
193 Name == "vperm2i128"); // Added in 6.0
194
195 if (Name.consume_front("avx512.")) {
196 if (Name.consume_front("mask."))
197 // 'avx512.mask.*'
198 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
199 Name.starts_with("and.") || // Added in 3.9
200 Name.starts_with("andn.") || // Added in 3.9
201 Name.starts_with("broadcast.s") || // Added in 3.9
202 Name.starts_with("broadcastf32x4.") || // Added in 6.0
203 Name.starts_with("broadcastf32x8.") || // Added in 6.0
204 Name.starts_with("broadcastf64x2.") || // Added in 6.0
205 Name.starts_with("broadcastf64x4.") || // Added in 6.0
206 Name.starts_with("broadcasti32x4.") || // Added in 6.0
207 Name.starts_with("broadcasti32x8.") || // Added in 6.0
208 Name.starts_with("broadcasti64x2.") || // Added in 6.0
209 Name.starts_with("broadcasti64x4.") || // Added in 6.0
210 Name.starts_with("cmp.b") || // Added in 5.0
211 Name.starts_with("cmp.d") || // Added in 5.0
212 Name.starts_with("cmp.q") || // Added in 5.0
213 Name.starts_with("cmp.w") || // Added in 5.0
214 Name.starts_with("compress.b") || // Added in 9.0
215 Name.starts_with("compress.d") || // Added in 9.0
216 Name.starts_with("compress.p") || // Added in 9.0
217 Name.starts_with("compress.q") || // Added in 9.0
218 Name.starts_with("compress.store.") || // Added in 7.0
219 Name.starts_with("compress.w") || // Added in 9.0
220 Name.starts_with("conflict.") || // Added in 9.0
221 Name.starts_with("cvtdq2pd.") || // Added in 4.0
222 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
223 Name == "cvtpd2dq.256" || // Added in 7.0
224 Name == "cvtpd2ps.256" || // Added in 7.0
225 Name == "cvtps2pd.128" || // Added in 7.0
226 Name == "cvtps2pd.256" || // Added in 7.0
227 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
228 Name == "cvtqq2ps.256" || // Added in 9.0
229 Name == "cvtqq2ps.512" || // Added in 9.0
230 Name == "cvttpd2dq.256" || // Added in 7.0
231 Name == "cvttps2dq.128" || // Added in 7.0
232 Name == "cvttps2dq.256" || // Added in 7.0
233 Name.starts_with("cvtudq2pd.") || // Added in 4.0
234 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
235 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
236 Name == "cvtuqq2ps.256" || // Added in 9.0
237 Name == "cvtuqq2ps.512" || // Added in 9.0
238 Name.starts_with("dbpsadbw.") || // Added in 7.0
239 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
240 Name.starts_with("expand.b") || // Added in 9.0
241 Name.starts_with("expand.d") || // Added in 9.0
242 Name.starts_with("expand.load.") || // Added in 7.0
243 Name.starts_with("expand.p") || // Added in 9.0
244 Name.starts_with("expand.q") || // Added in 9.0
245 Name.starts_with("expand.w") || // Added in 9.0
246 Name.starts_with("fpclass.p") || // Added in 7.0
247 Name.starts_with("insert") || // Added in 4.0
248 Name.starts_with("load.") || // Added in 3.9
249 Name.starts_with("loadu.") || // Added in 3.9
250 Name.starts_with("lzcnt.") || // Added in 5.0
251 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
252 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
253 Name.starts_with("movddup") || // Added in 3.9
254 Name.starts_with("move.s") || // Added in 4.0
255 Name.starts_with("movshdup") || // Added in 3.9
256 Name.starts_with("movsldup") || // Added in 3.9
257 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
258 Name.starts_with("or.") || // Added in 3.9
259 Name.starts_with("pabs.") || // Added in 6.0
260 Name.starts_with("packssdw.") || // Added in 5.0
261 Name.starts_with("packsswb.") || // Added in 5.0
262 Name.starts_with("packusdw.") || // Added in 5.0
263 Name.starts_with("packuswb.") || // Added in 5.0
264 Name.starts_with("padd.") || // Added in 4.0
265 Name.starts_with("padds.") || // Added in 8.0
266 Name.starts_with("paddus.") || // Added in 8.0
267 Name.starts_with("palignr.") || // Added in 3.9
268 Name.starts_with("pand.") || // Added in 3.9
269 Name.starts_with("pandn.") || // Added in 3.9
270 Name.starts_with("pavg") || // Added in 6.0
271 Name.starts_with("pbroadcast") || // Added in 6.0
272 Name.starts_with("pcmpeq.") || // Added in 3.9
273 Name.starts_with("pcmpgt.") || // Added in 3.9
274 Name.starts_with("perm.df.") || // Added in 3.9
275 Name.starts_with("perm.di.") || // Added in 3.9
276 Name.starts_with("permvar.") || // Added in 7.0
277 Name.starts_with("pmaddubs.w.") || // Added in 7.0
278 Name.starts_with("pmaddw.d.") || // Added in 7.0
279 Name.starts_with("pmax") || // Added in 4.0
280 Name.starts_with("pmin") || // Added in 4.0
281 Name == "pmov.qd.256" || // Added in 9.0
282 Name == "pmov.qd.512" || // Added in 9.0
283 Name == "pmov.wb.256" || // Added in 9.0
284 Name == "pmov.wb.512" || // Added in 9.0
285 Name.starts_with("pmovsx") || // Added in 4.0
286 Name.starts_with("pmovzx") || // Added in 4.0
287 Name.starts_with("pmul.dq.") || // Added in 4.0
288 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
289 Name.starts_with("pmulh.w.") || // Added in 7.0
290 Name.starts_with("pmulhu.w.") || // Added in 7.0
291 Name.starts_with("pmull.") || // Added in 4.0
292 Name.starts_with("pmultishift.qb.") || // Added in 8.0
293 Name.starts_with("pmulu.dq.") || // Added in 4.0
294 Name.starts_with("por.") || // Added in 3.9
295 Name.starts_with("prol.") || // Added in 8.0
296 Name.starts_with("prolv.") || // Added in 8.0
297 Name.starts_with("pror.") || // Added in 8.0
298 Name.starts_with("prorv.") || // Added in 8.0
299 Name.starts_with("pshuf.b.") || // Added in 4.0
300 Name.starts_with("pshuf.d.") || // Added in 3.9
301 Name.starts_with("pshufh.w.") || // Added in 3.9
302 Name.starts_with("pshufl.w.") || // Added in 3.9
303 Name.starts_with("psll.d") || // Added in 4.0
304 Name.starts_with("psll.q") || // Added in 4.0
305 Name.starts_with("psll.w") || // Added in 4.0
306 Name.starts_with("pslli") || // Added in 4.0
307 Name.starts_with("psllv") || // Added in 4.0
308 Name.starts_with("psra.d") || // Added in 4.0
309 Name.starts_with("psra.q") || // Added in 4.0
310 Name.starts_with("psra.w") || // Added in 4.0
311 Name.starts_with("psrai") || // Added in 4.0
312 Name.starts_with("psrav") || // Added in 4.0
313 Name.starts_with("psrl.d") || // Added in 4.0
314 Name.starts_with("psrl.q") || // Added in 4.0
315 Name.starts_with("psrl.w") || // Added in 4.0
316 Name.starts_with("psrli") || // Added in 4.0
317 Name.starts_with("psrlv") || // Added in 4.0
318 Name.starts_with("psub.") || // Added in 4.0
319 Name.starts_with("psubs.") || // Added in 8.0
320 Name.starts_with("psubus.") || // Added in 8.0
321 Name.starts_with("pternlog.") || // Added in 7.0
322 Name.starts_with("punpckh") || // Added in 3.9
323 Name.starts_with("punpckl") || // Added in 3.9
324 Name.starts_with("pxor.") || // Added in 3.9
325 Name.starts_with("shuf.f") || // Added in 6.0
326 Name.starts_with("shuf.i") || // Added in 6.0
327 Name.starts_with("shuf.p") || // Added in 4.0
328 Name.starts_with("sqrt.p") || // Added in 7.0
329 Name.starts_with("store.b.") || // Added in 3.9
330 Name.starts_with("store.d.") || // Added in 3.9
331 Name.starts_with("store.p") || // Added in 3.9
332 Name.starts_with("store.q.") || // Added in 3.9
333 Name.starts_with("store.w.") || // Added in 3.9
334 Name == "store.ss" || // Added in 7.0
335 Name.starts_with("storeu.") || // Added in 3.9
336 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
337 Name.starts_with("ucmp.") || // Added in 5.0
338 Name.starts_with("unpckh.") || // Added in 3.9
339 Name.starts_with("unpckl.") || // Added in 3.9
340 Name.starts_with("valign.") || // Added in 4.0
341 Name == "vcvtph2ps.128" || // Added in 11.0
342 Name == "vcvtph2ps.256" || // Added in 11.0
343 Name.starts_with("vextract") || // Added in 4.0
344 Name.starts_with("vfmadd.") || // Added in 7.0
345 Name.starts_with("vfmaddsub.") || // Added in 7.0
346 Name.starts_with("vfnmadd.") || // Added in 7.0
347 Name.starts_with("vfnmsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermi2var.") || // Added in 7.0
353 Name.starts_with("vpermil.p") || // Added in 3.9
354 Name.starts_with("vpermilvar.") || // Added in 4.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshld.") || // Added in 7.0
358 Name.starts_with("vpshldv.") || // Added in 8.0
359 Name.starts_with("vpshrd.") || // Added in 7.0
360 Name.starts_with("vpshrdv.") || // Added in 8.0
361 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
362 Name.starts_with("xor.")); // Added in 3.9
363
364 if (Name.consume_front("mask3."))
365 // 'avx512.mask3.*'
366 return (Name.starts_with("vfmadd.") || // Added in 7.0
367 Name.starts_with("vfmaddsub.") || // Added in 7.0
368 Name.starts_with("vfmsub.") || // Added in 7.0
369 Name.starts_with("vfmsubadd.") || // Added in 7.0
370 Name.starts_with("vfnmsub.")); // Added in 7.0
371
372 if (Name.consume_front("maskz."))
373 // 'avx512.maskz.*'
374 return (Name.starts_with("pternlog.") || // Added in 7.0
375 Name.starts_with("vfmadd.") || // Added in 7.0
376 Name.starts_with("vfmaddsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermt2var.") || // Added in 7.0
382 Name.starts_with("vpmadd52") || // Added in 7.0
383 Name.starts_with("vpshldv.") || // Added in 8.0
384 Name.starts_with("vpshrdv.")); // Added in 8.0
385
386 // 'avx512.*'
387 return (Name == "movntdqa" || // Added in 5.0
388 Name == "pmul.dq.512" || // Added in 7.0
389 Name == "pmulu.dq.512" || // Added in 7.0
390 Name.starts_with("broadcastm") || // Added in 6.0
391 Name.starts_with("cmp.p") || // Added in 12.0
392 Name.starts_with("cvtb2mask.") || // Added in 7.0
393 Name.starts_with("cvtd2mask.") || // Added in 7.0
394 Name.starts_with("cvtmask2") || // Added in 5.0
395 Name.starts_with("cvtq2mask.") || // Added in 7.0
396 Name == "cvtusi2sd" || // Added in 7.0
397 Name.starts_with("cvtw2mask.") || // Added in 7.0
398 Name == "kand.w" || // Added in 7.0
399 Name == "kandn.w" || // Added in 7.0
400 Name == "knot.w" || // Added in 7.0
401 Name == "kor.w" || // Added in 7.0
402 Name == "kortestc.w" || // Added in 7.0
403 Name == "kortestz.w" || // Added in 7.0
404 Name.starts_with("kunpck") || // added in 6.0
405 Name == "kxnor.w" || // Added in 7.0
406 Name == "kxor.w" || // Added in 7.0
407 Name.starts_with("padds.") || // Added in 8.0
408 Name.starts_with("pbroadcast") || // Added in 3.9
409 Name.starts_with("prol") || // Added in 8.0
410 Name.starts_with("pror") || // Added in 8.0
411 Name.starts_with("psll.dq") || // Added in 3.9
412 Name.starts_with("psrl.dq") || // Added in 3.9
413 Name.starts_with("psubs.") || // Added in 8.0
414 Name.starts_with("ptestm") || // Added in 6.0
415 Name.starts_with("ptestnm") || // Added in 6.0
416 Name.starts_with("storent.") || // Added in 3.9
417 Name.starts_with("vbroadcast.s") || // Added in 7.0
418 Name.starts_with("vpshld.") || // Added in 8.0
419 Name.starts_with("vpshrd.")); // Added in 8.0
420 }
421
422 if (Name.consume_front("fma."))
423 return (Name.starts_with("vfmadd.") || // Added in 7.0
424 Name.starts_with("vfmsub.") || // Added in 7.0
425 Name.starts_with("vfmsubadd.") || // Added in 7.0
426 Name.starts_with("vfnmadd.") || // Added in 7.0
427 Name.starts_with("vfnmsub.")); // Added in 7.0
428
429 if (Name.consume_front("fma4."))
430 return Name.starts_with("vfmadd.s"); // Added in 7.0
431
432 if (Name.consume_front("sse."))
433 return (Name == "add.ss" || // Added in 4.0
434 Name == "cvtsi2ss" || // Added in 7.0
435 Name == "cvtsi642ss" || // Added in 7.0
436 Name == "div.ss" || // Added in 4.0
437 Name == "mul.ss" || // Added in 4.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.ss" || // Added in 7.0
440 Name.starts_with("storeu.") || // Added in 3.9
441 Name == "sub.ss"); // Added in 4.0
442
443 if (Name.consume_front("sse2."))
444 return (Name == "add.sd" || // Added in 4.0
445 Name == "cvtdq2pd" || // Added in 3.9
446 Name == "cvtdq2ps" || // Added in 7.0
447 Name == "cvtps2pd" || // Added in 3.9
448 Name == "cvtsi2sd" || // Added in 7.0
449 Name == "cvtsi642sd" || // Added in 7.0
450 Name == "cvtss2sd" || // Added in 7.0
451 Name == "div.sd" || // Added in 4.0
452 Name == "mul.sd" || // Added in 4.0
453 Name.starts_with("padds.") || // Added in 8.0
454 Name.starts_with("paddus.") || // Added in 8.0
455 Name.starts_with("pcmpeq.") || // Added in 3.1
456 Name.starts_with("pcmpgt.") || // Added in 3.1
457 Name == "pmaxs.w" || // Added in 3.9
458 Name == "pmaxu.b" || // Added in 3.9
459 Name == "pmins.w" || // Added in 3.9
460 Name == "pminu.b" || // Added in 3.9
461 Name == "pmulu.dq" || // Added in 7.0
462 Name.starts_with("pshuf") || // Added in 3.9
463 Name.starts_with("psll.dq") || // Added in 3.7
464 Name.starts_with("psrl.dq") || // Added in 3.7
465 Name.starts_with("psubs.") || // Added in 8.0
466 Name.starts_with("psubus.") || // Added in 8.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.sd" || // Added in 7.0
469 Name == "storel.dq" || // Added in 3.9
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.sd"); // Added in 4.0
472
473 if (Name.consume_front("sse41."))
474 return (Name.starts_with("blendp") || // Added in 3.7
475 Name == "movntdqa" || // Added in 5.0
476 Name == "pblendw" || // Added in 3.7
477 Name == "pmaxsb" || // Added in 3.9
478 Name == "pmaxsd" || // Added in 3.9
479 Name == "pmaxud" || // Added in 3.9
480 Name == "pmaxuw" || // Added in 3.9
481 Name == "pminsb" || // Added in 3.9
482 Name == "pminsd" || // Added in 3.9
483 Name == "pminud" || // Added in 3.9
484 Name == "pminuw" || // Added in 3.9
485 Name.starts_with("pmovsx") || // Added in 3.8
486 Name.starts_with("pmovzx") || // Added in 3.9
487 Name == "pmuldq"); // Added in 7.0
488
489 if (Name.consume_front("sse42."))
490 return Name == "crc32.64.8"; // Added in 3.4
491
492 if (Name.consume_front("sse4a."))
493 return Name.starts_with("movnt."); // Added in 3.9
494
495 if (Name.consume_front("ssse3."))
496 return (Name == "pabs.b.128" || // Added in 6.0
497 Name == "pabs.d.128" || // Added in 6.0
498 Name == "pabs.w.128"); // Added in 6.0
499
500 if (Name.consume_front("xop."))
501 return (Name == "vpcmov" || // Added in 3.8
502 Name == "vpcmov.256" || // Added in 5.0
503 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
504 Name.starts_with("vprot")); // Added in 8.0
505
506 return (Name == "addcarry.u32" || // Added in 8.0
507 Name == "addcarry.u64" || // Added in 8.0
508 Name == "addcarryx.u32" || // Added in 8.0
509 Name == "addcarryx.u64" || // Added in 8.0
510 Name == "subborrow.u32" || // Added in 8.0
511 Name == "subborrow.u64" || // Added in 8.0
512 Name.starts_with("vcvtph2ps.")); // Added in 11.0
513}
514
516 Function *&NewFn) {
517 // Only handle intrinsics that start with "x86.".
518 if (!Name.consume_front("x86."))
519 return false;
520
521 if (shouldUpgradeX86Intrinsic(F, Name)) {
522 NewFn = nullptr;
523 return true;
524 }
525
526 if (Name == "rdtscp") { // Added in 8.0
527 // If this intrinsic has 0 operands, it's the new version.
528 if (F->getFunctionType()->getNumParams() == 0)
529 return false;
530
531 rename(F);
532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
533 Intrinsic::x86_rdtscp);
534 return true;
535 }
536
538
539 // SSE4.1 ptest functions may have an old signature.
540 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
542 .Case("c", Intrinsic::x86_sse41_ptestc)
543 .Case("z", Intrinsic::x86_sse41_ptestz)
544 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
547 return upgradePTESTIntrinsic(F, ID, NewFn);
548
549 return false;
550 }
551
552 // Several blend and other instructions with masks used the wrong number of
553 // bits.
554
555 // Added in 3.6
557 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
558 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
559 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
560 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
561 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
562 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
565 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
566
567 if (Name.consume_front("avx512.")) {
568 if (Name.consume_front("mask.cmp.")) {
569 // Added in 7.0
571 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
572 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
573 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
574 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
575 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
576 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
579 return upgradeX86MaskedFPCompare(F, ID, NewFn);
580 } else if (Name.starts_with("vpdpbusd.") ||
581 Name.starts_with("vpdpbusds.")) {
582 // Added in 21.1
584 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
585 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
586 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
587 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
588 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
589 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
592 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
593 }
594 return false; // No other 'x86.avx512.*'.
595 }
596
597 if (Name.consume_front("avx2.vpdpb")) {
598 // Added in 21.1
600 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
601 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
602 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
603 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
604 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
605 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
606 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
607 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
608 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
609 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
610 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
611 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
614 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
615 return false; // No other 'x86.avx2.*'
616 }
617
618 if (Name.consume_front("avx10.vpdpb")) {
619 // Added in 21.1
621 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
622 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
623 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
624 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
625 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
626 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
629 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
630 return false; // No other 'x86.avx10.*'
631 }
632
633 if (Name.consume_front("avx512bf16.")) {
634 // Added in 9.0
636 .Case("cvtne2ps2bf16.128",
637 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
638 .Case("cvtne2ps2bf16.256",
639 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
640 .Case("cvtne2ps2bf16.512",
641 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
642 .Case("mask.cvtneps2bf16.128",
643 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
644 .Case("cvtneps2bf16.256",
645 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
646 .Case("cvtneps2bf16.512",
647 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
650 return upgradeX86BF16Intrinsic(F, ID, NewFn);
651
652 // Added in 9.0
654 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
655 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
656 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
659 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
660 return false; // No other 'x86.avx512bf16.*'.
661 }
662
663 if (Name.consume_front("xop.")) {
665 if (Name.starts_with("vpermil2")) { // Added in 3.9
666 // Upgrade any XOP PERMIL2 index operand still using a float/double
667 // vector.
668 auto Idx = F->getFunctionType()->getParamType(2);
669 if (Idx->isFPOrFPVectorTy()) {
670 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
671 unsigned EltSize = Idx->getScalarSizeInBits();
672 if (EltSize == 64 && IdxSize == 128)
673 ID = Intrinsic::x86_xop_vpermil2pd;
674 else if (EltSize == 32 && IdxSize == 128)
675 ID = Intrinsic::x86_xop_vpermil2ps;
676 else if (EltSize == 64 && IdxSize == 256)
677 ID = Intrinsic::x86_xop_vpermil2pd_256;
678 else
679 ID = Intrinsic::x86_xop_vpermil2ps_256;
680 }
681 } else if (F->arg_size() == 2)
682 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
684 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
685 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
687
689 rename(F);
690 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
691 return true;
692 }
693 return false; // No other 'x86.xop.*'
694 }
695
696 if (Name == "seh.recoverfp") {
697 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
698 Intrinsic::eh_recoverfp);
699 return true;
700 }
701
702 return false;
703}
704
705// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
706// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
708 StringRef Name,
709 Function *&NewFn) {
710 if (Name.starts_with("rbit")) {
711 // '(arm|aarch64).rbit'.
713 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
714 return true;
715 }
716
717 if (Name == "thread.pointer") {
718 // '(arm|aarch64).thread.pointer'.
720 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
721 return true;
722 }
723
724 bool Neon = Name.consume_front("neon.");
725 if (Neon) {
726 // '(arm|aarch64).neon.*'.
727 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
728 // v16i8 respectively.
729 if (Name.consume_front("bfdot.")) {
730 // (arm|aarch64).neon.bfdot.*'.
733 .Cases("v2f32.v8i8", "v4f32.v16i8",
734 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
735 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
738 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
739 assert((OperandWidth == 64 || OperandWidth == 128) &&
740 "Unexpected operand width");
741 LLVMContext &Ctx = F->getParent()->getContext();
742 std::array<Type *, 2> Tys{
743 {F->getReturnType(),
744 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
745 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
746 return true;
747 }
748 return false; // No other '(arm|aarch64).neon.bfdot.*'.
749 }
750
751 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
752 // anymore and accept v8bf16 instead of v16i8.
753 if (Name.consume_front("bfm")) {
754 // (arm|aarch64).neon.bfm*'.
755 if (Name.consume_back(".v4f32.v16i8")) {
756 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
759 .Case("mla",
760 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
761 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
762 .Case("lalb",
763 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
764 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
765 .Case("lalt",
766 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
767 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
771 return true;
772 }
773 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
774 }
775 return false; // No other '(arm|aarch64).neon.bfm*.
776 }
777 // Continue on to Aarch64 Neon or Arm Neon.
778 }
779 // Continue on to Arm or Aarch64.
780
781 if (IsArm) {
782 // 'arm.*'.
783 if (Neon) {
784 // 'arm.neon.*'.
786 .StartsWith("vclz.", Intrinsic::ctlz)
787 .StartsWith("vcnt.", Intrinsic::ctpop)
788 .StartsWith("vqadds.", Intrinsic::sadd_sat)
789 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
790 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
791 .StartsWith("vqsubu.", Intrinsic::usub_sat)
792 .StartsWith("vrinta.", Intrinsic::round)
793 .StartsWith("vrintn.", Intrinsic::roundeven)
794 .StartsWith("vrintm.", Intrinsic::floor)
795 .StartsWith("vrintp.", Intrinsic::ceil)
796 .StartsWith("vrintx.", Intrinsic::rint)
797 .StartsWith("vrintz.", Intrinsic::trunc)
800 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
801 F->arg_begin()->getType());
802 return true;
803 }
804
805 if (Name.consume_front("vst")) {
806 // 'arm.neon.vst*'.
807 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
809 if (vstRegex.match(Name, &Groups)) {
810 static const Intrinsic::ID StoreInts[] = {
811 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
812 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
813
814 static const Intrinsic::ID StoreLaneInts[] = {
815 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
816 Intrinsic::arm_neon_vst4lane};
817
818 auto fArgs = F->getFunctionType()->params();
819 Type *Tys[] = {fArgs[0], fArgs[1]};
820 if (Groups[1].size() == 1)
822 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
823 else
825 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
826 return true;
827 }
828 return false; // No other 'arm.neon.vst*'.
829 }
830
831 return false; // No other 'arm.neon.*'.
832 }
833
834 if (Name.consume_front("mve.")) {
835 // 'arm.mve.*'.
836 if (Name == "vctp64") {
837 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
838 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
839 // the function and deal with it below in UpgradeIntrinsicCall.
840 rename(F);
841 return true;
842 }
843 return false; // Not 'arm.mve.vctp64'.
844 }
845
846 if (Name.starts_with("vrintn.v")) {
848 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
849 return true;
850 }
851
852 // These too are changed to accept a v2i1 instead of the old v4i1.
853 if (Name.consume_back(".v4i1")) {
854 // 'arm.mve.*.v4i1'.
855 if (Name.consume_back(".predicated.v2i64.v4i32"))
856 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
857 return Name == "mull.int" || Name == "vqdmull";
858
859 if (Name.consume_back(".v2i64")) {
860 // 'arm.mve.*.v2i64.v4i1'
861 bool IsGather = Name.consume_front("vldr.gather.");
862 if (IsGather || Name.consume_front("vstr.scatter.")) {
863 if (Name.consume_front("base.")) {
864 // Optional 'wb.' prefix.
865 Name.consume_front("wb.");
866 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
867 // predicated.v2i64.v2i64.v4i1'.
868 return Name == "predicated.v2i64";
869 }
870
871 if (Name.consume_front("offset.predicated."))
872 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
873 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
874
875 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
876 return false;
877 }
878
879 return false; // No other 'arm.mve.*.v2i64.v4i1'.
880 }
881 return false; // No other 'arm.mve.*.v4i1'.
882 }
883 return false; // No other 'arm.mve.*'.
884 }
885
886 if (Name.consume_front("cde.vcx")) {
887 // 'arm.cde.vcx*'.
888 if (Name.consume_back(".predicated.v2i64.v4i1"))
889 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
890 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
891 Name == "3q" || Name == "3qa";
892
893 return false; // No other 'arm.cde.vcx*'.
894 }
895 } else {
896 // 'aarch64.*'.
897 if (Neon) {
898 // 'aarch64.neon.*'.
900 .StartsWith("frintn", Intrinsic::roundeven)
901 .StartsWith("rbit", Intrinsic::bitreverse)
904 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
905 F->arg_begin()->getType());
906 return true;
907 }
908
909 if (Name.starts_with("addp")) {
910 // 'aarch64.neon.addp*'.
911 if (F->arg_size() != 2)
912 return false; // Invalid IR.
913 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
914 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
916 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
917 return true;
918 }
919 }
920
921 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
922 if (Name.starts_with("bfcvt")) {
923 NewFn = nullptr;
924 return true;
925 }
926
927 return false; // No other 'aarch64.neon.*'.
928 }
929 if (Name.consume_front("sve.")) {
930 // 'aarch64.sve.*'.
931 if (Name.consume_front("bf")) {
932 if (Name.consume_back(".lane")) {
933 // 'aarch64.sve.bf*.lane'.
936 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
937 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
938 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
941 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
942 return true;
943 }
944 return false; // No other 'aarch64.sve.bf*.lane'.
945 }
946 return false; // No other 'aarch64.sve.bf*'.
947 }
948
949 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
950 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
951 NewFn = nullptr;
952 return true;
953 }
954
955 if (Name.consume_front("addqv")) {
956 // 'aarch64.sve.addqv'.
957 if (!F->getReturnType()->isFPOrFPVectorTy())
958 return false;
959
960 auto Args = F->getFunctionType()->params();
961 Type *Tys[] = {F->getReturnType(), Args[1]};
963 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
964 return true;
965 }
966
967 if (Name.consume_front("ld")) {
968 // 'aarch64.sve.ld*'.
969 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
970 if (LdRegex.match(Name)) {
971 Type *ScalarTy =
972 cast<VectorType>(F->getReturnType())->getElementType();
973 ElementCount EC =
974 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
975 Type *Ty = VectorType::get(ScalarTy, EC);
976 static const Intrinsic::ID LoadIDs[] = {
977 Intrinsic::aarch64_sve_ld2_sret,
978 Intrinsic::aarch64_sve_ld3_sret,
979 Intrinsic::aarch64_sve_ld4_sret,
980 };
981 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
982 LoadIDs[Name[0] - '2'], Ty);
983 return true;
984 }
985 return false; // No other 'aarch64.sve.ld*'.
986 }
987
988 if (Name.consume_front("tuple.")) {
989 // 'aarch64.sve.tuple.*'.
990 if (Name.starts_with("get")) {
991 // 'aarch64.sve.tuple.get*'.
992 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
994 F->getParent(), Intrinsic::vector_extract, Tys);
995 return true;
996 }
997
998 if (Name.starts_with("set")) {
999 // 'aarch64.sve.tuple.set*'.
1000 auto Args = F->getFunctionType()->params();
1001 Type *Tys[] = {Args[0], Args[2], Args[1]};
1003 F->getParent(), Intrinsic::vector_insert, Tys);
1004 return true;
1005 }
1006
1007 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1008 if (CreateTupleRegex.match(Name)) {
1009 // 'aarch64.sve.tuple.create*'.
1010 auto Args = F->getFunctionType()->params();
1011 Type *Tys[] = {F->getReturnType(), Args[1]};
1013 F->getParent(), Intrinsic::vector_insert, Tys);
1014 return true;
1015 }
1016 return false; // No other 'aarch64.sve.tuple.*'.
1017 }
1018 return false; // No other 'aarch64.sve.*'.
1019 }
1020 }
1021 return false; // No other 'arm.*', 'aarch64.*'.
1022}
1023
1025 StringRef Name) {
1026 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1029 .Case("im2col.3d",
1030 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1031 .Case("im2col.4d",
1032 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1033 .Case("im2col.5d",
1034 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1035 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1036 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1037 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1038 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1039 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1041
1043 return ID;
1044
1045 // These intrinsics may need upgrade for two reasons:
1046 // (1) When the address-space of the first argument is shared[AS=3]
1047 // (and we upgrade it to use shared_cluster address-space[AS=7])
1048 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1050 return ID;
1051
1052 // (2) When there are only two boolean flag arguments at the end:
1053 //
1054 // The last three parameters of the older version of these
1055 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1056 //
1057 // The newer version reads as:
1058 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1059 //
1060 // So, when the type of the [N-3]rd argument is "not i1", then
1061 // it is the older version and we need to upgrade.
1062 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1063 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1064 if (!ArgType->isIntegerTy(1))
1065 return ID;
1066 }
1067
1069}
1070
1072 StringRef Name) {
1073 if (Name.consume_front("mapa.shared.cluster"))
1074 if (F->getReturnType()->getPointerAddressSpace() ==
1076 return Intrinsic::nvvm_mapa_shared_cluster;
1077
1078 if (Name.consume_front("cp.async.bulk.")) {
1081 .Case("global.to.shared.cluster",
1082 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1083 .Case("shared.cta.to.cluster",
1084 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1086
1088 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1090 return ID;
1091 }
1092
1094}
1095
1097 if (Name.consume_front("fma.rn."))
1098 return StringSwitch<Intrinsic::ID>(Name)
1099 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1100 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1101 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1102 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1103 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1104 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1105 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1106 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1107 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1108 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1109 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1110 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1112
1113 if (Name.consume_front("fmax."))
1114 return StringSwitch<Intrinsic::ID>(Name)
1115 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1116 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1117 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1118 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1119 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1120 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1121 .Case("ftz.nan.xorsign.abs.bf16",
1122 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1123 .Case("ftz.nan.xorsign.abs.bf16x2",
1124 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1125 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1126 .Case("ftz.xorsign.abs.bf16x2",
1127 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1128 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1129 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1130 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1131 .Case("nan.xorsign.abs.bf16x2",
1132 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1133 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1134 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1136
1137 if (Name.consume_front("fmin."))
1138 return StringSwitch<Intrinsic::ID>(Name)
1139 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1140 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1141 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1142 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1143 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1144 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1145 .Case("ftz.nan.xorsign.abs.bf16",
1146 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1147 .Case("ftz.nan.xorsign.abs.bf16x2",
1148 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1149 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1150 .Case("ftz.xorsign.abs.bf16x2",
1151 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1152 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1153 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1154 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1155 .Case("nan.xorsign.abs.bf16x2",
1156 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1157 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1158 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1160
1161 if (Name.consume_front("neg."))
1162 return StringSwitch<Intrinsic::ID>(Name)
1163 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1164 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1166
1168}
1169
1171 return Name.consume_front("local") || Name.consume_front("shared") ||
1172 Name.consume_front("global") || Name.consume_front("constant") ||
1173 Name.consume_front("param");
1174}
1175
1177 bool CanUpgradeDebugIntrinsicsToRecords) {
1178 assert(F && "Illegal to upgrade a non-existent Function.");
1179
1180 StringRef Name = F->getName();
1181
1182 // Quickly eliminate it, if it's not a candidate.
1183 if (!Name.consume_front("llvm.") || Name.empty())
1184 return false;
1185
1186 switch (Name[0]) {
1187 default: break;
1188 case 'a': {
1189 bool IsArm = Name.consume_front("arm.");
1190 if (IsArm || Name.consume_front("aarch64.")) {
1191 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1192 return true;
1193 break;
1194 }
1195
1196 if (Name.consume_front("amdgcn.")) {
1197 if (Name == "alignbit") {
1198 // Target specific intrinsic became redundant
1200 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1201 return true;
1202 }
1203
1204 if (Name.consume_front("atomic.")) {
1205 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1206 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1207 // there's no new declaration.
1208 NewFn = nullptr;
1209 return true;
1210 }
1211 break; // No other 'amdgcn.atomic.*'
1212 }
1213
1214 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1215 Name.consume_front("flat.atomic.")) {
1216 if (Name.starts_with("fadd") ||
1217 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1218 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1219 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1220 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1221 // declaration.
1222 NewFn = nullptr;
1223 return true;
1224 }
1225 }
1226
1227 if (Name.starts_with("ldexp.")) {
1228 // Target specific intrinsic became redundant
1230 F->getParent(), Intrinsic::ldexp,
1231 {F->getReturnType(), F->getArg(1)->getType()});
1232 return true;
1233 }
1234 break; // No other 'amdgcn.*'
1235 }
1236
1237 break;
1238 }
1239 case 'c': {
1240 if (F->arg_size() == 1) {
1242 .StartsWith("ctlz.", Intrinsic::ctlz)
1243 .StartsWith("cttz.", Intrinsic::cttz)
1246 rename(F);
1247 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1248 F->arg_begin()->getType());
1249 return true;
1250 }
1251 }
1252
1253 if (F->arg_size() == 2 && Name == "coro.end") {
1254 rename(F);
1255 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1256 Intrinsic::coro_end);
1257 return true;
1258 }
1259
1260 break;
1261 }
1262 case 'd':
1263 if (Name.consume_front("dbg.")) {
1264 // Mark debug intrinsics for upgrade to new debug format.
1265 if (CanUpgradeDebugIntrinsicsToRecords) {
1266 if (Name == "addr" || Name == "value" || Name == "assign" ||
1267 Name == "declare" || Name == "label") {
1268 // There's no function to replace these with.
1269 NewFn = nullptr;
1270 // But we do want these to get upgraded.
1271 return true;
1272 }
1273 }
1274 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1275 // converted to DbgVariableRecords later.
1276 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1277 rename(F);
1278 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1279 Intrinsic::dbg_value);
1280 return true;
1281 }
1282 break; // No other 'dbg.*'.
1283 }
1284 break;
1285 case 'e':
1286 if (Name.consume_front("experimental.vector.")) {
1289 // Skip over extract.last.active, otherwise it will be 'upgraded'
1290 // to a regular vector extract which is a different operation.
1291 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1292 .StartsWith("extract.", Intrinsic::vector_extract)
1293 .StartsWith("insert.", Intrinsic::vector_insert)
1294 .StartsWith("splice.", Intrinsic::vector_splice)
1295 .StartsWith("reverse.", Intrinsic::vector_reverse)
1296 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1297 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1298 .StartsWith("partial.reduce.add",
1299 Intrinsic::vector_partial_reduce_add)
1302 const auto *FT = F->getFunctionType();
1304 if (ID == Intrinsic::vector_extract ||
1305 ID == Intrinsic::vector_interleave2)
1306 // Extracting overloads the return type.
1307 Tys.push_back(FT->getReturnType());
1308 if (ID != Intrinsic::vector_interleave2)
1309 Tys.push_back(FT->getParamType(0));
1310 if (ID == Intrinsic::vector_insert ||
1311 ID == Intrinsic::vector_partial_reduce_add)
1312 // Inserting overloads the inserted type.
1313 Tys.push_back(FT->getParamType(1));
1314 rename(F);
1315 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1316 return true;
1317 }
1318
1319 if (Name.consume_front("reduce.")) {
1321 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1322 if (R.match(Name, &Groups))
1324 .Case("add", Intrinsic::vector_reduce_add)
1325 .Case("mul", Intrinsic::vector_reduce_mul)
1326 .Case("and", Intrinsic::vector_reduce_and)
1327 .Case("or", Intrinsic::vector_reduce_or)
1328 .Case("xor", Intrinsic::vector_reduce_xor)
1329 .Case("smax", Intrinsic::vector_reduce_smax)
1330 .Case("smin", Intrinsic::vector_reduce_smin)
1331 .Case("umax", Intrinsic::vector_reduce_umax)
1332 .Case("umin", Intrinsic::vector_reduce_umin)
1333 .Case("fmax", Intrinsic::vector_reduce_fmax)
1334 .Case("fmin", Intrinsic::vector_reduce_fmin)
1336
1337 bool V2 = false;
1339 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1340 Groups.clear();
1341 V2 = true;
1342 if (R2.match(Name, &Groups))
1344 .Case("fadd", Intrinsic::vector_reduce_fadd)
1345 .Case("fmul", Intrinsic::vector_reduce_fmul)
1347 }
1349 rename(F);
1350 auto Args = F->getFunctionType()->params();
1351 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1352 {Args[V2 ? 1 : 0]});
1353 return true;
1354 }
1355 break; // No other 'expermental.vector.reduce.*'.
1356 }
1357 break; // No other 'experimental.vector.*'.
1358 }
1359 if (Name.consume_front("experimental.stepvector.")) {
1360 Intrinsic::ID ID = Intrinsic::stepvector;
1361 rename(F);
1363 F->getParent(), ID, F->getFunctionType()->getReturnType());
1364 return true;
1365 }
1366 break; // No other 'e*'.
1367 case 'f':
1368 if (Name.starts_with("flt.rounds")) {
1369 rename(F);
1370 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1371 Intrinsic::get_rounding);
1372 return true;
1373 }
1374 break;
1375 case 'i':
1376 if (Name.starts_with("invariant.group.barrier")) {
1377 // Rename invariant.group.barrier to launder.invariant.group
1378 auto Args = F->getFunctionType()->params();
1379 Type* ObjectPtr[1] = {Args[0]};
1380 rename(F);
1382 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1383 return true;
1384 }
1385 break;
1386 case 'l':
1387 if ((Name.starts_with("lifetime.start") ||
1388 Name.starts_with("lifetime.end")) &&
1389 F->arg_size() == 2) {
1390 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1391 ? Intrinsic::lifetime_start
1392 : Intrinsic::lifetime_end;
1393 rename(F);
1394 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1395 F->getArg(0)->getType());
1396 return true;
1397 }
1398 break;
1399 case 'm': {
1400 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1401 // alignment parameter to embedding the alignment as an attribute of
1402 // the pointer args.
1403 if (unsigned ID = StringSwitch<unsigned>(Name)
1404 .StartsWith("memcpy.", Intrinsic::memcpy)
1405 .StartsWith("memmove.", Intrinsic::memmove)
1406 .Default(0)) {
1407 if (F->arg_size() == 5) {
1408 rename(F);
1409 // Get the types of dest, src, and len
1410 ArrayRef<Type *> ParamTypes =
1411 F->getFunctionType()->params().slice(0, 3);
1412 NewFn =
1413 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1414 return true;
1415 }
1416 }
1417 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1418 rename(F);
1419 // Get the types of dest, and len
1420 const auto *FT = F->getFunctionType();
1421 Type *ParamTypes[2] = {
1422 FT->getParamType(0), // Dest
1423 FT->getParamType(2) // len
1424 };
1425 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1426 Intrinsic::memset, ParamTypes);
1427 return true;
1428 }
1429 break;
1430 }
1431 case 'n': {
1432 if (Name.consume_front("nvvm.")) {
1433 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1434 if (F->arg_size() == 1) {
1435 Intrinsic::ID IID =
1437 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1438 .Case("clz.i", Intrinsic::ctlz)
1439 .Case("popc.i", Intrinsic::ctpop)
1441 if (IID != Intrinsic::not_intrinsic) {
1442 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1443 {F->getReturnType()});
1444 return true;
1445 }
1446 }
1447
1448 // Check for nvvm intrinsics that need a return type adjustment.
1449 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1451 if (IID != Intrinsic::not_intrinsic) {
1452 NewFn = nullptr;
1453 return true;
1454 }
1455 }
1456
1457 // Upgrade Distributed Shared Memory Intrinsics
1459 if (IID != Intrinsic::not_intrinsic) {
1460 rename(F);
1461 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1462 return true;
1463 }
1464
1465 // Upgrade TMA copy G2S Intrinsics
1467 if (IID != Intrinsic::not_intrinsic) {
1468 rename(F);
1469 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1470 return true;
1471 }
1472
1473 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1474 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1475 //
1476 // TODO: We could add lohi.i2d.
1477 bool Expand = false;
1478 if (Name.consume_front("abs."))
1479 // nvvm.abs.{i,ii}
1480 Expand =
1481 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1482 else if (Name.consume_front("fabs."))
1483 // nvvm.fabs.{f,ftz.f,d}
1484 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1485 else if (Name.consume_front("max.") || Name.consume_front("min."))
1486 // nvvm.{min,max}.{i,ii,ui,ull}
1487 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1488 Name == "ui" || Name == "ull";
1489 else if (Name.consume_front("atomic.load."))
1490 // nvvm.atomic.load.add.{f32,f64}.p
1491 // nvvm.atomic.load.{inc,dec}.32.p
1492 Expand = StringSwitch<bool>(Name)
1493 .StartsWith("add.f32.p", true)
1494 .StartsWith("add.f64.p", true)
1495 .StartsWith("inc.32.p", true)
1496 .StartsWith("dec.32.p", true)
1497 .Default(false);
1498 else if (Name.consume_front("bitcast."))
1499 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1500 Expand =
1501 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1502 else if (Name.consume_front("rotate."))
1503 // nvvm.rotate.{b32,b64,right.b64}
1504 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1505 else if (Name.consume_front("ptr.gen.to."))
1506 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1507 Expand = consumeNVVMPtrAddrSpace(Name);
1508 else if (Name.consume_front("ptr."))
1509 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1510 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1511 else if (Name.consume_front("ldg.global."))
1512 // nvvm.ldg.global.{i,p,f}
1513 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1514 Name.starts_with("p."));
1515 else
1516 Expand = StringSwitch<bool>(Name)
1517 .Case("barrier0", true)
1518 .Case("barrier.n", true)
1519 .Case("barrier.sync.cnt", true)
1520 .Case("barrier.sync", true)
1521 .Case("barrier", true)
1522 .Case("bar.sync", true)
1523 .Case("clz.ll", true)
1524 .Case("popc.ll", true)
1525 .Case("h2f", true)
1526 .Case("swap.lo.hi.b64", true)
1527 .Case("tanh.approx.f32", true)
1528 .Default(false);
1529
1530 if (Expand) {
1531 NewFn = nullptr;
1532 return true;
1533 }
1534 break; // No other 'nvvm.*'.
1535 }
1536 break;
1537 }
1538 case 'o':
1539 if (Name.starts_with("objectsize.")) {
1540 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1541 if (F->arg_size() == 2 || F->arg_size() == 3) {
1542 rename(F);
1543 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1544 Intrinsic::objectsize, Tys);
1545 return true;
1546 }
1547 }
1548 break;
1549
1550 case 'p':
1551 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1552 rename(F);
1554 F->getParent(), Intrinsic::ptr_annotation,
1555 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1556 return true;
1557 }
1558 break;
1559
1560 case 'r': {
1561 if (Name.consume_front("riscv.")) {
1564 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1565 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1566 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1567 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1570 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1571 rename(F);
1572 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1573 return true;
1574 }
1575 break; // No other applicable upgrades.
1576 }
1577
1579 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1580 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1583 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1584 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1585 rename(F);
1586 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1587 return true;
1588 }
1589 break; // No other applicable upgrades.
1590 }
1591
1593 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1594 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1595 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1596 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1597 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1598 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1601 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1602 rename(F);
1603 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1604 return true;
1605 }
1606 break; // No other applicable upgrades.
1607 }
1608 break; // No other 'riscv.*' intrinsics
1609 }
1610 } break;
1611
1612 case 's':
1613 if (Name == "stackprotectorcheck") {
1614 NewFn = nullptr;
1615 return true;
1616 }
1617 break;
1618
1619 case 't':
1620 if (Name == "thread.pointer") {
1622 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1623 return true;
1624 }
1625 break;
1626
1627 case 'v': {
1628 if (Name == "var.annotation" && F->arg_size() == 4) {
1629 rename(F);
1631 F->getParent(), Intrinsic::var_annotation,
1632 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1633 return true;
1634 }
1635 break;
1636 }
1637
1638 case 'w':
1639 if (Name.consume_front("wasm.")) {
1642 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1643 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1644 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1647 rename(F);
1648 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1649 F->getReturnType());
1650 return true;
1651 }
1652
1653 if (Name.consume_front("dot.i8x16.i7x16.")) {
1655 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1656 .Case("add.signed",
1657 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1660 rename(F);
1661 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1662 return true;
1663 }
1664 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1665 }
1666 break; // No other 'wasm.*'.
1667 }
1668 break;
1669
1670 case 'x':
1671 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1672 return true;
1673 }
1674
1675 auto *ST = dyn_cast<StructType>(F->getReturnType());
1676 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1677 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1678 // Replace return type with literal non-packed struct. Only do this for
1679 // intrinsics declared to return a struct, not for intrinsics with
1680 // overloaded return type, in which case the exact struct type will be
1681 // mangled into the name.
1684 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1685 auto *FT = F->getFunctionType();
1686 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1687 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1688 std::string Name = F->getName().str();
1689 rename(F);
1690 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1691 Name, F->getParent());
1692
1693 // The new function may also need remangling.
1694 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1695 NewFn = *Result;
1696 return true;
1697 }
1698 }
1699
1700 // Remangle our intrinsic since we upgrade the mangling
1702 if (Result != std::nullopt) {
1703 NewFn = *Result;
1704 return true;
1705 }
1706
1707 // This may not belong here. This function is effectively being overloaded
1708 // to both detect an intrinsic which needs upgrading, and to provide the
1709 // upgraded form of the intrinsic. We should perhaps have two separate
1710 // functions for this.
1711 return false;
1712}
1713
1715 bool CanUpgradeDebugIntrinsicsToRecords) {
1716 NewFn = nullptr;
1717 bool Upgraded =
1718 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1719
1720 // Upgrade intrinsic attributes. This does not change the function.
1721 if (NewFn)
1722 F = NewFn;
1723 if (Intrinsic::ID id = F->getIntrinsicID()) {
1724 // Only do this if the intrinsic signature is valid.
1725 SmallVector<Type *> OverloadTys;
1726 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1727 F->setAttributes(
1728 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1729 }
1730 return Upgraded;
1731}
1732
1734 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1735 GV->getName() == "llvm.global_dtors")) ||
1736 !GV->hasInitializer())
1737 return nullptr;
1739 if (!ATy)
1740 return nullptr;
1742 if (!STy || STy->getNumElements() != 2)
1743 return nullptr;
1744
1745 LLVMContext &C = GV->getContext();
1746 IRBuilder<> IRB(C);
1747 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1748 IRB.getPtrTy());
1749 Constant *Init = GV->getInitializer();
1750 unsigned N = Init->getNumOperands();
1751 std::vector<Constant *> NewCtors(N);
1752 for (unsigned i = 0; i != N; ++i) {
1753 auto Ctor = cast<Constant>(Init->getOperand(i));
1754 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1755 Ctor->getAggregateElement(1),
1757 }
1758 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1759
1760 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1761 NewInit, GV->getName());
1762}
1763
1764// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1765// to byte shuffles.
1767 unsigned Shift) {
1768 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1769 unsigned NumElts = ResultTy->getNumElements() * 8;
1770
1771 // Bitcast from a 64-bit element type to a byte element type.
1772 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1773 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1774
1775 // We'll be shuffling in zeroes.
1776 Value *Res = Constant::getNullValue(VecTy);
1777
1778 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1779 // we'll just return the zero vector.
1780 if (Shift < 16) {
1781 int Idxs[64];
1782 // 256/512-bit version is split into 2/4 16-byte lanes.
1783 for (unsigned l = 0; l != NumElts; l += 16)
1784 for (unsigned i = 0; i != 16; ++i) {
1785 unsigned Idx = NumElts + i - Shift;
1786 if (Idx < NumElts)
1787 Idx -= NumElts - 16; // end of lane, switch operand.
1788 Idxs[l + i] = Idx + l;
1789 }
1790
1791 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1792 }
1793
1794 // Bitcast back to a 64-bit element type.
1795 return Builder.CreateBitCast(Res, ResultTy, "cast");
1796}
1797
1798// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1799// to byte shuffles.
1801 unsigned Shift) {
1802 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1803 unsigned NumElts = ResultTy->getNumElements() * 8;
1804
1805 // Bitcast from a 64-bit element type to a byte element type.
1806 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1807 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1808
1809 // We'll be shuffling in zeroes.
1810 Value *Res = Constant::getNullValue(VecTy);
1811
1812 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1813 // we'll just return the zero vector.
1814 if (Shift < 16) {
1815 int Idxs[64];
1816 // 256/512-bit version is split into 2/4 16-byte lanes.
1817 for (unsigned l = 0; l != NumElts; l += 16)
1818 for (unsigned i = 0; i != 16; ++i) {
1819 unsigned Idx = i + Shift;
1820 if (Idx >= 16)
1821 Idx += NumElts - 16; // end of lane, switch operand.
1822 Idxs[l + i] = Idx + l;
1823 }
1824
1825 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1826 }
1827
1828 // Bitcast back to a 64-bit element type.
1829 return Builder.CreateBitCast(Res, ResultTy, "cast");
1830}
1831
1832static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1833 unsigned NumElts) {
1834 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1836 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1837 Mask = Builder.CreateBitCast(Mask, MaskTy);
1838
1839 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1840 // i8 and we need to extract down to the right number of elements.
1841 if (NumElts <= 4) {
1842 int Indices[4];
1843 for (unsigned i = 0; i != NumElts; ++i)
1844 Indices[i] = i;
1845 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1846 "extract");
1847 }
1848
1849 return Mask;
1850}
1851
1852static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1853 Value *Op1) {
1854 // If the mask is all ones just emit the first operation.
1855 if (const auto *C = dyn_cast<Constant>(Mask))
1856 if (C->isAllOnesValue())
1857 return Op0;
1858
1859 Mask = getX86MaskVec(Builder, Mask,
1860 cast<FixedVectorType>(Op0->getType())->getNumElements());
1861 return Builder.CreateSelect(Mask, Op0, Op1);
1862}
1863
1864static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1865 Value *Op1) {
1866 // If the mask is all ones just emit the first operation.
1867 if (const auto *C = dyn_cast<Constant>(Mask))
1868 if (C->isAllOnesValue())
1869 return Op0;
1870
1871 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1872 Mask->getType()->getIntegerBitWidth());
1873 Mask = Builder.CreateBitCast(Mask, MaskTy);
1874 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1875 return Builder.CreateSelect(Mask, Op0, Op1);
1876}
1877
1878// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1879// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1880// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1882 Value *Op1, Value *Shift,
1883 Value *Passthru, Value *Mask,
1884 bool IsVALIGN) {
1885 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1886
1887 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1888 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1889 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1890 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1891
1892 // Mask the immediate for VALIGN.
1893 if (IsVALIGN)
1894 ShiftVal &= (NumElts - 1);
1895
1896 // If palignr is shifting the pair of vectors more than the size of two
1897 // lanes, emit zero.
1898 if (ShiftVal >= 32)
1900
1901 // If palignr is shifting the pair of input vectors more than one lane,
1902 // but less than two lanes, convert to shifting in zeroes.
1903 if (ShiftVal > 16) {
1904 ShiftVal -= 16;
1905 Op1 = Op0;
1907 }
1908
1909 int Indices[64];
1910 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1911 for (unsigned l = 0; l < NumElts; l += 16) {
1912 for (unsigned i = 0; i != 16; ++i) {
1913 unsigned Idx = ShiftVal + i;
1914 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1915 Idx += NumElts - 16; // End of lane, switch operand.
1916 Indices[l + i] = Idx + l;
1917 }
1918 }
1919
1920 Value *Align = Builder.CreateShuffleVector(
1921 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1922
1923 return emitX86Select(Builder, Mask, Align, Passthru);
1924}
1925
1927 bool ZeroMask, bool IndexForm) {
1928 Type *Ty = CI.getType();
1929 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1930 unsigned EltWidth = Ty->getScalarSizeInBits();
1931 bool IsFloat = Ty->isFPOrFPVectorTy();
1932 Intrinsic::ID IID;
1933 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1934 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1935 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1936 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1937 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1938 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1939 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1940 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1941 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1942 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1943 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1944 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1945 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1946 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1947 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1948 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1949 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1950 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1951 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1952 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1953 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1954 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1955 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1956 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1957 else if (VecWidth == 128 && EltWidth == 16)
1958 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1959 else if (VecWidth == 256 && EltWidth == 16)
1960 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1961 else if (VecWidth == 512 && EltWidth == 16)
1962 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1963 else if (VecWidth == 128 && EltWidth == 8)
1964 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1965 else if (VecWidth == 256 && EltWidth == 8)
1966 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1967 else if (VecWidth == 512 && EltWidth == 8)
1968 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1969 else
1970 llvm_unreachable("Unexpected intrinsic");
1971
1972 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1973 CI.getArgOperand(2) };
1974
1975 // If this isn't index form we need to swap operand 0 and 1.
1976 if (!IndexForm)
1977 std::swap(Args[0], Args[1]);
1978
1979 Value *V = Builder.CreateIntrinsic(IID, Args);
1980 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1981 : Builder.CreateBitCast(CI.getArgOperand(1),
1982 Ty);
1983 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1984}
1985
1987 Intrinsic::ID IID) {
1988 Type *Ty = CI.getType();
1989 Value *Op0 = CI.getOperand(0);
1990 Value *Op1 = CI.getOperand(1);
1991 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1992
1993 if (CI.arg_size() == 4) { // For masked intrinsics.
1994 Value *VecSrc = CI.getOperand(2);
1995 Value *Mask = CI.getOperand(3);
1996 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1997 }
1998 return Res;
1999}
2000
2002 bool IsRotateRight) {
2003 Type *Ty = CI.getType();
2004 Value *Src = CI.getArgOperand(0);
2005 Value *Amt = CI.getArgOperand(1);
2006
2007 // Amount may be scalar immediate, in which case create a splat vector.
2008 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2009 // we only care about the lowest log2 bits anyway.
2010 if (Amt->getType() != Ty) {
2011 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2012 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2013 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2014 }
2015
2016 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2017 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2018
2019 if (CI.arg_size() == 4) { // For masked intrinsics.
2020 Value *VecSrc = CI.getOperand(2);
2021 Value *Mask = CI.getOperand(3);
2022 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2023 }
2024 return Res;
2025}
2026
2027static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2028 bool IsSigned) {
2029 Type *Ty = CI.getType();
2030 Value *LHS = CI.getArgOperand(0);
2031 Value *RHS = CI.getArgOperand(1);
2032
2033 CmpInst::Predicate Pred;
2034 switch (Imm) {
2035 case 0x0:
2036 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2037 break;
2038 case 0x1:
2039 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2040 break;
2041 case 0x2:
2042 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2043 break;
2044 case 0x3:
2045 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2046 break;
2047 case 0x4:
2048 Pred = ICmpInst::ICMP_EQ;
2049 break;
2050 case 0x5:
2051 Pred = ICmpInst::ICMP_NE;
2052 break;
2053 case 0x6:
2054 return Constant::getNullValue(Ty); // FALSE
2055 case 0x7:
2056 return Constant::getAllOnesValue(Ty); // TRUE
2057 default:
2058 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2059 }
2060
2061 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2062 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2063 return Ext;
2064}
2065
2067 bool IsShiftRight, bool ZeroMask) {
2068 Type *Ty = CI.getType();
2069 Value *Op0 = CI.getArgOperand(0);
2070 Value *Op1 = CI.getArgOperand(1);
2071 Value *Amt = CI.getArgOperand(2);
2072
2073 if (IsShiftRight)
2074 std::swap(Op0, Op1);
2075
2076 // Amount may be scalar immediate, in which case create a splat vector.
2077 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2078 // we only care about the lowest log2 bits anyway.
2079 if (Amt->getType() != Ty) {
2080 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2081 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2082 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2083 }
2084
2085 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2086 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2087
2088 unsigned NumArgs = CI.arg_size();
2089 if (NumArgs >= 4) { // For masked intrinsics.
2090 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2091 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2092 CI.getArgOperand(0);
2093 Value *Mask = CI.getOperand(NumArgs - 1);
2094 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2095 }
2096 return Res;
2097}
2098
2100 Value *Mask, bool Aligned) {
2101 const Align Alignment =
2102 Aligned
2103 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2104 : Align(1);
2105
2106 // If the mask is all ones just emit a regular store.
2107 if (const auto *C = dyn_cast<Constant>(Mask))
2108 if (C->isAllOnesValue())
2109 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2110
2111 // Convert the mask from an integer type to a vector of i1.
2112 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2113 Mask = getX86MaskVec(Builder, Mask, NumElts);
2114 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2115}
2116
2118 Value *Passthru, Value *Mask, bool Aligned) {
2119 Type *ValTy = Passthru->getType();
2120 const Align Alignment =
2121 Aligned
2122 ? Align(
2124 8)
2125 : Align(1);
2126
2127 // If the mask is all ones just emit a regular store.
2128 if (const auto *C = dyn_cast<Constant>(Mask))
2129 if (C->isAllOnesValue())
2130 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2131
2132 // Convert the mask from an integer type to a vector of i1.
2133 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2134 Mask = getX86MaskVec(Builder, Mask, NumElts);
2135 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2136}
2137
2138static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2139 Type *Ty = CI.getType();
2140 Value *Op0 = CI.getArgOperand(0);
2141 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2142 {Op0, Builder.getInt1(false)});
2143 if (CI.arg_size() == 3)
2144 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2145 return Res;
2146}
2147
2148static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2149 Type *Ty = CI.getType();
2150
2151 // Arguments have a vXi32 type so cast to vXi64.
2152 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2153 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2154
2155 if (IsSigned) {
2156 // Shift left then arithmetic shift right.
2157 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2158 LHS = Builder.CreateShl(LHS, ShiftAmt);
2159 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2160 RHS = Builder.CreateShl(RHS, ShiftAmt);
2161 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2162 } else {
2163 // Clear the upper bits.
2164 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2165 LHS = Builder.CreateAnd(LHS, Mask);
2166 RHS = Builder.CreateAnd(RHS, Mask);
2167 }
2168
2169 Value *Res = Builder.CreateMul(LHS, RHS);
2170
2171 if (CI.arg_size() == 4)
2172 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2173
2174 return Res;
2175}
2176
2177// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2179 Value *Mask) {
2180 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2181 if (Mask) {
2182 const auto *C = dyn_cast<Constant>(Mask);
2183 if (!C || !C->isAllOnesValue())
2184 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2185 }
2186
2187 if (NumElts < 8) {
2188 int Indices[8];
2189 for (unsigned i = 0; i != NumElts; ++i)
2190 Indices[i] = i;
2191 for (unsigned i = NumElts; i != 8; ++i)
2192 Indices[i] = NumElts + i % NumElts;
2193 Vec = Builder.CreateShuffleVector(Vec,
2195 Indices);
2196 }
2197 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2198}
2199
2201 unsigned CC, bool Signed) {
2202 Value *Op0 = CI.getArgOperand(0);
2203 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2204
2205 Value *Cmp;
2206 if (CC == 3) {
2208 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2209 } else if (CC == 7) {
2211 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2212 } else {
2214 switch (CC) {
2215 default: llvm_unreachable("Unknown condition code");
2216 case 0: Pred = ICmpInst::ICMP_EQ; break;
2217 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2218 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2219 case 4: Pred = ICmpInst::ICMP_NE; break;
2220 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2221 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2222 }
2223 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2224 }
2225
2226 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2227
2228 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2229}
2230
2231// Replace a masked intrinsic with an older unmasked intrinsic.
2233 Intrinsic::ID IID) {
2234 Value *Rep =
2235 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2236 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2237}
2238
2240 Value* A = CI.getArgOperand(0);
2241 Value* B = CI.getArgOperand(1);
2242 Value* Src = CI.getArgOperand(2);
2243 Value* Mask = CI.getArgOperand(3);
2244
2245 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2246 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2247 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2248 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2249 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2250 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2251}
2252
2254 Value* Op = CI.getArgOperand(0);
2255 Type* ReturnOp = CI.getType();
2256 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2257 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2258 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2259}
2260
2261// Replace intrinsic with unmasked version and a select.
2263 CallBase &CI, Value *&Rep) {
2264 Name = Name.substr(12); // Remove avx512.mask.
2265
2266 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2267 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2268 Intrinsic::ID IID;
2269 if (Name.starts_with("max.p")) {
2270 if (VecWidth == 128 && EltWidth == 32)
2271 IID = Intrinsic::x86_sse_max_ps;
2272 else if (VecWidth == 128 && EltWidth == 64)
2273 IID = Intrinsic::x86_sse2_max_pd;
2274 else if (VecWidth == 256 && EltWidth == 32)
2275 IID = Intrinsic::x86_avx_max_ps_256;
2276 else if (VecWidth == 256 && EltWidth == 64)
2277 IID = Intrinsic::x86_avx_max_pd_256;
2278 else
2279 llvm_unreachable("Unexpected intrinsic");
2280 } else if (Name.starts_with("min.p")) {
2281 if (VecWidth == 128 && EltWidth == 32)
2282 IID = Intrinsic::x86_sse_min_ps;
2283 else if (VecWidth == 128 && EltWidth == 64)
2284 IID = Intrinsic::x86_sse2_min_pd;
2285 else if (VecWidth == 256 && EltWidth == 32)
2286 IID = Intrinsic::x86_avx_min_ps_256;
2287 else if (VecWidth == 256 && EltWidth == 64)
2288 IID = Intrinsic::x86_avx_min_pd_256;
2289 else
2290 llvm_unreachable("Unexpected intrinsic");
2291 } else if (Name.starts_with("pshuf.b.")) {
2292 if (VecWidth == 128)
2293 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2294 else if (VecWidth == 256)
2295 IID = Intrinsic::x86_avx2_pshuf_b;
2296 else if (VecWidth == 512)
2297 IID = Intrinsic::x86_avx512_pshuf_b_512;
2298 else
2299 llvm_unreachable("Unexpected intrinsic");
2300 } else if (Name.starts_with("pmul.hr.sw.")) {
2301 if (VecWidth == 128)
2302 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2303 else if (VecWidth == 256)
2304 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2305 else if (VecWidth == 512)
2306 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2307 else
2308 llvm_unreachable("Unexpected intrinsic");
2309 } else if (Name.starts_with("pmulh.w.")) {
2310 if (VecWidth == 128)
2311 IID = Intrinsic::x86_sse2_pmulh_w;
2312 else if (VecWidth == 256)
2313 IID = Intrinsic::x86_avx2_pmulh_w;
2314 else if (VecWidth == 512)
2315 IID = Intrinsic::x86_avx512_pmulh_w_512;
2316 else
2317 llvm_unreachable("Unexpected intrinsic");
2318 } else if (Name.starts_with("pmulhu.w.")) {
2319 if (VecWidth == 128)
2320 IID = Intrinsic::x86_sse2_pmulhu_w;
2321 else if (VecWidth == 256)
2322 IID = Intrinsic::x86_avx2_pmulhu_w;
2323 else if (VecWidth == 512)
2324 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2325 else
2326 llvm_unreachable("Unexpected intrinsic");
2327 } else if (Name.starts_with("pmaddw.d.")) {
2328 if (VecWidth == 128)
2329 IID = Intrinsic::x86_sse2_pmadd_wd;
2330 else if (VecWidth == 256)
2331 IID = Intrinsic::x86_avx2_pmadd_wd;
2332 else if (VecWidth == 512)
2333 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2334 else
2335 llvm_unreachable("Unexpected intrinsic");
2336 } else if (Name.starts_with("pmaddubs.w.")) {
2337 if (VecWidth == 128)
2338 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2339 else if (VecWidth == 256)
2340 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2341 else if (VecWidth == 512)
2342 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2343 else
2344 llvm_unreachable("Unexpected intrinsic");
2345 } else if (Name.starts_with("packsswb.")) {
2346 if (VecWidth == 128)
2347 IID = Intrinsic::x86_sse2_packsswb_128;
2348 else if (VecWidth == 256)
2349 IID = Intrinsic::x86_avx2_packsswb;
2350 else if (VecWidth == 512)
2351 IID = Intrinsic::x86_avx512_packsswb_512;
2352 else
2353 llvm_unreachable("Unexpected intrinsic");
2354 } else if (Name.starts_with("packssdw.")) {
2355 if (VecWidth == 128)
2356 IID = Intrinsic::x86_sse2_packssdw_128;
2357 else if (VecWidth == 256)
2358 IID = Intrinsic::x86_avx2_packssdw;
2359 else if (VecWidth == 512)
2360 IID = Intrinsic::x86_avx512_packssdw_512;
2361 else
2362 llvm_unreachable("Unexpected intrinsic");
2363 } else if (Name.starts_with("packuswb.")) {
2364 if (VecWidth == 128)
2365 IID = Intrinsic::x86_sse2_packuswb_128;
2366 else if (VecWidth == 256)
2367 IID = Intrinsic::x86_avx2_packuswb;
2368 else if (VecWidth == 512)
2369 IID = Intrinsic::x86_avx512_packuswb_512;
2370 else
2371 llvm_unreachable("Unexpected intrinsic");
2372 } else if (Name.starts_with("packusdw.")) {
2373 if (VecWidth == 128)
2374 IID = Intrinsic::x86_sse41_packusdw;
2375 else if (VecWidth == 256)
2376 IID = Intrinsic::x86_avx2_packusdw;
2377 else if (VecWidth == 512)
2378 IID = Intrinsic::x86_avx512_packusdw_512;
2379 else
2380 llvm_unreachable("Unexpected intrinsic");
2381 } else if (Name.starts_with("vpermilvar.")) {
2382 if (VecWidth == 128 && EltWidth == 32)
2383 IID = Intrinsic::x86_avx_vpermilvar_ps;
2384 else if (VecWidth == 128 && EltWidth == 64)
2385 IID = Intrinsic::x86_avx_vpermilvar_pd;
2386 else if (VecWidth == 256 && EltWidth == 32)
2387 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2388 else if (VecWidth == 256 && EltWidth == 64)
2389 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2390 else if (VecWidth == 512 && EltWidth == 32)
2391 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2392 else if (VecWidth == 512 && EltWidth == 64)
2393 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2394 else
2395 llvm_unreachable("Unexpected intrinsic");
2396 } else if (Name == "cvtpd2dq.256") {
2397 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2398 } else if (Name == "cvtpd2ps.256") {
2399 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2400 } else if (Name == "cvttpd2dq.256") {
2401 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2402 } else if (Name == "cvttps2dq.128") {
2403 IID = Intrinsic::x86_sse2_cvttps2dq;
2404 } else if (Name == "cvttps2dq.256") {
2405 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2406 } else if (Name.starts_with("permvar.")) {
2407 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2408 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2409 IID = Intrinsic::x86_avx2_permps;
2410 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2411 IID = Intrinsic::x86_avx2_permd;
2412 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2413 IID = Intrinsic::x86_avx512_permvar_df_256;
2414 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2415 IID = Intrinsic::x86_avx512_permvar_di_256;
2416 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2417 IID = Intrinsic::x86_avx512_permvar_sf_512;
2418 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2419 IID = Intrinsic::x86_avx512_permvar_si_512;
2420 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2421 IID = Intrinsic::x86_avx512_permvar_df_512;
2422 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2423 IID = Intrinsic::x86_avx512_permvar_di_512;
2424 else if (VecWidth == 128 && EltWidth == 16)
2425 IID = Intrinsic::x86_avx512_permvar_hi_128;
2426 else if (VecWidth == 256 && EltWidth == 16)
2427 IID = Intrinsic::x86_avx512_permvar_hi_256;
2428 else if (VecWidth == 512 && EltWidth == 16)
2429 IID = Intrinsic::x86_avx512_permvar_hi_512;
2430 else if (VecWidth == 128 && EltWidth == 8)
2431 IID = Intrinsic::x86_avx512_permvar_qi_128;
2432 else if (VecWidth == 256 && EltWidth == 8)
2433 IID = Intrinsic::x86_avx512_permvar_qi_256;
2434 else if (VecWidth == 512 && EltWidth == 8)
2435 IID = Intrinsic::x86_avx512_permvar_qi_512;
2436 else
2437 llvm_unreachable("Unexpected intrinsic");
2438 } else if (Name.starts_with("dbpsadbw.")) {
2439 if (VecWidth == 128)
2440 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2441 else if (VecWidth == 256)
2442 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2443 else if (VecWidth == 512)
2444 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2445 else
2446 llvm_unreachable("Unexpected intrinsic");
2447 } else if (Name.starts_with("pmultishift.qb.")) {
2448 if (VecWidth == 128)
2449 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2450 else if (VecWidth == 256)
2451 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2452 else if (VecWidth == 512)
2453 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2454 else
2455 llvm_unreachable("Unexpected intrinsic");
2456 } else if (Name.starts_with("conflict.")) {
2457 if (Name[9] == 'd' && VecWidth == 128)
2458 IID = Intrinsic::x86_avx512_conflict_d_128;
2459 else if (Name[9] == 'd' && VecWidth == 256)
2460 IID = Intrinsic::x86_avx512_conflict_d_256;
2461 else if (Name[9] == 'd' && VecWidth == 512)
2462 IID = Intrinsic::x86_avx512_conflict_d_512;
2463 else if (Name[9] == 'q' && VecWidth == 128)
2464 IID = Intrinsic::x86_avx512_conflict_q_128;
2465 else if (Name[9] == 'q' && VecWidth == 256)
2466 IID = Intrinsic::x86_avx512_conflict_q_256;
2467 else if (Name[9] == 'q' && VecWidth == 512)
2468 IID = Intrinsic::x86_avx512_conflict_q_512;
2469 else
2470 llvm_unreachable("Unexpected intrinsic");
2471 } else if (Name.starts_with("pavg.")) {
2472 if (Name[5] == 'b' && VecWidth == 128)
2473 IID = Intrinsic::x86_sse2_pavg_b;
2474 else if (Name[5] == 'b' && VecWidth == 256)
2475 IID = Intrinsic::x86_avx2_pavg_b;
2476 else if (Name[5] == 'b' && VecWidth == 512)
2477 IID = Intrinsic::x86_avx512_pavg_b_512;
2478 else if (Name[5] == 'w' && VecWidth == 128)
2479 IID = Intrinsic::x86_sse2_pavg_w;
2480 else if (Name[5] == 'w' && VecWidth == 256)
2481 IID = Intrinsic::x86_avx2_pavg_w;
2482 else if (Name[5] == 'w' && VecWidth == 512)
2483 IID = Intrinsic::x86_avx512_pavg_w_512;
2484 else
2485 llvm_unreachable("Unexpected intrinsic");
2486 } else
2487 return false;
2488
2489 SmallVector<Value *, 4> Args(CI.args());
2490 Args.pop_back();
2491 Args.pop_back();
2492 Rep = Builder.CreateIntrinsic(IID, Args);
2493 unsigned NumArgs = CI.arg_size();
2494 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2495 CI.getArgOperand(NumArgs - 2));
2496 return true;
2497}
2498
2499/// Upgrade comment in call to inline asm that represents an objc retain release
2500/// marker.
2501void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2502 size_t Pos;
2503 if (AsmStr->find("mov\tfp") == 0 &&
2504 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2505 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2506 AsmStr->replace(Pos, 1, ";");
2507 }
2508}
2509
2511 Function *F, IRBuilder<> &Builder) {
2512 Value *Rep = nullptr;
2513
2514 if (Name == "abs.i" || Name == "abs.ll") {
2515 Value *Arg = CI->getArgOperand(0);
2516 Value *Neg = Builder.CreateNeg(Arg, "neg");
2517 Value *Cmp = Builder.CreateICmpSGE(
2518 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2519 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2520 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2521 Type *Ty = (Name == "abs.bf16")
2522 ? Builder.getBFloatTy()
2523 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2524 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2525 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2526 Rep = Builder.CreateBitCast(Abs, CI->getType());
2527 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2528 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2529 : Intrinsic::nvvm_fabs;
2530 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2531 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2532 Name.starts_with("atomic.load.add.f64.p")) {
2533 Value *Ptr = CI->getArgOperand(0);
2534 Value *Val = CI->getArgOperand(1);
2535 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2537 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2538 Name.starts_with("atomic.load.dec.32.p")) {
2539 Value *Ptr = CI->getArgOperand(0);
2540 Value *Val = CI->getArgOperand(1);
2541 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2543 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2545 } else if (Name.consume_front("max.") &&
2546 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2547 Name == "ui" || Name == "ull")) {
2548 Value *Arg0 = CI->getArgOperand(0);
2549 Value *Arg1 = CI->getArgOperand(1);
2550 Value *Cmp = Name.starts_with("u")
2551 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2552 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2553 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2554 } else if (Name.consume_front("min.") &&
2555 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2556 Name == "ui" || Name == "ull")) {
2557 Value *Arg0 = CI->getArgOperand(0);
2558 Value *Arg1 = CI->getArgOperand(1);
2559 Value *Cmp = Name.starts_with("u")
2560 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2561 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2562 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2563 } else if (Name == "clz.ll") {
2564 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2565 Value *Arg = CI->getArgOperand(0);
2566 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2567 {Arg, Builder.getFalse()},
2568 /*FMFSource=*/nullptr, "ctlz");
2569 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2570 } else if (Name == "popc.ll") {
2571 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2572 // i64.
2573 Value *Arg = CI->getArgOperand(0);
2574 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2575 Arg, /*FMFSource=*/nullptr, "ctpop");
2576 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2577 } else if (Name == "h2f") {
2578 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2579 {Builder.getFloatTy()}, CI->getArgOperand(0),
2580 /*FMFSource=*/nullptr, "h2f");
2581 } else if (Name.consume_front("bitcast.") &&
2582 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2583 Name == "d2ll")) {
2584 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2585 } else if (Name == "rotate.b32") {
2586 Value *Arg = CI->getOperand(0);
2587 Value *ShiftAmt = CI->getOperand(1);
2588 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2589 {Arg, Arg, ShiftAmt});
2590 } else if (Name == "rotate.b64") {
2591 Type *Int64Ty = Builder.getInt64Ty();
2592 Value *Arg = CI->getOperand(0);
2593 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2594 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2595 {Arg, Arg, ZExtShiftAmt});
2596 } else if (Name == "rotate.right.b64") {
2597 Type *Int64Ty = Builder.getInt64Ty();
2598 Value *Arg = CI->getOperand(0);
2599 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2600 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2601 {Arg, Arg, ZExtShiftAmt});
2602 } else if (Name == "swap.lo.hi.b64") {
2603 Type *Int64Ty = Builder.getInt64Ty();
2604 Value *Arg = CI->getOperand(0);
2605 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2606 {Arg, Arg, Builder.getInt64(32)});
2607 } else if ((Name.consume_front("ptr.gen.to.") &&
2608 consumeNVVMPtrAddrSpace(Name)) ||
2609 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2610 Name.starts_with(".to.gen"))) {
2611 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2612 } else if (Name.consume_front("ldg.global")) {
2613 Value *Ptr = CI->getArgOperand(0);
2614 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2615 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2616 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2617 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2618 MDNode *MD = MDNode::get(Builder.getContext(), {});
2619 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2620 return LD;
2621 } else if (Name == "tanh.approx.f32") {
2622 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2623 FastMathFlags FMF;
2624 FMF.setApproxFunc();
2625 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2626 FMF);
2627 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2628 Value *Arg =
2629 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2630 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2631 {}, {Arg});
2632 } else if (Name == "barrier") {
2633 Rep = Builder.CreateIntrinsic(
2634 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2635 {CI->getArgOperand(0), CI->getArgOperand(1)});
2636 } else if (Name == "barrier.sync") {
2637 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2638 {CI->getArgOperand(0)});
2639 } else if (Name == "barrier.sync.cnt") {
2640 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2641 {CI->getArgOperand(0), CI->getArgOperand(1)});
2642 } else {
2644 if (IID != Intrinsic::not_intrinsic &&
2645 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2646 rename(F);
2647 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2649 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2650 Value *Arg = CI->getArgOperand(I);
2651 Type *OldType = Arg->getType();
2652 Type *NewType = NewFn->getArg(I)->getType();
2653 Args.push_back(
2654 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2655 ? Builder.CreateBitCast(Arg, NewType)
2656 : Arg);
2657 }
2658 Rep = Builder.CreateCall(NewFn, Args);
2659 if (F->getReturnType()->isIntegerTy())
2660 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2661 }
2662 }
2663
2664 return Rep;
2665}
2666
2668 IRBuilder<> &Builder) {
2669 LLVMContext &C = F->getContext();
2670 Value *Rep = nullptr;
2671
2672 if (Name.starts_with("sse4a.movnt.")) {
2674 Elts.push_back(
2675 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2676 MDNode *Node = MDNode::get(C, Elts);
2677
2678 Value *Arg0 = CI->getArgOperand(0);
2679 Value *Arg1 = CI->getArgOperand(1);
2680
2681 // Nontemporal (unaligned) store of the 0'th element of the float/double
2682 // vector.
2683 Value *Extract =
2684 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2685
2686 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2687 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2688 } else if (Name.starts_with("avx.movnt.") ||
2689 Name.starts_with("avx512.storent.")) {
2691 Elts.push_back(
2692 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2693 MDNode *Node = MDNode::get(C, Elts);
2694
2695 Value *Arg0 = CI->getArgOperand(0);
2696 Value *Arg1 = CI->getArgOperand(1);
2697
2698 StoreInst *SI = Builder.CreateAlignedStore(
2699 Arg1, Arg0,
2701 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2702 } else if (Name == "sse2.storel.dq") {
2703 Value *Arg0 = CI->getArgOperand(0);
2704 Value *Arg1 = CI->getArgOperand(1);
2705
2706 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2707 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2708 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2709 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2710 } else if (Name.starts_with("sse.storeu.") ||
2711 Name.starts_with("sse2.storeu.") ||
2712 Name.starts_with("avx.storeu.")) {
2713 Value *Arg0 = CI->getArgOperand(0);
2714 Value *Arg1 = CI->getArgOperand(1);
2715 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2716 } else if (Name == "avx512.mask.store.ss") {
2717 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2718 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2719 Mask, false);
2720 } else if (Name.starts_with("avx512.mask.store")) {
2721 // "avx512.mask.storeu." or "avx512.mask.store."
2722 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2723 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2724 CI->getArgOperand(2), Aligned);
2725 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2726 // Upgrade packed integer vector compare intrinsics to compare instructions.
2727 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2728 bool CmpEq = Name[9] == 'e';
2729 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2730 CI->getArgOperand(0), CI->getArgOperand(1));
2731 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2732 } else if (Name.starts_with("avx512.broadcastm")) {
2733 Type *ExtTy = Type::getInt32Ty(C);
2734 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2735 ExtTy = Type::getInt64Ty(C);
2736 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2737 ExtTy->getPrimitiveSizeInBits();
2738 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2739 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2740 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2741 Value *Vec = CI->getArgOperand(0);
2742 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2743 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2744 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2745 } else if (Name.starts_with("avx.sqrt.p") ||
2746 Name.starts_with("sse2.sqrt.p") ||
2747 Name.starts_with("sse.sqrt.p")) {
2748 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2749 {CI->getArgOperand(0)});
2750 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2751 if (CI->arg_size() == 4 &&
2752 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2753 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2754 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2755 : Intrinsic::x86_avx512_sqrt_pd_512;
2756
2757 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2758 Rep = Builder.CreateIntrinsic(IID, Args);
2759 } else {
2760 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2761 {CI->getArgOperand(0)});
2762 }
2763 Rep =
2764 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2765 } else if (Name.starts_with("avx512.ptestm") ||
2766 Name.starts_with("avx512.ptestnm")) {
2767 Value *Op0 = CI->getArgOperand(0);
2768 Value *Op1 = CI->getArgOperand(1);
2769 Value *Mask = CI->getArgOperand(2);
2770 Rep = Builder.CreateAnd(Op0, Op1);
2771 llvm::Type *Ty = Op0->getType();
2773 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2776 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2777 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2778 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2779 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2780 ->getNumElements();
2781 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2782 Rep =
2783 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2784 } else if (Name.starts_with("avx512.kunpck")) {
2785 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2786 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2787 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2788 int Indices[64];
2789 for (unsigned i = 0; i != NumElts; ++i)
2790 Indices[i] = i;
2791
2792 // First extract half of each vector. This gives better codegen than
2793 // doing it in a single shuffle.
2794 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2795 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2796 // Concat the vectors.
2797 // NOTE: Operands have to be swapped to match intrinsic definition.
2798 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2799 Rep = Builder.CreateBitCast(Rep, CI->getType());
2800 } else if (Name == "avx512.kand.w") {
2801 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2802 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2803 Rep = Builder.CreateAnd(LHS, RHS);
2804 Rep = Builder.CreateBitCast(Rep, CI->getType());
2805 } else if (Name == "avx512.kandn.w") {
2806 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2807 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2808 LHS = Builder.CreateNot(LHS);
2809 Rep = Builder.CreateAnd(LHS, RHS);
2810 Rep = Builder.CreateBitCast(Rep, CI->getType());
2811 } else if (Name == "avx512.kor.w") {
2812 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2813 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2814 Rep = Builder.CreateOr(LHS, RHS);
2815 Rep = Builder.CreateBitCast(Rep, CI->getType());
2816 } else if (Name == "avx512.kxor.w") {
2817 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2818 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2819 Rep = Builder.CreateXor(LHS, RHS);
2820 Rep = Builder.CreateBitCast(Rep, CI->getType());
2821 } else if (Name == "avx512.kxnor.w") {
2822 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2823 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2824 LHS = Builder.CreateNot(LHS);
2825 Rep = Builder.CreateXor(LHS, RHS);
2826 Rep = Builder.CreateBitCast(Rep, CI->getType());
2827 } else if (Name == "avx512.knot.w") {
2828 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2829 Rep = Builder.CreateNot(Rep);
2830 Rep = Builder.CreateBitCast(Rep, CI->getType());
2831 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2832 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2833 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2834 Rep = Builder.CreateOr(LHS, RHS);
2835 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2836 Value *C;
2837 if (Name[14] == 'c')
2838 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2839 else
2840 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2841 Rep = Builder.CreateICmpEQ(Rep, C);
2842 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2843 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2844 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2845 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2846 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2847 Type *I32Ty = Type::getInt32Ty(C);
2848 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2849 ConstantInt::get(I32Ty, 0));
2850 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2851 ConstantInt::get(I32Ty, 0));
2852 Value *EltOp;
2853 if (Name.contains(".add."))
2854 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2855 else if (Name.contains(".sub."))
2856 EltOp = Builder.CreateFSub(Elt0, Elt1);
2857 else if (Name.contains(".mul."))
2858 EltOp = Builder.CreateFMul(Elt0, Elt1);
2859 else
2860 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2861 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2862 ConstantInt::get(I32Ty, 0));
2863 } else if (Name.starts_with("avx512.mask.pcmp")) {
2864 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2865 bool CmpEq = Name[16] == 'e';
2866 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2867 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2868 Type *OpTy = CI->getArgOperand(0)->getType();
2869 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2870 Intrinsic::ID IID;
2871 switch (VecWidth) {
2872 default:
2873 llvm_unreachable("Unexpected intrinsic");
2874 case 128:
2875 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2876 break;
2877 case 256:
2878 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2879 break;
2880 case 512:
2881 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2882 break;
2883 }
2884
2885 Rep =
2886 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2887 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2888 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2889 Type *OpTy = CI->getArgOperand(0)->getType();
2890 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2891 unsigned EltWidth = OpTy->getScalarSizeInBits();
2892 Intrinsic::ID IID;
2893 if (VecWidth == 128 && EltWidth == 32)
2894 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2895 else if (VecWidth == 256 && EltWidth == 32)
2896 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2897 else if (VecWidth == 512 && EltWidth == 32)
2898 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2899 else if (VecWidth == 128 && EltWidth == 64)
2900 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2901 else if (VecWidth == 256 && EltWidth == 64)
2902 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2903 else if (VecWidth == 512 && EltWidth == 64)
2904 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2905 else
2906 llvm_unreachable("Unexpected intrinsic");
2907
2908 Rep =
2909 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
2910 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2911 } else if (Name.starts_with("avx512.cmp.p")) {
2912 SmallVector<Value *, 4> Args(CI->args());
2913 Type *OpTy = Args[0]->getType();
2914 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2915 unsigned EltWidth = OpTy->getScalarSizeInBits();
2916 Intrinsic::ID IID;
2917 if (VecWidth == 128 && EltWidth == 32)
2918 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2919 else if (VecWidth == 256 && EltWidth == 32)
2920 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2921 else if (VecWidth == 512 && EltWidth == 32)
2922 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2923 else if (VecWidth == 128 && EltWidth == 64)
2924 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2925 else if (VecWidth == 256 && EltWidth == 64)
2926 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2927 else if (VecWidth == 512 && EltWidth == 64)
2928 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2929 else
2930 llvm_unreachable("Unexpected intrinsic");
2931
2933 if (VecWidth == 512)
2934 std::swap(Mask, Args.back());
2935 Args.push_back(Mask);
2936
2937 Rep = Builder.CreateIntrinsic(IID, Args);
2938 } else if (Name.starts_with("avx512.mask.cmp.")) {
2939 // Integer compare intrinsics.
2940 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2941 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2942 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2943 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2944 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2945 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2946 Name.starts_with("avx512.cvtw2mask.") ||
2947 Name.starts_with("avx512.cvtd2mask.") ||
2948 Name.starts_with("avx512.cvtq2mask.")) {
2949 Value *Op = CI->getArgOperand(0);
2950 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2951 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2952 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2953 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2954 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2955 Name.starts_with("avx512.mask.pabs")) {
2956 Rep = upgradeAbs(Builder, *CI);
2957 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2958 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2959 Name.starts_with("avx512.mask.pmaxs")) {
2960 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2961 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2962 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2963 Name.starts_with("avx512.mask.pmaxu")) {
2964 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2965 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2966 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2967 Name.starts_with("avx512.mask.pmins")) {
2968 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2969 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2970 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2971 Name.starts_with("avx512.mask.pminu")) {
2972 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2973 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2974 Name == "avx512.pmulu.dq.512" ||
2975 Name.starts_with("avx512.mask.pmulu.dq.")) {
2976 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2977 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2978 Name == "avx512.pmul.dq.512" ||
2979 Name.starts_with("avx512.mask.pmul.dq.")) {
2980 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2981 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2982 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2983 Rep =
2984 Builder.CreateSIToFP(CI->getArgOperand(1),
2985 cast<VectorType>(CI->getType())->getElementType());
2986 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2987 } else if (Name == "avx512.cvtusi2sd") {
2988 Rep =
2989 Builder.CreateUIToFP(CI->getArgOperand(1),
2990 cast<VectorType>(CI->getType())->getElementType());
2991 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2992 } else if (Name == "sse2.cvtss2sd") {
2993 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2994 Rep = Builder.CreateFPExt(
2995 Rep, cast<VectorType>(CI->getType())->getElementType());
2996 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2997 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2998 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2999 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3000 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3001 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3002 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3003 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3004 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3005 Name == "avx512.mask.cvtqq2ps.256" ||
3006 Name == "avx512.mask.cvtqq2ps.512" ||
3007 Name == "avx512.mask.cvtuqq2ps.256" ||
3008 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3009 Name == "avx.cvt.ps2.pd.256" ||
3010 Name == "avx512.mask.cvtps2pd.128" ||
3011 Name == "avx512.mask.cvtps2pd.256") {
3012 auto *DstTy = cast<FixedVectorType>(CI->getType());
3013 Rep = CI->getArgOperand(0);
3014 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3015
3016 unsigned NumDstElts = DstTy->getNumElements();
3017 if (NumDstElts < SrcTy->getNumElements()) {
3018 assert(NumDstElts == 2 && "Unexpected vector size");
3019 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3020 }
3021
3022 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3023 bool IsUnsigned = Name.contains("cvtu");
3024 if (IsPS2PD)
3025 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3026 else if (CI->arg_size() == 4 &&
3027 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3028 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3029 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3030 : Intrinsic::x86_avx512_sitofp_round;
3031 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3032 {Rep, CI->getArgOperand(3)});
3033 } else {
3034 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3035 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3036 }
3037
3038 if (CI->arg_size() >= 3)
3039 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3040 CI->getArgOperand(1));
3041 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3042 Name.starts_with("vcvtph2ps.")) {
3043 auto *DstTy = cast<FixedVectorType>(CI->getType());
3044 Rep = CI->getArgOperand(0);
3045 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3046 unsigned NumDstElts = DstTy->getNumElements();
3047 if (NumDstElts != SrcTy->getNumElements()) {
3048 assert(NumDstElts == 4 && "Unexpected vector size");
3049 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3050 }
3051 Rep = Builder.CreateBitCast(
3052 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3053 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3054 if (CI->arg_size() >= 3)
3055 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3056 CI->getArgOperand(1));
3057 } else if (Name.starts_with("avx512.mask.load")) {
3058 // "avx512.mask.loadu." or "avx512.mask.load."
3059 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3060 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3061 CI->getArgOperand(2), Aligned);
3062 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3063 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3064 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3065 ResultTy->getNumElements());
3066
3067 Rep = Builder.CreateIntrinsic(
3068 Intrinsic::masked_expandload, ResultTy,
3069 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3070 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3071 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3072 Value *MaskVec =
3073 getX86MaskVec(Builder, CI->getArgOperand(2),
3074 cast<FixedVectorType>(ResultTy)->getNumElements());
3075
3076 Rep = Builder.CreateIntrinsic(
3077 Intrinsic::masked_compressstore, ResultTy,
3078 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3079 } else if (Name.starts_with("avx512.mask.compress.") ||
3080 Name.starts_with("avx512.mask.expand.")) {
3081 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3082
3083 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3084 ResultTy->getNumElements());
3085
3086 bool IsCompress = Name[12] == 'c';
3087 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3088 : Intrinsic::x86_avx512_mask_expand;
3089 Rep = Builder.CreateIntrinsic(
3090 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3091 } else if (Name.starts_with("xop.vpcom")) {
3092 bool IsSigned;
3093 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3094 Name.ends_with("uq"))
3095 IsSigned = false;
3096 else if (Name.ends_with("b") || Name.ends_with("w") ||
3097 Name.ends_with("d") || Name.ends_with("q"))
3098 IsSigned = true;
3099 else
3100 llvm_unreachable("Unknown suffix");
3101
3102 unsigned Imm;
3103 if (CI->arg_size() == 3) {
3104 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3105 } else {
3106 Name = Name.substr(9); // strip off "xop.vpcom"
3107 if (Name.starts_with("lt"))
3108 Imm = 0;
3109 else if (Name.starts_with("le"))
3110 Imm = 1;
3111 else if (Name.starts_with("gt"))
3112 Imm = 2;
3113 else if (Name.starts_with("ge"))
3114 Imm = 3;
3115 else if (Name.starts_with("eq"))
3116 Imm = 4;
3117 else if (Name.starts_with("ne"))
3118 Imm = 5;
3119 else if (Name.starts_with("false"))
3120 Imm = 6;
3121 else if (Name.starts_with("true"))
3122 Imm = 7;
3123 else
3124 llvm_unreachable("Unknown condition");
3125 }
3126
3127 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3128 } else if (Name.starts_with("xop.vpcmov")) {
3129 Value *Sel = CI->getArgOperand(2);
3130 Value *NotSel = Builder.CreateNot(Sel);
3131 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3132 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3133 Rep = Builder.CreateOr(Sel0, Sel1);
3134 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3135 Name.starts_with("avx512.mask.prol")) {
3136 Rep = upgradeX86Rotate(Builder, *CI, false);
3137 } else if (Name.starts_with("avx512.pror") ||
3138 Name.starts_with("avx512.mask.pror")) {
3139 Rep = upgradeX86Rotate(Builder, *CI, true);
3140 } else if (Name.starts_with("avx512.vpshld.") ||
3141 Name.starts_with("avx512.mask.vpshld") ||
3142 Name.starts_with("avx512.maskz.vpshld")) {
3143 bool ZeroMask = Name[11] == 'z';
3144 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3145 } else if (Name.starts_with("avx512.vpshrd.") ||
3146 Name.starts_with("avx512.mask.vpshrd") ||
3147 Name.starts_with("avx512.maskz.vpshrd")) {
3148 bool ZeroMask = Name[11] == 'z';
3149 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3150 } else if (Name == "sse42.crc32.64.8") {
3151 Value *Trunc0 =
3152 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3153 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3154 {Trunc0, CI->getArgOperand(1)});
3155 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3156 } else if (Name.starts_with("avx.vbroadcast.s") ||
3157 Name.starts_with("avx512.vbroadcast.s")) {
3158 // Replace broadcasts with a series of insertelements.
3159 auto *VecTy = cast<FixedVectorType>(CI->getType());
3160 Type *EltTy = VecTy->getElementType();
3161 unsigned EltNum = VecTy->getNumElements();
3162 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3163 Type *I32Ty = Type::getInt32Ty(C);
3164 Rep = PoisonValue::get(VecTy);
3165 for (unsigned I = 0; I < EltNum; ++I)
3166 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3167 } else if (Name.starts_with("sse41.pmovsx") ||
3168 Name.starts_with("sse41.pmovzx") ||
3169 Name.starts_with("avx2.pmovsx") ||
3170 Name.starts_with("avx2.pmovzx") ||
3171 Name.starts_with("avx512.mask.pmovsx") ||
3172 Name.starts_with("avx512.mask.pmovzx")) {
3173 auto *DstTy = cast<FixedVectorType>(CI->getType());
3174 unsigned NumDstElts = DstTy->getNumElements();
3175
3176 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3177 SmallVector<int, 8> ShuffleMask(NumDstElts);
3178 for (unsigned i = 0; i != NumDstElts; ++i)
3179 ShuffleMask[i] = i;
3180
3181 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3182
3183 bool DoSext = Name.contains("pmovsx");
3184 Rep =
3185 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3186 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3187 if (CI->arg_size() == 3)
3188 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3189 CI->getArgOperand(1));
3190 } else if (Name == "avx512.mask.pmov.qd.256" ||
3191 Name == "avx512.mask.pmov.qd.512" ||
3192 Name == "avx512.mask.pmov.wb.256" ||
3193 Name == "avx512.mask.pmov.wb.512") {
3194 Type *Ty = CI->getArgOperand(1)->getType();
3195 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3196 Rep =
3197 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3198 } else if (Name.starts_with("avx.vbroadcastf128") ||
3199 Name == "avx2.vbroadcasti128") {
3200 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3201 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3202 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3203 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3204 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3205 if (NumSrcElts == 2)
3206 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3207 else
3208 Rep = Builder.CreateShuffleVector(Load,
3209 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3210 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3211 Name.starts_with("avx512.mask.shuf.f")) {
3212 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3213 Type *VT = CI->getType();
3214 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3215 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3216 unsigned ControlBitsMask = NumLanes - 1;
3217 unsigned NumControlBits = NumLanes / 2;
3218 SmallVector<int, 8> ShuffleMask(0);
3219
3220 for (unsigned l = 0; l != NumLanes; ++l) {
3221 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3222 // We actually need the other source.
3223 if (l >= NumLanes / 2)
3224 LaneMask += NumLanes;
3225 for (unsigned i = 0; i != NumElementsInLane; ++i)
3226 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3227 }
3228 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3229 CI->getArgOperand(1), ShuffleMask);
3230 Rep =
3231 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3232 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3233 Name.starts_with("avx512.mask.broadcasti")) {
3234 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3235 ->getNumElements();
3236 unsigned NumDstElts =
3237 cast<FixedVectorType>(CI->getType())->getNumElements();
3238
3239 SmallVector<int, 8> ShuffleMask(NumDstElts);
3240 for (unsigned i = 0; i != NumDstElts; ++i)
3241 ShuffleMask[i] = i % NumSrcElts;
3242
3243 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3244 CI->getArgOperand(0), ShuffleMask);
3245 Rep =
3246 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3247 } else if (Name.starts_with("avx2.pbroadcast") ||
3248 Name.starts_with("avx2.vbroadcast") ||
3249 Name.starts_with("avx512.pbroadcast") ||
3250 Name.starts_with("avx512.mask.broadcast.s")) {
3251 // Replace vp?broadcasts with a vector shuffle.
3252 Value *Op = CI->getArgOperand(0);
3253 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3254 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3257 Rep = Builder.CreateShuffleVector(Op, M);
3258
3259 if (CI->arg_size() == 3)
3260 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3261 CI->getArgOperand(1));
3262 } else if (Name.starts_with("sse2.padds.") ||
3263 Name.starts_with("avx2.padds.") ||
3264 Name.starts_with("avx512.padds.") ||
3265 Name.starts_with("avx512.mask.padds.")) {
3266 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3267 } else if (Name.starts_with("sse2.psubs.") ||
3268 Name.starts_with("avx2.psubs.") ||
3269 Name.starts_with("avx512.psubs.") ||
3270 Name.starts_with("avx512.mask.psubs.")) {
3271 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3272 } else if (Name.starts_with("sse2.paddus.") ||
3273 Name.starts_with("avx2.paddus.") ||
3274 Name.starts_with("avx512.mask.paddus.")) {
3275 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3276 } else if (Name.starts_with("sse2.psubus.") ||
3277 Name.starts_with("avx2.psubus.") ||
3278 Name.starts_with("avx512.mask.psubus.")) {
3279 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3280 } else if (Name.starts_with("avx512.mask.palignr.")) {
3281 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3282 CI->getArgOperand(1), CI->getArgOperand(2),
3283 CI->getArgOperand(3), CI->getArgOperand(4),
3284 false);
3285 } else if (Name.starts_with("avx512.mask.valign.")) {
3287 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3288 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3289 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3290 // 128/256-bit shift left specified in bits.
3291 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3292 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3293 Shift / 8); // Shift is in bits.
3294 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3295 // 128/256-bit shift right specified in bits.
3296 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3297 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3298 Shift / 8); // Shift is in bits.
3299 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3300 Name == "avx512.psll.dq.512") {
3301 // 128/256/512-bit shift left specified in bytes.
3302 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3303 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3304 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3305 Name == "avx512.psrl.dq.512") {
3306 // 128/256/512-bit shift right specified in bytes.
3307 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3308 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3309 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3310 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3311 Name.starts_with("avx2.pblendd.")) {
3312 Value *Op0 = CI->getArgOperand(0);
3313 Value *Op1 = CI->getArgOperand(1);
3314 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3315 auto *VecTy = cast<FixedVectorType>(CI->getType());
3316 unsigned NumElts = VecTy->getNumElements();
3317
3318 SmallVector<int, 16> Idxs(NumElts);
3319 for (unsigned i = 0; i != NumElts; ++i)
3320 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3321
3322 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3323 } else if (Name.starts_with("avx.vinsertf128.") ||
3324 Name == "avx2.vinserti128" ||
3325 Name.starts_with("avx512.mask.insert")) {
3326 Value *Op0 = CI->getArgOperand(0);
3327 Value *Op1 = CI->getArgOperand(1);
3328 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3329 unsigned DstNumElts =
3330 cast<FixedVectorType>(CI->getType())->getNumElements();
3331 unsigned SrcNumElts =
3332 cast<FixedVectorType>(Op1->getType())->getNumElements();
3333 unsigned Scale = DstNumElts / SrcNumElts;
3334
3335 // Mask off the high bits of the immediate value; hardware ignores those.
3336 Imm = Imm % Scale;
3337
3338 // Extend the second operand into a vector the size of the destination.
3339 SmallVector<int, 8> Idxs(DstNumElts);
3340 for (unsigned i = 0; i != SrcNumElts; ++i)
3341 Idxs[i] = i;
3342 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3343 Idxs[i] = SrcNumElts;
3344 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3345
3346 // Insert the second operand into the first operand.
3347
3348 // Note that there is no guarantee that instruction lowering will actually
3349 // produce a vinsertf128 instruction for the created shuffles. In
3350 // particular, the 0 immediate case involves no lane changes, so it can
3351 // be handled as a blend.
3352
3353 // Example of shuffle mask for 32-bit elements:
3354 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3355 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3356
3357 // First fill with identify mask.
3358 for (unsigned i = 0; i != DstNumElts; ++i)
3359 Idxs[i] = i;
3360 // Then replace the elements where we need to insert.
3361 for (unsigned i = 0; i != SrcNumElts; ++i)
3362 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3363 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3364
3365 // If the intrinsic has a mask operand, handle that.
3366 if (CI->arg_size() == 5)
3367 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3368 CI->getArgOperand(3));
3369 } else if (Name.starts_with("avx.vextractf128.") ||
3370 Name == "avx2.vextracti128" ||
3371 Name.starts_with("avx512.mask.vextract")) {
3372 Value *Op0 = CI->getArgOperand(0);
3373 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3374 unsigned DstNumElts =
3375 cast<FixedVectorType>(CI->getType())->getNumElements();
3376 unsigned SrcNumElts =
3377 cast<FixedVectorType>(Op0->getType())->getNumElements();
3378 unsigned Scale = SrcNumElts / DstNumElts;
3379
3380 // Mask off the high bits of the immediate value; hardware ignores those.
3381 Imm = Imm % Scale;
3382
3383 // Get indexes for the subvector of the input vector.
3384 SmallVector<int, 8> Idxs(DstNumElts);
3385 for (unsigned i = 0; i != DstNumElts; ++i) {
3386 Idxs[i] = i + (Imm * DstNumElts);
3387 }
3388 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3389
3390 // If the intrinsic has a mask operand, handle that.
3391 if (CI->arg_size() == 4)
3392 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3393 CI->getArgOperand(2));
3394 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3395 Name.starts_with("avx512.mask.perm.di.")) {
3396 Value *Op0 = CI->getArgOperand(0);
3397 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3398 auto *VecTy = cast<FixedVectorType>(CI->getType());
3399 unsigned NumElts = VecTy->getNumElements();
3400
3401 SmallVector<int, 8> Idxs(NumElts);
3402 for (unsigned i = 0; i != NumElts; ++i)
3403 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3404
3405 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3406
3407 if (CI->arg_size() == 4)
3408 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3409 CI->getArgOperand(2));
3410 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3411 // The immediate permute control byte looks like this:
3412 // [1:0] - select 128 bits from sources for low half of destination
3413 // [2] - ignore
3414 // [3] - zero low half of destination
3415 // [5:4] - select 128 bits from sources for high half of destination
3416 // [6] - ignore
3417 // [7] - zero high half of destination
3418
3419 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3420
3421 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3422 unsigned HalfSize = NumElts / 2;
3423 SmallVector<int, 8> ShuffleMask(NumElts);
3424
3425 // Determine which operand(s) are actually in use for this instruction.
3426 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3427 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3428
3429 // If needed, replace operands based on zero mask.
3430 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3431 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3432
3433 // Permute low half of result.
3434 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3435 for (unsigned i = 0; i < HalfSize; ++i)
3436 ShuffleMask[i] = StartIndex + i;
3437
3438 // Permute high half of result.
3439 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3440 for (unsigned i = 0; i < HalfSize; ++i)
3441 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3442
3443 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3444
3445 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3446 Name.starts_with("avx512.mask.vpermil.p") ||
3447 Name.starts_with("avx512.mask.pshuf.d.")) {
3448 Value *Op0 = CI->getArgOperand(0);
3449 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3450 auto *VecTy = cast<FixedVectorType>(CI->getType());
3451 unsigned NumElts = VecTy->getNumElements();
3452 // Calculate the size of each index in the immediate.
3453 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3454 unsigned IdxMask = ((1 << IdxSize) - 1);
3455
3456 SmallVector<int, 8> Idxs(NumElts);
3457 // Lookup the bits for this element, wrapping around the immediate every
3458 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3459 // to offset by the first index of each group.
3460 for (unsigned i = 0; i != NumElts; ++i)
3461 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3462
3463 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3464
3465 if (CI->arg_size() == 4)
3466 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3467 CI->getArgOperand(2));
3468 } else if (Name == "sse2.pshufl.w" ||
3469 Name.starts_with("avx512.mask.pshufl.w.")) {
3470 Value *Op0 = CI->getArgOperand(0);
3471 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3472 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3473
3474 SmallVector<int, 16> Idxs(NumElts);
3475 for (unsigned l = 0; l != NumElts; l += 8) {
3476 for (unsigned i = 0; i != 4; ++i)
3477 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3478 for (unsigned i = 4; i != 8; ++i)
3479 Idxs[i + l] = i + l;
3480 }
3481
3482 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3483
3484 if (CI->arg_size() == 4)
3485 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3486 CI->getArgOperand(2));
3487 } else if (Name == "sse2.pshufh.w" ||
3488 Name.starts_with("avx512.mask.pshufh.w.")) {
3489 Value *Op0 = CI->getArgOperand(0);
3490 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3491 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3492
3493 SmallVector<int, 16> Idxs(NumElts);
3494 for (unsigned l = 0; l != NumElts; l += 8) {
3495 for (unsigned i = 0; i != 4; ++i)
3496 Idxs[i + l] = i + l;
3497 for (unsigned i = 0; i != 4; ++i)
3498 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3499 }
3500
3501 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3502
3503 if (CI->arg_size() == 4)
3504 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3505 CI->getArgOperand(2));
3506 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3507 Value *Op0 = CI->getArgOperand(0);
3508 Value *Op1 = CI->getArgOperand(1);
3509 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3510 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3511
3512 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3513 unsigned HalfLaneElts = NumLaneElts / 2;
3514
3515 SmallVector<int, 16> Idxs(NumElts);
3516 for (unsigned i = 0; i != NumElts; ++i) {
3517 // Base index is the starting element of the lane.
3518 Idxs[i] = i - (i % NumLaneElts);
3519 // If we are half way through the lane switch to the other source.
3520 if ((i % NumLaneElts) >= HalfLaneElts)
3521 Idxs[i] += NumElts;
3522 // Now select the specific element. By adding HalfLaneElts bits from
3523 // the immediate. Wrapping around the immediate every 8-bits.
3524 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3525 }
3526
3527 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3528
3529 Rep =
3530 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3531 } else if (Name.starts_with("avx512.mask.movddup") ||
3532 Name.starts_with("avx512.mask.movshdup") ||
3533 Name.starts_with("avx512.mask.movsldup")) {
3534 Value *Op0 = CI->getArgOperand(0);
3535 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3536 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3537
3538 unsigned Offset = 0;
3539 if (Name.starts_with("avx512.mask.movshdup."))
3540 Offset = 1;
3541
3542 SmallVector<int, 16> Idxs(NumElts);
3543 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3544 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3545 Idxs[i + l + 0] = i + l + Offset;
3546 Idxs[i + l + 1] = i + l + Offset;
3547 }
3548
3549 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3550
3551 Rep =
3552 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3553 } else if (Name.starts_with("avx512.mask.punpckl") ||
3554 Name.starts_with("avx512.mask.unpckl.")) {
3555 Value *Op0 = CI->getArgOperand(0);
3556 Value *Op1 = CI->getArgOperand(1);
3557 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3558 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3559
3560 SmallVector<int, 64> Idxs(NumElts);
3561 for (int l = 0; l != NumElts; l += NumLaneElts)
3562 for (int i = 0; i != NumLaneElts; ++i)
3563 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3564
3565 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3566
3567 Rep =
3568 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3569 } else if (Name.starts_with("avx512.mask.punpckh") ||
3570 Name.starts_with("avx512.mask.unpckh.")) {
3571 Value *Op0 = CI->getArgOperand(0);
3572 Value *Op1 = CI->getArgOperand(1);
3573 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3574 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3575
3576 SmallVector<int, 64> Idxs(NumElts);
3577 for (int l = 0; l != NumElts; l += NumLaneElts)
3578 for (int i = 0; i != NumLaneElts; ++i)
3579 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3580
3581 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3582
3583 Rep =
3584 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3585 } else if (Name.starts_with("avx512.mask.and.") ||
3586 Name.starts_with("avx512.mask.pand.")) {
3587 VectorType *FTy = cast<VectorType>(CI->getType());
3589 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3590 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3591 Rep = Builder.CreateBitCast(Rep, FTy);
3592 Rep =
3593 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3594 } else if (Name.starts_with("avx512.mask.andn.") ||
3595 Name.starts_with("avx512.mask.pandn.")) {
3596 VectorType *FTy = cast<VectorType>(CI->getType());
3598 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3599 Rep = Builder.CreateAnd(Rep,
3600 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3601 Rep = Builder.CreateBitCast(Rep, FTy);
3602 Rep =
3603 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3604 } else if (Name.starts_with("avx512.mask.or.") ||
3605 Name.starts_with("avx512.mask.por.")) {
3606 VectorType *FTy = cast<VectorType>(CI->getType());
3608 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3609 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3610 Rep = Builder.CreateBitCast(Rep, FTy);
3611 Rep =
3612 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3613 } else if (Name.starts_with("avx512.mask.xor.") ||
3614 Name.starts_with("avx512.mask.pxor.")) {
3615 VectorType *FTy = cast<VectorType>(CI->getType());
3617 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3618 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3619 Rep = Builder.CreateBitCast(Rep, FTy);
3620 Rep =
3621 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3622 } else if (Name.starts_with("avx512.mask.padd.")) {
3623 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3624 Rep =
3625 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3626 } else if (Name.starts_with("avx512.mask.psub.")) {
3627 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3628 Rep =
3629 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3630 } else if (Name.starts_with("avx512.mask.pmull.")) {
3631 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3632 Rep =
3633 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3634 } else if (Name.starts_with("avx512.mask.add.p")) {
3635 if (Name.ends_with(".512")) {
3636 Intrinsic::ID IID;
3637 if (Name[17] == 's')
3638 IID = Intrinsic::x86_avx512_add_ps_512;
3639 else
3640 IID = Intrinsic::x86_avx512_add_pd_512;
3641
3642 Rep = Builder.CreateIntrinsic(
3643 IID,
3644 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3645 } else {
3646 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3647 }
3648 Rep =
3649 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3650 } else if (Name.starts_with("avx512.mask.div.p")) {
3651 if (Name.ends_with(".512")) {
3652 Intrinsic::ID IID;
3653 if (Name[17] == 's')
3654 IID = Intrinsic::x86_avx512_div_ps_512;
3655 else
3656 IID = Intrinsic::x86_avx512_div_pd_512;
3657
3658 Rep = Builder.CreateIntrinsic(
3659 IID,
3660 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3661 } else {
3662 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3663 }
3664 Rep =
3665 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3666 } else if (Name.starts_with("avx512.mask.mul.p")) {
3667 if (Name.ends_with(".512")) {
3668 Intrinsic::ID IID;
3669 if (Name[17] == 's')
3670 IID = Intrinsic::x86_avx512_mul_ps_512;
3671 else
3672 IID = Intrinsic::x86_avx512_mul_pd_512;
3673
3674 Rep = Builder.CreateIntrinsic(
3675 IID,
3676 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3677 } else {
3678 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3679 }
3680 Rep =
3681 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3682 } else if (Name.starts_with("avx512.mask.sub.p")) {
3683 if (Name.ends_with(".512")) {
3684 Intrinsic::ID IID;
3685 if (Name[17] == 's')
3686 IID = Intrinsic::x86_avx512_sub_ps_512;
3687 else
3688 IID = Intrinsic::x86_avx512_sub_pd_512;
3689
3690 Rep = Builder.CreateIntrinsic(
3691 IID,
3692 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3693 } else {
3694 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3695 }
3696 Rep =
3697 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3698 } else if ((Name.starts_with("avx512.mask.max.p") ||
3699 Name.starts_with("avx512.mask.min.p")) &&
3700 Name.drop_front(18) == ".512") {
3701 bool IsDouble = Name[17] == 'd';
3702 bool IsMin = Name[13] == 'i';
3703 static const Intrinsic::ID MinMaxTbl[2][2] = {
3704 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3705 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3706 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3707
3708 Rep = Builder.CreateIntrinsic(
3709 IID,
3710 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3711 Rep =
3712 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3713 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3714 Rep =
3715 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3716 {CI->getArgOperand(0), Builder.getInt1(false)});
3717 Rep =
3718 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3719 } else if (Name.starts_with("avx512.mask.psll")) {
3720 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3721 bool IsVariable = Name[16] == 'v';
3722 char Size = Name[16] == '.' ? Name[17]
3723 : Name[17] == '.' ? Name[18]
3724 : Name[18] == '.' ? Name[19]
3725 : Name[20];
3726
3727 Intrinsic::ID IID;
3728 if (IsVariable && Name[17] != '.') {
3729 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3730 IID = Intrinsic::x86_avx2_psllv_q;
3731 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3732 IID = Intrinsic::x86_avx2_psllv_q_256;
3733 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3734 IID = Intrinsic::x86_avx2_psllv_d;
3735 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3736 IID = Intrinsic::x86_avx2_psllv_d_256;
3737 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3738 IID = Intrinsic::x86_avx512_psllv_w_128;
3739 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3740 IID = Intrinsic::x86_avx512_psllv_w_256;
3741 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3742 IID = Intrinsic::x86_avx512_psllv_w_512;
3743 else
3744 llvm_unreachable("Unexpected size");
3745 } else if (Name.ends_with(".128")) {
3746 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3747 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3748 : Intrinsic::x86_sse2_psll_d;
3749 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3750 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3751 : Intrinsic::x86_sse2_psll_q;
3752 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3753 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3754 : Intrinsic::x86_sse2_psll_w;
3755 else
3756 llvm_unreachable("Unexpected size");
3757 } else if (Name.ends_with(".256")) {
3758 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3759 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3760 : Intrinsic::x86_avx2_psll_d;
3761 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3762 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3763 : Intrinsic::x86_avx2_psll_q;
3764 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3765 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3766 : Intrinsic::x86_avx2_psll_w;
3767 else
3768 llvm_unreachable("Unexpected size");
3769 } else {
3770 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3771 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3772 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3773 : Intrinsic::x86_avx512_psll_d_512;
3774 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3775 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3776 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3777 : Intrinsic::x86_avx512_psll_q_512;
3778 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3779 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3780 : Intrinsic::x86_avx512_psll_w_512;
3781 else
3782 llvm_unreachable("Unexpected size");
3783 }
3784
3785 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3786 } else if (Name.starts_with("avx512.mask.psrl")) {
3787 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3788 bool IsVariable = Name[16] == 'v';
3789 char Size = Name[16] == '.' ? Name[17]
3790 : Name[17] == '.' ? Name[18]
3791 : Name[18] == '.' ? Name[19]
3792 : Name[20];
3793
3794 Intrinsic::ID IID;
3795 if (IsVariable && Name[17] != '.') {
3796 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3797 IID = Intrinsic::x86_avx2_psrlv_q;
3798 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3799 IID = Intrinsic::x86_avx2_psrlv_q_256;
3800 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3801 IID = Intrinsic::x86_avx2_psrlv_d;
3802 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3803 IID = Intrinsic::x86_avx2_psrlv_d_256;
3804 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3805 IID = Intrinsic::x86_avx512_psrlv_w_128;
3806 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3807 IID = Intrinsic::x86_avx512_psrlv_w_256;
3808 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3809 IID = Intrinsic::x86_avx512_psrlv_w_512;
3810 else
3811 llvm_unreachable("Unexpected size");
3812 } else if (Name.ends_with(".128")) {
3813 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3814 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3815 : Intrinsic::x86_sse2_psrl_d;
3816 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3817 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3818 : Intrinsic::x86_sse2_psrl_q;
3819 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3820 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3821 : Intrinsic::x86_sse2_psrl_w;
3822 else
3823 llvm_unreachable("Unexpected size");
3824 } else if (Name.ends_with(".256")) {
3825 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3826 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3827 : Intrinsic::x86_avx2_psrl_d;
3828 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3829 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3830 : Intrinsic::x86_avx2_psrl_q;
3831 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3832 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3833 : Intrinsic::x86_avx2_psrl_w;
3834 else
3835 llvm_unreachable("Unexpected size");
3836 } else {
3837 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3838 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3839 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3840 : Intrinsic::x86_avx512_psrl_d_512;
3841 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3842 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3843 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3844 : Intrinsic::x86_avx512_psrl_q_512;
3845 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3846 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3847 : Intrinsic::x86_avx512_psrl_w_512;
3848 else
3849 llvm_unreachable("Unexpected size");
3850 }
3851
3852 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3853 } else if (Name.starts_with("avx512.mask.psra")) {
3854 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3855 bool IsVariable = Name[16] == 'v';
3856 char Size = Name[16] == '.' ? Name[17]
3857 : Name[17] == '.' ? Name[18]
3858 : Name[18] == '.' ? Name[19]
3859 : Name[20];
3860
3861 Intrinsic::ID IID;
3862 if (IsVariable && Name[17] != '.') {
3863 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3864 IID = Intrinsic::x86_avx2_psrav_d;
3865 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3866 IID = Intrinsic::x86_avx2_psrav_d_256;
3867 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3868 IID = Intrinsic::x86_avx512_psrav_w_128;
3869 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3870 IID = Intrinsic::x86_avx512_psrav_w_256;
3871 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3872 IID = Intrinsic::x86_avx512_psrav_w_512;
3873 else
3874 llvm_unreachable("Unexpected size");
3875 } else if (Name.ends_with(".128")) {
3876 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3877 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3878 : Intrinsic::x86_sse2_psra_d;
3879 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3880 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3881 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3882 : Intrinsic::x86_avx512_psra_q_128;
3883 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3884 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3885 : Intrinsic::x86_sse2_psra_w;
3886 else
3887 llvm_unreachable("Unexpected size");
3888 } else if (Name.ends_with(".256")) {
3889 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3890 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3891 : Intrinsic::x86_avx2_psra_d;
3892 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3893 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3894 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3895 : Intrinsic::x86_avx512_psra_q_256;
3896 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3897 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3898 : Intrinsic::x86_avx2_psra_w;
3899 else
3900 llvm_unreachable("Unexpected size");
3901 } else {
3902 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3903 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3904 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3905 : Intrinsic::x86_avx512_psra_d_512;
3906 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3907 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3908 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3909 : Intrinsic::x86_avx512_psra_q_512;
3910 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3911 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3912 : Intrinsic::x86_avx512_psra_w_512;
3913 else
3914 llvm_unreachable("Unexpected size");
3915 }
3916
3917 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3918 } else if (Name.starts_with("avx512.mask.move.s")) {
3919 Rep = upgradeMaskedMove(Builder, *CI);
3920 } else if (Name.starts_with("avx512.cvtmask2")) {
3921 Rep = upgradeMaskToInt(Builder, *CI);
3922 } else if (Name.ends_with(".movntdqa")) {
3924 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3925
3926 LoadInst *LI = Builder.CreateAlignedLoad(
3927 CI->getType(), CI->getArgOperand(0),
3929 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3930 Rep = LI;
3931 } else if (Name.starts_with("fma.vfmadd.") ||
3932 Name.starts_with("fma.vfmsub.") ||
3933 Name.starts_with("fma.vfnmadd.") ||
3934 Name.starts_with("fma.vfnmsub.")) {
3935 bool NegMul = Name[6] == 'n';
3936 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3937 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3938
3939 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3940 CI->getArgOperand(2)};
3941
3942 if (IsScalar) {
3943 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3944 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3945 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3946 }
3947
3948 if (NegMul && !IsScalar)
3949 Ops[0] = Builder.CreateFNeg(Ops[0]);
3950 if (NegMul && IsScalar)
3951 Ops[1] = Builder.CreateFNeg(Ops[1]);
3952 if (NegAcc)
3953 Ops[2] = Builder.CreateFNeg(Ops[2]);
3954
3955 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3956
3957 if (IsScalar)
3958 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3959 } else if (Name.starts_with("fma4.vfmadd.s")) {
3960 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3961 CI->getArgOperand(2)};
3962
3963 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3964 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3965 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3966
3967 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3968
3969 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3970 Rep, (uint64_t)0);
3971 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3972 Name.starts_with("avx512.maskz.vfmadd.s") ||
3973 Name.starts_with("avx512.mask3.vfmadd.s") ||
3974 Name.starts_with("avx512.mask3.vfmsub.s") ||
3975 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3976 bool IsMask3 = Name[11] == '3';
3977 bool IsMaskZ = Name[11] == 'z';
3978 // Drop the "avx512.mask." to make it easier.
3979 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3980 bool NegMul = Name[2] == 'n';
3981 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3982
3983 Value *A = CI->getArgOperand(0);
3984 Value *B = CI->getArgOperand(1);
3985 Value *C = CI->getArgOperand(2);
3986
3987 if (NegMul && (IsMask3 || IsMaskZ))
3988 A = Builder.CreateFNeg(A);
3989 if (NegMul && !(IsMask3 || IsMaskZ))
3990 B = Builder.CreateFNeg(B);
3991 if (NegAcc)
3992 C = Builder.CreateFNeg(C);
3993
3994 A = Builder.CreateExtractElement(A, (uint64_t)0);
3995 B = Builder.CreateExtractElement(B, (uint64_t)0);
3996 C = Builder.CreateExtractElement(C, (uint64_t)0);
3997
3998 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3999 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4000 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4001
4002 Intrinsic::ID IID;
4003 if (Name.back() == 'd')
4004 IID = Intrinsic::x86_avx512_vfmadd_f64;
4005 else
4006 IID = Intrinsic::x86_avx512_vfmadd_f32;
4007 Rep = Builder.CreateIntrinsic(IID, Ops);
4008 } else {
4009 Rep = Builder.CreateFMA(A, B, C);
4010 }
4011
4012 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4013 : IsMask3 ? C
4014 : A;
4015
4016 // For Mask3 with NegAcc, we need to create a new extractelement that
4017 // avoids the negation above.
4018 if (NegAcc && IsMask3)
4019 PassThru =
4020 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4021
4022 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4023 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4024 (uint64_t)0);
4025 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4026 Name.starts_with("avx512.mask.vfnmadd.p") ||
4027 Name.starts_with("avx512.mask.vfnmsub.p") ||
4028 Name.starts_with("avx512.mask3.vfmadd.p") ||
4029 Name.starts_with("avx512.mask3.vfmsub.p") ||
4030 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4031 Name.starts_with("avx512.maskz.vfmadd.p")) {
4032 bool IsMask3 = Name[11] == '3';
4033 bool IsMaskZ = Name[11] == 'z';
4034 // Drop the "avx512.mask." to make it easier.
4035 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4036 bool NegMul = Name[2] == 'n';
4037 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4038
4039 Value *A = CI->getArgOperand(0);
4040 Value *B = CI->getArgOperand(1);
4041 Value *C = CI->getArgOperand(2);
4042
4043 if (NegMul && (IsMask3 || IsMaskZ))
4044 A = Builder.CreateFNeg(A);
4045 if (NegMul && !(IsMask3 || IsMaskZ))
4046 B = Builder.CreateFNeg(B);
4047 if (NegAcc)
4048 C = Builder.CreateFNeg(C);
4049
4050 if (CI->arg_size() == 5 &&
4051 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4052 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4053 Intrinsic::ID IID;
4054 // Check the character before ".512" in string.
4055 if (Name[Name.size() - 5] == 's')
4056 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4057 else
4058 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4059
4060 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4061 } else {
4062 Rep = Builder.CreateFMA(A, B, C);
4063 }
4064
4065 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4066 : IsMask3 ? CI->getArgOperand(2)
4067 : CI->getArgOperand(0);
4068
4069 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4070 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4071 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4072 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4073 Intrinsic::ID IID;
4074 if (VecWidth == 128 && EltWidth == 32)
4075 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4076 else if (VecWidth == 256 && EltWidth == 32)
4077 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4078 else if (VecWidth == 128 && EltWidth == 64)
4079 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4080 else if (VecWidth == 256 && EltWidth == 64)
4081 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4082 else
4083 llvm_unreachable("Unexpected intrinsic");
4084
4085 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4086 CI->getArgOperand(2)};
4087 Ops[2] = Builder.CreateFNeg(Ops[2]);
4088 Rep = Builder.CreateIntrinsic(IID, Ops);
4089 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4090 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4091 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4092 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4093 bool IsMask3 = Name[11] == '3';
4094 bool IsMaskZ = Name[11] == 'z';
4095 // Drop the "avx512.mask." to make it easier.
4096 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4097 bool IsSubAdd = Name[3] == 's';
4098 if (CI->arg_size() == 5) {
4099 Intrinsic::ID IID;
4100 // Check the character before ".512" in string.
4101 if (Name[Name.size() - 5] == 's')
4102 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4103 else
4104 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4105
4106 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4107 CI->getArgOperand(2), CI->getArgOperand(4)};
4108 if (IsSubAdd)
4109 Ops[2] = Builder.CreateFNeg(Ops[2]);
4110
4111 Rep = Builder.CreateIntrinsic(IID, Ops);
4112 } else {
4113 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4114
4115 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4116 CI->getArgOperand(2)};
4117
4119 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4120 Value *Odd = Builder.CreateCall(FMA, Ops);
4121 Ops[2] = Builder.CreateFNeg(Ops[2]);
4122 Value *Even = Builder.CreateCall(FMA, Ops);
4123
4124 if (IsSubAdd)
4125 std::swap(Even, Odd);
4126
4127 SmallVector<int, 32> Idxs(NumElts);
4128 for (int i = 0; i != NumElts; ++i)
4129 Idxs[i] = i + (i % 2) * NumElts;
4130
4131 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4132 }
4133
4134 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4135 : IsMask3 ? CI->getArgOperand(2)
4136 : CI->getArgOperand(0);
4137
4138 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4139 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4140 Name.starts_with("avx512.maskz.pternlog.")) {
4141 bool ZeroMask = Name[11] == 'z';
4142 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4143 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4144 Intrinsic::ID IID;
4145 if (VecWidth == 128 && EltWidth == 32)
4146 IID = Intrinsic::x86_avx512_pternlog_d_128;
4147 else if (VecWidth == 256 && EltWidth == 32)
4148 IID = Intrinsic::x86_avx512_pternlog_d_256;
4149 else if (VecWidth == 512 && EltWidth == 32)
4150 IID = Intrinsic::x86_avx512_pternlog_d_512;
4151 else if (VecWidth == 128 && EltWidth == 64)
4152 IID = Intrinsic::x86_avx512_pternlog_q_128;
4153 else if (VecWidth == 256 && EltWidth == 64)
4154 IID = Intrinsic::x86_avx512_pternlog_q_256;
4155 else if (VecWidth == 512 && EltWidth == 64)
4156 IID = Intrinsic::x86_avx512_pternlog_q_512;
4157 else
4158 llvm_unreachable("Unexpected intrinsic");
4159
4160 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4161 CI->getArgOperand(2), CI->getArgOperand(3)};
4162 Rep = Builder.CreateIntrinsic(IID, Args);
4163 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4164 : CI->getArgOperand(0);
4165 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4166 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4167 Name.starts_with("avx512.maskz.vpmadd52")) {
4168 bool ZeroMask = Name[11] == 'z';
4169 bool High = Name[20] == 'h' || Name[21] == 'h';
4170 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4171 Intrinsic::ID IID;
4172 if (VecWidth == 128 && !High)
4173 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4174 else if (VecWidth == 256 && !High)
4175 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4176 else if (VecWidth == 512 && !High)
4177 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4178 else if (VecWidth == 128 && High)
4179 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4180 else if (VecWidth == 256 && High)
4181 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4182 else if (VecWidth == 512 && High)
4183 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4184 else
4185 llvm_unreachable("Unexpected intrinsic");
4186
4187 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4188 CI->getArgOperand(2)};
4189 Rep = Builder.CreateIntrinsic(IID, Args);
4190 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4191 : CI->getArgOperand(0);
4192 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4193 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4194 Name.starts_with("avx512.mask.vpermt2var.") ||
4195 Name.starts_with("avx512.maskz.vpermt2var.")) {
4196 bool ZeroMask = Name[11] == 'z';
4197 bool IndexForm = Name[17] == 'i';
4198 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4199 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4200 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4201 Name.starts_with("avx512.mask.vpdpbusds.") ||
4202 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4203 bool ZeroMask = Name[11] == 'z';
4204 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4205 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4206 Intrinsic::ID IID;
4207 if (VecWidth == 128 && !IsSaturating)
4208 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4209 else if (VecWidth == 256 && !IsSaturating)
4210 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4211 else if (VecWidth == 512 && !IsSaturating)
4212 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4213 else if (VecWidth == 128 && IsSaturating)
4214 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4215 else if (VecWidth == 256 && IsSaturating)
4216 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4217 else if (VecWidth == 512 && IsSaturating)
4218 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4219 else
4220 llvm_unreachable("Unexpected intrinsic");
4221
4222 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4223 CI->getArgOperand(2)};
4224
4225 // Input arguments types were incorrectly set to vectors of i32 before but
4226 // they should be vectors of i8. Insert bit cast when encountering the old
4227 // types
4228 if (Args[1]->getType()->isVectorTy() &&
4229 cast<VectorType>(Args[1]->getType())
4230 ->getElementType()
4231 ->isIntegerTy(32) &&
4232 Args[2]->getType()->isVectorTy() &&
4233 cast<VectorType>(Args[2]->getType())
4234 ->getElementType()
4235 ->isIntegerTy(32)) {
4236 Type *NewArgType = nullptr;
4237 if (VecWidth == 128)
4238 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4239 else if (VecWidth == 256)
4240 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4241 else if (VecWidth == 512)
4242 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4243 else
4244 llvm_unreachable("Unexpected vector bit width");
4245
4246 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4247 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4248 }
4249
4250 Rep = Builder.CreateIntrinsic(IID, Args);
4251 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4252 : CI->getArgOperand(0);
4253 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4254 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4255 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4256 Name.starts_with("avx512.mask.vpdpwssds.") ||
4257 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4258 bool ZeroMask = Name[11] == 'z';
4259 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4260 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4261 Intrinsic::ID IID;
4262 if (VecWidth == 128 && !IsSaturating)
4263 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4264 else if (VecWidth == 256 && !IsSaturating)
4265 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4266 else if (VecWidth == 512 && !IsSaturating)
4267 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4268 else if (VecWidth == 128 && IsSaturating)
4269 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4270 else if (VecWidth == 256 && IsSaturating)
4271 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4272 else if (VecWidth == 512 && IsSaturating)
4273 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4274 else
4275 llvm_unreachable("Unexpected intrinsic");
4276
4277 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4278 CI->getArgOperand(2)};
4279 Rep = Builder.CreateIntrinsic(IID, Args);
4280 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4281 : CI->getArgOperand(0);
4282 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4283 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4284 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4285 Name == "subborrow.u32" || Name == "subborrow.u64") {
4286 Intrinsic::ID IID;
4287 if (Name[0] == 'a' && Name.back() == '2')
4288 IID = Intrinsic::x86_addcarry_32;
4289 else if (Name[0] == 'a' && Name.back() == '4')
4290 IID = Intrinsic::x86_addcarry_64;
4291 else if (Name[0] == 's' && Name.back() == '2')
4292 IID = Intrinsic::x86_subborrow_32;
4293 else if (Name[0] == 's' && Name.back() == '4')
4294 IID = Intrinsic::x86_subborrow_64;
4295 else
4296 llvm_unreachable("Unexpected intrinsic");
4297
4298 // Make a call with 3 operands.
4299 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4300 CI->getArgOperand(2)};
4301 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4302
4303 // Extract the second result and store it.
4304 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4305 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4306 // Replace the original call result with the first result of the new call.
4307 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4308
4309 CI->replaceAllUsesWith(CF);
4310 Rep = nullptr;
4311 } else if (Name.starts_with("avx512.mask.") &&
4312 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4313 // Rep will be updated by the call in the condition.
4314 }
4315
4316 return Rep;
4317}
4318
4320 Function *F, IRBuilder<> &Builder) {
4321 if (Name.starts_with("neon.bfcvt")) {
4322 if (Name.starts_with("neon.bfcvtn2")) {
4323 SmallVector<int, 32> LoMask(4);
4324 std::iota(LoMask.begin(), LoMask.end(), 0);
4325 SmallVector<int, 32> ConcatMask(8);
4326 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4327 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4328 Value *Trunc =
4329 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4330 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4331 } else if (Name.starts_with("neon.bfcvtn")) {
4332 SmallVector<int, 32> ConcatMask(8);
4333 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4334 Type *V4BF16 =
4335 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4336 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4337 dbgs() << "Trunc: " << *Trunc << "\n";
4338 return Builder.CreateShuffleVector(
4339 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4340 } else {
4341 return Builder.CreateFPTrunc(CI->getOperand(0),
4342 Type::getBFloatTy(F->getContext()));
4343 }
4344 } else if (Name.starts_with("sve.fcvt")) {
4345 Intrinsic::ID NewID =
4347 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4348 .Case("sve.fcvtnt.bf16f32",
4349 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4351 if (NewID == Intrinsic::not_intrinsic)
4352 llvm_unreachable("Unhandled Intrinsic!");
4353
4354 SmallVector<Value *, 3> Args(CI->args());
4355
4356 // The original intrinsics incorrectly used a predicate based on the
4357 // smallest element type rather than the largest.
4358 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4359 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4360
4361 if (Args[1]->getType() != BadPredTy)
4362 llvm_unreachable("Unexpected predicate type!");
4363
4364 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4365 BadPredTy, Args[1]);
4366 Args[1] = Builder.CreateIntrinsic(
4367 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4368
4369 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4370 CI->getName());
4371 }
4372
4373 llvm_unreachable("Unhandled Intrinsic!");
4374}
4375
4377 IRBuilder<> &Builder) {
4378 if (Name == "mve.vctp64.old") {
4379 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4380 // correct type.
4381 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4382 CI->getArgOperand(0),
4383 /*FMFSource=*/nullptr, CI->getName());
4384 Value *C1 = Builder.CreateIntrinsic(
4385 Intrinsic::arm_mve_pred_v2i,
4386 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4387 return Builder.CreateIntrinsic(
4388 Intrinsic::arm_mve_pred_i2v,
4389 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4390 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4391 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4392 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4393 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4394 Name ==
4395 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4396 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4397 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4398 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4399 Name ==
4400 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4401 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4402 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4403 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4404 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4405 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4406 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4407 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4408 std::vector<Type *> Tys;
4409 unsigned ID = CI->getIntrinsicID();
4410 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4411 switch (ID) {
4412 case Intrinsic::arm_mve_mull_int_predicated:
4413 case Intrinsic::arm_mve_vqdmull_predicated:
4414 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4415 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4416 break;
4417 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4418 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4419 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4420 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4421 V2I1Ty};
4422 break;
4423 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4424 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4425 CI->getOperand(1)->getType(), V2I1Ty};
4426 break;
4427 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4428 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4429 CI->getOperand(2)->getType(), V2I1Ty};
4430 break;
4431 case Intrinsic::arm_cde_vcx1q_predicated:
4432 case Intrinsic::arm_cde_vcx1qa_predicated:
4433 case Intrinsic::arm_cde_vcx2q_predicated:
4434 case Intrinsic::arm_cde_vcx2qa_predicated:
4435 case Intrinsic::arm_cde_vcx3q_predicated:
4436 case Intrinsic::arm_cde_vcx3qa_predicated:
4437 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4438 break;
4439 default:
4440 llvm_unreachable("Unhandled Intrinsic!");
4441 }
4442
4443 std::vector<Value *> Ops;
4444 for (Value *Op : CI->args()) {
4445 Type *Ty = Op->getType();
4446 if (Ty->getScalarSizeInBits() == 1) {
4447 Value *C1 = Builder.CreateIntrinsic(
4448 Intrinsic::arm_mve_pred_v2i,
4449 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4450 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4451 }
4452 Ops.push_back(Op);
4453 }
4454
4455 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4456 CI->getName());
4457 }
4458 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4459}
4460
4461// These are expected to have the arguments:
4462// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4463//
4464// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4465//
4467 Function *F, IRBuilder<> &Builder) {
4468 AtomicRMWInst::BinOp RMWOp =
4470 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4471 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4472 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4473 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4474 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4475 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4476 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4477 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4478 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4479 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4480 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4481
4482 unsigned NumOperands = CI->getNumOperands();
4483 if (NumOperands < 3) // Malformed bitcode.
4484 return nullptr;
4485
4486 Value *Ptr = CI->getArgOperand(0);
4487 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4488 if (!PtrTy) // Malformed.
4489 return nullptr;
4490
4491 Value *Val = CI->getArgOperand(1);
4492 if (Val->getType() != CI->getType()) // Malformed.
4493 return nullptr;
4494
4495 ConstantInt *OrderArg = nullptr;
4496 bool IsVolatile = false;
4497
4498 // These should have 5 arguments (plus the callee). A separate version of the
4499 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4500 if (NumOperands > 3)
4501 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4502
4503 // Ignore scope argument at 3
4504
4505 if (NumOperands > 5) {
4506 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4507 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4508 }
4509
4511 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4512 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4515
4516 LLVMContext &Ctx = F->getContext();
4517
4518 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4519 Type *RetTy = CI->getType();
4520 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4521 if (VT->getElementType()->isIntegerTy(16)) {
4522 VectorType *AsBF16 =
4523 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4524 Val = Builder.CreateBitCast(Val, AsBF16);
4525 }
4526 }
4527
4528 // The scope argument never really worked correctly. Use agent as the most
4529 // conservative option which should still always produce the instruction.
4530 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4531 AtomicRMWInst *RMW =
4532 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4533
4534 unsigned AddrSpace = PtrTy->getAddressSpace();
4535 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4536 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4537 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4538 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4539 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4540 }
4541
4542 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4543 MDBuilder MDB(F->getContext());
4544 MDNode *RangeNotPrivate =
4547 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4548 }
4549
4550 if (IsVolatile)
4551 RMW->setVolatile(true);
4552
4553 return Builder.CreateBitCast(RMW, RetTy);
4554}
4555
4556/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4557/// plain MDNode, as it's the verifier's job to check these are the correct
4558/// types later.
4559static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4560 if (Op < CI->arg_size()) {
4561 if (MetadataAsValue *MAV =
4563 Metadata *MD = MAV->getMetadata();
4564 return dyn_cast_if_present<MDNode>(MD);
4565 }
4566 }
4567 return nullptr;
4568}
4569
4570/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4571static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4572 if (Op < CI->arg_size())
4574 return MAV->getMetadata();
4575 return nullptr;
4576}
4577
4579 // The MDNode attached to this instruction might not be the correct type,
4580 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4581 return I->getDebugLoc().getAsMDNode();
4582}
4583
4584/// Convert debug intrinsic calls to non-instruction debug records.
4585/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4586/// \p CI - The debug intrinsic call.
4588 DbgRecord *DR = nullptr;
4589 if (Name == "label") {
4591 CI->getDebugLoc());
4592 } else if (Name == "assign") {
4595 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4596 unwrapMAVMetadataOp(CI, 4),
4597 /*The address is a Value ref, it will be stored as a Metadata */
4598 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4599 } else if (Name == "declare") {
4602 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4603 getDebugLocSafe(CI));
4604 } else if (Name == "addr") {
4605 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4606 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4607 // Don't try to add something to the expression if it's not an expression.
4608 // Instead, allow the verifier to fail later.
4609 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4610 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4611 }
4614 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4615 getDebugLocSafe(CI));
4616 } else if (Name == "value") {
4617 // An old version of dbg.value had an extra offset argument.
4618 unsigned VarOp = 1;
4619 unsigned ExprOp = 2;
4620 if (CI->arg_size() == 4) {
4622 // Nonzero offset dbg.values get dropped without a replacement.
4623 if (!Offset || !Offset->isZeroValue())
4624 return;
4625 VarOp = 2;
4626 ExprOp = 3;
4627 }
4630 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4631 nullptr, getDebugLocSafe(CI));
4632 }
4633 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4634 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4635}
4636
4637/// Upgrade a call to an old intrinsic. All argument and return casting must be
4638/// provided to seamlessly integrate with existing context.
4640 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4641 // checks the callee's function type matches. It's likely we need to handle
4642 // type changes here.
4644 if (!F)
4645 return;
4646
4647 LLVMContext &C = CI->getContext();
4648 IRBuilder<> Builder(C);
4649 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4650
4651 if (!NewFn) {
4652 // Get the Function's name.
4653 StringRef Name = F->getName();
4654
4655 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4656 Name = Name.substr(5);
4657
4658 bool IsX86 = Name.consume_front("x86.");
4659 bool IsNVVM = Name.consume_front("nvvm.");
4660 bool IsAArch64 = Name.consume_front("aarch64.");
4661 bool IsARM = Name.consume_front("arm.");
4662 bool IsAMDGCN = Name.consume_front("amdgcn.");
4663 bool IsDbg = Name.consume_front("dbg.");
4664 Value *Rep = nullptr;
4665
4666 if (!IsX86 && Name == "stackprotectorcheck") {
4667 Rep = nullptr;
4668 } else if (IsNVVM) {
4669 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4670 } else if (IsX86) {
4671 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4672 } else if (IsAArch64) {
4673 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4674 } else if (IsARM) {
4675 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4676 } else if (IsAMDGCN) {
4677 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4678 } else if (IsDbg) {
4680 } else {
4681 llvm_unreachable("Unknown function for CallBase upgrade.");
4682 }
4683
4684 if (Rep)
4685 CI->replaceAllUsesWith(Rep);
4686 CI->eraseFromParent();
4687 return;
4688 }
4689
4690 const auto &DefaultCase = [&]() -> void {
4691 if (F == NewFn)
4692 return;
4693
4694 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4695 // Handle generic mangling change.
4696 assert(
4697 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4698 "Unknown function for CallBase upgrade and isn't just a name change");
4699 CI->setCalledFunction(NewFn);
4700 return;
4701 }
4702
4703 // This must be an upgrade from a named to a literal struct.
4704 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4705 assert(OldST != NewFn->getReturnType() &&
4706 "Return type must have changed");
4707 assert(OldST->getNumElements() ==
4708 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4709 "Must have same number of elements");
4710
4711 SmallVector<Value *> Args(CI->args());
4712 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4713 NewCI->setAttributes(CI->getAttributes());
4714 Value *Res = PoisonValue::get(OldST);
4715 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4716 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4717 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4718 }
4719 CI->replaceAllUsesWith(Res);
4720 CI->eraseFromParent();
4721 return;
4722 }
4723
4724 // We're probably about to produce something invalid. Let the verifier catch
4725 // it instead of dying here.
4726 CI->setCalledOperand(
4728 return;
4729 };
4730 CallInst *NewCall = nullptr;
4731 switch (NewFn->getIntrinsicID()) {
4732 default: {
4733 DefaultCase();
4734 return;
4735 }
4736 case Intrinsic::arm_neon_vst1:
4737 case Intrinsic::arm_neon_vst2:
4738 case Intrinsic::arm_neon_vst3:
4739 case Intrinsic::arm_neon_vst4:
4740 case Intrinsic::arm_neon_vst2lane:
4741 case Intrinsic::arm_neon_vst3lane:
4742 case Intrinsic::arm_neon_vst4lane: {
4743 SmallVector<Value *, 4> Args(CI->args());
4744 NewCall = Builder.CreateCall(NewFn, Args);
4745 break;
4746 }
4747 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4748 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4749 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4750 LLVMContext &Ctx = F->getParent()->getContext();
4751 SmallVector<Value *, 4> Args(CI->args());
4752 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4753 cast<ConstantInt>(Args[3])->getZExtValue());
4754 NewCall = Builder.CreateCall(NewFn, Args);
4755 break;
4756 }
4757 case Intrinsic::aarch64_sve_ld3_sret:
4758 case Intrinsic::aarch64_sve_ld4_sret:
4759 case Intrinsic::aarch64_sve_ld2_sret: {
4760 StringRef Name = F->getName();
4761 Name = Name.substr(5);
4762 unsigned N = StringSwitch<unsigned>(Name)
4763 .StartsWith("aarch64.sve.ld2", 2)
4764 .StartsWith("aarch64.sve.ld3", 3)
4765 .StartsWith("aarch64.sve.ld4", 4)
4766 .Default(0);
4767 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4768 unsigned MinElts = RetTy->getMinNumElements() / N;
4769 SmallVector<Value *, 2> Args(CI->args());
4770 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4771 Value *Ret = llvm::PoisonValue::get(RetTy);
4772 for (unsigned I = 0; I < N; I++) {
4773 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4774 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4775 }
4776 NewCall = dyn_cast<CallInst>(Ret);
4777 break;
4778 }
4779
4780 case Intrinsic::coro_end: {
4781 SmallVector<Value *, 3> Args(CI->args());
4782 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4783 NewCall = Builder.CreateCall(NewFn, Args);
4784 break;
4785 }
4786
4787 case Intrinsic::vector_extract: {
4788 StringRef Name = F->getName();
4789 Name = Name.substr(5); // Strip llvm
4790 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4791 DefaultCase();
4792 return;
4793 }
4794 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4795 unsigned MinElts = RetTy->getMinNumElements();
4796 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4797 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4798 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4799 break;
4800 }
4801
4802 case Intrinsic::vector_insert: {
4803 StringRef Name = F->getName();
4804 Name = Name.substr(5);
4805 if (!Name.starts_with("aarch64.sve.tuple")) {
4806 DefaultCase();
4807 return;
4808 }
4809 if (Name.starts_with("aarch64.sve.tuple.set")) {
4810 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4811 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4812 Value *NewIdx =
4813 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4814 NewCall = Builder.CreateCall(
4815 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4816 break;
4817 }
4818 if (Name.starts_with("aarch64.sve.tuple.create")) {
4819 unsigned N = StringSwitch<unsigned>(Name)
4820 .StartsWith("aarch64.sve.tuple.create2", 2)
4821 .StartsWith("aarch64.sve.tuple.create3", 3)
4822 .StartsWith("aarch64.sve.tuple.create4", 4)
4823 .Default(0);
4824 assert(N > 1 && "Create is expected to be between 2-4");
4825 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4826 Value *Ret = llvm::PoisonValue::get(RetTy);
4827 unsigned MinElts = RetTy->getMinNumElements() / N;
4828 for (unsigned I = 0; I < N; I++) {
4829 Value *V = CI->getArgOperand(I);
4830 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
4831 }
4832 NewCall = dyn_cast<CallInst>(Ret);
4833 }
4834 break;
4835 }
4836
4837 case Intrinsic::arm_neon_bfdot:
4838 case Intrinsic::arm_neon_bfmmla:
4839 case Intrinsic::arm_neon_bfmlalb:
4840 case Intrinsic::arm_neon_bfmlalt:
4841 case Intrinsic::aarch64_neon_bfdot:
4842 case Intrinsic::aarch64_neon_bfmmla:
4843 case Intrinsic::aarch64_neon_bfmlalb:
4844 case Intrinsic::aarch64_neon_bfmlalt: {
4846 assert(CI->arg_size() == 3 &&
4847 "Mismatch between function args and call args");
4848 size_t OperandWidth =
4850 assert((OperandWidth == 64 || OperandWidth == 128) &&
4851 "Unexpected operand width");
4852 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4853 auto Iter = CI->args().begin();
4854 Args.push_back(*Iter++);
4855 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4856 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4857 NewCall = Builder.CreateCall(NewFn, Args);
4858 break;
4859 }
4860
4861 case Intrinsic::bitreverse:
4862 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4863 break;
4864
4865 case Intrinsic::ctlz:
4866 case Intrinsic::cttz:
4867 assert(CI->arg_size() == 1 &&
4868 "Mismatch between function args and call args");
4869 NewCall =
4870 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4871 break;
4872
4873 case Intrinsic::objectsize: {
4874 Value *NullIsUnknownSize =
4875 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4876 Value *Dynamic =
4877 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4878 NewCall = Builder.CreateCall(
4879 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4880 break;
4881 }
4882
4883 case Intrinsic::ctpop:
4884 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4885 break;
4886
4887 case Intrinsic::convert_from_fp16:
4888 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4889 break;
4890
4891 case Intrinsic::dbg_value: {
4892 StringRef Name = F->getName();
4893 Name = Name.substr(5); // Strip llvm.
4894 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4895 if (Name.starts_with("dbg.addr")) {
4897 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4898 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4899 NewCall =
4900 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4901 MetadataAsValue::get(C, Expr)});
4902 break;
4903 }
4904
4905 // Upgrade from the old version that had an extra offset argument.
4906 assert(CI->arg_size() == 4);
4907 // Drop nonzero offsets instead of attempting to upgrade them.
4909 if (Offset->isZeroValue()) {
4910 NewCall = Builder.CreateCall(
4911 NewFn,
4912 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4913 break;
4914 }
4915 CI->eraseFromParent();
4916 return;
4917 }
4918
4919 case Intrinsic::ptr_annotation:
4920 // Upgrade from versions that lacked the annotation attribute argument.
4921 if (CI->arg_size() != 4) {
4922 DefaultCase();
4923 return;
4924 }
4925
4926 // Create a new call with an added null annotation attribute argument.
4927 NewCall = Builder.CreateCall(
4928 NewFn,
4929 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4930 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4931 NewCall->takeName(CI);
4932 CI->replaceAllUsesWith(NewCall);
4933 CI->eraseFromParent();
4934 return;
4935
4936 case Intrinsic::var_annotation:
4937 // Upgrade from versions that lacked the annotation attribute argument.
4938 if (CI->arg_size() != 4) {
4939 DefaultCase();
4940 return;
4941 }
4942 // Create a new call with an added null annotation attribute argument.
4943 NewCall = Builder.CreateCall(
4944 NewFn,
4945 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
4946 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
4947 NewCall->takeName(CI);
4948 CI->replaceAllUsesWith(NewCall);
4949 CI->eraseFromParent();
4950 return;
4951
4952 case Intrinsic::riscv_aes32dsi:
4953 case Intrinsic::riscv_aes32dsmi:
4954 case Intrinsic::riscv_aes32esi:
4955 case Intrinsic::riscv_aes32esmi:
4956 case Intrinsic::riscv_sm4ks:
4957 case Intrinsic::riscv_sm4ed: {
4958 // The last argument to these intrinsics used to be i8 and changed to i32.
4959 // The type overload for sm4ks and sm4ed was removed.
4960 Value *Arg2 = CI->getArgOperand(2);
4961 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4962 return;
4963
4964 Value *Arg0 = CI->getArgOperand(0);
4965 Value *Arg1 = CI->getArgOperand(1);
4966 if (CI->getType()->isIntegerTy(64)) {
4967 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4968 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4969 }
4970
4971 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4972 cast<ConstantInt>(Arg2)->getZExtValue());
4973
4974 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4975 Value *Res = NewCall;
4976 if (Res->getType() != CI->getType())
4977 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4978 NewCall->takeName(CI);
4979 CI->replaceAllUsesWith(Res);
4980 CI->eraseFromParent();
4981 return;
4982 }
4983 case Intrinsic::nvvm_mapa_shared_cluster: {
4984 // Create a new call with the correct address space.
4985 NewCall =
4986 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
4987 Value *Res = NewCall;
4988 Res = Builder.CreateAddrSpaceCast(
4989 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
4990 NewCall->takeName(CI);
4991 CI->replaceAllUsesWith(Res);
4992 CI->eraseFromParent();
4993 return;
4994 }
4995 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
4996 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
4997 // Create a new call with the correct address space.
4998 SmallVector<Value *, 4> Args(CI->args());
4999 Args[0] = Builder.CreateAddrSpaceCast(
5000 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5001
5002 NewCall = Builder.CreateCall(NewFn, Args);
5003 NewCall->takeName(CI);
5004 CI->replaceAllUsesWith(NewCall);
5005 CI->eraseFromParent();
5006 return;
5007 }
5008 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5009 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5010 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5011 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5012 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5013 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5014 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5015 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5016 SmallVector<Value *, 16> Args(CI->args());
5017
5018 // Create AddrSpaceCast to shared_cluster if needed.
5019 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5020 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5022 Args[0] = Builder.CreateAddrSpaceCast(
5023 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5024
5025 // Attach the flag argument for cta_group, with a
5026 // default value of 0. This handles case (2) in
5027 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5028 size_t NumArgs = CI->arg_size();
5029 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5030 if (!FlagArg->getType()->isIntegerTy(1))
5031 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5032
5033 NewCall = Builder.CreateCall(NewFn, Args);
5034 NewCall->takeName(CI);
5035 CI->replaceAllUsesWith(NewCall);
5036 CI->eraseFromParent();
5037 return;
5038 }
5039 case Intrinsic::riscv_sha256sig0:
5040 case Intrinsic::riscv_sha256sig1:
5041 case Intrinsic::riscv_sha256sum0:
5042 case Intrinsic::riscv_sha256sum1:
5043 case Intrinsic::riscv_sm3p0:
5044 case Intrinsic::riscv_sm3p1: {
5045 // The last argument to these intrinsics used to be i8 and changed to i32.
5046 // The type overload for sm4ks and sm4ed was removed.
5047 if (!CI->getType()->isIntegerTy(64))
5048 return;
5049
5050 Value *Arg =
5051 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5052
5053 NewCall = Builder.CreateCall(NewFn, Arg);
5054 Value *Res =
5055 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5056 NewCall->takeName(CI);
5057 CI->replaceAllUsesWith(Res);
5058 CI->eraseFromParent();
5059 return;
5060 }
5061
5062 case Intrinsic::x86_xop_vfrcz_ss:
5063 case Intrinsic::x86_xop_vfrcz_sd:
5064 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5065 break;
5066
5067 case Intrinsic::x86_xop_vpermil2pd:
5068 case Intrinsic::x86_xop_vpermil2ps:
5069 case Intrinsic::x86_xop_vpermil2pd_256:
5070 case Intrinsic::x86_xop_vpermil2ps_256: {
5071 SmallVector<Value *, 4> Args(CI->args());
5072 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5073 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5074 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5075 NewCall = Builder.CreateCall(NewFn, Args);
5076 break;
5077 }
5078
5079 case Intrinsic::x86_sse41_ptestc:
5080 case Intrinsic::x86_sse41_ptestz:
5081 case Intrinsic::x86_sse41_ptestnzc: {
5082 // The arguments for these intrinsics used to be v4f32, and changed
5083 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5084 // So, the only thing required is a bitcast for both arguments.
5085 // First, check the arguments have the old type.
5086 Value *Arg0 = CI->getArgOperand(0);
5087 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5088 return;
5089
5090 // Old intrinsic, add bitcasts
5091 Value *Arg1 = CI->getArgOperand(1);
5092
5093 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5094
5095 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5096 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5097
5098 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5099 break;
5100 }
5101
5102 case Intrinsic::x86_rdtscp: {
5103 // This used to take 1 arguments. If we have no arguments, it is already
5104 // upgraded.
5105 if (CI->getNumOperands() == 0)
5106 return;
5107
5108 NewCall = Builder.CreateCall(NewFn);
5109 // Extract the second result and store it.
5110 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5111 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5112 // Replace the original call result with the first result of the new call.
5113 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5114
5115 NewCall->takeName(CI);
5116 CI->replaceAllUsesWith(TSC);
5117 CI->eraseFromParent();
5118 return;
5119 }
5120
5121 case Intrinsic::x86_sse41_insertps:
5122 case Intrinsic::x86_sse41_dppd:
5123 case Intrinsic::x86_sse41_dpps:
5124 case Intrinsic::x86_sse41_mpsadbw:
5125 case Intrinsic::x86_avx_dp_ps_256:
5126 case Intrinsic::x86_avx2_mpsadbw: {
5127 // Need to truncate the last argument from i32 to i8 -- this argument models
5128 // an inherently 8-bit immediate operand to these x86 instructions.
5129 SmallVector<Value *, 4> Args(CI->args());
5130
5131 // Replace the last argument with a trunc.
5132 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5133 NewCall = Builder.CreateCall(NewFn, Args);
5134 break;
5135 }
5136
5137 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5138 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5139 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5140 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5141 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5142 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5143 SmallVector<Value *, 4> Args(CI->args());
5144 unsigned NumElts =
5145 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5146 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5147
5148 NewCall = Builder.CreateCall(NewFn, Args);
5149 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5150
5151 NewCall->takeName(CI);
5152 CI->replaceAllUsesWith(Res);
5153 CI->eraseFromParent();
5154 return;
5155 }
5156
5157 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5158 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5159 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5160 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5161 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5162 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5163 SmallVector<Value *, 4> Args(CI->args());
5164 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5165 if (NewFn->getIntrinsicID() ==
5166 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5167 Args[1] = Builder.CreateBitCast(
5168 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5169
5170 NewCall = Builder.CreateCall(NewFn, Args);
5171 Value *Res = Builder.CreateBitCast(
5172 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5173
5174 NewCall->takeName(CI);
5175 CI->replaceAllUsesWith(Res);
5176 CI->eraseFromParent();
5177 return;
5178 }
5179 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5180 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5181 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5182 SmallVector<Value *, 4> Args(CI->args());
5183 unsigned NumElts =
5184 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5185 Args[1] = Builder.CreateBitCast(
5186 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5187 Args[2] = Builder.CreateBitCast(
5188 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5189
5190 NewCall = Builder.CreateCall(NewFn, Args);
5191 break;
5192 }
5193
5194 case Intrinsic::thread_pointer: {
5195 NewCall = Builder.CreateCall(NewFn, {});
5196 break;
5197 }
5198
5199 case Intrinsic::memcpy:
5200 case Intrinsic::memmove:
5201 case Intrinsic::memset: {
5202 // We have to make sure that the call signature is what we're expecting.
5203 // We only want to change the old signatures by removing the alignment arg:
5204 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5205 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5206 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5207 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5208 // Note: i8*'s in the above can be any pointer type
5209 if (CI->arg_size() != 5) {
5210 DefaultCase();
5211 return;
5212 }
5213 // Remove alignment argument (3), and add alignment attributes to the
5214 // dest/src pointers.
5215 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5216 CI->getArgOperand(2), CI->getArgOperand(4)};
5217 NewCall = Builder.CreateCall(NewFn, Args);
5218 AttributeList OldAttrs = CI->getAttributes();
5219 AttributeList NewAttrs = AttributeList::get(
5220 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5221 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5222 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5223 NewCall->setAttributes(NewAttrs);
5224 auto *MemCI = cast<MemIntrinsic>(NewCall);
5225 // All mem intrinsics support dest alignment.
5227 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5228 // Memcpy/Memmove also support source alignment.
5229 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5230 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5231 break;
5232 }
5233
5234 case Intrinsic::lifetime_start:
5235 case Intrinsic::lifetime_end: {
5236 if (CI->arg_size() != 2) {
5237 DefaultCase();
5238 return;
5239 }
5240
5241 Value *Ptr = CI->getArgOperand(1);
5242 // Try to strip pointer casts, such that the lifetime works on an alloca.
5243 Ptr = Ptr->stripPointerCasts();
5244 if (isa<AllocaInst>(Ptr)) {
5245 // Don't use NewFn, as we might have looked through an addrspacecast.
5246 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5247 NewCall = Builder.CreateLifetimeStart(Ptr);
5248 else
5249 NewCall = Builder.CreateLifetimeEnd(Ptr);
5250 break;
5251 }
5252
5253 // Otherwise remove the lifetime marker.
5254 CI->eraseFromParent();
5255 return;
5256 }
5257
5258 case Intrinsic::x86_avx512_vpdpbusd_128:
5259 case Intrinsic::x86_avx512_vpdpbusd_256:
5260 case Intrinsic::x86_avx512_vpdpbusd_512:
5261 case Intrinsic::x86_avx512_vpdpbusds_128:
5262 case Intrinsic::x86_avx512_vpdpbusds_256:
5263 case Intrinsic::x86_avx512_vpdpbusds_512:
5264 case Intrinsic::x86_avx2_vpdpbssd_128:
5265 case Intrinsic::x86_avx2_vpdpbssd_256:
5266 case Intrinsic::x86_avx10_vpdpbssd_512:
5267 case Intrinsic::x86_avx2_vpdpbssds_128:
5268 case Intrinsic::x86_avx2_vpdpbssds_256:
5269 case Intrinsic::x86_avx10_vpdpbssds_512:
5270 case Intrinsic::x86_avx2_vpdpbsud_128:
5271 case Intrinsic::x86_avx2_vpdpbsud_256:
5272 case Intrinsic::x86_avx10_vpdpbsud_512:
5273 case Intrinsic::x86_avx2_vpdpbsuds_128:
5274 case Intrinsic::x86_avx2_vpdpbsuds_256:
5275 case Intrinsic::x86_avx10_vpdpbsuds_512:
5276 case Intrinsic::x86_avx2_vpdpbuud_128:
5277 case Intrinsic::x86_avx2_vpdpbuud_256:
5278 case Intrinsic::x86_avx10_vpdpbuud_512:
5279 case Intrinsic::x86_avx2_vpdpbuuds_128:
5280 case Intrinsic::x86_avx2_vpdpbuuds_256:
5281 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5282 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5283 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5284 CI->getArgOperand(2)};
5285 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5286 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5287 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5288
5289 NewCall = Builder.CreateCall(NewFn, Args);
5290 break;
5291 }
5292 }
5293 assert(NewCall && "Should have either set this variable or returned through "
5294 "the default case");
5295 NewCall->takeName(CI);
5296 CI->replaceAllUsesWith(NewCall);
5297 CI->eraseFromParent();
5298}
5299
5301 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5302
5303 // Check if this function should be upgraded and get the replacement function
5304 // if there is one.
5305 Function *NewFn;
5306 if (UpgradeIntrinsicFunction(F, NewFn)) {
5307 // Replace all users of the old function with the new function or new
5308 // instructions. This is not a range loop because the call is deleted.
5309 for (User *U : make_early_inc_range(F->users()))
5310 if (CallBase *CB = dyn_cast<CallBase>(U))
5311 UpgradeIntrinsicCall(CB, NewFn);
5312
5313 // Remove old function, no longer used, from the module.
5314 if (F != NewFn)
5315 F->eraseFromParent();
5316 }
5317}
5318
5320 const unsigned NumOperands = MD.getNumOperands();
5321 if (NumOperands == 0)
5322 return &MD; // Invalid, punt to a verifier error.
5323
5324 // Check if the tag uses struct-path aware TBAA format.
5325 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5326 return &MD;
5327
5328 auto &Context = MD.getContext();
5329 if (NumOperands == 3) {
5330 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5331 MDNode *ScalarType = MDNode::get(Context, Elts);
5332 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5333 Metadata *Elts2[] = {ScalarType, ScalarType,
5336 MD.getOperand(2)};
5337 return MDNode::get(Context, Elts2);
5338 }
5339 // Create a MDNode <MD, MD, offset 0>
5341 Type::getInt64Ty(Context)))};
5342 return MDNode::get(Context, Elts);
5343}
5344
5346 Instruction *&Temp) {
5347 if (Opc != Instruction::BitCast)
5348 return nullptr;
5349
5350 Temp = nullptr;
5351 Type *SrcTy = V->getType();
5352 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5353 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5354 LLVMContext &Context = V->getContext();
5355
5356 // We have no information about target data layout, so we assume that
5357 // the maximum pointer size is 64bit.
5358 Type *MidTy = Type::getInt64Ty(Context);
5359 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5360
5361 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5362 }
5363
5364 return nullptr;
5365}
5366
5368 if (Opc != Instruction::BitCast)
5369 return nullptr;
5370
5371 Type *SrcTy = C->getType();
5372 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5373 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5374 LLVMContext &Context = C->getContext();
5375
5376 // We have no information about target data layout, so we assume that
5377 // the maximum pointer size is 64bit.
5378 Type *MidTy = Type::getInt64Ty(Context);
5379
5381 DestTy);
5382 }
5383
5384 return nullptr;
5385}
5386
5387/// Check the debug info version number, if it is out-dated, drop the debug
5388/// info. Return true if module is modified.
5391 return false;
5392
5393 llvm::TimeTraceScope timeScope("Upgrade debug info");
5394 // We need to get metadata before the module is verified (i.e., getModuleFlag
5395 // makes assumptions that we haven't verified yet). Carefully extract the flag
5396 // from the metadata.
5397 unsigned Version = 0;
5398 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5399 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5400 if (Flag->getNumOperands() < 3)
5401 return false;
5402 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5403 return K->getString() == "Debug Info Version";
5404 return false;
5405 });
5406 if (OpIt != ModFlags->op_end()) {
5407 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5408 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5409 Version = CI->getZExtValue();
5410 }
5411 }
5412
5414 bool BrokenDebugInfo = false;
5415 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5416 report_fatal_error("Broken module found, compilation aborted!");
5417 if (!BrokenDebugInfo)
5418 // Everything is ok.
5419 return false;
5420 else {
5421 // Diagnose malformed debug info.
5423 M.getContext().diagnose(Diag);
5424 }
5425 }
5426 bool Modified = StripDebugInfo(M);
5428 // Diagnose a version mismatch.
5430 M.getContext().diagnose(DiagVersion);
5431 }
5432 return Modified;
5433}
5434
5435static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5436 GlobalValue *GV, const Metadata *V) {
5437 Function *F = cast<Function>(GV);
5438
5439 constexpr StringLiteral DefaultValue = "1";
5440 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5441 unsigned Length = 0;
5442
5443 if (F->hasFnAttribute(Attr)) {
5444 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5445 // parse these elements placing them into Vect3
5446 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5447 for (; Length < 3 && !S.empty(); Length++) {
5448 auto [Part, Rest] = S.split(',');
5449 Vect3[Length] = Part.trim();
5450 S = Rest;
5451 }
5452 }
5453
5454 const unsigned Dim = DimC - 'x';
5455 assert(Dim < 3 && "Unexpected dim char");
5456
5457 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5458
5459 // local variable required for StringRef in Vect3 to point to.
5460 const std::string VStr = llvm::utostr(VInt);
5461 Vect3[Dim] = VStr;
5462 Length = std::max(Length, Dim + 1);
5463
5464 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5465 F->addFnAttr(Attr, NewAttr);
5466}
5467
5468static inline bool isXYZ(StringRef S) {
5469 return S == "x" || S == "y" || S == "z";
5470}
5471
5473 const Metadata *V) {
5474 if (K == "kernel") {
5476 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5477 return true;
5478 }
5479 if (K == "align") {
5480 // V is a bitfeild specifying two 16-bit values. The alignment value is
5481 // specfied in low 16-bits, The index is specified in the high bits. For the
5482 // index, 0 indicates the return value while higher values correspond to
5483 // each parameter (idx = param + 1).
5484 const uint64_t AlignIdxValuePair =
5485 mdconst::extract<ConstantInt>(V)->getZExtValue();
5486 const unsigned Idx = (AlignIdxValuePair >> 16);
5487 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5488 cast<Function>(GV)->addAttributeAtIndex(
5489 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5490 return true;
5491 }
5492 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5493 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5494 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5495 return true;
5496 }
5497 if (K == "minctasm") {
5498 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5499 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5500 return true;
5501 }
5502 if (K == "maxnreg") {
5503 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5504 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5505 return true;
5506 }
5507 if (K.consume_front("maxntid") && isXYZ(K)) {
5508 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5509 return true;
5510 }
5511 if (K.consume_front("reqntid") && isXYZ(K)) {
5512 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5513 return true;
5514 }
5515 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5516 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5517 return true;
5518 }
5519 if (K == "grid_constant") {
5520 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5521 for (const auto &Op : cast<MDNode>(V)->operands()) {
5522 // For some reason, the index is 1-based in the metadata. Good thing we're
5523 // able to auto-upgrade it!
5524 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5525 cast<Function>(GV)->addParamAttr(Index, Attr);
5526 }
5527 return true;
5528 }
5529
5530 return false;
5531}
5532
5534 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5535 if (!NamedMD)
5536 return;
5537
5538 SmallVector<MDNode *, 8> NewNodes;
5540 for (MDNode *MD : NamedMD->operands()) {
5541 if (!SeenNodes.insert(MD).second)
5542 continue;
5543
5544 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5545 if (!GV)
5546 continue;
5547
5548 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5549
5550 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5551 // Each nvvm.annotations metadata entry will be of the following form:
5552 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5553 // start index = 1, to skip the global variable key
5554 // increment = 2, to skip the value for each property-value pairs
5555 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5556 MDString *K = cast<MDString>(MD->getOperand(j));
5557 const MDOperand &V = MD->getOperand(j + 1);
5558 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5559 if (!Upgraded)
5560 NewOperands.append({K, V});
5561 }
5562
5563 if (NewOperands.size() > 1)
5564 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5565 }
5566
5567 NamedMD->clearOperands();
5568 for (MDNode *N : NewNodes)
5569 NamedMD->addOperand(N);
5570}
5571
5572/// This checks for objc retain release marker which should be upgraded. It
5573/// returns true if module is modified.
5575 bool Changed = false;
5576 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5577 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5578 if (ModRetainReleaseMarker) {
5579 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5580 if (Op) {
5581 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5582 if (ID) {
5583 SmallVector<StringRef, 4> ValueComp;
5584 ID->getString().split(ValueComp, "#");
5585 if (ValueComp.size() == 2) {
5586 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5587 ID = MDString::get(M.getContext(), NewValue);
5588 }
5589 M.addModuleFlag(Module::Error, MarkerKey, ID);
5590 M.eraseNamedMetadata(ModRetainReleaseMarker);
5591 Changed = true;
5592 }
5593 }
5594 }
5595 return Changed;
5596}
5597
5599 // This lambda converts normal function calls to ARC runtime functions to
5600 // intrinsic calls.
5601 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5602 llvm::Intrinsic::ID IntrinsicFunc) {
5603 Function *Fn = M.getFunction(OldFunc);
5604
5605 if (!Fn)
5606 return;
5607
5608 Function *NewFn =
5609 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5610
5611 for (User *U : make_early_inc_range(Fn->users())) {
5613 if (!CI || CI->getCalledFunction() != Fn)
5614 continue;
5615
5616 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5617 FunctionType *NewFuncTy = NewFn->getFunctionType();
5619
5620 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5621 // value to the return type of the old function.
5622 if (NewFuncTy->getReturnType() != CI->getType() &&
5623 !CastInst::castIsValid(Instruction::BitCast, CI,
5624 NewFuncTy->getReturnType()))
5625 continue;
5626
5627 bool InvalidCast = false;
5628
5629 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5630 Value *Arg = CI->getArgOperand(I);
5631
5632 // Bitcast argument to the parameter type of the new function if it's
5633 // not a variadic argument.
5634 if (I < NewFuncTy->getNumParams()) {
5635 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5636 // to the parameter type of the new function.
5637 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5638 NewFuncTy->getParamType(I))) {
5639 InvalidCast = true;
5640 break;
5641 }
5642 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5643 }
5644 Args.push_back(Arg);
5645 }
5646
5647 if (InvalidCast)
5648 continue;
5649
5650 // Create a call instruction that calls the new function.
5651 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5652 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5653 NewCall->takeName(CI);
5654
5655 // Bitcast the return value back to the type of the old call.
5656 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5657
5658 if (!CI->use_empty())
5659 CI->replaceAllUsesWith(NewRetVal);
5660 CI->eraseFromParent();
5661 }
5662
5663 if (Fn->use_empty())
5664 Fn->eraseFromParent();
5665 };
5666
5667 // Unconditionally convert a call to "clang.arc.use" to a call to
5668 // "llvm.objc.clang.arc.use".
5669 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5670
5671 // Upgrade the retain release marker. If there is no need to upgrade
5672 // the marker, that means either the module is already new enough to contain
5673 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5675 return;
5676
5677 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5678 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5679 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5680 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5681 {"objc_autoreleaseReturnValue",
5682 llvm::Intrinsic::objc_autoreleaseReturnValue},
5683 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5684 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5685 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5686 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5687 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5688 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5689 {"objc_release", llvm::Intrinsic::objc_release},
5690 {"objc_retain", llvm::Intrinsic::objc_retain},
5691 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5692 {"objc_retainAutoreleaseReturnValue",
5693 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5694 {"objc_retainAutoreleasedReturnValue",
5695 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5696 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5697 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5698 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5699 {"objc_unsafeClaimAutoreleasedReturnValue",
5700 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5701 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5702 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5703 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5704 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5705 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5706 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5707 {"objc_arc_annotation_topdown_bbstart",
5708 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5709 {"objc_arc_annotation_topdown_bbend",
5710 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5711 {"objc_arc_annotation_bottomup_bbstart",
5712 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5713 {"objc_arc_annotation_bottomup_bbend",
5714 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5715
5716 for (auto &I : RuntimeFuncs)
5717 UpgradeToIntrinsic(I.first, I.second);
5718}
5719
5721 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5722 if (!ModFlags)
5723 return false;
5724
5725 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5726 bool HasSwiftVersionFlag = false;
5727 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5728 uint32_t SwiftABIVersion;
5729 auto Int8Ty = Type::getInt8Ty(M.getContext());
5730 auto Int32Ty = Type::getInt32Ty(M.getContext());
5731
5732 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5733 MDNode *Op = ModFlags->getOperand(I);
5734 if (Op->getNumOperands() != 3)
5735 continue;
5736 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5737 if (!ID)
5738 continue;
5739 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5740 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5741 Type::getInt32Ty(M.getContext()), B)),
5742 MDString::get(M.getContext(), ID->getString()),
5743 Op->getOperand(2)};
5744 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5745 Changed = true;
5746 };
5747
5748 if (ID->getString() == "Objective-C Image Info Version")
5749 HasObjCFlag = true;
5750 if (ID->getString() == "Objective-C Class Properties")
5751 HasClassProperties = true;
5752 // Upgrade PIC from Error/Max to Min.
5753 if (ID->getString() == "PIC Level") {
5754 if (auto *Behavior =
5756 uint64_t V = Behavior->getLimitedValue();
5757 if (V == Module::Error || V == Module::Max)
5758 SetBehavior(Module::Min);
5759 }
5760 }
5761 // Upgrade "PIE Level" from Error to Max.
5762 if (ID->getString() == "PIE Level")
5763 if (auto *Behavior =
5765 if (Behavior->getLimitedValue() == Module::Error)
5766 SetBehavior(Module::Max);
5767
5768 // Upgrade branch protection and return address signing module flags. The
5769 // module flag behavior for these fields were Error and now they are Min.
5770 if (ID->getString() == "branch-target-enforcement" ||
5771 ID->getString().starts_with("sign-return-address")) {
5772 if (auto *Behavior =
5774 if (Behavior->getLimitedValue() == Module::Error) {
5775 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5776 Metadata *Ops[3] = {
5777 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5778 Op->getOperand(1), Op->getOperand(2)};
5779 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5780 Changed = true;
5781 }
5782 }
5783 }
5784
5785 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5786 // section name so that llvm-lto will not complain about mismatching
5787 // module flags that is functionally the same.
5788 if (ID->getString() == "Objective-C Image Info Section") {
5789 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5790 SmallVector<StringRef, 4> ValueComp;
5791 Value->getString().split(ValueComp, " ");
5792 if (ValueComp.size() != 1) {
5793 std::string NewValue;
5794 for (auto &S : ValueComp)
5795 NewValue += S.str();
5796 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5797 MDString::get(M.getContext(), NewValue)};
5798 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5799 Changed = true;
5800 }
5801 }
5802 }
5803
5804 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5805 // If the higher bits are set, it adds new module flag for swift info.
5806 if (ID->getString() == "Objective-C Garbage Collection") {
5807 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5808 if (Md) {
5809 assert(Md->getValue() && "Expected non-empty metadata");
5810 auto Type = Md->getValue()->getType();
5811 if (Type == Int8Ty)
5812 continue;
5813 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5814 if ((Val & 0xff) != Val) {
5815 HasSwiftVersionFlag = true;
5816 SwiftABIVersion = (Val & 0xff00) >> 8;
5817 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5818 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5819 }
5820 Metadata *Ops[3] = {
5822 Op->getOperand(1),
5823 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5824 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5825 Changed = true;
5826 }
5827 }
5828
5829 if (ID->getString() == "amdgpu_code_object_version") {
5830 Metadata *Ops[3] = {
5831 Op->getOperand(0),
5832 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5833 Op->getOperand(2)};
5834 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5835 Changed = true;
5836 }
5837 }
5838
5839 // "Objective-C Class Properties" is recently added for Objective-C. We
5840 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5841 // flag of value 0, so we can correclty downgrade this flag when trying to
5842 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5843 // this module flag.
5844 if (HasObjCFlag && !HasClassProperties) {
5845 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5846 (uint32_t)0);
5847 Changed = true;
5848 }
5849
5850 if (HasSwiftVersionFlag) {
5851 M.addModuleFlag(Module::Error, "Swift ABI Version",
5852 SwiftABIVersion);
5853 M.addModuleFlag(Module::Error, "Swift Major Version",
5854 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5855 M.addModuleFlag(Module::Error, "Swift Minor Version",
5856 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5857 Changed = true;
5858 }
5859
5860 return Changed;
5861}
5862
5864 auto TrimSpaces = [](StringRef Section) -> std::string {
5865 SmallVector<StringRef, 5> Components;
5866 Section.split(Components, ',');
5867
5868 SmallString<32> Buffer;
5869 raw_svector_ostream OS(Buffer);
5870
5871 for (auto Component : Components)
5872 OS << ',' << Component.trim();
5873
5874 return std::string(OS.str().substr(1));
5875 };
5876
5877 for (auto &GV : M.globals()) {
5878 if (!GV.hasSection())
5879 continue;
5880
5881 StringRef Section = GV.getSection();
5882
5883 if (!Section.starts_with("__DATA, __objc_catlist"))
5884 continue;
5885
5886 // __DATA, __objc_catlist, regular, no_dead_strip
5887 // __DATA,__objc_catlist,regular,no_dead_strip
5888 GV.setSection(TrimSpaces(Section));
5889 }
5890}
5891
5892namespace {
5893// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5894// callsites within a function that did not also have the strictfp attribute.
5895// Since 10.0, if strict FP semantics are needed within a function, the
5896// function must have the strictfp attribute and all calls within the function
5897// must also have the strictfp attribute. This latter restriction is
5898// necessary to prevent unwanted libcall simplification when a function is
5899// being cloned (such as for inlining).
5900//
5901// The "dangling" strictfp attribute usage was only used to prevent constant
5902// folding and other libcall simplification. The nobuiltin attribute on the
5903// callsite has the same effect.
5904struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5905 StrictFPUpgradeVisitor() = default;
5906
5907 void visitCallBase(CallBase &Call) {
5908 if (!Call.isStrictFP())
5909 return;
5911 return;
5912 // If we get here, the caller doesn't have the strictfp attribute
5913 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5914 Call.removeFnAttr(Attribute::StrictFP);
5915 Call.addFnAttr(Attribute::NoBuiltin);
5916 }
5917};
5918
5919/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5920struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5921 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5922 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5923
5924 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5925 if (!RMW.isFloatingPointOperation())
5926 return;
5927
5928 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5929 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5930 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5931 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5932 }
5933};
5934} // namespace
5935
5937 // If a function definition doesn't have the strictfp attribute,
5938 // convert any callsite strictfp attributes to nobuiltin.
5939 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5940 StrictFPUpgradeVisitor SFPV;
5941 SFPV.visit(F);
5942 }
5943
5944 // Remove all incompatibile attributes from function.
5945 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5946 F.getReturnType(), F.getAttributes().getRetAttrs()));
5947 for (auto &Arg : F.args())
5948 Arg.removeAttrs(
5949 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5950
5951 // Older versions of LLVM treated an "implicit-section-name" attribute
5952 // similarly to directly setting the section on a Function.
5953 if (Attribute A = F.getFnAttribute("implicit-section-name");
5954 A.isValid() && A.isStringAttribute()) {
5955 F.setSection(A.getValueAsString());
5956 F.removeFnAttr("implicit-section-name");
5957 }
5958
5959 if (!F.empty()) {
5960 // For some reason this is called twice, and the first time is before any
5961 // instructions are loaded into the body.
5962
5963 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5964 A.isValid()) {
5965
5966 if (A.getValueAsBool()) {
5967 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5968 Visitor.visit(F);
5969 }
5970
5971 // We will leave behind dead attribute uses on external declarations, but
5972 // clang never added these to declarations anyway.
5973 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5974 }
5975 }
5976}
5977
5978static bool isOldLoopArgument(Metadata *MD) {
5979 auto *T = dyn_cast_or_null<MDTuple>(MD);
5980 if (!T)
5981 return false;
5982 if (T->getNumOperands() < 1)
5983 return false;
5984 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5985 if (!S)
5986 return false;
5987 return S->getString().starts_with("llvm.vectorizer.");
5988}
5989
5991 StringRef OldPrefix = "llvm.vectorizer.";
5992 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5993
5994 if (OldTag == "llvm.vectorizer.unroll")
5995 return MDString::get(C, "llvm.loop.interleave.count");
5996
5997 return MDString::get(
5998 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5999 .str());
6000}
6001
6003 auto *T = dyn_cast_or_null<MDTuple>(MD);
6004 if (!T)
6005 return MD;
6006 if (T->getNumOperands() < 1)
6007 return MD;
6008 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6009 if (!OldTag)
6010 return MD;
6011 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6012 return MD;
6013
6014 // This has an old tag. Upgrade it.
6016 Ops.reserve(T->getNumOperands());
6017 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6018 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6019 Ops.push_back(T->getOperand(I));
6020
6021 return MDTuple::get(T->getContext(), Ops);
6022}
6023
6025 auto *T = dyn_cast<MDTuple>(&N);
6026 if (!T)
6027 return &N;
6028
6029 if (none_of(T->operands(), isOldLoopArgument))
6030 return &N;
6031
6033 Ops.reserve(T->getNumOperands());
6034 for (Metadata *MD : T->operands())
6035 Ops.push_back(upgradeLoopArgument(MD));
6036
6037 return MDTuple::get(T->getContext(), Ops);
6038}
6039
6041 Triple T(TT);
6042 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6043 // the address space of globals to 1. This does not apply to SPIRV Logical.
6044 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
6045 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
6046 !DL.contains("-G") && !DL.starts_with("G")) {
6047 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6048 }
6049
6050 if (T.isLoongArch64() || T.isRISCV64()) {
6051 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6052 auto I = DL.find("-n64-");
6053 if (I != StringRef::npos)
6054 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6055 return DL.str();
6056 }
6057
6058 std::string Res = DL.str();
6059 // AMDGCN data layout upgrades.
6060 if (T.isAMDGCN()) {
6061 // Define address spaces for constants.
6062 if (!DL.contains("-G") && !DL.starts_with("G"))
6063 Res.append(Res.empty() ? "G1" : "-G1");
6064
6065 // Add missing non-integral declarations.
6066 // This goes before adding new address spaces to prevent incoherent string
6067 // values.
6068 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6069 Res.append("-ni:7:8:9");
6070 // Update ni:7 to ni:7:8:9.
6071 if (DL.ends_with("ni:7"))
6072 Res.append(":8:9");
6073 if (DL.ends_with("ni:7:8"))
6074 Res.append(":9");
6075
6076 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6077 // resources) An empty data layout has already been upgraded to G1 by now.
6078 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6079 Res.append("-p7:160:256:256:32");
6080 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6081 Res.append("-p8:128:128:128:48");
6082 constexpr StringRef OldP8("-p8:128:128-");
6083 if (DL.contains(OldP8))
6084 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6085 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6086 Res.append("-p9:192:256:256:32");
6087
6088 return Res;
6089 }
6090
6091 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6092 // If the datalayout matches the expected format, add pointer size address
6093 // spaces to the datalayout.
6094 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6095 if (!DL.contains(AddrSpaces)) {
6097 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6098 if (R.match(Res, &Groups))
6099 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6100 }
6101 };
6102
6103 // AArch64 data layout upgrades.
6104 if (T.isAArch64()) {
6105 // Add "-Fn32"
6106 if (!DL.empty() && !DL.contains("-Fn32"))
6107 Res.append("-Fn32");
6108 AddPtr32Ptr64AddrSpaces();
6109 return Res;
6110 }
6111
6112 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6113 T.isWasm()) {
6114 // Mips64 with o32 ABI did not add "-i128:128".
6115 // Add "-i128:128"
6116 std::string I64 = "-i64:64";
6117 std::string I128 = "-i128:128";
6118 if (!StringRef(Res).contains(I128)) {
6119 size_t Pos = Res.find(I64);
6120 if (Pos != size_t(-1))
6121 Res.insert(Pos + I64.size(), I128);
6122 }
6123 return Res;
6124 }
6125
6126 if (!T.isX86())
6127 return Res;
6128
6129 AddPtr32Ptr64AddrSpaces();
6130
6131 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6132 // for i128 operations prior to this being reflected in the data layout, and
6133 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6134 // boundaries, so although this is a breaking change, the upgrade is expected
6135 // to fix more IR than it breaks.
6136 // Intel MCU is an exception and uses 4-byte-alignment.
6137 if (!T.isOSIAMCU()) {
6138 std::string I128 = "-i128:128";
6139 if (StringRef Ref = Res; !Ref.contains(I128)) {
6141 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6142 if (R.match(Res, &Groups))
6143 Res = (Groups[1] + I128 + Groups[3]).str();
6144 }
6145 }
6146
6147 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6148 // Raising the alignment is safe because Clang did not produce f80 values in
6149 // the MSVC environment before this upgrade was added.
6150 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6151 StringRef Ref = Res;
6152 auto I = Ref.find("-f80:32-");
6153 if (I != StringRef::npos)
6154 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6155 }
6156
6157 return Res;
6158}
6159
6160void llvm::UpgradeAttributes(AttrBuilder &B) {
6161 StringRef FramePointer;
6162 Attribute A = B.getAttribute("no-frame-pointer-elim");
6163 if (A.isValid()) {
6164 // The value can be "true" or "false".
6165 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6166 B.removeAttribute("no-frame-pointer-elim");
6167 }
6168 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6169 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6170 if (FramePointer != "all")
6171 FramePointer = "non-leaf";
6172 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6173 }
6174 if (!FramePointer.empty())
6175 B.addAttribute("frame-pointer", FramePointer);
6176
6177 A = B.getAttribute("null-pointer-is-valid");
6178 if (A.isValid()) {
6179 // The value can be "true" or "false".
6180 bool NullPointerIsValid = A.getValueAsString() == "true";
6181 B.removeAttribute("null-pointer-is-valid");
6182 if (NullPointerIsValid)
6183 B.addAttribute(Attribute::NullPointerIsValid);
6184 }
6185}
6186
6187void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6188 // clang.arc.attachedcall bundles are now required to have an operand.
6189 // If they don't, it's okay to drop them entirely: when there is an operand,
6190 // the "attachedcall" is meaningful and required, but without an operand,
6191 // it's just a marker NOP. Dropping it merely prevents an optimization.
6192 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6193 return OBD.getTag() == "clang.arc.attachedcall" &&
6194 OBD.inputs().empty();
6195 });
6196}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:708
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:706
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:448
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:825
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:816
static constexpr size_t npos
Definition StringRef.h:57
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:283
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:733
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:754
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1719
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106