Thanks to visit codestin.com
Credit goes to www.ffmpeg.org
FFmpeg
libavutil
mips
mmiutils.h
Go to the documentation of this file.
1
/*
2
* Loongson SIMD utils
3
*
4
* Copyright (c) 2016 Loongson Technology Corporation Limited
5
* Copyright (c) 2016 Zhou Xiaoyong <
[email protected]
>
6
*
7
* This file is part of FFmpeg.
8
*
9
* FFmpeg is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU Lesser General Public
11
* License as published by the Free Software Foundation; either
12
* version 2.1 of the License, or (at your option) any later version.
13
*
14
* FFmpeg is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
* Lesser General Public License for more details.
18
*
19
* You should have received a copy of the GNU Lesser General Public
20
* License along with FFmpeg; if not, write to the Free Software
21
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
*/
23
24
#ifndef AVUTIL_MIPS_MMIUTILS_H
25
#define AVUTIL_MIPS_MMIUTILS_H
26
27
#include "config.h"
28
29
#include "
libavutil/mem_internal.h
"
30
#include "
libavutil/mips/asmdefs.h
"
31
32
/*
33
* These were used to define temporary registers for MMI marcos
34
* however now we're using $at. They're theoretically unnecessary
35
* but just leave them here to avoid mess.
36
*/
37
#define DECLARE_VAR_LOW32
38
#define RESTRICT_ASM_LOW32
39
#define DECLARE_VAR_ALL64
40
#define RESTRICT_ASM_ALL64
41
#define DECLARE_VAR_ADDRT
42
#define RESTRICT_ASM_ADDRT
43
44
#if HAVE_LOONGSON2
45
46
#define MMI_LWX(reg, addr, stride, bias) \
47
".set noat \n\t" \
48
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
49
"lw "#reg", "#bias"($at) \n\t" \
50
".set at \n\t"
51
52
#define MMI_SWX(reg, addr, stride, bias) \
53
".set noat \n\t" \
54
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
55
"sw "#reg", "#bias"($at) \n\t" \
56
".set at \n\t"
57
58
#define MMI_LDX(reg, addr, stride, bias) \
59
".set noat \n\t" \
60
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
61
"ld "#reg", "#bias"($at) \n\t" \
62
".set at \n\t"
63
64
#define MMI_SDX(reg, addr, stride, bias) \
65
".set noat \n\t" \
66
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
67
"sd "#reg", "#bias"($at) \n\t" \
68
".set at \n\t"
69
70
#define MMI_LWC1(fp, addr, bias) \
71
"lwc1 "#fp", "#bias"("#addr") \n\t"
72
73
#define MMI_ULWC1(fp, addr, bias) \
74
".set noat \n\t" \
75
"ulw $at, "#bias"("#addr") \n\t" \
76
"mtc1 $at, "#fp" \n\t" \
77
".set at \n\t"
78
79
#define MMI_LWXC1(fp, addr, stride, bias) \
80
".set noat \n\t" \
81
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
82
MMI_LWC1(fp, $at, bias) \
83
".set at \n\t"
84
85
#define MMI_SWC1(fp, addr, bias) \
86
"swc1 "#fp", "#bias"("#addr") \n\t"
87
88
#define MMI_USWC1(fp, addr, bias) \
89
".set noat \n\t" \
90
"mfc1 $at, "#fp" \n\t" \
91
"usw $at, "#bias"("#addr") \n\t" \
92
".set at \n\t"
93
94
#define MMI_SWXC1(fp, addr, stride, bias) \
95
".set noat \n\t" \
96
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
97
MMI_SWC1(fp, $at, bias) \
98
".set at \n\t"
99
100
#define MMI_LDC1(fp, addr, bias) \
101
"ldc1 "#fp", "#bias"("#addr") \n\t"
102
103
#define MMI_ULDC1(fp, addr, bias) \
104
".set noat \n\t" \
105
"uld $at, "#bias"("#addr") \n\t" \
106
"dmtc1 $at, "#fp" \n\t" \
107
".set at \n\t"
108
109
#define MMI_LDXC1(fp, addr, stride, bias) \
110
".set noat \n\t" \
111
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
112
MMI_LDC1(fp, $at, bias) \
113
".set at \n\t"
114
115
#define MMI_SDC1(fp, addr, bias) \
116
"sdc1 "#fp", "#bias"("#addr") \n\t"
117
118
#define MMI_USDC1(fp, addr, bias) \
119
".set noat \n\t" \
120
"dmfc1 $at, "#fp" \n\t" \
121
"usd $at, "#bias"("#addr") \n\t" \
122
".set at \n\t"
123
124
#define MMI_SDXC1(fp, addr, stride, bias) \
125
".set noat \n\t" \
126
PTR_ADDU "$at, "#addr", "#stride" \n\t" \
127
MMI_SDC1(fp, $at, bias) \
128
".set at \n\t"
129
130
#define MMI_LQ(reg1, reg2, addr, bias) \
131
"ld "#reg1", "#bias"("#addr") \n\t" \
132
"ld "#reg2", 8+"#bias"("#addr") \n\t"
133
134
#define MMI_SQ(reg1, reg2, addr, bias) \
135
"sd "#reg1", "#bias"("#addr") \n\t" \
136
"sd "#reg2", 8+"#bias"("#addr") \n\t"
137
138
#define MMI_LQC1(fp1, fp2, addr, bias) \
139
"ldc1 "#fp1", "#bias"("#addr") \n\t" \
140
"ldc1 "#fp2", 8+"#bias"("#addr") \n\t"
141
142
#define MMI_SQC1(fp1, fp2, addr, bias) \
143
"sdc1 "#fp1", "#bias"("#addr") \n\t" \
144
"sdc1 "#fp2", 8+"#bias"("#addr") \n\t"
145
146
#elif HAVE_LOONGSON3
/* !HAVE_LOONGSON2 */
147
148
#define MMI_LWX(reg, addr, stride, bias) \
149
"gslwx "#reg", "#bias"("#addr", "#stride") \n\t"
150
151
#define MMI_SWX(reg, addr, stride, bias) \
152
"gsswx "#reg", "#bias"("#addr", "#stride") \n\t"
153
154
#define MMI_LDX(reg, addr, stride, bias) \
155
"gsldx "#reg", "#bias"("#addr", "#stride") \n\t"
156
157
#define MMI_SDX(reg, addr, stride, bias) \
158
"gssdx "#reg", "#bias"("#addr", "#stride") \n\t"
159
160
#define MMI_LWC1(fp, addr, bias) \
161
"lwc1 "#fp", "#bias"("#addr") \n\t"
162
163
#if _MIPS_SIM == _ABIO32
/* workaround for 3A2000 gslwlc1 bug */
164
165
#define MMI_LWLRC1(fp, addr, bias, off) \
166
".set noat \n\t" \
167
"lwl $at, "#bias"+"#off"("#addr") \n\t" \
168
"lwr $at, "#bias"("#addr") \n\t" \
169
"mtc1 $at, "#fp" \n\t" \
170
".set at \n\t"
171
172
#else
/* _MIPS_SIM != _ABIO32 */
173
174
#define DECLARE_VAR_LOW32
175
#define RESTRICT_ASM_LOW32
176
177
#define MMI_ULWC1(fp, addr, bias) \
178
"gslwlc1 "#fp", 3+"#bias"("#addr") \n\t" \
179
"gslwrc1 "#fp", "#bias"("#addr") \n\t"
180
181
#endif
/* _MIPS_SIM != _ABIO32 */
182
183
#define MMI_LWXC1(fp, addr, stride, bias) \
184
"gslwxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
185
186
#define MMI_SWC1(fp, addr, bias) \
187
"swc1 "#fp", "#bias"("#addr") \n\t"
188
189
#define MMI_USWC1(fp, addr, bias) \
190
"gsswlc1 "#fp", 3+"#bias"("#addr") \n\t" \
191
"gsswrc1 "#fp", "#bias"("#addr") \n\t"
192
193
#define MMI_SWXC1(fp, addr, stride, bias) \
194
"gsswxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
195
196
#define MMI_LDC1(fp, addr, bias) \
197
"ldc1 "#fp", "#bias"("#addr") \n\t"
198
199
#define MMI_ULDC1(fp, addr, bias) \
200
"gsldlc1 "#fp", 7+"#bias"("#addr") \n\t" \
201
"gsldrc1 "#fp", "#bias"("#addr") \n\t"
202
203
#define MMI_LDXC1(fp, addr, stride, bias) \
204
"gsldxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
205
206
#define MMI_SDC1(fp, addr, bias) \
207
"sdc1 "#fp", "#bias"("#addr") \n\t"
208
209
#define MMI_USDC1(fp, addr, bias) \
210
"gssdlc1 "#fp", 7+"#bias"("#addr") \n\t" \
211
"gssdrc1 "#fp", "#bias"("#addr") \n\t"
212
213
#define MMI_SDXC1(fp, addr, stride, bias) \
214
"gssdxc1 "#fp", "#bias"("#addr", "#stride") \n\t"
215
216
#define MMI_LQ(reg1, reg2, addr, bias) \
217
"gslq "#reg1", "#reg2", "#bias"("#addr") \n\t"
218
219
#define MMI_SQ(reg1, reg2, addr, bias) \
220
"gssq "#reg1", "#reg2", "#bias"("#addr") \n\t"
221
222
#define MMI_LQC1(fp1, fp2, addr, bias) \
223
"gslqc1 "#fp1", "#fp2", "#bias"("#addr") \n\t"
224
225
#define MMI_SQC1(fp1, fp2, addr, bias) \
226
"gssqc1 "#fp1", "#fp2", "#bias"("#addr") \n\t"
227
228
#endif
/* HAVE_LOONGSON2 */
229
230
/**
231
* Backup saved registers
232
* We're not using compiler's clobber list as it's not smart enough
233
* to take advantage of quad word load/store.
234
*/
235
#define BACKUP_REG \
236
LOCAL_ALIGNED_16(double, temp_backup_reg, [8]); \
237
if (_MIPS_SIM == _ABI64) \
238
__asm__ volatile ( \
239
MMI_SQC1($f25, $f24, %[temp], 0x00) \
240
MMI_SQC1($f27, $f26, %[temp], 0x10) \
241
MMI_SQC1($f29, $f28, %[temp], 0x20) \
242
MMI_SQC1($f31, $f30, %[temp], 0x30) \
243
: \
244
: [temp]"r"(temp_backup_reg) \
245
: "memory" \
246
); \
247
else \
248
__asm__ volatile ( \
249
MMI_SQC1($f22, $f20, %[temp], 0x10) \
250
MMI_SQC1($f26, $f24, %[temp], 0x10) \
251
MMI_SQC1($f30, $f28, %[temp], 0x20) \
252
: \
253
: [temp]"r"(temp_backup_reg) \
254
: "memory" \
255
);
256
257
/**
258
* recover register
259
*/
260
#define RECOVER_REG \
261
if (_MIPS_SIM == _ABI64) \
262
__asm__ volatile ( \
263
MMI_LQC1($f25, $f24, %[temp], 0x00) \
264
MMI_LQC1($f27, $f26, %[temp], 0x10) \
265
MMI_LQC1($f29, $f28, %[temp], 0x20) \
266
MMI_LQC1($f31, $f30, %[temp], 0x30) \
267
: \
268
: [temp]"r"(temp_backup_reg) \
269
: "memory" \
270
); \
271
else \
272
__asm__ volatile ( \
273
MMI_LQC1($f22, $f20, %[temp], 0x10) \
274
MMI_LQC1($f26, $f24, %[temp], 0x10) \
275
MMI_LQC1($f30, $f28, %[temp], 0x20) \
276
: \
277
: [temp]"r"(temp_backup_reg) \
278
: "memory" \
279
);
280
281
/**
282
* brief: Transpose 2X2 word packaged data.
283
* fr_i0, fr_i1: src
284
* fr_o0, fr_o1: dst
285
*/
286
#define TRANSPOSE_2W(fr_i0, fr_i1, fr_o0, fr_o1) \
287
"punpcklwd "#fr_o0", "#fr_i0", "#fr_i1" \n\t" \
288
"punpckhwd "#fr_o1", "#fr_i0", "#fr_i1" \n\t"
289
290
/**
291
* brief: Transpose 4X4 half word packaged data.
292
* fr_i0, fr_i1, fr_i2, fr_i3: src & dst
293
* fr_t0, fr_t1, fr_t2, fr_t3: temporary register
294
*/
295
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, \
296
fr_t0, fr_t1, fr_t2, fr_t3) \
297
"punpcklhw "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
298
"punpckhhw "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
299
"punpcklhw "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
300
"punpckhhw "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
301
"punpcklwd "#fr_i0", "#fr_t0", "#fr_t2" \n\t" \
302
"punpckhwd "#fr_i1", "#fr_t0", "#fr_t2" \n\t" \
303
"punpcklwd "#fr_i2", "#fr_t1", "#fr_t3" \n\t" \
304
"punpckhwd "#fr_i3", "#fr_t1", "#fr_t3" \n\t"
305
306
/**
307
* brief: Transpose 8x8 byte packaged data.
308
* fr_i0~i7: src & dst
309
* fr_t0~t3: temporary register
310
*/
311
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, \
312
fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3) \
313
"punpcklbh "#fr_t0", "#fr_i0", "#fr_i1" \n\t" \
314
"punpckhbh "#fr_t1", "#fr_i0", "#fr_i1" \n\t" \
315
"punpcklbh "#fr_t2", "#fr_i2", "#fr_i3" \n\t" \
316
"punpckhbh "#fr_t3", "#fr_i2", "#fr_i3" \n\t" \
317
"punpcklbh "#fr_i0", "#fr_i4", "#fr_i5" \n\t" \
318
"punpckhbh "#fr_i1", "#fr_i4", "#fr_i5" \n\t" \
319
"punpcklbh "#fr_i2", "#fr_i6", "#fr_i7" \n\t" \
320
"punpckhbh "#fr_i3", "#fr_i6", "#fr_i7" \n\t" \
321
"punpcklhw "#fr_i4", "#fr_t0", "#fr_t2" \n\t" \
322
"punpckhhw "#fr_i5", "#fr_t0", "#fr_t2" \n\t" \
323
"punpcklhw "#fr_i6", "#fr_t1", "#fr_t3" \n\t" \
324
"punpckhhw "#fr_i7", "#fr_t1", "#fr_t3" \n\t" \
325
"punpcklhw "#fr_t0", "#fr_i0", "#fr_i2" \n\t" \
326
"punpckhhw "#fr_t1", "#fr_i0", "#fr_i2" \n\t" \
327
"punpcklhw "#fr_t2", "#fr_i1", "#fr_i3" \n\t" \
328
"punpckhhw "#fr_t3", "#fr_i1", "#fr_i3" \n\t" \
329
"punpcklwd "#fr_i0", "#fr_i4", "#fr_t0" \n\t" \
330
"punpckhwd "#fr_i1", "#fr_i4", "#fr_t0" \n\t" \
331
"punpcklwd "#fr_i2", "#fr_i5", "#fr_t1" \n\t" \
332
"punpckhwd "#fr_i3", "#fr_i5", "#fr_t1" \n\t" \
333
"punpcklwd "#fr_i4", "#fr_i6", "#fr_t2" \n\t" \
334
"punpckhwd "#fr_i5", "#fr_i6", "#fr_t2" \n\t" \
335
"punpcklwd "#fr_i6", "#fr_i7", "#fr_t3" \n\t" \
336
"punpckhwd "#fr_i7", "#fr_i7", "#fr_t3" \n\t"
337
338
/**
339
* brief: Parallel SRA for 8 byte packaged data.
340
* fr_i0: src
341
* fr_i1: SRA number(SRAB number + 8)
342
* fr_t0, fr_t1: temporary register
343
* fr_d0: dst
344
*/
345
#define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
346
"punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
347
"punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
348
"psrah "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
349
"psrah "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
350
"packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
351
352
/**
353
* brief: Parallel SRL for 8 byte packaged data.
354
* fr_i0: src
355
* fr_i1: SRL number(SRLB number + 8)
356
* fr_t0, fr_t1: temporary register
357
* fr_d0: dst
358
*/
359
#define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0) \
360
"punpcklbh "#fr_t0", "#fr_t0", "#fr_i0" \n\t" \
361
"punpckhbh "#fr_t1", "#fr_t1", "#fr_i0" \n\t" \
362
"psrlh "#fr_t0", "#fr_t0", "#fr_i1" \n\t" \
363
"psrlh "#fr_t1", "#fr_t1", "#fr_i1" \n\t" \
364
"packsshb "#fr_d0", "#fr_t0", "#fr_t1" \n\t"
365
366
#define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
367
"psrah "#fp1", "#fp1", "#shift" \n\t" \
368
"psrah "#fp2", "#fp2", "#shift" \n\t" \
369
"psrah "#fp3", "#fp3", "#shift" \n\t" \
370
"psrah "#fp4", "#fp4", "#shift" \n\t"
371
372
#define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift) \
373
PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift) \
374
PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
375
376
/**
377
* brief: (((value) + (1 << ((n) - 1))) >> (n))
378
* fr_i0: src & dst
379
* fr_i1: Operand number
380
* fr_t0, fr_t1: temporary FPR
381
* gr_t0: temporary GPR
382
*/
383
#define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0) \
384
"li "#gr_t0", 0x01 \n\t" \
385
"dmtc1 "#gr_t0", "#fr_t0" \n\t" \
386
"punpcklwd "#fr_t0", "#fr_t0", "#fr_t0" \n\t" \
387
"psubw "#fr_t1", "#fr_i1", "#fr_t0" \n\t" \
388
"psllw "#fr_t1", "#fr_t0", "#fr_t1" \n\t" \
389
"paddw "#fr_i0", "#fr_i0", "#fr_t1" \n\t" \
390
"psraw "#fr_i0", "#fr_i0", "#fr_i1" \n\t"
391
392
#endif
/* AVUTILS_MIPS_MMIUTILS_H */
mem_internal.h
asmdefs.h
Generated on Tue May 12 2026 19:23:40 for FFmpeg by
1.8.17