PLASMA
Parallel Linear Algebra Software for Multicore Architectures
plasma_core_blas_s.h
1
12#ifndef PLASMA_CORE_BLAS_S_H
13#define PLASMA_CORE_BLAS_S_H
14
15#include "plasma_async.h"
16#include "plasma_barrier.h"
17#include "plasma_descriptor.h"
18#include "plasma_types.h"
19#include "plasma_workspace.h"
20#include "plasma_descriptor.h"
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26#define REAL
27
28/******************************************************************************/
29#ifdef COMPLEX
30float fabsf(float alpha);
31#endif
32
33int plasma_core_sgeadd(plasma_enum_t transa,
34 int m, int n,
35 float alpha, const float *A, int lda,
36 float beta, float *B, int ldb);
37
38int plasma_core_sgelqt(int m, int n, int ib,
39 float *A, int lda,
40 float *T, int ldt,
41 float *tau,
42 float *work);
43
44void plasma_core_sgemm(plasma_enum_t transa, plasma_enum_t transb,
45 int m, int n, int k,
46 float alpha, const float *A, int lda,
47 const float *B, int ldb,
48 float beta, float *C, int ldc);
49
50int plasma_core_sgeqrt(int m, int n, int ib,
51 float *A, int lda,
52 float *T, int ldt,
53 float *tau,
54 float *work);
55
56void plasma_core_sgessq(int m, int n,
57 const float *A, int lda,
58 float *scale, float *sumsq);
59
60void plasma_core_sgetrf(plasma_desc_t A, int *ipiv, int ib, int rank, int size,
61 volatile int *max_idx, volatile float *max_val,
62 volatile int *info, plasma_barrier_t *barrier);
63
64int plasma_core_ssygst(int itype, plasma_enum_t uplo,
65 int n,
66 float *A, int lda,
67 float *B, int ldb);
68
69void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
70 int m, int n,
71 float alpha, const float *A, int lda,
72 const float *B, int ldb,
73 float beta, float *C, int ldc);
74
75void plasma_core_ssyr2k(plasma_enum_t uplo, plasma_enum_t trans,
76 int n, int k,
77 float alpha, const float *A, int lda,
78 const float *B, int ldb,
79 float beta, float *C, int ldc);
80
81void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
82 int n, int k,
83 float alpha, const float *A, int lda,
84 float beta, float *C, int ldc);
85
86void plasma_core_ssyssq(plasma_enum_t uplo,
87 int n,
88 const float *A, int lda,
89 float *scale, float *sumsq);
90
91void plasma_core_ssyssq(plasma_enum_t uplo,
92 int n,
93 const float *A, int lda,
94 float *scale, float *sumsq);
95
96void plasma_core_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
97 int m, int n,
98 const float *A, int lda,
99 float *B, int ldb);
100
101void plasma_core_slacpy_lapack2tile_band(plasma_enum_t uplo,
102 int it, int jt,
103 int m, int n, int nb, int kl, int ku,
104 const float *A, int lda,
105 float *B, int ldb);
106
107void plasma_core_slacpy_tile2lapack_band(plasma_enum_t uplo,
108 int it, int jt,
109 int m, int n, int nb, int kl, int ku,
110 const float *B, int ldb,
111 float *A, int lda);
112
113void plasma_core_slange(plasma_enum_t norm,
114 int m, int n,
115 const float *A, int lda,
116 float *work, float *result);
117
118void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
119 int n,
120 const float *A, int lda,
121 float *work, float *value);
122
123void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
124 int n,
125 const float *A, int lda,
126 float *work, float *value);
127
128void plasma_core_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
129 int m, int n,
130 const float *A, int lda,
131 float *work, float *value);
132
133void plasma_core_slascl(plasma_enum_t uplo,
134 float cfrom, float cto,
135 int m, int n,
136 float *A, int lda);
137
138void plasma_core_slaset(plasma_enum_t uplo,
139 int m, int n,
140 float alpha, float beta,
141 float *A, int lda);
142
143void plasma_core_sgeswp(plasma_enum_t colrow,
144 plasma_desc_t A, int k1, int k2, const int *ipiv, int incx);
145
146void plasma_core_ssyswp(int rank, int num_threads,
147 int uplo, plasma_desc_t A, int k1, int k2, const int *ipiv,
148 int incx, plasma_barrier_t *barrier);
149
150int plasma_core_slauum(plasma_enum_t uplo,
151 int n,
152 float *A, int lda);
153
154int plasma_core_spamm(plasma_enum_t op, plasma_enum_t side, plasma_enum_t storev,
155 int m, int n, int k, int l,
156 const float *A1, int lda1,
157 float *A2, int lda2,
158 const float *V, int ldv,
159 float *W, int ldw);
160
161int plasma_core_sparfb(plasma_enum_t side, plasma_enum_t trans, plasma_enum_t direct,
162 plasma_enum_t storev,
163 int m1, int n1, int m2, int n2, int k, int l,
164 float *A1, int lda1,
165 float *A2, int lda2,
166 const float *V, int ldv,
167 const float *T, int ldt,
168 float *work, int ldwork);
169
170int plasma_core_spemv(plasma_enum_t trans, int storev,
171 int m, int n, int l,
172 float alpha,
173 const float *A, int lda,
174 const float *X, int incx,
175 float beta,
176 float *Y, int incy,
177 float *work);
178
179int plasma_core_spotrf(plasma_enum_t uplo,
180 int n,
181 float *A, int lda);
182
183void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
184 int m, int n,
185 float alpha, const float *A, int lda,
186 const float *B, int ldb,
187 float beta, float *C, int ldc);
188
189void plasma_core_ssyr2k(
190 plasma_enum_t uplo, plasma_enum_t trans,
191 int n, int k,
192 float alpha, const float *A, int lda,
193 const float *B, int ldb,
194 float beta, float *C, int ldc);
195
196void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
197 int n, int k,
198 float alpha, const float *A, int lda,
199 float beta, float *C, int ldc);
200
201int plasma_core_stradd(plasma_enum_t uplo, plasma_enum_t transa,
202 int m, int n,
203 float alpha, const float *A, int lda,
204 float beta, float *B, int ldb);
205
206void plasma_core_strmm(plasma_enum_t side, plasma_enum_t uplo,
207 plasma_enum_t transa, plasma_enum_t diag,
208 int m, int n,
209 float alpha, const float *A, int lda,
210 float *B, int ldb);
211
212void plasma_core_strsm(plasma_enum_t side, plasma_enum_t uplo,
213 plasma_enum_t transa, plasma_enum_t diag,
214 int m, int n,
215 float alpha, const float *A, int lda,
216 float *B, int ldb);
217
218void plasma_core_strssq(plasma_enum_t uplo, plasma_enum_t diag,
219 int m, int n,
220 const float *A, int lda,
221 float *scale, float *sumsq);
222
223int plasma_core_strtri(plasma_enum_t uplo, plasma_enum_t diag,
224 int n,
225 float *A, int lda);
226
227int plasma_core_stslqt(int m, int n, int ib,
228 float *A1, int lda1,
229 float *A2, int lda2,
230 float *T, int ldt,
231 float *tau,
232 float *work);
233
234int plasma_core_stsmlq(plasma_enum_t side, plasma_enum_t trans,
235 int m1, int n1, int m2, int n2, int k, int ib,
236 float *A1, int lda1,
237 float *A2, int lda2,
238 const float *V, int ldv,
239 const float *T, int ldt,
240 float *work, int ldwork);
241
242int plasma_core_stsmqr(plasma_enum_t side, plasma_enum_t trans,
243 int m1, int n1, int m2, int n2, int k, int ib,
244 float *A1, int lda1,
245 float *A2, int lda2,
246 const float *V, int ldv,
247 const float *T, int ldt,
248 float *work, int ldwork);
249
250int plasma_core_stsqrt(int m, int n, int ib,
251 float *A1, int lda1,
252 float *A2, int lda2,
253 float *T, int ldt,
254 float *tau,
255 float *work);
256
257int plasma_core_sttlqt(int m, int n, int ib,
258 float *A1, int lda1,
259 float *A2, int lda2,
260 float *T, int ldt,
261 float *tau,
262 float *work);
263
264int plasma_core_sttmlq(plasma_enum_t side, plasma_enum_t trans,
265 int m1, int n1, int m2, int n2, int k, int ib,
266 float *A1, int lda1,
267 float *A2, int lda2,
268 const float *V, int ldv,
269 const float *T, int ldt,
270 float *work, int ldwork);
271
272int plasma_core_sttmqr(plasma_enum_t side, plasma_enum_t trans,
273 int m1, int n1, int m2, int n2, int k, int ib,
274 float *A1, int lda1,
275 float *A2, int lda2,
276 const float *V, int ldv,
277 const float *T, int ldt,
278 float *work, int ldwork);
279
280int plasma_core_sttqrt(int m, int n, int ib,
281 float *A1, int lda1,
282 float *A2, int lda2,
283 float *T, int ldt,
284 float *tau,
285 float *work);
286
287int plasma_core_sormlq(plasma_enum_t side, plasma_enum_t trans,
288 int m, int n, int k, int ib,
289 const float *A, int lda,
290 const float *T, int ldt,
291 float *C, int ldc,
292 float *work, int ldwork);
293
294int plasma_core_sormqr(plasma_enum_t side, plasma_enum_t trans,
295 int m, int n, int k, int ib,
296 const float *A, int lda,
297 const float *T, int ldt,
298 float *C, int ldc,
299 float *work, int ldwork);
300
301/******************************************************************************/
302void plasma_core_omp_samax(int colrow, int m, int n,
303 const float *A, int lda,
304 float *values,
305 plasma_sequence_t *sequence, plasma_request_t *request);
306
307void plasma_core_omp_sgeadd(
308 plasma_enum_t transa, int m, int n,
309 float alpha, const float *A, int lda,
310 float beta, float *B, int ldb,
311 plasma_sequence_t *sequence, plasma_request_t *request);
312
313void plasma_core_omp_sgelqt(int m, int n, int ib,
314 float *A, int lda,
315 float *T, int ldt,
316 plasma_workspace_t work,
317 plasma_sequence_t *sequence, plasma_request_t *request);
318
319void plasma_core_omp_sgemm(
320 plasma_enum_t transa, plasma_enum_t transb,
321 int m, int n, int k,
322 float alpha, const float *A, int lda,
323 const float *B, int ldb,
324 float beta, float *C, int ldc,
325 plasma_sequence_t *sequence, plasma_request_t *request);
326
327void plasma_core_omp_sgeqrt(int m, int n, int ib,
328 float *A, int lda,
329 float *T, int ldt,
330 plasma_workspace_t work,
331 plasma_sequence_t *sequence, plasma_request_t *request);
332
333void plasma_core_omp_sgessq(int m, int n,
334 const float *A, int lda,
335 float *scale, float *sumsq,
336 plasma_sequence_t *sequence, plasma_request_t *request);
337
338void plasma_core_omp_sgessq_aux(int n,
339 const float *scale, const float *sumsq,
340 float *value,
341 plasma_sequence_t *sequence,
342 plasma_request_t *request);
343
344void plasma_core_omp_ssygst(int itype, plasma_enum_t uplo,
345 int n,
346 float *A, int lda,
347 float *B, int ldb,
348 plasma_sequence_t *sequence, plasma_request_t *request);
349
350void plasma_core_omp_ssymm(
351 plasma_enum_t side, plasma_enum_t uplo,
352 int m, int n,
353 float alpha, const float *A, int lda,
354 const float *B, int ldb,
355 float beta, float *C, int ldc,
356 plasma_sequence_t *sequence, plasma_request_t *request);
357
358void plasma_core_omp_ssyr2k(
359 plasma_enum_t uplo, plasma_enum_t trans,
360 int n, int k,
361 float alpha, const float *A, int lda,
362 const float *B, int ldb,
363 float beta, float *C, int ldc,
364 plasma_sequence_t *sequence, plasma_request_t *request);
365
366void plasma_core_omp_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
367 int n, int k,
368 float alpha, const float *A, int lda,
369 float beta, float *C, int ldc,
370 plasma_sequence_t *sequence, plasma_request_t *request);
371
372void plasma_core_omp_ssyssq(plasma_enum_t uplo,
373 int n,
374 const float *A, int lda,
375 float *scale, float *sumsq,
376 plasma_sequence_t *sequence, plasma_request_t *request);
377
378void plasma_core_omp_ssyssq(plasma_enum_t uplo,
379 int n,
380 const float *A, int lda,
381 float *scale, float *sumsq,
382 plasma_sequence_t *sequence, plasma_request_t *request);
383
384void plasma_core_omp_ssyssq_aux(int m, int n,
385 const float *scale, const float *sumsq,
386 float *value,
387 plasma_sequence_t *sequence,
388 plasma_request_t *request);
389
390void plasma_core_omp_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
391 int m, int n,
392 const float *A, int lda,
393 float *B, int ldb,
394 plasma_sequence_t *sequence, plasma_request_t *request);
395
396void plasma_core_omp_slacpy_lapack2tile_band(plasma_enum_t uplo,
397 int it, int jt,
398 int m, int n, int nb, int kl, int ku,
399 const float *A, int lda,
400 float *B, int ldb);
401
402void plasma_core_omp_slacpy_tile2lapack_band(plasma_enum_t uplo,
403 int it, int jt,
404 int m, int n, int nb, int kl, int ku,
405 const float *B, int ldb,
406 float *A, int lda);
407
408void plasma_core_omp_slange(plasma_enum_t norm,
409 int m, int n,
410 const float *A, int lda,
411 float *work, float *result,
412 plasma_sequence_t *sequence, plasma_request_t *request);
413
414void plasma_core_omp_slange_aux(plasma_enum_t norm,
415 int m, int n,
416 const float *A, int lda,
417 float *value,
418 plasma_sequence_t *sequence,
419 plasma_request_t *request);
420
421void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
422 int n,
423 const float *A, int lda,
424 float *work, float *value,
425 plasma_sequence_t *sequence, plasma_request_t *request);
426
427void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
428 int n,
429 const float *A, int lda,
430 float *value,
431 plasma_sequence_t *sequence,
432 plasma_request_t *request);
433
434void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
435 int n,
436 const float *A, int lda,
437 float *work, float *value,
438 plasma_sequence_t *sequence, plasma_request_t *request);
439
440void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
441 int n,
442 const float *A, int lda,
443 float *value,
444 plasma_sequence_t *sequence,
445 plasma_request_t *request);
446
447void plasma_core_omp_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
448 int m, int n,
449 const float *A, int lda,
450 float *work, float *value,
451 plasma_sequence_t *sequence, plasma_request_t *request);
452
453void plasma_core_omp_slantr_aux(plasma_enum_t norm, plasma_enum_t uplo,
454 plasma_enum_t diag,
455 int m, int n,
456 const float *A, int lda,
457 float *value,
458 plasma_sequence_t *sequence,
459 plasma_request_t *request);
460
461void plasma_core_omp_slascl(plasma_enum_t uplo,
462 float cfrom, float cto,
463 int m, int n,
464 float *A, int lda,
465 plasma_sequence_t *sequence, plasma_request_t *request);
466
467void plasma_core_omp_slaset(plasma_enum_t uplo,
468 int mb, int nb,
469 int i, int j,
470 int m, int n,
471 float alpha, float beta,
472 float *A);
473
474void plasma_core_omp_slauum(plasma_enum_t uplo,
475 int n,
476 float *A, int lda,
477 plasma_sequence_t *sequence, plasma_request_t *request);
478
479void plasma_core_omp_spotrf(plasma_enum_t uplo,
480 int n,
481 float *A, int lda,
482 int iinfo,
483 plasma_sequence_t *sequence, plasma_request_t *request);
484
485void plasma_core_omp_ssymm(
486 plasma_enum_t side, plasma_enum_t uplo,
487 int m, int n,
488 float alpha, const float *A, int lda,
489 const float *B, int ldb,
490 float beta, float *C, int ldc,
491 plasma_sequence_t *sequence, plasma_request_t *request);
492
493void plasma_core_omp_ssyr2k(
494 plasma_enum_t uplo, plasma_enum_t trans,
495 int n, int k,
496 float alpha, const float *A, int lda,
497 const float *B, int ldb,
498 float beta, float *C, int ldc,
499 plasma_sequence_t *sequence, plasma_request_t *request);
500
501void plasma_core_omp_ssyrk(
502 plasma_enum_t uplo, plasma_enum_t trans,
503 int n, int k,
504 float alpha, const float *A, int lda,
505 float beta, float *C, int ldc,
506 plasma_sequence_t *sequence, plasma_request_t *request);
507
508void plasma_core_omp_stradd(
509 plasma_enum_t uplo, plasma_enum_t transa,
510 int m, int n,
511 float alpha, const float *A, int lda,
512 float beta, float *B, int ldb,
513 plasma_sequence_t *sequence, plasma_request_t *request);
514
515void plasma_core_omp_strmm(
516 plasma_enum_t side, plasma_enum_t uplo,
517 plasma_enum_t transa, plasma_enum_t diag,
518 int m, int n,
519 float alpha, const float *A, int lda,
520 float *B, int ldb,
521 plasma_sequence_t *sequence, plasma_request_t *request);
522
523void plasma_core_omp_strsm(
524 plasma_enum_t side, plasma_enum_t uplo,
525 plasma_enum_t transa, plasma_enum_t diag,
526 int m, int n,
527 float alpha, const float *A, int lda,
528 float *B, int ldb,
529 plasma_sequence_t *sequence, plasma_request_t *request);
530
531void plasma_core_omp_strssq(plasma_enum_t uplo, plasma_enum_t diag,
532 int m, int n,
533 const float *A, int lda,
534 float *scale, float *sumsq,
535 plasma_sequence_t *sequence, plasma_request_t *request);
536
537void plasma_core_omp_strtri(plasma_enum_t uplo, plasma_enum_t diag,
538 int n,
539 float *A, int lda,
540 int iinfo,
541 plasma_sequence_t *sequence, plasma_request_t *request);
542
543void plasma_core_omp_stslqt(int m, int n, int ib,
544 float *A1, int lda1,
545 float *A2, int lda2,
546 float *T, int ldt,
547 plasma_workspace_t work,
548 plasma_sequence_t *sequence, plasma_request_t *request);
549
550void plasma_core_omp_stsmlq(plasma_enum_t side, plasma_enum_t trans,
551 int m1, int n1, int m2, int n2, int k, int ib,
552 float *A1, int lda1,
553 float *A2, int lda2,
554 const float *V, int ldv,
555 const float *T, int ldt,
556 plasma_workspace_t work,
557 plasma_sequence_t *sequence, plasma_request_t *request);
558
559void plasma_core_omp_stsmqr(plasma_enum_t side, plasma_enum_t trans,
560 int m1, int n1, int m2, int n2, int k, int ib,
561 float *A1, int lda1,
562 float *A2, int lda2,
563 const float *V, int ldv,
564 const float *T, int ldt,
565 plasma_workspace_t work,
566 plasma_sequence_t *sequence, plasma_request_t *request);
567
568void plasma_core_omp_stsqrt(int m, int n, int ib,
569 float *A1, int lda1,
570 float *A2, int lda2,
571 float *T, int ldt,
572 plasma_workspace_t work,
573 plasma_sequence_t *sequence, plasma_request_t *request);
574
575void plasma_core_omp_sttlqt(int m, int n, int ib,
576 float *A1, int lda1,
577 float *A2, int lda2,
578 float *T, int ldt,
579 plasma_workspace_t work,
580 plasma_sequence_t *sequence, plasma_request_t *request);
581
582void plasma_core_omp_sttmlq(plasma_enum_t side, plasma_enum_t trans,
583 int m1, int n1, int m2, int n2, int k, int ib,
584 float *A1, int lda1,
585 float *A2, int lda2,
586 const float *V, int ldv,
587 const float *T, int ldt,
588 plasma_workspace_t work,
589 plasma_sequence_t *sequence, plasma_request_t *request);
590
591void plasma_core_omp_sttmqr(plasma_enum_t side, plasma_enum_t trans,
592 int m1, int n1, int m2, int n2, int k, int ib,
593 float *A1, int lda1,
594 float *A2, int lda2,
595 const float *V, int ldv,
596 const float *T, int ldt,
597 plasma_workspace_t work,
598 plasma_sequence_t *sequence, plasma_request_t *request);
599
600void plasma_core_omp_sttqrt(int m, int n, int ib,
601 float *A1, int lda1,
602 float *A2, int lda2,
603 float *T, int ldt,
604 plasma_workspace_t work,
605 plasma_sequence_t *sequence, plasma_request_t *request);
606
607void plasma_core_omp_sormlq(plasma_enum_t side, plasma_enum_t trans,
608 int m, int n, int k, int ib,
609 const float *A, int lda,
610 const float *T, int ldt,
611 float *C, int ldc,
612 plasma_workspace_t work,
613 plasma_sequence_t *sequence, plasma_request_t *request);
614
615void plasma_core_omp_sormqr(plasma_enum_t side, plasma_enum_t trans,
616 int m, int n, int k, int ib,
617 const float *A, int lda,
618 const float *T, int ldt,
619 float *C, int ldc,
620 plasma_workspace_t work,
621 plasma_sequence_t *sequence, plasma_request_t *request);
622
623#undef REAL
624
625#ifdef __cplusplus
626} // extern "C"
627#endif
628
629#endif // PLASMA_CORE_BLAS_S_H
Definition: plasma_descriptor.h:40