12 #ifndef PLASMA_CORE_BLAS_S_H 13 #define PLASMA_CORE_BLAS_S_H 15 #include "plasma_async.h" 16 #include "plasma_barrier.h" 17 #include "plasma_descriptor.h" 18 #include "plasma_types.h" 19 #include "plasma_workspace.h" 20 #include "plasma_descriptor.h" 30 float fabsf(
float alpha);
33 int plasma_core_sgeadd(plasma_enum_t transa,
35 float alpha,
const float *A,
int lda,
36 float beta,
float *B,
int ldb);
38 int plasma_core_sgelqt(
int m,
int n,
int ib,
44 void plasma_core_sgemm(plasma_enum_t transa, plasma_enum_t transb,
46 float alpha,
const float *A,
int lda,
47 const float *B,
int ldb,
48 float beta,
float *C,
int ldc);
50 int plasma_core_sgeqrt(
int m,
int n,
int ib,
56 void plasma_core_sgessq(
int m,
int n,
57 const float *A,
int lda,
58 float *scale,
float *sumsq);
60 void plasma_core_sgetrf(
plasma_desc_t A,
int *ipiv,
int ib,
int rank,
int size,
61 volatile int *max_idx,
volatile float *max_val,
62 volatile int *info, plasma_barrier_t *barrier);
64 int plasma_core_ssygst(
int itype, plasma_enum_t uplo,
69 void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
71 float alpha,
const float *A,
int lda,
72 const float *B,
int ldb,
73 float beta,
float *C,
int ldc);
75 void plasma_core_ssyr2k(plasma_enum_t uplo, plasma_enum_t trans,
77 float alpha,
const float *A,
int lda,
78 const float *B,
int ldb,
79 float beta,
float *C,
int ldc);
81 void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
83 float alpha,
const float *A,
int lda,
84 float beta,
float *C,
int ldc);
86 void plasma_core_ssyssq(plasma_enum_t uplo,
88 const float *A,
int lda,
89 float *scale,
float *sumsq);
91 void plasma_core_ssyssq(plasma_enum_t uplo,
93 const float *A,
int lda,
94 float *scale,
float *sumsq);
96 void plasma_core_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
98 const float *A,
int lda,
101 void plasma_core_slacpy_lapack2tile_band(plasma_enum_t uplo,
103 int m,
int n,
int nb,
int kl,
int ku,
104 const float *A,
int lda,
107 void plasma_core_slacpy_tile2lapack_band(plasma_enum_t uplo,
109 int m,
int n,
int nb,
int kl,
int ku,
110 const float *B,
int ldb,
113 void plasma_core_slange(plasma_enum_t norm,
115 const float *A,
int lda,
116 float *work,
float *result);
118 void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
120 const float *A,
int lda,
121 float *work,
float *value);
123 void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
125 const float *A,
int lda,
126 float *work,
float *value);
128 void plasma_core_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
130 const float *A,
int lda,
131 float *work,
float *value);
133 void plasma_core_slascl(plasma_enum_t uplo,
134 float cfrom,
float cto,
138 void plasma_core_slaset(plasma_enum_t uplo,
140 float alpha,
float beta,
143 void plasma_core_sgeswp(plasma_enum_t colrow,
146 void plasma_core_ssyswp(
int rank,
int num_threads,
148 int incx, plasma_barrier_t *barrier);
150 int plasma_core_slauum(plasma_enum_t uplo,
154 int plasma_core_spamm(plasma_enum_t op, plasma_enum_t side, plasma_enum_t storev,
155 int m,
int n,
int k,
int l,
156 const float *A1,
int lda1,
158 const float *V,
int ldv,
161 int plasma_core_sparfb(plasma_enum_t side, plasma_enum_t trans, plasma_enum_t direct,
162 plasma_enum_t storev,
163 int m1,
int n1,
int m2,
int n2,
int k,
int l,
166 const float *V,
int ldv,
167 const float *T,
int ldt,
168 float *work,
int ldwork);
170 int plasma_core_spemv(plasma_enum_t trans,
int storev,
173 const float *A,
int lda,
174 const float *X,
int incx,
179 int plasma_core_spotrf(plasma_enum_t uplo,
183 void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
185 float alpha,
const float *A,
int lda,
186 const float *B,
int ldb,
187 float beta,
float *C,
int ldc);
189 void plasma_core_ssyr2k(
190 plasma_enum_t uplo, plasma_enum_t trans,
192 float alpha,
const float *A,
int lda,
193 const float *B,
int ldb,
194 float beta,
float *C,
int ldc);
196 void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
198 float alpha,
const float *A,
int lda,
199 float beta,
float *C,
int ldc);
201 int plasma_core_stradd(plasma_enum_t uplo, plasma_enum_t transa,
203 float alpha,
const float *A,
int lda,
204 float beta,
float *B,
int ldb);
206 void plasma_core_strmm(plasma_enum_t side, plasma_enum_t uplo,
207 plasma_enum_t transa, plasma_enum_t diag,
209 float alpha,
const float *A,
int lda,
212 void plasma_core_strsm(plasma_enum_t side, plasma_enum_t uplo,
213 plasma_enum_t transa, plasma_enum_t diag,
215 float alpha,
const float *A,
int lda,
218 void plasma_core_strssq(plasma_enum_t uplo, plasma_enum_t diag,
220 const float *A,
int lda,
221 float *scale,
float *sumsq);
223 int plasma_core_strtri(plasma_enum_t uplo, plasma_enum_t diag,
227 int plasma_core_stslqt(
int m,
int n,
int ib,
234 int plasma_core_stsmlq(plasma_enum_t side, plasma_enum_t trans,
235 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
238 const float *V,
int ldv,
239 const float *T,
int ldt,
240 float *work,
int ldwork);
242 int plasma_core_stsmqr(plasma_enum_t side, plasma_enum_t trans,
243 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
246 const float *V,
int ldv,
247 const float *T,
int ldt,
248 float *work,
int ldwork);
250 int plasma_core_stsqrt(
int m,
int n,
int ib,
257 int plasma_core_sttlqt(
int m,
int n,
int ib,
264 int plasma_core_sttmlq(plasma_enum_t side, plasma_enum_t trans,
265 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
268 const float *V,
int ldv,
269 const float *T,
int ldt,
270 float *work,
int ldwork);
272 int plasma_core_sttmqr(plasma_enum_t side, plasma_enum_t trans,
273 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
276 const float *V,
int ldv,
277 const float *T,
int ldt,
278 float *work,
int ldwork);
280 int plasma_core_sttqrt(
int m,
int n,
int ib,
287 int plasma_core_sormlq(plasma_enum_t side, plasma_enum_t trans,
288 int m,
int n,
int k,
int ib,
289 const float *A,
int lda,
290 const float *T,
int ldt,
292 float *work,
int ldwork);
294 int plasma_core_sormqr(plasma_enum_t side, plasma_enum_t trans,
295 int m,
int n,
int k,
int ib,
296 const float *A,
int lda,
297 const float *T,
int ldt,
299 float *work,
int ldwork);
302 void plasma_core_omp_samax(
int colrow,
int m,
int n,
303 const float *A,
int lda,
305 plasma_sequence_t *sequence, plasma_request_t *request);
307 void plasma_core_omp_sgeadd(
308 plasma_enum_t transa,
int m,
int n,
309 float alpha,
const float *A,
int lda,
310 float beta,
float *B,
int ldb,
311 plasma_sequence_t *sequence, plasma_request_t *request);
313 void plasma_core_omp_sgelqt(
int m,
int n,
int ib,
316 plasma_workspace_t work,
317 plasma_sequence_t *sequence, plasma_request_t *request);
319 void plasma_core_omp_sgemm(
320 plasma_enum_t transa, plasma_enum_t transb,
322 float alpha,
const float *A,
int lda,
323 const float *B,
int ldb,
324 float beta,
float *C,
int ldc,
325 plasma_sequence_t *sequence, plasma_request_t *request);
327 void plasma_core_omp_sgeqrt(
int m,
int n,
int ib,
330 plasma_workspace_t work,
331 plasma_sequence_t *sequence, plasma_request_t *request);
333 void plasma_core_omp_sgessq(
int m,
int n,
334 const float *A,
int lda,
335 float *scale,
float *sumsq,
336 plasma_sequence_t *sequence, plasma_request_t *request);
338 void plasma_core_omp_sgessq_aux(
int n,
339 const float *scale,
const float *sumsq,
341 plasma_sequence_t *sequence,
342 plasma_request_t *request);
344 void plasma_core_omp_ssygst(
int itype, plasma_enum_t uplo,
348 plasma_sequence_t *sequence, plasma_request_t *request);
350 void plasma_core_omp_ssymm(
351 plasma_enum_t side, plasma_enum_t uplo,
353 float alpha,
const float *A,
int lda,
354 const float *B,
int ldb,
355 float beta,
float *C,
int ldc,
356 plasma_sequence_t *sequence, plasma_request_t *request);
358 void plasma_core_omp_ssyr2k(
359 plasma_enum_t uplo, plasma_enum_t trans,
361 float alpha,
const float *A,
int lda,
362 const float *B,
int ldb,
363 float beta,
float *C,
int ldc,
364 plasma_sequence_t *sequence, plasma_request_t *request);
366 void plasma_core_omp_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
368 float alpha,
const float *A,
int lda,
369 float beta,
float *C,
int ldc,
370 plasma_sequence_t *sequence, plasma_request_t *request);
372 void plasma_core_omp_ssyssq(plasma_enum_t uplo,
374 const float *A,
int lda,
375 float *scale,
float *sumsq,
376 plasma_sequence_t *sequence, plasma_request_t *request);
378 void plasma_core_omp_ssyssq(plasma_enum_t uplo,
380 const float *A,
int lda,
381 float *scale,
float *sumsq,
382 plasma_sequence_t *sequence, plasma_request_t *request);
384 void plasma_core_omp_ssyssq_aux(
int m,
int n,
385 const float *scale,
const float *sumsq,
387 plasma_sequence_t *sequence,
388 plasma_request_t *request);
390 void plasma_core_omp_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
392 const float *A,
int lda,
394 plasma_sequence_t *sequence, plasma_request_t *request);
396 void plasma_core_omp_slacpy_lapack2tile_band(plasma_enum_t uplo,
398 int m,
int n,
int nb,
int kl,
int ku,
399 const float *A,
int lda,
402 void plasma_core_omp_slacpy_tile2lapack_band(plasma_enum_t uplo,
404 int m,
int n,
int nb,
int kl,
int ku,
405 const float *B,
int ldb,
408 void plasma_core_omp_slange(plasma_enum_t norm,
410 const float *A,
int lda,
411 float *work,
float *result,
412 plasma_sequence_t *sequence, plasma_request_t *request);
414 void plasma_core_omp_slange_aux(plasma_enum_t norm,
416 const float *A,
int lda,
418 plasma_sequence_t *sequence,
419 plasma_request_t *request);
421 void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
423 const float *A,
int lda,
424 float *work,
float *value,
425 plasma_sequence_t *sequence, plasma_request_t *request);
427 void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
429 const float *A,
int lda,
431 plasma_sequence_t *sequence,
432 plasma_request_t *request);
434 void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
436 const float *A,
int lda,
437 float *work,
float *value,
438 plasma_sequence_t *sequence, plasma_request_t *request);
440 void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
442 const float *A,
int lda,
444 plasma_sequence_t *sequence,
445 plasma_request_t *request);
447 void plasma_core_omp_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
449 const float *A,
int lda,
450 float *work,
float *value,
451 plasma_sequence_t *sequence, plasma_request_t *request);
453 void plasma_core_omp_slantr_aux(plasma_enum_t norm, plasma_enum_t uplo,
456 const float *A,
int lda,
458 plasma_sequence_t *sequence,
459 plasma_request_t *request);
461 void plasma_core_omp_slascl(plasma_enum_t uplo,
462 float cfrom,
float cto,
465 plasma_sequence_t *sequence, plasma_request_t *request);
467 void plasma_core_omp_slaset(plasma_enum_t uplo,
471 float alpha,
float beta,
474 void plasma_core_omp_slauum(plasma_enum_t uplo,
477 plasma_sequence_t *sequence, plasma_request_t *request);
479 void plasma_core_omp_spotrf(plasma_enum_t uplo,
483 plasma_sequence_t *sequence, plasma_request_t *request);
485 void plasma_core_omp_ssymm(
486 plasma_enum_t side, plasma_enum_t uplo,
488 float alpha,
const float *A,
int lda,
489 const float *B,
int ldb,
490 float beta,
float *C,
int ldc,
491 plasma_sequence_t *sequence, plasma_request_t *request);
493 void plasma_core_omp_ssyr2k(
494 plasma_enum_t uplo, plasma_enum_t trans,
496 float alpha,
const float *A,
int lda,
497 const float *B,
int ldb,
498 float beta,
float *C,
int ldc,
499 plasma_sequence_t *sequence, plasma_request_t *request);
501 void plasma_core_omp_ssyrk(
502 plasma_enum_t uplo, plasma_enum_t trans,
504 float alpha,
const float *A,
int lda,
505 float beta,
float *C,
int ldc,
506 plasma_sequence_t *sequence, plasma_request_t *request);
508 void plasma_core_omp_stradd(
509 plasma_enum_t uplo, plasma_enum_t transa,
511 float alpha,
const float *A,
int lda,
512 float beta,
float *B,
int ldb,
513 plasma_sequence_t *sequence, plasma_request_t *request);
515 void plasma_core_omp_strmm(
516 plasma_enum_t side, plasma_enum_t uplo,
517 plasma_enum_t transa, plasma_enum_t diag,
519 float alpha,
const float *A,
int lda,
521 plasma_sequence_t *sequence, plasma_request_t *request);
523 void plasma_core_omp_strsm(
524 plasma_enum_t side, plasma_enum_t uplo,
525 plasma_enum_t transa, plasma_enum_t diag,
527 float alpha,
const float *A,
int lda,
529 plasma_sequence_t *sequence, plasma_request_t *request);
531 void plasma_core_omp_strssq(plasma_enum_t uplo, plasma_enum_t diag,
533 const float *A,
int lda,
534 float *scale,
float *sumsq,
535 plasma_sequence_t *sequence, plasma_request_t *request);
537 void plasma_core_omp_strtri(plasma_enum_t uplo, plasma_enum_t diag,
541 plasma_sequence_t *sequence, plasma_request_t *request);
543 void plasma_core_omp_stslqt(
int m,
int n,
int ib,
547 plasma_workspace_t work,
548 plasma_sequence_t *sequence, plasma_request_t *request);
550 void plasma_core_omp_stsmlq(plasma_enum_t side, plasma_enum_t trans,
551 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
554 const float *V,
int ldv,
555 const float *T,
int ldt,
556 plasma_workspace_t work,
557 plasma_sequence_t *sequence, plasma_request_t *request);
559 void plasma_core_omp_stsmqr(plasma_enum_t side, plasma_enum_t trans,
560 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
563 const float *V,
int ldv,
564 const float *T,
int ldt,
565 plasma_workspace_t work,
566 plasma_sequence_t *sequence, plasma_request_t *request);
568 void plasma_core_omp_stsqrt(
int m,
int n,
int ib,
572 plasma_workspace_t work,
573 plasma_sequence_t *sequence, plasma_request_t *request);
575 void plasma_core_omp_sttlqt(
int m,
int n,
int ib,
579 plasma_workspace_t work,
580 plasma_sequence_t *sequence, plasma_request_t *request);
582 void plasma_core_omp_sttmlq(plasma_enum_t side, plasma_enum_t trans,
583 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
586 const float *V,
int ldv,
587 const float *T,
int ldt,
588 plasma_workspace_t work,
589 plasma_sequence_t *sequence, plasma_request_t *request);
591 void plasma_core_omp_sttmqr(plasma_enum_t side, plasma_enum_t trans,
592 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
595 const float *V,
int ldv,
596 const float *T,
int ldt,
597 plasma_workspace_t work,
598 plasma_sequence_t *sequence, plasma_request_t *request);
600 void plasma_core_omp_sttqrt(
int m,
int n,
int ib,
604 plasma_workspace_t work,
605 plasma_sequence_t *sequence, plasma_request_t *request);
607 void plasma_core_omp_sormlq(plasma_enum_t side, plasma_enum_t trans,
608 int m,
int n,
int k,
int ib,
609 const float *A,
int lda,
610 const float *T,
int ldt,
612 plasma_workspace_t work,
613 plasma_sequence_t *sequence, plasma_request_t *request);
615 void plasma_core_omp_sormqr(plasma_enum_t side, plasma_enum_t trans,
616 int m,
int n,
int k,
int ib,
617 const float *A,
int lda,
618 const float *T,
int ldt,
620 plasma_workspace_t work,
621 plasma_sequence_t *sequence, plasma_request_t *request);
629 #endif // PLASMA_CORE_BLAS_S_H Definition: plasma_descriptor.h:40