PLASMA
Parallel Linear Algebra Software for Multicore Architectures
plasma_core_blas_s.h
1 
12 #ifndef PLASMA_CORE_BLAS_S_H
13 #define PLASMA_CORE_BLAS_S_H
14 
15 #include "plasma_async.h"
16 #include "plasma_barrier.h"
17 #include "plasma_descriptor.h"
18 #include "plasma_types.h"
19 #include "plasma_workspace.h"
20 #include "plasma_descriptor.h"
21 
22 #ifdef __cplusplus
23 extern "C" {
24 #endif
25 
26 #define REAL
27 
28 /******************************************************************************/
29 #ifdef COMPLEX
30 float fabsf(float alpha);
31 #endif
32 
33 int plasma_core_sgeadd(plasma_enum_t transa,
34  int m, int n,
35  float alpha, const float *A, int lda,
36  float beta, float *B, int ldb);
37 
38 int plasma_core_sgelqt(int m, int n, int ib,
39  float *A, int lda,
40  float *T, int ldt,
41  float *tau,
42  float *work);
43 
44 void plasma_core_sgemm(plasma_enum_t transa, plasma_enum_t transb,
45  int m, int n, int k,
46  float alpha, const float *A, int lda,
47  const float *B, int ldb,
48  float beta, float *C, int ldc);
49 
50 int plasma_core_sgeqrt(int m, int n, int ib,
51  float *A, int lda,
52  float *T, int ldt,
53  float *tau,
54  float *work);
55 
56 void plasma_core_sgessq(int m, int n,
57  const float *A, int lda,
58  float *scale, float *sumsq);
59 
60 void plasma_core_sgetrf(plasma_desc_t A, int *ipiv, int ib, int rank, int size,
61  volatile int *max_idx, volatile float *max_val,
62  volatile int *info, plasma_barrier_t *barrier);
63 
64 int plasma_core_ssygst(int itype, plasma_enum_t uplo,
65  int n,
66  float *A, int lda,
67  float *B, int ldb);
68 
69 void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
70  int m, int n,
71  float alpha, const float *A, int lda,
72  const float *B, int ldb,
73  float beta, float *C, int ldc);
74 
75 void plasma_core_ssyr2k(plasma_enum_t uplo, plasma_enum_t trans,
76  int n, int k,
77  float alpha, const float *A, int lda,
78  const float *B, int ldb,
79  float beta, float *C, int ldc);
80 
81 void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
82  int n, int k,
83  float alpha, const float *A, int lda,
84  float beta, float *C, int ldc);
85 
86 void plasma_core_ssyssq(plasma_enum_t uplo,
87  int n,
88  const float *A, int lda,
89  float *scale, float *sumsq);
90 
91 void plasma_core_ssyssq(plasma_enum_t uplo,
92  int n,
93  const float *A, int lda,
94  float *scale, float *sumsq);
95 
96 void plasma_core_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
97  int m, int n,
98  const float *A, int lda,
99  float *B, int ldb);
100 
101 void plasma_core_slacpy_lapack2tile_band(plasma_enum_t uplo,
102  int it, int jt,
103  int m, int n, int nb, int kl, int ku,
104  const float *A, int lda,
105  float *B, int ldb);
106 
107 void plasma_core_slacpy_tile2lapack_band(plasma_enum_t uplo,
108  int it, int jt,
109  int m, int n, int nb, int kl, int ku,
110  const float *B, int ldb,
111  float *A, int lda);
112 
113 void plasma_core_slange(plasma_enum_t norm,
114  int m, int n,
115  const float *A, int lda,
116  float *work, float *result);
117 
118 void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
119  int n,
120  const float *A, int lda,
121  float *work, float *value);
122 
123 void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
124  int n,
125  const float *A, int lda,
126  float *work, float *value);
127 
128 void plasma_core_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
129  int m, int n,
130  const float *A, int lda,
131  float *work, float *value);
132 
133 void plasma_core_slascl(plasma_enum_t uplo,
134  float cfrom, float cto,
135  int m, int n,
136  float *A, int lda);
137 
138 void plasma_core_slaset(plasma_enum_t uplo,
139  int m, int n,
140  float alpha, float beta,
141  float *A, int lda);
142 
143 void plasma_core_sgeswp(plasma_enum_t colrow,
144  plasma_desc_t A, int k1, int k2, const int *ipiv, int incx);
145 
146 void plasma_core_ssyswp(int rank, int num_threads,
147  int uplo, plasma_desc_t A, int k1, int k2, const int *ipiv,
148  int incx, plasma_barrier_t *barrier);
149 
150 int plasma_core_slauum(plasma_enum_t uplo,
151  int n,
152  float *A, int lda);
153 
154 int plasma_core_spamm(plasma_enum_t op, plasma_enum_t side, plasma_enum_t storev,
155  int m, int n, int k, int l,
156  const float *A1, int lda1,
157  float *A2, int lda2,
158  const float *V, int ldv,
159  float *W, int ldw);
160 
161 int plasma_core_sparfb(plasma_enum_t side, plasma_enum_t trans, plasma_enum_t direct,
162  plasma_enum_t storev,
163  int m1, int n1, int m2, int n2, int k, int l,
164  float *A1, int lda1,
165  float *A2, int lda2,
166  const float *V, int ldv,
167  const float *T, int ldt,
168  float *work, int ldwork);
169 
170 int plasma_core_spemv(plasma_enum_t trans, int storev,
171  int m, int n, int l,
172  float alpha,
173  const float *A, int lda,
174  const float *X, int incx,
175  float beta,
176  float *Y, int incy,
177  float *work);
178 
179 int plasma_core_spotrf(plasma_enum_t uplo,
180  int n,
181  float *A, int lda);
182 
183 void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
184  int m, int n,
185  float alpha, const float *A, int lda,
186  const float *B, int ldb,
187  float beta, float *C, int ldc);
188 
189 void plasma_core_ssyr2k(
190  plasma_enum_t uplo, plasma_enum_t trans,
191  int n, int k,
192  float alpha, const float *A, int lda,
193  const float *B, int ldb,
194  float beta, float *C, int ldc);
195 
196 void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
197  int n, int k,
198  float alpha, const float *A, int lda,
199  float beta, float *C, int ldc);
200 
201 int plasma_core_stradd(plasma_enum_t uplo, plasma_enum_t transa,
202  int m, int n,
203  float alpha, const float *A, int lda,
204  float beta, float *B, int ldb);
205 
206 void plasma_core_strmm(plasma_enum_t side, plasma_enum_t uplo,
207  plasma_enum_t transa, plasma_enum_t diag,
208  int m, int n,
209  float alpha, const float *A, int lda,
210  float *B, int ldb);
211 
212 void plasma_core_strsm(plasma_enum_t side, plasma_enum_t uplo,
213  plasma_enum_t transa, plasma_enum_t diag,
214  int m, int n,
215  float alpha, const float *A, int lda,
216  float *B, int ldb);
217 
218 void plasma_core_strssq(plasma_enum_t uplo, plasma_enum_t diag,
219  int m, int n,
220  const float *A, int lda,
221  float *scale, float *sumsq);
222 
223 int plasma_core_strtri(plasma_enum_t uplo, plasma_enum_t diag,
224  int n,
225  float *A, int lda);
226 
227 int plasma_core_stslqt(int m, int n, int ib,
228  float *A1, int lda1,
229  float *A2, int lda2,
230  float *T, int ldt,
231  float *tau,
232  float *work);
233 
234 int plasma_core_stsmlq(plasma_enum_t side, plasma_enum_t trans,
235  int m1, int n1, int m2, int n2, int k, int ib,
236  float *A1, int lda1,
237  float *A2, int lda2,
238  const float *V, int ldv,
239  const float *T, int ldt,
240  float *work, int ldwork);
241 
242 int plasma_core_stsmqr(plasma_enum_t side, plasma_enum_t trans,
243  int m1, int n1, int m2, int n2, int k, int ib,
244  float *A1, int lda1,
245  float *A2, int lda2,
246  const float *V, int ldv,
247  const float *T, int ldt,
248  float *work, int ldwork);
249 
250 int plasma_core_stsqrt(int m, int n, int ib,
251  float *A1, int lda1,
252  float *A2, int lda2,
253  float *T, int ldt,
254  float *tau,
255  float *work);
256 
257 int plasma_core_sttlqt(int m, int n, int ib,
258  float *A1, int lda1,
259  float *A2, int lda2,
260  float *T, int ldt,
261  float *tau,
262  float *work);
263 
264 int plasma_core_sttmlq(plasma_enum_t side, plasma_enum_t trans,
265  int m1, int n1, int m2, int n2, int k, int ib,
266  float *A1, int lda1,
267  float *A2, int lda2,
268  const float *V, int ldv,
269  const float *T, int ldt,
270  float *work, int ldwork);
271 
272 int plasma_core_sttmqr(plasma_enum_t side, plasma_enum_t trans,
273  int m1, int n1, int m2, int n2, int k, int ib,
274  float *A1, int lda1,
275  float *A2, int lda2,
276  const float *V, int ldv,
277  const float *T, int ldt,
278  float *work, int ldwork);
279 
280 int plasma_core_sttqrt(int m, int n, int ib,
281  float *A1, int lda1,
282  float *A2, int lda2,
283  float *T, int ldt,
284  float *tau,
285  float *work);
286 
287 int plasma_core_sormlq(plasma_enum_t side, plasma_enum_t trans,
288  int m, int n, int k, int ib,
289  const float *A, int lda,
290  const float *T, int ldt,
291  float *C, int ldc,
292  float *work, int ldwork);
293 
294 int plasma_core_sormqr(plasma_enum_t side, plasma_enum_t trans,
295  int m, int n, int k, int ib,
296  const float *A, int lda,
297  const float *T, int ldt,
298  float *C, int ldc,
299  float *work, int ldwork);
300 
301 /******************************************************************************/
302 void plasma_core_omp_samax(int colrow, int m, int n,
303  const float *A, int lda,
304  float *values,
305  plasma_sequence_t *sequence, plasma_request_t *request);
306 
307 void plasma_core_omp_sgeadd(
308  plasma_enum_t transa, int m, int n,
309  float alpha, const float *A, int lda,
310  float beta, float *B, int ldb,
311  plasma_sequence_t *sequence, plasma_request_t *request);
312 
313 void plasma_core_omp_sgelqt(int m, int n, int ib,
314  float *A, int lda,
315  float *T, int ldt,
316  plasma_workspace_t work,
317  plasma_sequence_t *sequence, plasma_request_t *request);
318 
319 void plasma_core_omp_sgemm(
320  plasma_enum_t transa, plasma_enum_t transb,
321  int m, int n, int k,
322  float alpha, const float *A, int lda,
323  const float *B, int ldb,
324  float beta, float *C, int ldc,
325  plasma_sequence_t *sequence, plasma_request_t *request);
326 
327 void plasma_core_omp_sgeqrt(int m, int n, int ib,
328  float *A, int lda,
329  float *T, int ldt,
330  plasma_workspace_t work,
331  plasma_sequence_t *sequence, plasma_request_t *request);
332 
333 void plasma_core_omp_sgessq(int m, int n,
334  const float *A, int lda,
335  float *scale, float *sumsq,
336  plasma_sequence_t *sequence, plasma_request_t *request);
337 
338 void plasma_core_omp_sgessq_aux(int n,
339  const float *scale, const float *sumsq,
340  float *value,
341  plasma_sequence_t *sequence,
342  plasma_request_t *request);
343 
344 void plasma_core_omp_ssygst(int itype, plasma_enum_t uplo,
345  int n,
346  float *A, int lda,
347  float *B, int ldb,
348  plasma_sequence_t *sequence, plasma_request_t *request);
349 
350 void plasma_core_omp_ssymm(
351  plasma_enum_t side, plasma_enum_t uplo,
352  int m, int n,
353  float alpha, const float *A, int lda,
354  const float *B, int ldb,
355  float beta, float *C, int ldc,
356  plasma_sequence_t *sequence, plasma_request_t *request);
357 
358 void plasma_core_omp_ssyr2k(
359  plasma_enum_t uplo, plasma_enum_t trans,
360  int n, int k,
361  float alpha, const float *A, int lda,
362  const float *B, int ldb,
363  float beta, float *C, int ldc,
364  plasma_sequence_t *sequence, plasma_request_t *request);
365 
366 void plasma_core_omp_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
367  int n, int k,
368  float alpha, const float *A, int lda,
369  float beta, float *C, int ldc,
370  plasma_sequence_t *sequence, plasma_request_t *request);
371 
372 void plasma_core_omp_ssyssq(plasma_enum_t uplo,
373  int n,
374  const float *A, int lda,
375  float *scale, float *sumsq,
376  plasma_sequence_t *sequence, plasma_request_t *request);
377 
378 void plasma_core_omp_ssyssq(plasma_enum_t uplo,
379  int n,
380  const float *A, int lda,
381  float *scale, float *sumsq,
382  plasma_sequence_t *sequence, plasma_request_t *request);
383 
384 void plasma_core_omp_ssyssq_aux(int m, int n,
385  const float *scale, const float *sumsq,
386  float *value,
387  plasma_sequence_t *sequence,
388  plasma_request_t *request);
389 
390 void plasma_core_omp_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
391  int m, int n,
392  const float *A, int lda,
393  float *B, int ldb,
394  plasma_sequence_t *sequence, plasma_request_t *request);
395 
396 void plasma_core_omp_slacpy_lapack2tile_band(plasma_enum_t uplo,
397  int it, int jt,
398  int m, int n, int nb, int kl, int ku,
399  const float *A, int lda,
400  float *B, int ldb);
401 
402 void plasma_core_omp_slacpy_tile2lapack_band(plasma_enum_t uplo,
403  int it, int jt,
404  int m, int n, int nb, int kl, int ku,
405  const float *B, int ldb,
406  float *A, int lda);
407 
408 void plasma_core_omp_slange(plasma_enum_t norm,
409  int m, int n,
410  const float *A, int lda,
411  float *work, float *result,
412  plasma_sequence_t *sequence, plasma_request_t *request);
413 
414 void plasma_core_omp_slange_aux(plasma_enum_t norm,
415  int m, int n,
416  const float *A, int lda,
417  float *value,
418  plasma_sequence_t *sequence,
419  plasma_request_t *request);
420 
421 void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
422  int n,
423  const float *A, int lda,
424  float *work, float *value,
425  plasma_sequence_t *sequence, plasma_request_t *request);
426 
427 void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
428  int n,
429  const float *A, int lda,
430  float *value,
431  plasma_sequence_t *sequence,
432  plasma_request_t *request);
433 
434 void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
435  int n,
436  const float *A, int lda,
437  float *work, float *value,
438  plasma_sequence_t *sequence, plasma_request_t *request);
439 
440 void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
441  int n,
442  const float *A, int lda,
443  float *value,
444  plasma_sequence_t *sequence,
445  plasma_request_t *request);
446 
447 void plasma_core_omp_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
448  int m, int n,
449  const float *A, int lda,
450  float *work, float *value,
451  plasma_sequence_t *sequence, plasma_request_t *request);
452 
453 void plasma_core_omp_slantr_aux(plasma_enum_t norm, plasma_enum_t uplo,
454  plasma_enum_t diag,
455  int m, int n,
456  const float *A, int lda,
457  float *value,
458  plasma_sequence_t *sequence,
459  plasma_request_t *request);
460 
461 void plasma_core_omp_slascl(plasma_enum_t uplo,
462  float cfrom, float cto,
463  int m, int n,
464  float *A, int lda,
465  plasma_sequence_t *sequence, plasma_request_t *request);
466 
467 void plasma_core_omp_slaset(plasma_enum_t uplo,
468  int mb, int nb,
469  int i, int j,
470  int m, int n,
471  float alpha, float beta,
472  float *A);
473 
474 void plasma_core_omp_slauum(plasma_enum_t uplo,
475  int n,
476  float *A, int lda,
477  plasma_sequence_t *sequence, plasma_request_t *request);
478 
479 void plasma_core_omp_spotrf(plasma_enum_t uplo,
480  int n,
481  float *A, int lda,
482  int iinfo,
483  plasma_sequence_t *sequence, plasma_request_t *request);
484 
485 void plasma_core_omp_ssymm(
486  plasma_enum_t side, plasma_enum_t uplo,
487  int m, int n,
488  float alpha, const float *A, int lda,
489  const float *B, int ldb,
490  float beta, float *C, int ldc,
491  plasma_sequence_t *sequence, plasma_request_t *request);
492 
493 void plasma_core_omp_ssyr2k(
494  plasma_enum_t uplo, plasma_enum_t trans,
495  int n, int k,
496  float alpha, const float *A, int lda,
497  const float *B, int ldb,
498  float beta, float *C, int ldc,
499  plasma_sequence_t *sequence, plasma_request_t *request);
500 
501 void plasma_core_omp_ssyrk(
502  plasma_enum_t uplo, plasma_enum_t trans,
503  int n, int k,
504  float alpha, const float *A, int lda,
505  float beta, float *C, int ldc,
506  plasma_sequence_t *sequence, plasma_request_t *request);
507 
508 void plasma_core_omp_stradd(
509  plasma_enum_t uplo, plasma_enum_t transa,
510  int m, int n,
511  float alpha, const float *A, int lda,
512  float beta, float *B, int ldb,
513  plasma_sequence_t *sequence, plasma_request_t *request);
514 
515 void plasma_core_omp_strmm(
516  plasma_enum_t side, plasma_enum_t uplo,
517  plasma_enum_t transa, plasma_enum_t diag,
518  int m, int n,
519  float alpha, const float *A, int lda,
520  float *B, int ldb,
521  plasma_sequence_t *sequence, plasma_request_t *request);
522 
523 void plasma_core_omp_strsm(
524  plasma_enum_t side, plasma_enum_t uplo,
525  plasma_enum_t transa, plasma_enum_t diag,
526  int m, int n,
527  float alpha, const float *A, int lda,
528  float *B, int ldb,
529  plasma_sequence_t *sequence, plasma_request_t *request);
530 
531 void plasma_core_omp_strssq(plasma_enum_t uplo, plasma_enum_t diag,
532  int m, int n,
533  const float *A, int lda,
534  float *scale, float *sumsq,
535  plasma_sequence_t *sequence, plasma_request_t *request);
536 
537 void plasma_core_omp_strtri(plasma_enum_t uplo, plasma_enum_t diag,
538  int n,
539  float *A, int lda,
540  int iinfo,
541  plasma_sequence_t *sequence, plasma_request_t *request);
542 
543 void plasma_core_omp_stslqt(int m, int n, int ib,
544  float *A1, int lda1,
545  float *A2, int lda2,
546  float *T, int ldt,
547  plasma_workspace_t work,
548  plasma_sequence_t *sequence, plasma_request_t *request);
549 
550 void plasma_core_omp_stsmlq(plasma_enum_t side, plasma_enum_t trans,
551  int m1, int n1, int m2, int n2, int k, int ib,
552  float *A1, int lda1,
553  float *A2, int lda2,
554  const float *V, int ldv,
555  const float *T, int ldt,
556  plasma_workspace_t work,
557  plasma_sequence_t *sequence, plasma_request_t *request);
558 
559 void plasma_core_omp_stsmqr(plasma_enum_t side, plasma_enum_t trans,
560  int m1, int n1, int m2, int n2, int k, int ib,
561  float *A1, int lda1,
562  float *A2, int lda2,
563  const float *V, int ldv,
564  const float *T, int ldt,
565  plasma_workspace_t work,
566  plasma_sequence_t *sequence, plasma_request_t *request);
567 
568 void plasma_core_omp_stsqrt(int m, int n, int ib,
569  float *A1, int lda1,
570  float *A2, int lda2,
571  float *T, int ldt,
572  plasma_workspace_t work,
573  plasma_sequence_t *sequence, plasma_request_t *request);
574 
575 void plasma_core_omp_sttlqt(int m, int n, int ib,
576  float *A1, int lda1,
577  float *A2, int lda2,
578  float *T, int ldt,
579  plasma_workspace_t work,
580  plasma_sequence_t *sequence, plasma_request_t *request);
581 
582 void plasma_core_omp_sttmlq(plasma_enum_t side, plasma_enum_t trans,
583  int m1, int n1, int m2, int n2, int k, int ib,
584  float *A1, int lda1,
585  float *A2, int lda2,
586  const float *V, int ldv,
587  const float *T, int ldt,
588  plasma_workspace_t work,
589  plasma_sequence_t *sequence, plasma_request_t *request);
590 
591 void plasma_core_omp_sttmqr(plasma_enum_t side, plasma_enum_t trans,
592  int m1, int n1, int m2, int n2, int k, int ib,
593  float *A1, int lda1,
594  float *A2, int lda2,
595  const float *V, int ldv,
596  const float *T, int ldt,
597  plasma_workspace_t work,
598  plasma_sequence_t *sequence, plasma_request_t *request);
599 
600 void plasma_core_omp_sttqrt(int m, int n, int ib,
601  float *A1, int lda1,
602  float *A2, int lda2,
603  float *T, int ldt,
604  plasma_workspace_t work,
605  plasma_sequence_t *sequence, plasma_request_t *request);
606 
607 void plasma_core_omp_sormlq(plasma_enum_t side, plasma_enum_t trans,
608  int m, int n, int k, int ib,
609  const float *A, int lda,
610  const float *T, int ldt,
611  float *C, int ldc,
612  plasma_workspace_t work,
613  plasma_sequence_t *sequence, plasma_request_t *request);
614 
615 void plasma_core_omp_sormqr(plasma_enum_t side, plasma_enum_t trans,
616  int m, int n, int k, int ib,
617  const float *A, int lda,
618  const float *T, int ldt,
619  float *C, int ldc,
620  plasma_workspace_t work,
621  plasma_sequence_t *sequence, plasma_request_t *request);
622 
623 #undef REAL
624 
625 #ifdef __cplusplus
626 } // extern "C"
627 #endif
628 
629 #endif // PLASMA_CORE_BLAS_S_H
Definition: plasma_descriptor.h:40