[object]

ARMv8A ARM Cortex A57

Comparison of different BLAS implementations

Test machine: ARM Cortex A57 @ 2.15 GHz, theoretical maximum throughput of 8.6 (17.2) Gflops in double (single) precision.

BLASFEO taget: ARMV8A_ARM_CORTEX_A57

BLAS 3 🔗

GEMM 🔗

dgemm_nn sgemm_nn
dgemm_nt sgemm_nt
dgemm_tn sgemm_tn
dgemm_tt sgemm_tt

SYRK 🔗

dsyrk_ln ssyrk_ln
dsyrk_lt ssyrk_lt
dsyrk_un ssyrk_un
dsyrk_lt ssyrk_lt

TRMM 🔗

dtrmm_rlnn strmm_rlnn
dtrmm_rutn strmm_rutn

TRSM 🔗

dtrsm_llnn strsm_llnn
dtrsm_llnu strsm_llnu
dtrsm_lltn strsm_lltn
dtrsm_lltu strsm_lltu
dtrsm_lunn strsm_lunn
dtrsm_lunu strsm_lunu
dtrsm_lutn strsm_lutn
dtrsm_lutu strsm_lutu
dtrsm_rlnn strsm_rlnn
dtrsm_rlnu strsm_rlnu
dtrsm_rltn strsm_rltn
dtrsm_rltu strsm_rltu
dtrsm_runn strsm_runn
dtrsm_runu strsm_runu
dtrsm_rutn strsm_rutn
dtrsm_rutu strsm_rutu

LAPACK 🔗

GEQRF 🔗

dgeqrf sgeqrf
dgelqf sgelqf

POTRF 🔗

dpotrf_u spotrf_u
dpotrf_l spotrf_l

BLAS 2 🔗

GEMV 🔗

dgemv_n sgemv_n
dgemv_t sgemv_t
dgemv_nt sgemv_nt

SYMV 🔗

dsymv_l ssymv_l

TRMV 🔗

dtrmv_lnn strmv_lnn
dtrmv_ltn strmv_ltn

TRSV 🔗

dtrsv_lnn strsv_lnn
dtrsv_ltn strsv_ltn