From 5cb0fe181bf691a28aaf276e60271773c38fd197 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Thu, 25 Apr 2019 18:43:45 +0200 Subject: add block variant of matrix multiplication --- CMakeLists.txt | 1 + src/blockmath.F90 | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.F90 | 20 ++++++-- 3 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 src/blockmath.F90 diff --git a/CMakeLists.txt b/CMakeLists.txt index 18d3378..bd62935 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,7 @@ set(mull_SRC ${SRC}/bettermath.F90 ${SRC}/dotmath.F90 ${SRC}/bettermath2.F90 + ${SRC}/blockmath.F90 ${SRC}/main.F90 ) diff --git a/src/blockmath.F90 b/src/blockmath.F90 new file mode 100644 index 0000000..247d0b0 --- /dev/null +++ b/src/blockmath.F90 @@ -0,0 +1,151 @@ +! Copyright 2019 Wojciech Kosior + +! This is free and unencumbered software released into the public domain. + +! Anyone is free to copy, modify, publish, use, compile, sell, or +! distribute this software, either in source code form or as a compiled +! binary, for any purpose, commercial or non-commercial, and by any +! means. + +! In jurisdictions that recognize copyright laws, the author or authors +! of this software dedicate any and all copyright interest in the +! software to the public domain. We make this dedication for the benefit +! of the public at large and to the detriment of our heirs and +! successors. We intend this dedication to be an overt act of +! relinquishment in perpetuity of all present and future rights to this +! software under copyright law. + +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +! IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +! OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +! OTHER DEALINGS IN THE SOFTWARE. + +! For more information, please refer to + +MODULE blockmat + IMPLICIT none + PRIVATE + + integer, public :: blocksize = 25 + + PUBLIC :: blockmull + PRIVATE :: blockmull_4, blockmull_8, blockmull_16 + + INTERFACE blockmull + procedure blockmull_4, blockmull_8, blockmull_16 + END INTERFACE blockmull + +CONTAINS + + FUNCTION blockmull_4(A, B) result(C) + IMPLICIT none + real(kind=4), intent(in), dimension(1:,1:) :: A, B + real(kind=4), dimension(size(A, 1), size(B, 2)) :: C + integer :: i, ii, ii_max, iiend, j, k, kk, kk_max, kkend + + C = 0 + + kk_max = (size(A, 2) / blocksize) * blocksize + 1 + ii_max = (size(A, 1) / blocksize) * blocksize + 1 + + DO kk = 1, kk_max, blocksize + IF (kk .lt. kk_max) kkend = kk + blocksize - 1 + IF (kk .eq. kk_max) THEN + kkend = size(A, 2) + IF (kk .gt. kkend) EXIT + END IF + + DO ii = 1, ii_max, blocksize + IF (ii .lt. ii_max) iiend = ii + blocksize - 1 + IF (ii .eq. ii_max) THEN + iiend = size(A, 1) + IF (ii .gt. iiend) EXIT + END IF + + DO j = 1, size(B, 2) + DO k = kk, kkend + + C(ii:iiend,j) = C(ii:iiend,j) + A(ii:iiend,k) * B(k,j) + END DO + END DO + END DO + END DO + + END FUNCTION blockmull_4 + + FUNCTION blockmull_8(A, B) result(C) + IMPLICIT none + real(kind=8), intent(in), dimension(1:,1:) :: A, B + real(kind=8), dimension(size(A, 1), size(B, 2)) :: C + integer :: i, ii, ii_max, iiend, j, k, kk, kk_max, kkend + + C = 0 + + kk_max = (size(A, 2) / blocksize) * blocksize + 1 + ii_max = (size(A, 1) / blocksize) * blocksize + 1 + + DO kk = 1, kk_max, blocksize + IF (kk .lt. kk_max) kkend = kk + blocksize - 1 + IF (kk .eq. kk_max) THEN + kkend = size(A, 2) + IF (kk .gt. kkend) EXIT + END IF + + DO ii = 1, ii_max, blocksize + IF (ii .lt. ii_max) iiend = ii + blocksize - 1 + IF (ii .eq. ii_max) THEN + iiend = size(A, 1) + IF (ii .gt. iiend) EXIT + END IF + + DO j = 1, size(B, 2) + DO k = kk, kkend + + C(ii:iiend,j) = C(ii:iiend,j) + A(ii:iiend,k) * B(k,j) + END DO + END DO + END DO + END DO + + END FUNCTION blockmull_8 + + FUNCTION blockmull_16(A, B) result(C) + IMPLICIT none + real(kind=16), intent(in), dimension(1:,1:) :: A, B + real(kind=16), dimension(size(A, 1), size(B, 2)) :: C + integer :: i, ii, ii_max, iiend, j, k, kk, kk_max, kkend + + C = 0 + + kk_max = (size(A, 2) / blocksize) * blocksize + 1 + ii_max = (size(A, 1) / blocksize) * blocksize + 1 + + DO kk = 1, kk_max, blocksize + IF (kk .lt. kk_max) kkend = kk + blocksize - 1 + IF (kk .eq. kk_max) THEN + kkend = size(A, 2) + IF (kk .gt. kkend) EXIT + END IF + + DO ii = 1, ii_max, blocksize + IF (ii .lt. ii_max) iiend = ii + blocksize - 1 + IF (ii .eq. ii_max) THEN + iiend = size(A, 1) + IF (ii .gt. iiend) EXIT + END IF + + DO j = 1, size(B, 2) + DO k = kk, kkend + + C(ii:iiend,j) = C(ii:iiend,j) + A(ii:iiend,k) * B(k,j) + END DO + END DO + END DO + END DO + + END FUNCTION blockmull_16 + +END MODULE blockmat diff --git a/src/main.F90 b/src/main.F90 index f28bbb8..7c03462 100644 --- a/src/main.F90 +++ b/src/main.F90 @@ -3,6 +3,7 @@ PROGRAM mul USE bettmat USE dotmat USE bettmat2 + USE blockmat USE iso_fortran_env, only: error_unit IMPLICIT none @@ -38,6 +39,8 @@ PROGRAM mul multype = 4 ELSE IF (trim(impl_arg) .eq. "bett2") THEN multype = 5 + ELSE IF (trim(impl_arg) .eq. "block") THEN + multype = 6 ELSE write (error_unit, '(A)') "Unrecognized implementation argument" call print_usage() @@ -73,7 +76,8 @@ CONTAINS write (*, '(A)') & "Usage: mull KIND IMPLEMENTATION" // char(10) // & "where KIND is one of: 4, 8, 16" // char(10) // & - " IMPLEMENTATION is one of: naiv, bett, dot, mat" + " IMPLEMENTATION is one of: " // & + "naiv, bett, dot, mat, bett2, block" END SUBROUTINE print_usage @@ -113,8 +117,10 @@ CONTAINS res = dotmull(mat1, mat2) CASE (4) res = matmul(mat1, mat2) - CASE default + CASE (5) res = bett2mull(mat1, mat2) + CASE default + res = blockmull(mat1, mat2) END SELECT @@ -150,8 +156,10 @@ CONTAINS res = dotmull(mat1, mat2) CASE (4) res = matmul(mat1, mat2) - CASE default + CASE (5) res = bett2mull(mat1, mat2) + CASE default + res = blockmull(mat1, mat2) END SELECT @@ -187,13 +195,15 @@ CONTAINS res = dotmull(mat1, mat2) CASE (4) res = matmul(mat1, mat2) - CASE default + CASE (5) res = bett2mull(mat1, mat2) + CASE default + res = blockmull(mat1, mat2) END SELECT call cpu_time(end) - + time = end - start END FUNCTION measure_16 -- cgit v1.2.3