y A A x: ← y+Ax ← A+xy ∈ R ,y,z ∈ R ← y + Ax ← A + xy ,Φ ← A + Bc
y A A x: ← y+Ax ← A+xy ∈ R ,y,z ∈ R ← y + Ax ← A + xy ,Φ ← A + Bc
I
I.
!"
# $
%
&
'
(
) "
&
* " +
&
&
# *
"
&
#
& load
& &
& store* " ,
&
#
*
" -
&
& #
* "
& miss.
#
& # !
/
# 0 "
1 $
&
, min
y y + Ax A A + xy ! A Rnn x, y, z Rn "
&
& BLAS-2
2 MV y y + Ax) = n(2n 1), min = n2 + 3n 2 GER A A + xy ) = 2n2 , min =
2n2 + 2n 3
.
#
&
n
4 &.
# A A + BC
5
#
& 2'
BLAS-2
II.
6
IEEE &
! 1. 71 bits
/0
#&.
&
&.
& " &
/ 0 /
8
0 4
&
u9
:
#
& .
&
& x = 1. .
t = 53
&
! &;
" 4
&
u
= max
x
|x fl(x)| |0.0 0 |
=
|x|
1.0
= 2t + 2t1 + = 2t (1 + 21 + ) = 2t
1
= 2t+1 = 252 .
(1 1/2)
! 2
. &
/max0.
#
/sup0"
1 6
realmax, realmin
&
+
)
i) realmax+ realmin ii) realmin/0 iii) realmin/2 == 0 iv) realmax+realmax/2
i) realmax+ realmin = realmax
&
&
realmax+ realmin = realmax (1+realmin/realmax)
&
#
realmin/realmax < eps eps
& (
.
&
realmin
& 8
&
MATLAB 8
.
& < ! realmin = -realmax ii) realmin/0 =
.
& Inf = .
.
<.
& -Inf iii) realmin/2 == 0 4#
& .
&
&.
realmin/2
& <.
&
< /0 iv) realmax+realmax/2 :
&
&
#
Inf >
#
(i)
&
&
&;
realmax/2
5 $
&
#
#
&
s=0; for j = 1:4, s = s+ x(j)y(j); end;
DOT &
&
FMA
!?
) FMA
#
"
s fl(s)
&
. &
6
FMA
&
) s = FMA(t,a,b)
#
s = t+a*b +
&
&
)
s=0; for j = 1:4, s = FMA(s,x(j),y(j)); end; + #
&
)
fl(s) = ((((s + x(1)y(1))(1 + 1 ) + x(2)y(2))(1 + 2 ) + x(3)y(3))(1 + 3 ) + x(4)y(4))(1 + 4 )
= x(1)y(1)(1 + 4 ) + x(2)y(2)(1 + 3 ) + x(3)y(3)(1 + 2 ) + x(4)y(4)(1 + 1 )
>
&
|fl(s) s| |s|4 .
III. @
&
#
#
&
)
s=x(1)*x(1); for j = 2:n, s = s+ x(j)*x(j); end;
& x(j)
& x( j) x( j) < Inf
$
&
&
&
& #
=
.
& +
s
&
x) f (x) = nj=1 2j .
&
#
j
x( j) 4 . f : Rn R+ . R+ &;
4 . A f
& ! " JR1n )
J
f
f
,...,
]
1
n
= [21 , ..., 2n ] = 2x.
= [
-
&
&
&;
)
cond( f ; x) = J
x
x2
x
= 2x
=2
s
|s|
s
# s
& 4 .
&
.
s = x22
&
&
cond( f ; x) = 2
2 & #.
)
fl(s) = 21 < n > +22 < n > +23 < n 1 > + 2n < 2 >
j
< j >= k=1
(1 + k ) 4
&
&
&
+
& x
& & x 4
.
x x = [1 (1 + n )1/2 1 , 2 (1 + n )1/2 2 , 3 (1 + n1 )1/2 3 , ..., n (1 + 2 )1/2 n ]
n x
1 $
&
#
#
s
=
&.
|fl(s) s|
2n .
|s|
IV.
$
& /
0
7 -
&
! "
n
s = 0.0; for j=1:n, s = s+x(j)*y(j); end
@
. MATLAB . rem(x, y)
#
&
x, y -
&
)
m = rem(n,5); s = 0.0;
for j=1:m, s = s+x(j)*y(j); end
for j=m+1:5:n
s = s+x(j)*y(j)+x(j+1)*y(j+1)+x(j+2)*y(j+2)+x(j+3)*y(j+3)+x(j+4)*y(j+4);
end
1 @
&
y y + Ak x. k > 1
&
. A Rnn x, y Rn $
&
&
, min & ! . &
kn3 "
6
#
& &
&)
LOAD A, x, y
for kstep = 1 : k
for i = 1 : n
z(i) = 0
for j = 1 : n
z(i) = z(i) + A(i, j)x( j)
end
end
x=z
end
y = y + x;
STORE y
+
& = 2kn2 + n min = n2 + 3n >
&
)
y = y + A(A( A(Ax) )
.
& #
&;
&
)
LOAD A, x, y
for kstep = 1 : k
for i = 1 : n
z(i) = A(i, 1) x(1)
for j = 2 : n
z(i) = z(i) + A(i, j)x( j)
end
end
x=z
end
y = y + x;
STORE y
= kn(2n 1) + n
5
.
&
&
#
K = O(n)
&
! #
& min " ,.
&
= min
# O(n).
& A
LOAD x
for kstep = 1 : k
for i = 1 : n
z(i) = 0
for j = 1 : n
LOAD A(i, j)
z(i) = z(i) + A(i, j)x( j)
end
end
x=z
end
for i = 1 : n
y(i) = y(i) + x(i);
end
STORE y
+ .
&
#
3n !
x, y, z" (
.
= 3n + kn2