nvidia tesla - gpgpu · nvidia® tesla™ case study: cfd dr. graham pullan university of...
TRANSCRIPT
NVIDIA® TESLA™
Case study: CFD
Dr. Graham Pullan
University of Cambridge
!"#$%&'
• !"#$%&'($)*(+',-./012(34
• 5$6''7$8)$&'($9:;<=
• >,?@2,21)-A'1
• B2<*@)<
C*(+',-./012(3
Thousands of blades
Arranged in rows
Each blade row has a
bespoke blade profile
designed with CFD
Blade row
!"#$'&$-$D2)$216012$&-1
Blades coloured by
pressure
>1)('7*.A'1$)'$!"#
Divide the volume into cells
Blade
Flow
9'E2(1016$2F*-A'1<$&'($2-./$.2@@
9'E2(1016$2F*-A'1<$&'($2-./$.2@@
Conserve:
• Mass
• Momentum
• Energy
GH-,?@2I$,-<<$.'1<2(E-A'1
• GE-@*-)2$,-<<$J*H2<$'1$2-./$&-.2
!
Fmass
=A
4"V
n#
GH-,?@2I$,-<<$.'1<2(E-A'1
• K*,$J*H2<$'1$&-.2<$)'$817$721<0)3$./-162$01$.2@@
!
"#cell
="t
"volFmass$
GH-,?@2I$,-<<$.'1<2(E-A'1
• ;?7-)2$721<0)3
!
"#node
=1
8"#
cell$
(only 4 of 8 surrounding cells shown)
K0,0@-(0)3$'&$<)2?<
Each step uses data from surrounding nodes – “stencil” operation
K0,0@-(0)3$'&$2F*-A'1<
• "'($2-./$2F*-A'1$%L$01$-@@4I
– K2)$(2@2E-1)$J*H$%,-<<M$,',21)*,M$212(634
– K*,$J*H2<
– ;?7-)2$1'72<
– %?@*<$<,'')/016$N$-@<'$<)21.0@
$+'*17-(3$.'170A'1<$N$1')$<)21.0@4
!:;$(*1$A,2<$%HOP$,-./012<4
K)2-73$-??('H0,-A'1$N$'12$+@-72$?2($('Q
R$+@-72$ STL$U.2@@< R$!:;$/'*(
R$<)-62$%V$+@-72<4 RTS$U.2@@< W$!:;$/'*(<
R$.',?'121)$%L$<)-62<4 LTS$U.2@@< VS$!:;$/'*(<
;1<)2-73$-??('H0,-A'1$N$-@@$+@-72<$01$('Q
R$.',?'121)$%RSSS$+@-72<4 LSS$U.2@@< STR$U$!:;
/'*(<
G16012$%XSSS$+@-72<4 V$9.2@@< R$U$!:;
/'*(<
:2-Y$"Z[:<
C/2$?*(?'<2$'&$9:;<
9(-?/0.<$-17$<.021A8.$.',?*A16
9:;<$-(2$72<06127$)'$-??@3$)/2
<-,2$!"#$%&'()*&+,-&
)'$,-13$.%/01!$<0,*@)-12'*<@3
9(-?/0.<$-17$<.021A8.$.',?*A16
9:;<$-(2$72<06127$)'$-??@3$)/2
<-,2$()*&+,-&
)'$,-13$$#2#$<0,*@)-12'*<@3
5(2$9:;<$-$6''7$8)$&'($!"#=
• [*($!"#$.'72$0<I
– K>U#$%<-,2$&*1.A'1<$-??@027$)'$-@@$.2@@<$01$7',-014
– K016@2$?(2.0<0'1
– Z-(62$7-)-<2)<$%.$RSU$1'72<4$8)$'1$'12$X9\$C2<@-$.-(7
• %$+-17Q0)/$'1$.-(7$0<$/06/$.$RSV$9\]<
,*./$<@'Q2($)']&(',$.-(7$.$O$9\]<
-17$<)2?<$01$!"#$-(2$^,2,'(3$+'*17_$4
!;#5
• :('6(-,,016$9:;<$Q0)/'*)$)/2$6(-?/0.<$-+<)(-.A'1
• K.-@-($E-(0-+@2<$%1')$6(-?/0.<`)3?2$X`E2.)'(<a4
• GH)21<0'1<$)'$!$%1')$6(-?/0.<$5:><M$26$[:Gb9Z4
!;#5
• :('6(-,,016$9:;<$Q0)/'*)$)/2$6(-?/0.<$-+<)(-.A'1
• K.-@-($E-(0-+@2<$%1')$6(-?/0.<`)3?2$X`E2.)'(<a4
• GH)21<0'1<$)'$!$%1')$6(-?/0.<$5:><M$26$[:Gb9Z4
• \;C$N$?'(A16$RLMSSS$@012<$'&$2H0<A16$"[BCB5b$!"#$.'72$)'
!;#5$<A@@$-$@216)/3$)-<Y
[E2(-@@$<)(-)263
• #0E072$*?$7',-01
– 2-./$<*+`7',-01$)'$-$)/(2-7$+@'.Y
– *?7-)2$1'72<$01$<*+`7',-01$Q0)/
,'<)$2c.021)$<)21.0@$'?2(-A'1$Q2
.-1$.',2$*?$Q0)/a
– *?7-)2$<*+`7',-01$+'*17-(02<
%U:>$0&$1227274
K\Z[!d$N$<)21.0@$&(-,2Q'(Y
• K\Z[!d$&(-,2Q'(Y$&'($<)21.0@$'?2(-A'1<$'1$<)(*.)*(27$6(07<I
– K'*(.2`)'`<'*(.2$.',?0@2(
• C-Y2<$01$/06/$@2E2@$Y2(12@$72810A'1<
• :('7*.2<$'?A,0<27$Y2(12@<$01$!$'($!;#5
• 5@@'Q<$12Q$<)21.0@<$)'$+2$0,?@2,21)27$F*0.Y@3
• 5@@'Q<$12Q$<)21.0@$'?A,0<-A'1$<)(-)2602<$)'$+2$72?@'327$'1
-@@$<)21.0@<$%Q0)/'*)$)3?'<a4
K\Z[!d
GH-,?@2$K\Z[!d$72810A'1kind = "stencil"
bpin = ["a"]
bpout = ["b”]
lookup = ((1,0, 0), (0, 0, 0), (1,0, 0), (0, 1,0),
(0, 1, 0), (0, 0, 1), (0, 0, 1))
calc = {"lvalue": "b",
"rvalue": """sf1*a[0][0][0] + sfd6*(a[1][0][0] + a[1][0][0] +
a[0][1][0] + a[0][1][0] + a[0][0][1] + a[0][0][1])"""}
!$0,?@2,21)-A'1void smooth(float sf, float *a, float *b)
{
for (k=0; k < nk; k++) {
for (j=0; j < nj; j++) {
for (i=0; i < ni; i++) {
// compute indices i000, im100, etc (not shown) //
b[i000] = sf1*a[i000] +
sfd6*(a[im100] + a[ip100] + a[i0m10] + a[i0p10]
+ a[i00m1] + a[i00p1]);
}
}
}
}
!;#5$<)(-)263$%-e2($#-f-$2)$-@T4
• G-./$)/(2-7$01$-$+@'.Y$(2-7<$<*+`7',-01$7-)-$&(',$6@'+-@
72E0.2$,2,'(3$)'$KU$</-(27$,2,'(3$%.'-@2<.27$(2-7<$&'(
,-H0,*,$+-17Q07)/4
• K31./$)/(2-7<
• ;?7-)2$1'72<$01$<*+`7',-01$*<016$</-(27$,2,'(3$-17
'*)?*)$(2<*@)$+-.Y$)'$6@'+-@$,2,'(3
!;#5$<)(-)263$%-e2($#-f-$2)$-@T4
• G-./$)/(2-7$01$-$+@'.Y$(2-7<$<*+`7',-01$7-)-$&(',$6@'+-@
72E0.2$,2,'(3$)'$KU$</-(27$,2,'(3$%.'-@2<.27$(2-7<$&'(
,-H0,*,$+-17Q07)/4
• K31./$)/(2-7<
• ;?7-)2$1'72<$01$<*+`7',-01$*<016$</-(27$,2,'(3$-17
'*)?*)$(2<*@)$+-.Y$)'$6@'+-@$,2,'(3
• \*)$</-(27$,2,'(3$-17$,-H$)/(2-7<$?2($+@'.Y$-(2$@0,0)27M$<'
+2<)$?@-1$0<$)'$,-(./$)/('*6/$<*+`7',-01$?@-12`+3`?@-12g
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$<)(-)263
!;#5$2H-,?@2__global__ void smooth_kernel(float sf, float *a_d, float *b_d){__shared__ float a[16][5][3]; // shared memory array
!;#5$2H-,?@2__global__ void smooth_kernel(float sf, float *a_d, float *b_d){__shared__ float a[16][5][3]; // shared memory arraya[i][j][0] = a_d[i0m10]; // fetch first three planesa[i][j][1] = a_d[i000];a[i][j][2] = a_d[i0p10];__syncthreads(); // make sure planes are loaded
!;#5$2H-,?@2__global__ void smooth_kernel(float sf, float *a_d, float *b_d){__shared__ float a[16][5][3]; // shared memory arraya[i][j][0] = a_d[i0m10]; // fetch first three planesa[i][j][1] = a_d[i000];a[i][j][2] = a_d[i0p10];__syncthreads(); // make sure planes are loaded// compute the stencil: //b_d[i000] = sf1*a[i][j][1] + + sfd6*(a[i-1][j][1] + a[i+1][j][1] + a[i][j][0] + a[i][j][2] + a[i][j-1][1] + a[i][j+1][1])// load next “k" plane and repeat //
C*(+'<)(2-,
• !;#5$?'()$'&$2H0<A16$"[BCB5b$.'72$%C\Z[!d4
• RLMSSS$@012<$"[BCB5b
• LMSSS$@012<$Y2(12@$72810A'1<$`h$WSMSSS$@012<$'&$!;#5
• B*1<$'1$!:;$'($,*@A?@2$9:;<
• VSH$<?227*?$'1$C2<@-$!RSPS$-<$.',?-(27$)'$-@@$.'(2<$'&$-
,'72(1$>1)2@$.'(2V$F*-7T
C*(+'<)(2-,
Turbine geometry Flow solution
C*(+'<)(2-,
• i$,01*)2<$'1$-$C2<@-$KOjS$%X$9:;<4
• RV$/'*(<$'1$'12$VTL9kl$!:;$.'(2
"[BCB5b$m$!;#5$.',?-(0<'1
"'()(-1
!;#5
>,?-.)$'&$9:;$-..2@2(-)27$!"#
• C2<@-$:2(<'1-@$K*?2(.',?*)2($21-+@2<
– "*@@$)*(+012$01$RS$,01*)2<$%1')$RV$/'*(<4
– [12$+@-72$%&'($72<0614$01$V$,01*)2<
• C2<@-$.@*<)2($21-+@2<
– >1)2(-.AE2$72<061$'&$+@-72<$&'($8(<)$A,2
– ;<2$'&$/06/2($-..*(-.3$,2)/'7<$-)$2-(@3$<)-62$01$72<061
?('.2<<
K*,,-(3
• U-13$<[email protected]'1<$8)$)/2$K>U#$,'72@$*<27$01$9:;<
• !;#5$21-+@2<$<.021.2$72E2@'?2(<$)'$-..2<<$)'$bn>#>5$9:;<
Q0)/'*)$.*,+2(<',2$6(-?/0.<$5:><
• GH0<A16$.'72<$/-E2$)'$+2$-1-@3<27$-17$(2`.'727$)'$+2<)$8)
)/2$$,-13`.'(2$-(./0)2.)*(2
• C/2$<?227*?<$-(2$<*./$)/-)$)/0<$.-1$+2$Q'()/$7'016
• "'($'*([email protected]'1M$)/2$<)2?`./-162$01$.-?-+0@0)3$0<
(2E'@*A'1-(3
U'(2$01&'(,-A'1
www.many-core.group.cam.ac.uk