É {b2bxb b babbb]4? b1% yb+b-b&b)b bab bú b bvb2 0...

37
1 1 April 10, 2007

Upload: dodien

Post on 30-Mar-2019

214 views

Category:

Documents


0 download

TRANSCRIPT

11 April 10, 2007

22 April 10, 2007

)

33 April 10, 2007

CPU “

0.01

0.1

1

10

100

1000

10000

100000

1000000

10000000

1970 1980 1990 2000 2010 2020

MIP

S

Pentium® 4

Pentium®

486386

2868086

Pentium® Pro

44 April 10, 2007

65nm65nm

20062006

30nm30nm 20nm20nm

45nm45nm

20082008

32nm32nm20102010

15nm15nm

22nm22nm20122012

10nm10nm

IntelIntel

3216842TR

2232456590nm

2

55 April 10, 2007

SPECint 2000

Year Source: SPECInt

66 April 10, 2007

i486

0

5

10

15

20

25

30

0 2 4 6 8

= ^ 1.74

i486 Pentium

Pentium Pro

Pentium 4 (Wmt)

Pentium 4 (Psc)

77 April 10, 2007

0

2

4

6

8

10

1 2 3 4 5 6 7 8 9 10

0

2

4

6

8

10

1 2 3 4 5 6 7 8 9 10( )

0

2

4

6

8

10

1 2 3 4 5 6 7 8 9 10

88 April 10, 2007

0

5

10

15

20

25

30

0 2 4 6 8

= ^ 1.74

Pentium M

i486 Pentium

Pentium Pro

Pentium 4 (Wmt)

Pentium 4 (Psc)

CPU

99 April 10, 2007

:

90nm MOS

50nm50nm SiSi

1.2 nm SiO1.2 nm SiO22

Xj

ToxD

Leff

30%

2

1010 April 10, 2007

C1 C2

C3 C4

1

2

3

4

1

2

1 1

1

2

3

4

1

2

3

4

= 1/4

= 1/2

1111 April 10, 2007

GP GP

GP

GP GP

GP

GP

GP GP

GP

GP GP

SP SP

SP SP

H/W

CC

CC

CC

CC

CC

CC

CC

CC

1212 April 10, 2007

80

4.27GHz

* 1.37 SP TFLOPS

PDE

* 1 SP TFLOPS

* 0.51 SP TFLOPS

* 2 FMAC

2 FLOPS

Source: A 80-tile 1.28 TFLOP Network-on-Chip in 65 nm CMOS, ISSCC’07, Sriram Vangal, Jason Howard, Gregory Ruhl, Saurabh Dighe, Howard Wilson, James tschanz, David Finan, Priya Iyer, Arvind Singh, Riju Jacob, Shailendra Jain, Sriram venkataraman, Yatin Hoskote and Nitin Borkar.

1313 April 10, 2007

– Herb Sutter Dr. Dobbs

– CPUH/W

ISVISV

1414 April 10, 2007

Program SPMD_Emb_Par ()

{

TYPE *tmp, *func();

global_array Data(TYPE);

global_array Res(TYPE);

int N = get_num_procs();

int id = get_proc_id();

if (id==0) setup_problem(N,DATA);

for (int I= 0; I<N;I=I+Num){

tmp = func(I);

Res.accumulate( tmp);

}

}

Program SPMD_Emb_Par ()

{

TYPE *tmp, *func();

global_array Data(TYPE);

global_array Res(TYPE);

int N = get_num_procs();

int id = get_proc_id();

if (id==0) setup_problem(N,DATA);

for (int I= 0; I<N;I=I+Num){

tmp = func(I);

Res.accumulate( tmp);

}

}

Program SPMD_Emb_Par ()

{

TYPE *tmp, *func();

global_array Data(TYPE);

global_array Res(TYPE);

int N = get_num_procs();

int id = get_proc_id();

if (id==0) setup_problem(N,DATA);

for (int I= 0; I<N;I=I+Num){

tmp = func(I);

Res.accumulate( tmp);

}

}

Program SPMD_Emb_Par ()

{

TYPE *tmp, *func();

global_array Data(TYPE);

global_array Res(TYPE);

int Num = get_num_procs();

int id = get_proc_id();

if (id==0) setup_problem(N, Data);

for (int I= ID; I<N;I=I+Num){

tmp = func(I, Data);

Res.accumulate( tmp);

}

}

+

1515 April 10, 2007

? ? ?? ? ?

1616 April 10, 2007

MPI OpenMP Java

1717 April 10, 2007

PC CPU

!!

1818 April 10, 2007

THINK PARALLEL ( )

1919 April 10, 2007

2020 April 10, 2007

Joe Wolf® C++ Fortran

10.0 June 19

Vasanth Tovinkere3

–June 5

Victoria Gromova3

– ®May 15

Dr. David Mackay3

–May 1

Gary CarletonCPU Windows Vista*

April 17

Dr. Tim MattsonApril 3

2121 April 10, 2007

33

2222 April 10, 2007

11 22 44 881X1X

2X2X

4X4X

8X8X

2323 April 10, 2007

OpenMP ®

®

OpenMP

® IPP MKL

®

® C++

® VTune™

2424 April 10, 2007

®

2525 April 10, 2007

®

C++

• 32 64

• Windows* Linux* Mac OS X* Microsoft* GNU*

Maya

3D

®

Gerry Hawkins Maya Autodesk

2626 April 10, 2007

Windows*2D Ray Tracing

TBB

2727 April 10, 2007

33

2828 April 10, 2007

– 1

2929 April 10, 2007

3030 April 10, 2007

®

3131 April 10, 2007

33

3232 April 10, 2007

pthreads Windows API

OpenMP

MPI

3333 April 10, 2007

Thread Setup and InitializationCRITICAL_SECTION MyMutex, MyMutex2, MyMutex3;int get_num_cpus (void) {

SYSTEM_INFO si;GetSystemInfo(&si);return (int)si.dwNumberOfProcessors;}

int nthreads = get_num_cpus ();HANDLE *threads = (HANDLE *) alloca (nthreads * sizeof (HANDLE));InitializeCriticalSection (&MyMutex);InitializeCriticalSection (&MyMutex2);InitializeCriticalSection (&MyMutex3);for (int i = 0; i < nthreads; i++) {

DWORD id;&threads[i] = CreateThread (NULL, 0, parallel_thread, i, 0, &id);}

for (int i = 0; i < nthreads; i++) {WaitForSingleObject (&threads[i], INFINITE);

}

Parallel Task Scheduling and Executionconst int MINPATCH = 150;const int DIVFACTOR = 2;typedef struct work_queue_entry_s {

patch pch;struct work_queue_entry_s *next;

} work_queue_entry_t;work_queue_entry_t *work_queue_head = NULL;work_queue_entry_t *work_queue_tail = NULL;void generate_work (patch* pchin){ int startx, stopx, starty, stopy;

int xs,ys;startx=pchin->startx; stopx= pchin->stopx;starty=pchin->starty; stopy= pchin->stopy;if(((stopx-startx) >= MINPATCH) || ((stopy-starty) >= MINPATCH)) {

int xpatchsize = (stopx-startx)/DIVFACTOR + 1;int ypatchsize = (stopy-starty)/DIVFACTOR + 1;for (ys=starty; ys<=stopy; ys+=ypatchsize)for (xs=startx; xs<=stopx; xs+=xpatchsize) {

patch pch;pch.startx = xs;pch.starty = ys;pch.stopx = MIN(xs+xpatchsize-1,stopx);pch.stopy = MIN(ys+ypatchsize-1,stopy);generate_work (&pch);}

} else {/* just trace this patch */work_queue_entry_t *q = (work_queue_entry_t *) malloc (sizeof

(work_queue_entry_t));q->pch.starty = starty; q->pch.stopy = stopy;q->pch.startx = startx; q->pch.stopx = stopx;q->next = NULL;

Thread Setup and Initialization#include "tbb/task_scheduler_init.h" #include "tbb/spin_mutex.h"tbb::task_scheduler_init init;tbb::spin_mutex MyMutex, MyMutex2;

Parallel Task Scheduling and Execution#include "tbb/parallel_for.h"#include "tbb/blocked_range2d.h"class parallel_task {public:

void operator() (const tbb::blocked_range2d<int> &r) const {for (int y = r.rows().begin(); y != r.rows().end(); ++y) {

for (int x = r.cols().begin(); x != r.cols().end(); x++) {render_one_pixel (x, y);

}} if (scene.displaymode == RT_DISPLAY_ENABLED) {

tbb::spin_mutex::scoped_lock lock (MyMutex2);for (int y = r.rows().begin(); y != r.rows().end(); ++y) {

GraphicsDrawRow(startx-1, y-1, totalx, (unsigned char *) &global_buffer[(y-starty)*totalx*3]);

}}

}parallel_task () {}

};parallel_for (tbb::blocked_range2d<int> (starty, stopy + 1, grain_size, startx, stopx + 1, grain_size), parallel_task ());

Windows

®

API Windows*

Linux* Mac OS*

2D

(Tacheon)

if (work_queue_head == NULL) {work_queue_head = q;

} else {work_queue_tail->next = q;

}work_queue_tail = q;

}}void generate_worklist (void){

patch pch;pch.startx = startx;pch.stopx = stopx;pch.starty = starty;pch.stopy = stopy;generate_work (&pch);

}bool schedule_thread_work (patch &pch){

EnterCriticalSection (&MyMutex3);work_queue_entry_t *q = work_queue_head;if (q != NULL) {

pch = q->pch;work_queue_head = work_queue_head->next;

}LeaveCriticalSection (&MyMutex3);return (q != NULL);

}generate_worklist ();

void parallel_thread (void *arg){

patch pch;while (schedule_thread_work (pch)) {

for (int y = pch.starty; y <= pch.stopy; y++) {for (int x=pch.startx; x<=pch.stopx; x++) {

render_one_pixel (x, y);}} if (scene.displaymode == RT_DISPLAY_ENABLED) {

EnterCriticalSection (&MyMutex3);for (int y = pch.starty; y <= pch.stopy; y++) {

GraphicsDrawRow(pch.startx-1, y-1, pch.stopx-pch.startx+1, (unsigned char *) &global_buffer[((y-starty)*totalx+(pch.startx-startx))*3]);

}LeaveCriticalSection (&MyMutex3);

}}

}

This example includes softwaredeveloped by John E. Stone.

®

3434 April 10, 2007

Think parallel

3535 April 10, 2007

intel.com/software/mcdeveloper go-parallel.com

Joe Wolf® C++ Fortran

10.0June 19

Vasanth Tovinkere3

–June 5

Victoria Gromova3

– ®May 15

Dr. David Mackay3

–May 1

Gary CarletonCPU Windows Vista*

April 17

Dr. Tim MattsonApril 3

intel.com/software/products

3 … …

http://on24.com/event/36/88/3/rt/1/?eventid=36883

3636 April 10, 2007

– go-parallel.com

3737 April 10, 2007

intel.com/software/mcdevelopergo-parallel.com

intel.com/software/products

3 … …

http://on24.com/event/36/88/3/rt/1/?eventid=36883

Joe Wolf® C++ Fortran

10.0June 19

Vasanth Tovinkere

3–June 5

Victoria Gromova3

– ®May 15

Dr. David Mackay

3–

May 1

Gary CarletonCPU Windows Vista*

April 17

Dr. Tim MattsonApril 3