#define __PIO_FILE__ "calcdecomp.F90" !> !! @file !! @brief calcdecomp This module computes the IO decomposition when generated by PO internally. !! !! $Revision$ !! $LastChangedDate$ !< module calcdecomp #ifdef TESTCALCDECOMP implicit none integer, parameter :: i4=selected_int_kind(6), & i8=selected_int_kind(13), pio_offset=i8, r8=selected_real_kind(13) logical, parameter :: debug=.false. integer, parameter :: pio_real=1,pio_int=2, pio_double=3 #else use pio_kinds, only: i4, r4,r8,i4,i8, PIO_offset use pio_types, only: PIO_int, PIO_real, PIO_double use pio_support, only : debug, piodie implicit none #endif public :: CalcStartandCount, pio_set_blocksize integer, parameter :: default_blocksize=884736 integer :: blocksize=default_blocksize contains !> !! @defgroup PIO_set_blocksize !! Sets the contiguous block size read or written from each IO task. !! The optimal value of this parameter is filesystem specific. !< subroutine pio_set_blocksize(newsize) integer, intent(in) :: newsize #ifndef TESTCALCDECOMP if(newsize<0) then call piodie(__PIO_FILE__,__LINE__,'bad value to blocksize: ',newsize) end if #endif blocksize=newsize end subroutine pio_set_blocksize ! ! Determine start and kount values for an array of global size gdims over at most num_io_procs tasks. ! The algorythm creates contigous blocks of approximate size stripesize. Blocksize should be adjusted ! to be optimal for the filesystem being used. The actual number of io tasks used is output in variable ! use_io_procs ! subroutine CalcStartandCount(basetype, ndims, gdims, num_io_procs,myiorank,start, kount, use_io_procs, innermostdecomposed) integer(i4), intent(in) :: ndims, num_io_procs, basetype,myiorank integer(i4), intent(in) :: gdims(ndims) integer(kind=PIO_OFFSET), intent(out) :: start(ndims), kount(ndims) integer, intent(out) :: use_io_procs integer, intent(out), optional :: innermostdecomposed integer :: i, dims(ndims), lb, ub, inc integer(kind=pio_offset) :: p, tpsize, pgdims logical :: converged integer :: extras, subrank, tioprocs, rem integer :: minbytes, maxbytes, iorank integer :: minblocksize, basesize, maxiosize, ioprocs, tiorank integer :: ldims integer(kind=PIO_OFFSET) :: mystart(ndims), mykount(ndims) minbytes = blocksize-256 ! minimum number of contigous blocks in bytes to put on a IO task maxbytes = blocksize+256 ! maximum length of contigous block in bytes to put on a IO task select case(basetype) case(PIO_int) basesize = 4 case(PIO_real) basesize = 4 case(PIO_double) basesize = 8 end select minblocksize = minbytes/basesize pgdims=product(int(gdims,pio_offset)) p=pgdims use_io_procs = max(1, min(int(real(p)/real(minblocksize)+0.5),num_io_procs)) converged=.false. tpsize=0 mystart=1 mykount=0 do while(.not. converged) do iorank=0,use_io_procs-1 start(:)=1 kount(:)=0 ldims=ndims p=basesize do i=1,ndims p=p*gdims(i) if(p/use_io_procs > maxbytes) then ldims=i exit end if end do ! Things work best if use_io_procs is a multiple of gdims(ndims) ! this adjustment makes it so, potentially increasing the blocksize a bit if (gdims(ldims)<use_io_procs) then if(ldims>1 .and. gdims(ldims-1) > use_io_procs) then ldims=ldims-1 else use_io_procs = use_io_procs - mod(use_io_procs,gdims(ldims)) end if end if kount(:)=gdims ioprocs=use_io_procs tiorank=iorank do i=ldims,1,-1 if(gdims(i)>1) then if(gdims(i)>=ioprocs) then call computestartandcount(gdims(i),ioprocs,tiorank,start(i),kount(i)) if(start(i)+kount(i)>gdims(i)+1) then print *,__PIO_FILE__,__LINE__,i,ioprocs,gdims(i),start(i),kount(i) #if TESTCALCDECOMP stop #else call piodie(__PIO_FILE__,__LINE__,'Start plus count exceeds dimension bound') #endif endif exit ! Decomposition is complete else ! The current dimension cannot complete the decomposition. Decompose this ! dimension in groups then go on to decompose the next dimesion in each of those ! groups. tioprocs=gdims(i) tiorank = (iorank*tioprocs)/ioprocs call computestartandcount(gdims(i),tioprocs, tiorank , start(i),kount(i)) ioprocs=ioprocs/tioprocs tiorank = mod(iorank,ioprocs) end if end if end do if(myiorank==iorank) then mystart=start mykount=kount endif tpsize=tpsize+product(kount(:)) if(tpsize==pgdims .and. use_io_procs==iorank+1) then converged=.true. exit else if(tpsize>=pgdims) then exit endif enddo if(.not.converged) then tpsize=0 use_io_procs=use_io_procs-1 endif end do start=mystart kount=mykount ! if(myiorank>=0 .and. myiorank<use_io_procs) then ! print *,__PIO_FILE__,__LINE__,use_io_procs,gdims,start,kount ! endif if(present(innermostdecomposed)) then innermostdecomposed=i end if end subroutine Calcstartandcount ! ! Compute start and kount values to distribute gdim over ioprocs ! as evenly as possible. gdim must be >= ioprocs ! ! subroutine computestartandcount(gdim,ioprocs,rank,start,kount) implicit none integer,intent(in) :: gdim,ioprocs,rank integer(kind=pio_offset),intent(out) :: start,kount integer :: remainder, irank if(gdim<ioprocs) then stop 'bad arguments' end if ! print *,__LINE__,gdim,ioprocs,rank irank = mod(rank,ioprocs) kount = gdim/ioprocs start = kount*irank+1 remainder = gdim-kount*ioprocs if(remainder>=ioprocs-irank) then kount=kount+1 start=start+max(0,(irank+remainder-ioprocs)) end if ! write(99,*) __LINE__,gdim,ioprocs,rank,start,kount,remainder end subroutine computestartandcount end module calcdecomp #ifdef TESTCALCDECOMP program sandctest use calcdecomp !_EXTERNAL implicit none ! integer, parameter :: ndims=4 ! integer, parameter :: gdims(ndims) = (/66,199,10,8/) ! integer, parameter :: ndims=3 ! integer, parameter :: gdims(ndims) = (/1024,1024,1024/) ! integer, parameter :: num_io_procs=16 integer, parameter :: ndims=2 integer, parameter :: gdims(ndims) = (/777602,31/) integer :: num_io_procs=24 logical :: converged=.false. ! integer :: gdims(ndims) integer :: psize, n, i,j,k,m integer, parameter :: imax=200,jmax=200,kmax=30,mmax=7 integer(kind=pio_offset) :: start(ndims), count(ndims) integer :: iorank, numaiotasks, tpsize #ifdef DOTHIS do i=1,imax gdims(1)=i do j=1,jmax gdims(2)=j do k=20,kmax gdims(3)=k do m=1,mmax gdims(4)=m #endif tpsize = 0 numaiotasks=0 do while(.not. converged) do iorank=0,num_io_procs-1 call Calcstartandcount(PIO_double, ndims, gdims, num_io_procs, iorank, start, count, numaiotasks) psize=1 do n=1,ndims psize=psize*count(n) end do tpsize = tpsize+psize ! if(ndims==3) then ! write(*,'(i2,a,3i5,a,3i5,2i12)') iorank,' start =',start,' count=', count, product(gdims), psize ! else if(ndims==4) then if(sum(count)>0) then write(*,'(i2,a,2i8,a,2i8,2i12)') iorank,' start =',start,' count=', count, product(gdims), psize if(any(start<0)) then print *, gdims stop endif end if end do if(tpsize==product(gdims)) then converged=.true. else print *,'Failed to converge: ',tpsize,product(gdims),gdims,num_io_procs tpsize=0 num_io_procs=num_io_procs-1 end if end do #ifdef DOTHIS end do end do end do end do #endif end program sandctest #endif