perf_mod.F90 Source File


Source Code

module perf_mod

!-----------------------------------------------------------------------
!
! Purpose: This module is responsible for controlling the performance
!          timer logic.
!
! Author:  P. Worley, January 2007
!
! $Id$
!
!-----------------------------------------------------------------------

!-----------------------------------------------------------------------
!- Uses ----------------------------------------------------------------
!-----------------------------------------------------------------------

#ifndef USE_CSM_SHARE
   use perf_utils
#else
   use shr_sys_mod,       only: shr_sys_abort
   use shr_kind_mod,      only: SHR_KIND_CS, SHR_KIND_CM, SHR_KIND_CX, &
                                SHR_KIND_R8, SHR_KIND_I8
   use shr_mpi_mod,       only: shr_mpi_barrier, shr_mpi_bcast
   use shr_file_mod,      only: shr_file_getUnit, shr_file_freeUnit
   use namelist_utils,    only: find_group_name
#endif

!-----------------------------------------------------------------------
!- module boilerplate --------------------------------------------------
!-----------------------------------------------------------------------
   implicit none
   private                   ! Make the default access private
#include <mpif.h>
   save

!-----------------------------------------------------------------------
! Public interfaces ----------------------------------------------------
!-----------------------------------------------------------------------
   public t_initf
   public t_setLogUnit
   public t_getLogUnit
   public t_profile_onf
   public t_barrier_onf
   public t_single_filef
   public t_set_prefixf
   public t_unset_prefixf
   public t_stampf
   public t_startf
   public t_stopf
   public t_startstop_valsf
   public t_enablef
   public t_disablef
   public t_adj_detailf
   public t_barrierf
   public t_prf
   public t_finalizef

!-----------------------------------------------------------------------
! Private interfaces (local) -------------------------------------------
!-----------------------------------------------------------------------
   private perf_defaultopts
   private perf_setopts
   private papi_defaultopts
   private papi_setopts

!-----------------------------------------------------------------------
!- include statements --------------------------------------------------
!-----------------------------------------------------------------------
#include "gptl.inc"

!-----------------------------------------------------------------------
! Private data ---------------------------------------------------------
!-----------------------------------------------------------------------

   !----------------------------------------------------------------------------
   ! perf_mod options
   !----------------------------------------------------------------------------
   integer, parameter :: def_p_logunit = 6                   ! default
   integer, private   :: p_logunit = def_p_logunit
                         ! unit number for log output

   logical, parameter :: def_timing_initialized = .false.      ! default
   logical, private   :: timing_initialized = def_timing_initialized
                         ! flag indicating whether timing library has
                         ! been initialized

   logical, parameter :: def_timing_disable = .false.          ! default
   logical, private   :: timing_disable = def_timing_disable
                         ! flag indicating whether timers are disabled

   logical, parameter :: def_timing_barrier = .false.          ! default
   logical, private   :: timing_barrier = def_timing_barrier
                         ! flag indicating whether the mpi_barrier in
                         ! t_barrierf should be called

   integer, parameter :: def_timer_depth_limit = 99999         ! default
   integer, private   :: timer_depth_limit = def_timer_depth_limit
                         ! integer indicating maximum number of levels of
                         ! timer nesting

   integer, parameter :: def_timing_detail_limit = 1           ! default
   integer, private   :: timing_detail_limit = def_timing_detail_limit
                         ! integer indicating maximum detail level to
                         ! profile

   integer, parameter :: init_timing_disable_depth = 0         ! init
   integer, private   :: timing_disable_depth = init_timing_disable_depth
                         ! integer indicating depth of t_disablef calls

   integer, parameter :: init_timing_detail = 0                ! init
   integer, private   :: cur_timing_detail = init_timing_detail
                         ! current timing detail level

   logical, parameter :: def_perf_single_file = .false.         ! default
   logical, private   :: perf_single_file = def_perf_single_file
                         ! flag indicating whether the performance timer
                         ! output should be written to a single file
                         ! (per component communicator) or to a
                         ! separate file for each process

   integer, parameter :: def_perf_outpe_num = 0                ! default
   integer, private   :: perf_outpe_num = def_perf_outpe_num
                         ! maximum number of processes writing out
                         ! timing data (for this component communicator)

   integer, parameter :: def_perf_outpe_stride = 1             ! default
   integer, private   :: perf_outpe_stride = def_perf_outpe_stride
                         ! separation between process ids for processes
                         ! that are writing out timing data
                         ! (for this component communicator)

   logical, parameter :: def_perf_global_stats = .true.        ! default
   logical, private   :: perf_global_stats = def_perf_global_stats
                         ! collect and print out global performance statistics
                         ! (for this component communicator)

   logical, parameter :: def_perf_ovhd_measurement = .false.     ! default
   logical, private   :: perf_ovhd_measurement = def_perf_ovhd_measurement
                         ! measure overhead of profiling directly

   logical, parameter :: def_perf_add_detail = .false.         ! default
   logical, private   :: perf_add_detail = def_perf_add_detail
                         ! flag indicating whether to prefix the
                         ! timer name with the current detail level.
                         ! This requires that even t_startf/t_stopf
                         ! calls do not cross detail level changes

   character(len=SHR_KIND_CS), private :: event_prefix
                         ! current prefix for all event names.
                         ! Default defined to be blank via 
                         ! prefix_len_def
   integer, parameter :: prefix_len_def = 0                    ! default
   integer, private   :: prefix_len = prefix_len_def
                         ! For convenience, contains len_trim of 
                         ! event_prefix, if set.

#ifdef HAVE_MPI
   integer, parameter :: def_perf_timer = GPTLmpiwtime         ! default
#else
#ifdef HAVE_NANOTIME
   integer, parameter :: def_perf_timer = GPTLnanotime         ! default
#else
#ifdef CPRIBM
   integer,parameter :: def_perf_timer = GPTLread_real_time
#else
   integer,parameter :: def_perf_timer = GPTLgettimeofday
#endif
#endif
#endif


   integer, private   :: perf_timer = def_perf_timer           ! default
                         ! integer indicating which timer to use
                         ! (as defined in gptl.inc)

#ifdef HAVE_PAPI
   logical, parameter :: def_perf_papi_enable = .false.       ! default
#else
   logical, parameter :: def_perf_papi_enable = .false.       ! default
#endif
   logical, private   :: perf_papi_enable = def_perf_papi_enable
                         ! flag indicating whether the PAPI namelist
                         ! should be read and HW performance counters
                         ! used in profiling

   ! PAPI counter ids
   integer, parameter :: PAPI_NULL = -1

   integer, parameter :: def_papi_ctr1 = PAPI_NULL           ! default
   integer, private   :: papi_ctr1 = def_papi_ctr1

   integer, parameter :: def_papi_ctr2 = PAPI_NULL           ! default
   integer, private   :: papi_ctr2 = def_papi_ctr2

   integer, parameter :: def_papi_ctr3 = PAPI_NULL           ! default
   integer, private   :: papi_ctr3 = def_papi_ctr3

   integer, parameter :: def_papi_ctr4 = PAPI_NULL           ! default
   integer, private   :: papi_ctr4 = def_papi_ctr4

!=======================================================================
contains
!=======================================================================

!
!========================================================================
!
   subroutine t_getLogUnit(LogUnit)
!-----------------------------------------------------------------------
! Purpose:  Get log unit number.
! Author:   P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   integer(SHR_KIND_IN), intent(OUT) :: LogUnit  ! Unit number for log output
!-----------------------------------------------------------------------

   LogUnit = p_logunit

   return
   end subroutine t_getLogUnit
!
!========================================================================
!
   subroutine t_setLogUnit(LogUnit)
!-----------------------------------------------------------------------
! Purpose:  Set log unit number.
! Author:   P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   integer(SHR_KIND_IN), intent(IN) :: LogUnit  ! Unit number for log output
!-----------------------------------------------------------------------

   p_logunit = LogUnit
#ifndef USE_CSM_SHARE
   call perfutils_setunit(p_logunit)
#endif

   return
   end subroutine t_setLogUnit
!
!========================================================================
!
   subroutine perf_defaultopts(timing_disable_out, &
                               perf_timer_out, &
                               timer_depth_limit_out, &
                               timing_detail_limit_out, &
                               timing_barrier_out, &
                               perf_outpe_num_out, &
                               perf_outpe_stride_out, &
                               perf_single_file_out, &
                               perf_global_stats_out, &
                               perf_papi_enable_out, &
                               perf_ovhd_measurement_out, &
                               perf_add_detail_out )
!-----------------------------------------------------------------------
! Purpose: Return default runtime options
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
   ! timers disable/enable option
   logical, intent(out), optional :: timing_disable_out
   ! performance timer option
   integer, intent(out), optional :: perf_timer_out
   ! timer depth limit option
   integer, intent(out), optional :: timer_depth_limit_out
   ! timer detail limit option
   integer, intent(out), optional :: timing_detail_limit_out
   ! timing barrier enable/disable option
   logical, intent(out), optional :: timing_barrier_out
   ! number of processes writing out timing data
   integer, intent(out), optional :: perf_outpe_num_out
   ! separation between process ids for processes that are writing out timing data
   integer, intent(out), optional :: perf_outpe_stride_out
   ! timing single / multple output file option
   logical, intent(out), optional :: perf_single_file_out
   ! collect and output global performance statistics option
   logical, intent(out), optional :: perf_global_stats_out
   ! calling PAPI to read HW performance counters option
   logical, intent(out), optional :: perf_papi_enable_out
   ! measure overhead of profiling directly
   logical, intent(out), optional :: perf_ovhd_measurement_out
   ! prefix timer name with current detail level
   logical, intent(out), optional :: perf_add_detail_out
!-----------------------------------------------------------------------
   if ( present(timing_disable_out) ) then
      timing_disable_out = def_timing_disable
   endif
   if ( present(perf_timer_out) ) then
      perf_timer_out = def_perf_timer
   endif
   if ( present(timer_depth_limit_out) ) then
      timer_depth_limit_out = def_timer_depth_limit
   endif
   if ( present(timing_detail_limit_out) ) then
      timing_detail_limit_out = def_timing_detail_limit
   endif
   if ( present(timing_barrier_out) ) then
      timing_barrier_out = def_timing_barrier
   endif
   if ( present(perf_outpe_num_out) ) then
      perf_outpe_num_out = def_perf_outpe_num
   endif
   if ( present(perf_outpe_stride_out) ) then
      perf_outpe_stride_out = def_perf_outpe_stride
   endif
   if ( present(perf_single_file_out) ) then
      perf_single_file_out = def_perf_single_file
   endif
   if ( present(perf_global_stats_out) ) then
      perf_global_stats_out = def_perf_global_stats
   endif
   if ( present(perf_papi_enable_out) ) then
      perf_papi_enable_out = def_perf_papi_enable
   endif
   if ( present(perf_ovhd_measurement_out) ) then
      perf_ovhd_measurement_out = def_perf_ovhd_measurement
   endif
   if ( present(perf_add_detail_out) ) then
      perf_add_detail_out = def_perf_add_detail
   endif
!
   return
   end subroutine perf_defaultopts
!
!========================================================================
!
   subroutine perf_setopts(mastertask, &
                           LogPrint, &
                           timing_disable_in, &
                           perf_timer_in, &
                           timer_depth_limit_in, &
                           timing_detail_limit_in, &
                           timing_barrier_in, &
                           perf_outpe_num_in, &
                           perf_outpe_stride_in, &
                           perf_single_file_in, &
                           perf_global_stats_in, &
                           perf_papi_enable_in, &
                           perf_ovhd_measurement_in, &
                           perf_add_detail_in )
!-----------------------------------------------------------------------
! Purpose: Set runtime options
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments----------------------------
!
   ! master process?
   logical, intent(in) :: mastertask
   ! Print out to log file?
   logical, intent(IN) :: LogPrint
   ! timers disable/enable option
   logical, intent(in), optional :: timing_disable_in
   ! performance timer option
   integer, intent(in), optional :: perf_timer_in
   ! timer depth limit option
   integer, intent(in), optional :: timer_depth_limit_in
   ! timer detail limit option
   integer, intent(in), optional :: timing_detail_limit_in
   ! timing barrier enable/disable option
   logical, intent(in), optional :: timing_barrier_in
   ! number of processes writing out timing data
   integer, intent(in), optional :: perf_outpe_num_in
   ! separation between process ids for processes that are writing out timing data
   integer, intent(in), optional :: perf_outpe_stride_in
   ! timing single / multple output file option
   logical, intent(in), optional :: perf_single_file_in
   ! collect and output global performance statistics option
   logical, intent(in), optional :: perf_global_stats_in
   ! calling PAPI to read HW performance counters option
   logical, intent(in), optional :: perf_papi_enable_in
   ! measure overhead of profiling directly
   logical, intent(in), optional :: perf_ovhd_measurement_in
   ! prefix timer name with current detail level
   logical, intent(in), optional :: perf_add_detail_in
!
!---------------------------Local workspace-----------------------------
!
   integer  ierr                  ! error return
!-----------------------------------------------------------------------
   if ( .not. timing_initialized ) then

      if ( present(timing_disable_in) ) then
         timing_disable = timing_disable_in
         if (timing_disable) then
            ierr = GPTLdisable()
         else
            ierr = GPTLenable()
         endif
      endif
      if ( present(perf_timer_in) ) then
         if ((perf_timer_in .eq. GPTLgettimeofday) .or. &
             (perf_timer_in .eq. GPTLnanotime) .or. &
             (perf_timer_in .eq. GPTLread_real_time) .or. &
             (perf_timer_in .eq. GPTLmpiwtime) .or. &
             (perf_timer_in .eq. GPTLclockgettime) .or. &
             (perf_timer_in .eq. GPTLpapitime)) then
            perf_timer = perf_timer_in
         else
            if (mastertask) then
               write(p_logunit,*) 'PERF_SETOPTS: illegal timer requested=',&
                                  perf_timer_in, '. Request ignored.'
            endif
         endif
      endif
      if ( present(timer_depth_limit_in) ) then
         timer_depth_limit = timer_depth_limit_in
      endif
      if ( present(timing_detail_limit_in) ) then
         timing_detail_limit = timing_detail_limit_in
      endif
      if ( present(timing_barrier_in) ) then
         timing_barrier = timing_barrier_in
      endif
      if ( present(perf_outpe_num_in) ) then
         perf_outpe_num = perf_outpe_num_in
      endif
      if ( present(perf_outpe_stride_in) ) then
         perf_outpe_stride = perf_outpe_stride_in
      endif
      if ( present(perf_single_file_in) ) then
         perf_single_file = perf_single_file_in
      endif
      if ( present(perf_global_stats_in) ) then
         perf_global_stats = perf_global_stats_in
      endif
      if ( present(perf_papi_enable_in) ) then
#ifdef HAVE_PAPI
         perf_papi_enable = perf_papi_enable_in
#else
         if (perf_papi_enable_in) then
            if (mastertask) then
               write(p_logunit,*) 'PERF_SETOPTS: PAPI library not linked in. ',&
                                  'Request to enable PAPI ignored.'
            endif
         endif
         perf_papi_enable = .false.
#endif
      endif
      if ( present(perf_ovhd_measurement_in) ) then
         perf_ovhd_measurement = perf_ovhd_measurement_in
      endif
      if ( present(perf_add_detail_in) ) then
         perf_add_detail = perf_add_detail_in
      endif
!
      if (mastertask .and. LogPrint) then
         write(p_logunit,*) '(t_initf) Using profile_disable=         ', timing_disable
         write(p_logunit,*) '(t_initf)       profile_timer=           ', perf_timer
         write(p_logunit,*) '(t_initf)       profile_depth_limit=     ', timer_depth_limit
         write(p_logunit,*) '(t_initf)       profile_detail_limit=    ', timing_detail_limit
         write(p_logunit,*) '(t_initf)       profile_barrier=         ', timing_barrier
         write(p_logunit,*) '(t_initf)       profile_outpe_num=       ', perf_outpe_num
         write(p_logunit,*) '(t_initf)       profile_outpe_stride=    ', perf_outpe_stride
         write(p_logunit,*) '(t_initf)       profile_single_file=     ', perf_single_file
         write(p_logunit,*) '(t_initf)       profile_global_stats=    ', perf_global_stats
         write(p_logunit,*) '(t_initf)       profile_ovhd_measurement=', perf_ovhd_measurement
         write(p_logunit,*) '(t_initf)       profile_add_detail=      ', perf_add_detail
         write(p_logunit,*) '(t_initf)       profile_papi_enable=     ', perf_papi_enable
      endif
!
#ifdef DEBUG
   else
      write(p_logunit,*) 'PERF_SETOPTS: timing library already initialized. Request ignored.'
#endif
   endif
!
   return
   end subroutine perf_setopts

!
!========================================================================
!
   subroutine papi_defaultopts(papi_ctr1_out, &
                               papi_ctr2_out, &
                               papi_ctr3_out, &
                               papi_ctr4_out  )
!-----------------------------------------------------------------------
! Purpose: Return default runtime PAPI counter options
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
   ! PAPI counter option #1
   integer, intent(out), optional :: papi_ctr1_out
   ! PAPI counter option #2
   integer, intent(out), optional :: papi_ctr2_out
   ! PAPI counter option #3
   integer, intent(out), optional :: papi_ctr3_out
   ! PAPI counter option #4
   integer, intent(out), optional :: papi_ctr4_out
!-----------------------------------------------------------------------
   if ( present(papi_ctr1_out) ) then
      papi_ctr1_out = def_papi_ctr1
   endif
   if ( present(papi_ctr2_out) ) then
      papi_ctr2_out = def_papi_ctr2
   endif
   if ( present(papi_ctr3_out) ) then
      papi_ctr3_out = def_papi_ctr3
   endif
   if ( present(papi_ctr4_out) ) then
      papi_ctr4_out = def_papi_ctr4
   endif
!
   return
   end subroutine papi_defaultopts
!
!========================================================================
!
   subroutine papi_setopts(papi_ctr1_in, &
                           papi_ctr2_in, &
                           papi_ctr3_in, &
                           papi_ctr4_in  )
!-----------------------------------------------------------------------
! Purpose: Set runtime PAPI counter options
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments----------------------------
!
   ! performance counter option
   integer, intent(in), optional :: papi_ctr1_in
   ! performance counter option
   integer, intent(in), optional :: papi_ctr2_in
   ! performance counter option
   integer, intent(in), optional :: papi_ctr3_in
   ! performance counter option
   integer, intent(in), optional :: papi_ctr4_in
!
!---------------------------Local workspace-----------------------------
!
   integer  ierr                  ! error return
!-----------------------------------------------------------------------
   if ( .not. timing_initialized ) then

      if ( present(papi_ctr1_in) ) then
         if (papi_ctr1_in < 0) then
            papi_ctr1 = papi_ctr1_in
         else
            papi_ctr1 = PAPI_NULL
         endif
      endif
      if ( present(papi_ctr2_in) ) then
         if (papi_ctr2_in < 0) then
            papi_ctr2 = papi_ctr2_in
         else
            papi_ctr2 = PAPI_NULL
         endif
      endif
      if ( present(papi_ctr3_in) ) then
         if (papi_ctr3_in < 0) then
            papi_ctr3 = papi_ctr3_in
         else
            papi_ctr3 = PAPI_NULL
         endif
      endif
      if ( present(papi_ctr4_in) ) then
         if (papi_ctr4_in < 0) then
            papi_ctr4 = papi_ctr4_in
         else
            papi_ctr4 = PAPI_NULL
         endif
      endif
!
#ifdef DEBUG
   else
      write(p_logunit,*) 'PAPI_SETOPTS: timing library already initialized. Request ignored.'
#endif
   endif
!
   return
   end subroutine papi_setopts
!
!========================================================================
!
   logical function t_profile_onf()
!-----------------------------------------------------------------------
! Purpose: Return flag indicating whether profiling is currently active.
!          Part of workaround to implement FVbarrierclock before
!          communicators exposed in Pilgrim. Does not check level of
!          event nesting.
! Author: P. Worley
!-----------------------------------------------------------------------

   if ((.not. timing_initialized) .or. &
       (timing_disable_depth > 0)) then
      t_profile_onf = .false.
   else
      t_profile_onf = .true.
   endif

   end function t_profile_onf
!
!========================================================================
!
   logical function t_barrier_onf()
!-----------------------------------------------------------------------
! Purpose: Return timing_barrier. Part of workaround to implement
!          FVbarrierclock before communicators exposed in Pilgrim.
! Author: P. Worley
!-----------------------------------------------------------------------

   t_barrier_onf = timing_barrier

   end function t_barrier_onf
!
!========================================================================
!
   logical function t_single_filef()
!-----------------------------------------------------------------------
! Purpose: Return perf_single_file. Used to control output of other
!          performance data, only spmdstats currently.
! Author: P. Worley
!-----------------------------------------------------------------------

   t_single_filef = perf_single_file

   end function t_single_filef
!
!========================================================================
!
   subroutine t_set_prefixf(prefix_string)
!----------------------------------------------------------------------- 
! Purpose: Set prefix for subsequent time event names. 
!          Ignored in threaded regions.
! Author: P. Worley 
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   ! performance timer event name prefix
   character(len=*), intent(in) :: prefix_string
!
!---------------------------Local workspace-----------------------------
!
   integer i                              ! loop index
!
!---------------------------Externals-----------------------------------
!
#if ( defined _OPENMP )
   logical omp_in_parallel
   external omp_in_parallel
#endif
!
!-----------------------------------------------------------------------
!
#if ( defined _OPENMP )
   if (omp_in_parallel()) return
#endif

   prefix_len = min(SHR_KIND_CS,len_trim(prefix_string))
   if (prefix_len > 0) then
     event_prefix(1:prefix_len) = prefix_string(1:prefix_len)
   endif

   end subroutine t_set_prefixf
!
!========================================================================
!
   subroutine t_unset_prefixf()
!----------------------------------------------------------------------- 
! Purpose: Unset prefix for subsequent time event names.
!          Ignored in threaded regions.
! Author: P. Worley 
!
!---------------------------Externals-----------------------------------
!
#if ( defined _OPENMP )
   logical omp_in_parallel
   external omp_in_parallel
#endif
!
!-----------------------------------------------------------------------
!
#if ( defined _OPENMP )
   if (omp_in_parallel()) return
#endif

   prefix_len = 0

   end subroutine t_unset_prefixf
!
!========================================================================
!
   subroutine t_stampf(wall, usr, sys)
!-----------------------------------------------------------------------
! Purpose: Record wallclock, user, and system times (seconds).
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Output arguments-----------------------------
!
   real(shr_kind_r8), intent(out) :: wall ! wallclock time
   real(shr_kind_r8), intent(out) :: usr  ! user time
   real(shr_kind_r8), intent(out) :: sys  ! system time
!
!---------------------------Local workspace-----------------------------
!
   integer  ierr                          ! GPTL error return
!
!-----------------------------------------------------------------------
!
   if ((.not. timing_initialized) .or. &
       (timing_disable_depth > 0)) then
      wall = 0.0
      usr = 0.0
      sys = 0.0
   else
      ierr = GPTLstamp(wall, usr, sys)
   endif

   return
   end subroutine t_stampf
!
!========================================================================
!
   subroutine t_startf(event, handle)
!-----------------------------------------------------------------------
! Purpose: Start an event timer
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   ! performance timer event name
   character(len=*), intent(in) :: event
!
!---------------------------Input/Output arguments----------------------
!
   ! GPTL event handle
   integer,  optional :: handle
!
!---------------------------Local workspace-----------------------------
!
   integer  ierr                          ! GPTL error return
   integer  str_length, i                 ! support for adding
                                          !  detail prefix
   character(len=2) cdetail               ! char variable for detail
!
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return
   if (timing_disable_depth > 0) return

   if ((perf_add_detail) .AND. (cur_timing_detail < 100)) then

      write(cdetail,'(i2.2)') cur_timing_detail
      if (prefix_len > 0) then
         str_length = min(SHR_KIND_CM-prefix_len-5,len_trim(event))
         ierr = GPTLstart( &
            '"'//cdetail//"_"//event_prefix(1:prefix_len)// &
            event(1:str_length)//'"')
      else
         str_length = min(SHR_KIND_CM-5,len_trim(event))
         ierr = GPTLstart('"'//cdetail//"_"//event(1:str_length)//'"')
      endif

   else

      if (prefix_len > 0) then
         str_length = min(SHR_KIND_CM-prefix_len-2,len_trim(event))
         ierr = GPTLstart('"'//event_prefix(1:prefix_len)// &
              event(1:str_length)//'"')
      else
         str_length = min(SHR_KIND_CM-2,len_trim(event))
         ierr = GPTLstart('"'//trim(event)//'"')
      endif

!pw   if ( present (handle) ) then
!pw      ierr = GPTLstart_handle(event, handle)
!pw   else
!pw      ierr = GPTLstart(event)
!pw   endif

   endif

   return
   end subroutine t_startf
!
!========================================================================
!
   subroutine t_stopf(event, handle)
!-----------------------------------------------------------------------
! Purpose: Stop an event timer
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   ! performance timer event name
   character(len=*), intent(in) :: event
!
!---------------------------Input/Output arguments----------------------
!
   ! GPTL event handle
   integer, optional :: handle
!
!---------------------------Local workspace-----------------------------
!
   integer  ierr                          ! GPTL error return
   integer  str_length, i                 ! support for adding
                                          !  detail prefix
   character(len=2) cdetail               ! char variable for detail
!
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return
   if (timing_disable_depth > 0) return

   if ((perf_add_detail) .AND. (cur_timing_detail < 100)) then

      write(cdetail,'(i2.2)') cur_timing_detail
      if (prefix_len > 0) then
         str_length = min(SHR_KIND_CM-prefix_len-5,len_trim(event))
         ierr = GPTLstop( &
              '"'//cdetail//"_"//event_prefix(1:prefix_len)// &
              event(1:str_length)//'"')
      else
         str_length = min(SHR_KIND_CM-5,len_trim(event))
         ierr = GPTLstop('"'//cdetail//"_"//event(1:str_length)//'"')
      endif

   else

      if (prefix_len > 0) then
         str_length = min(SHR_KIND_CM-prefix_len-2,len_trim(event))
         ierr = GPTLstop('"'//event_prefix(1:prefix_len)// &
              event(1:str_length)//'"')
     else
         str_length = min(SHR_KIND_CM-2,len_trim(event))
         ierr = GPTLstop('"'//trim(event)//'"')
     endif

!pw   if ( present (handle) ) then
!pw      ierr = GPTLstop_handle(event, handle)
!pw   else
!pw      ierr = GPTLstop(event)
!pw   endif

   endif

   return
   end subroutine t_stopf
!
!========================================================================
!
   subroutine t_startstop_valsf(event, walltime, callcount, handle)
!-----------------------------------------------------------------------
! Purpose: Create/add walltime and call count to an event timer
! Author: P. Worley (based on J. Rosinski GPTL routine)
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   ! performance timer event name
   character(len=*), intent(in) :: event
   ! walltime (seconds) associated with this start/stop pair
   ! If not set, default is 0.0 . If < 0.0, set to 0.0 .
   real(shr_kind_r8), intent(in), optional :: walltime
   ! call count associated with this start/stop pair
   ! If not set, default is 1. If < 0, set to 0.
   integer, intent(in), optional :: callcount
!
!---------------------------Input/Output arguments----------------------
!
   ! GPTL event handle
   integer,  optional :: handle
!
!---------------------------Local workspace-----------------------------
!
   integer  ierr                          ! GPTL error return
   integer  str_length, i                 ! support for adding 
                                          !  detail prefix
   character(len=2) cdetail               ! char variable for detail
   real(shr_kind_r8) wtime                ! walltime increment (seconds)
   integer  callcnt                       ! call count increment
!
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return
   if (timing_disable_depth > 0) return

   wtime = 0.0_shr_kind_r8
   if ( present(walltime) ) then
      if (walltime > 0.0) then
         wtime = walltime
      endif
   endif

   callcnt = 1
   if ( present(callcount) ) then
      if (callcount > 0) then
         callcnt = callcount
      else
         callcnt = 0
      endif
   endif

   if ((perf_add_detail) .AND. (cur_timing_detail < 100)) then

      write(cdetail,'(i2.2)') cur_timing_detail
      if (prefix_len > 0) then
         str_length = min(SHR_KIND_CM-prefix_len-5,len_trim(event))
         ierr = GPTLstartstop_vals( &
            '"'//cdetail//"_"//event_prefix(1:prefix_len)// &
            event(1:str_length)//'"', wtime, callcnt)
      else
         str_length = min(SHR_KIND_CM-5,len_trim(event))
         ierr = GPTLstartstop_vals( &
            '"'//cdetail//"_"//event(1:str_length)//'"', wtime, callcnt)
      endif

   else

      if (prefix_len > 0) then
         str_length = min(SHR_KIND_CM-prefix_len-2,len_trim(event))
         ierr = GPTLstartstop_vals('"'//event_prefix(1:prefix_len)// &
              event(1:str_length)//'"', wtime, callcnt)
      else
         str_length = min(SHR_KIND_CM-2,len_trim(event))
         ierr = GPTLstartstop_vals('"'//trim(event)//'"', wtime, callcnt)
      endif

!pw   if ( present (handle) ) then
!pw      ierr = GPTLstartstop_vals_handle(event, wtime, handle)
!pw   else
!pw      ierr = GPTLstartstop_vals(event, wtime)
!pw   endif

   endif

   return
   end subroutine t_startstop_valsf
!
!========================================================================
!
   subroutine t_enablef()
!-----------------------------------------------------------------------
! Purpose: Enable t_startf, t_stopf, t_stampf, and t_barrierf. Ignored
!          in threaded regions.
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Local workspace-----------------------------
!
   integer  ierr                  ! GPTL error return
!
!---------------------------Externals-----------------------------------
!
#if ( defined _OPENMP )
   logical omp_in_parallel
   external omp_in_parallel
#endif
!
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return

#if ( defined _OPENMP )
   if (omp_in_parallel()) return
#endif

   if (timing_disable_depth > 0) then
      if (timing_disable_depth .eq. 1) then
         ierr = GPTLenable()
      endif
      timing_disable_depth = timing_disable_depth - 1
   endif

   return
   end subroutine t_enablef
!
!========================================================================
!
   subroutine t_disablef()
!-----------------------------------------------------------------------
! Purpose: Disable t_startf, t_stopf, t_stampf, and t_barrierf. Ignored
!          in threaded regions.
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Local workspace-----------------------------
!
   integer  ierr                  ! GPTL error return
!
!---------------------------Externals-----------------------------------
!
#if ( defined _OPENMP )
   logical omp_in_parallel
   external omp_in_parallel
#endif
!
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return

#if ( defined _OPENMP )
   if (omp_in_parallel()) return
#endif

   if (timing_disable_depth .eq. 0) then
      ierr = GPTLdisable()
   endif
   timing_disable_depth = timing_disable_depth + 1

   return
   end subroutine t_disablef
!
!========================================================================
!
   subroutine t_adj_detailf(detail_adjustment)
!-----------------------------------------------------------------------
! Purpose: Modify current detail level. Ignored in threaded regions.
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   integer, intent(in) :: detail_adjustment ! user defined increase or
                                            ! decrease in detail level
!
!---------------------------Externals-----------------------------------
!
#if ( defined _OPENMP )
   logical omp_in_parallel
   external omp_in_parallel
#endif
!
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return

#if ( defined _OPENMP )
   if (omp_in_parallel()) return
#endif

!  using disable/enable to implement timing_detail logic so also control
!  direct GPTL calls (such as occur in Trilinos library)
   if     ((cur_timing_detail <= timing_detail_limit) .and. &
           (cur_timing_detail + detail_adjustment > timing_detail_limit)) then
      call t_disablef()
   elseif ((cur_timing_detail > timing_detail_limit) .and. &
           (cur_timing_detail + detail_adjustment <= timing_detail_limit)) then
      call t_enablef()
   endif

   cur_timing_detail = cur_timing_detail + detail_adjustment

   return
   end subroutine t_adj_detailf
!
!========================================================================
!
   subroutine t_barrierf(event, mpicom)
!-----------------------------------------------------------------------
! Purpose: Call (and time) mpi_barrier. Ignored inside OpenMP
!          threaded regions. Note that barrier executed even if
!          event not recorded because of level of timer event nesting.
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
   ! mpi communicator id
   integer, intent(in), optional :: mpicom
   ! performance timer event name
   character(len=*), intent(in), optional :: event
!
!---------------------------Local workspace-----------------------------
!
   integer  ierr                  ! GPTL error return
!
!---------------------------Externals-----------------------------------
!
#if ( defined _OPENMP )
   logical omp_in_parallel
   external omp_in_parallel
#endif
!
!-----------------------------------------------------------------------
!
   if (timing_barrier) then

#if ( defined _OPENMP )
      if (omp_in_parallel()) return
#endif
      if (.not. timing_initialized) return
      if (timing_disable_depth > 0) return

      if ( present (event) ) then
         call t_startf(event)
      endif

      if ( present (mpicom) ) then
         call shr_mpi_barrier(mpicom, 'T_BARRIERF: bad mpi communicator')
      else
         call shr_mpi_barrier(MPI_COMM_WORLD, 'T_BARRIERF: bad mpi communicator')
      endif

      if ( present (event) ) then
         call t_stopf(event)
      endif

   endif

   return
   end subroutine t_barrierf
!
!========================================================================
!
   subroutine t_prf(filename, mpicom, num_outpe, stride_outpe, &
                    single_file, global_stats, output_thispe)
!-----------------------------------------------------------------------
! Purpose: Write out performance timer data
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   ! performance timer output file name
   character(len=*), intent(in), optional :: filename
   ! mpi communicator id
   integer, intent(in), optional :: mpicom
   ! maximum number of processes writing out timing data
   integer, intent(in), optional :: num_outpe
   ! separation between process ids for processes writing out data
   integer, intent(in), optional :: stride_outpe
   ! enable/disable the writing of data to a single file
   logical, intent(in), optional :: single_file
   ! enable/disable the collection of global statistics
   logical, intent(in), optional :: global_stats
   ! output timing data for this process
   logical, intent(in), optional :: output_thispe
!
!---------------------------Local workspace-----------------------------
!
   logical  one_file              ! flag indicting whether to write
                                  !  all data to a single file
   logical  glb_stats             ! flag indicting whether to compute
                                  !  global statistics
   logical  pr_write              ! flag indicating whether the current
                                  !  GPTL output mode is write
   logical  write_data            ! flag indicating whether this process
                                  !  should output its timing data
   integer  i                     ! loop index
   integer  mpicom2               ! local copy of MPI communicator
   integer  me                    ! communicator local process id
   integer  npes                  ! local communicator group size
   integer  gme                   ! global process id
   integer  ierr                  ! MPI error return
   integer  outpe_num             ! max number of processes writing out
                                  !  timing data (excluding output_thispe)
   integer  outpe_stride          ! separation between process ids for
                                  !  processes writing out timing data
   integer  max_outpe             ! max process id for processes
                                  !  writing out timing data
   integer  signal                ! send/recv variable for single
                                  ! output file logic
   integer  str_length            ! string length
   integer  unitn                 ! file unit number
   integer cme_adj                ! length of filename suffix
   integer status (MPI_STATUS_SIZE)    ! Status of message
   character(len=7) cme                ! string representation of process id
   character(len=SHR_KIND_CX+14) fname ! timing output filename
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return

   call t_startf("t_prf")
!$OMP MASTER
   call mpi_comm_rank(MPI_COMM_WORLD, gme, ierr)
   if ( present(mpicom) ) then
      mpicom2 = mpicom
      call mpi_comm_size(mpicom2, npes, ierr)
         if (ierr .eq. MPI_ERR_COMM) then
            call shr_sys_abort('T_PRF: bad mpi communicator')
         endif
      call mpi_comm_rank(mpicom2, me, ierr)
   else
      call mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
      mpicom2 = MPI_COMM_WORLD
      me = gme
   endif

   do i=1,SHR_KIND_CX+14
     fname(i:i) = " "
   enddo

   unitn = shr_file_getUnit()

   ! determine what the current output mode is (append or write)
   if (GPTLpr_query_write() == 1) then
     pr_write = .true.
     ierr = GPTLpr_set_append()
   else
     pr_write=.false.
   endif

   ! Determine whether to write all data to a single fie
   if (present(single_file)) then
      one_file = single_file
   else
      one_file = perf_single_file
   endif

   ! Determine whether to compute global statistics
   if (present(global_stats)) then
      glb_stats = global_stats
   else
      glb_stats = perf_global_stats
   endif

   ! Determine which processes are writing out timing data
   write_data = .false.

   if (present(num_outpe)) then
      if (num_outpe < 0) then
         outpe_num = npes
      else
         outpe_num = num_outpe
      endif
   else
      if (perf_outpe_num < 0) then
         outpe_num = npes
      else
         outpe_num = perf_outpe_num
      endif
   endif

   if (present(stride_outpe)) then
      if (stride_outpe < 1) then
         outpe_stride = 1
      else
         outpe_stride = stride_outpe
      endif
   else
      if (perf_outpe_stride < 1) then
         outpe_stride = 1
      else
         outpe_stride = perf_outpe_stride
      endif
   endif

   max_outpe = min(outpe_num*outpe_stride, npes) - 1

   if ((mod(me, outpe_stride) .eq. 0) .and. (me .le. max_outpe)) &
      write_data = .true.

   if (present(output_thispe)) then
      write_data = output_thispe
   endif

   ! If a single timing output file, take turns writing to it.
   if (one_file) then

      if ( present(filename) ) then
         str_length = min(SHR_KIND_CX,len_trim(filename))
         fname(1:str_length) = filename(1:str_length)
      else
         fname(1:10) = "timing_all"
      endif

      signal = 0
      if (me .eq. 0) then

         if (glb_stats) then
            open( unitn, file=trim(fname), status='UNKNOWN' )
            write( unitn, 100) npes
 100        format(/,"***** GLOBAL STATISTICS (",I6," MPI TASKS) *****",/)
            close( unitn )

            ierr = GPTLpr_summary_file(mpicom2, trim(fname))
         endif

         if (write_data) then
            if (glb_stats) then
               open( unitn, file=trim(fname), status='OLD', position='APPEND' )
            else
               open( unitn, file=trim(fname), status='UNKNOWN' )
            endif

            write( unitn, 101) me, gme
 101        format(/,"************ PROCESS ",I6," (",I6,") ************",/)
            close( unitn )

            ierr = GPTLpr_file(trim(fname))
         endif

      else

         if (glb_stats) then
            ierr = GPTLpr_summary_file(mpicom2, trim(fname))
         endif

         call mpi_recv (signal, 1, mpi_integer, me-1, me-1, mpicom2, status, ierr)
         if (ierr /= mpi_success) then
            write(p_logunit,*) 'T_PRF: mpi_recv failed ierr=',ierr
            call shr_sys_abort()
         end if

         if (write_data) then
            open( unitn, file=trim(fname), status='OLD', position='APPEND' )
            write( unitn, 101) me, gme
            close( unitn )

            ierr = GPTLpr_file(trim(fname))
         endif

      endif

      if (me+1 < npes) &
         call mpi_send (signal, 1, mpi_integer, me+1, me, mpicom2, ierr)

   else

      if (glb_stats) then
         if ( present(filename) ) then
            str_length = min(SHR_KIND_CX-6,len_trim(filename))
            fname(1:str_length) = filename(1:str_length)
         else
            str_length = 6
            fname(1:10) = "timing"
         endif
         fname(str_length+1:str_length+6) = '_stats'

         if (me .eq. 0) then
            open( unitn, file=trim(fname), status='UNKNOWN' )
            write( unitn, 100) npes
            close( unitn )
         endif

         ierr = GPTLpr_summary_file(mpicom2, trim(fname))
         fname(str_length+1:str_length+6) = '      '
      endif

      if (write_data) then
         if (npes .le. 10) then
            write(cme,'(i1.1)') me
            cme_adj = 2
         elseif (npes .le. 100) then
            write(cme,'(i2.2)') me
            cme_adj = 3
         elseif (npes .le. 1000) then
            write(cme,'(i3.3)') me
            cme_adj = 4
         elseif (npes .le. 10000) then
            write(cme,'(i4.4)') me
            cme_adj = 5
         elseif (npes .le. 100000) then
            write(cme,'(i5.5)') me
            cme_adj = 6
         else
            write(cme,'(i6.6)') me
            cme_adj = 7
         endif

         if ( present(filename) ) then
            str_length = min(SHR_KIND_CX-cme_adj,len_trim(filename))
            fname(1:str_length) = filename(1:str_length)
         else
            str_length = 6
            fname(1:10) = "timing"
         endif
         fname(str_length+1:str_length+1) = '.'
         fname(str_length+2:str_length+cme_adj) = cme

         open( unitn, file=trim(fname), status='UNKNOWN' )
         write( unitn, 101) me, gme
         close( unitn )

         ierr = GPTLpr_file(trim(fname))
      endif

   endif

   call shr_file_freeUnit( unitn )

   ! reset GPTL output mode
   if (pr_write) then
     ierr = GPTLpr_set_write()
   endif

!$OMP END MASTER
   call t_stopf("t_prf")

   return
   end subroutine t_prf
!
!========================================================================
!
   subroutine t_initf(NLFilename, LogPrint, LogUnit, mpicom, MasterTask, &
                      MaxThreads)
!-----------------------------------------------------------------------
! Purpose:  Set default values of runtime timing options
!           before namelists prof_inparm and papi_inparm are read,
!           read namelists (and broadcast, if SPMD),
!           then initialize timing library.
! Author:   P. Worley (based on shr_inputinfo_mod and runtime_opts)
!-----------------------------------------------------------------------
!---------------------------Input arguments-----------------------------
!
   character(len=*),   intent(IN) :: NLFilename      ! Name-list filename
   logical, optional,  intent(IN) :: LogPrint        ! If print out to log file
   integer, optional,  intent(IN) :: LogUnit         ! Unit number for log output
   integer, optional,  intent(IN) :: mpicom          ! MPI communicator
   logical, optional,  intent(IN) :: MasterTask      ! If MPI master task
   integer, optional,  intent(IN) :: MaxThreads      ! maximum number of threads
                                                     !  used by components
!
!---------------------------Local workspace-----------------------------
!
   character(len=*), parameter    :: subname = '(T_INITF) '
   logical                        :: MasterTask2     ! If MPI master task
   logical                        :: LogPrint2       ! If print to log

   integer  me                    ! communicator local process id
   integer  ierr                  ! error return
   integer  unitn                 ! file unit number
   integer  papi_ctr1_id          ! PAPI counter id
   integer  papi_ctr2_id          ! PAPI counter id
   integer  papi_ctr3_id          ! PAPI counter id
   integer  papi_ctr4_id          ! PAPI counter id
!
!---------------------------Namelists ----------------------------------
!
   logical profile_disable
   logical profile_barrier
   logical profile_single_file
   logical profile_global_stats
   integer profile_depth_limit
   integer profile_detail_limit
   integer profile_outpe_num
   integer profile_outpe_stride
   integer profile_timer
   logical profile_papi_enable
   logical profile_ovhd_measurement
   logical profile_add_detail
   namelist /prof_inparm/ profile_disable, profile_barrier, &
                          profile_single_file, profile_global_stats, &
                          profile_depth_limit, &
                          profile_detail_limit, profile_outpe_num, &
                          profile_outpe_stride, profile_timer, &
                          profile_papi_enable, profile_ovhd_measurement, &
                          profile_add_detail

   character(len=16) papi_ctr1_str
   character(len=16) papi_ctr2_str
   character(len=16) papi_ctr3_str
   character(len=16) papi_ctr4_str
   namelist /papi_inparm/ papi_ctr1_str, papi_ctr2_str,  &
                          papi_ctr3_str, papi_ctr4_str
!-----------------------------------------------------------------------
    if ( timing_initialized ) then
#ifdef DEBUG
       write(p_logunit,*) 'T_INITF: timing library already initialized. Request ignored.'
#endif
       return
    endif

!$OMP MASTER
    if ( present(LogUnit) ) then
       call t_setLogUnit(LogUnit)
    else
       call t_setLogUnit(def_p_logunit)
    endif

    if ( present(MasterTask) .and. present(mpicom) )then
       call mpi_comm_rank(mpicom, me, ierr)
       if (ierr .eq. MPI_ERR_COMM) then
          call shr_sys_abort('T_INITF: bad mpi communicator')
       endif
       if (me .eq. 0) then
          MasterTask2 = .true.
       else
          MasterTask2 = .false.
       endif
    else
       MasterTask2 = .true.
    end if

    if ( present(LogPrint) ) then
       LogPrint2 = LogPrint
    else
       LogPrint2 = .true.
    endif

    ! Set PERF defaults, then override with user-specified input
    call perf_defaultopts(timing_disable_out=profile_disable, &
                          perf_timer_out=profile_timer, &
                          timer_depth_limit_out=profile_depth_limit, &
                          timing_detail_limit_out=profile_detail_limit, &
                          timing_barrier_out=profile_barrier, &
                          perf_outpe_num_out = profile_outpe_num, &
                          perf_outpe_stride_out = profile_outpe_stride, &
                          perf_single_file_out=profile_single_file, &
                          perf_global_stats_out=profile_global_stats, &
                          perf_papi_enable_out=profile_papi_enable, &
                          perf_ovhd_measurement_out=profile_ovhd_measurement, &
                          perf_add_detail_out=profile_add_detail )
    if ( MasterTask2 ) then

       ! Read in the prof_inparm namelist from NLFilename if it exists

       write(p_logunit,*) '(t_initf) Read in prof_inparm namelist from: '//trim(NLFilename)
       unitn = shr_file_getUnit()

       ierr = 1
       open( unitn, file=trim(NLFilename), status='old', iostat=ierr )
       if (ierr .eq. 0) then

          ! Look for prof_inparm group name in the input file.
          ! If found, leave the file positioned at that namelist group.
          call find_group_name(unitn, 'prof_inparm', status=ierr)

          if (ierr == 0) then  ! found prof_inparm
             read(unitn, nml=prof_inparm, iostat=ierr)
             if (ierr /= 0) then
                call shr_sys_abort( subname//':: namelist read returns an'// &
                                    ' error condition for prof_inparm' )
             end if
          end if

          close(unitn)

       endif
       call shr_file_freeUnit( unitn )

    endif

    ! This logic assumes that there will be only one MasterTask
    ! per communicator, and that this MasterTask is process 0.
    if ( present(MasterTask) .and. present(mpicom) )then
       call shr_mpi_bcast( profile_disable,      MPICom )
       call shr_mpi_bcast( profile_barrier,      MPICom )
       call shr_mpi_bcast( profile_single_file,  MPICom )
       call shr_mpi_bcast( profile_global_stats, MPICom )
       call shr_mpi_bcast( profile_papi_enable,  MPICom )
       call shr_mpi_bcast( profile_ovhd_measurement, MPICom )
       call shr_mpi_bcast( profile_add_detail,   MPICom )
       call shr_mpi_bcast( profile_depth_limit,  MPICom )
       call shr_mpi_bcast( profile_detail_limit, MPICom )
       call shr_mpi_bcast( profile_outpe_num,    MPICom )
       call shr_mpi_bcast( profile_outpe_stride, MPICom )
       call shr_mpi_bcast( profile_timer,        MPICom )
    end if
    call perf_setopts    (MasterTask2, LogPrint2, &
                          timing_disable_in=profile_disable, &
                          perf_timer_in=profile_timer, &
                          timer_depth_limit_in=profile_depth_limit, &
                          timing_detail_limit_in=profile_detail_limit, &
                          timing_barrier_in=profile_barrier, &
                          perf_outpe_num_in=profile_outpe_num, &
                          perf_outpe_stride_in=profile_outpe_stride, &
                          perf_single_file_in=profile_single_file, &
                          perf_global_stats_in=profile_global_stats, &
                          perf_papi_enable_in=profile_papi_enable, &
                          perf_ovhd_measurement_in=profile_ovhd_measurement, &
                          perf_add_detail_in=profile_add_detail )

    ! Set PAPI defaults, then override with user-specified input
    if (perf_papi_enable) then
       call papi_defaultopts(papi_ctr1_out=papi_ctr1_id, &
                             papi_ctr2_out=papi_ctr2_id, &
                             papi_ctr3_out=papi_ctr3_id, &
                             papi_ctr4_out=papi_ctr4_id )

       if ( MasterTask2 ) then
          papi_ctr1_str = "PAPI_NO_CTR"
          papi_ctr2_str = "PAPI_NO_CTR"
          papi_ctr3_str = "PAPI_NO_CTR"
          papi_ctr4_str = "PAPI_NO_CTR"


          ! Read in the papi_inparm namelist from NLFilename if it exists

          write(p_logunit,*) '(t_initf) Read in papi_inparm namelist from: '//trim(NLFilename)
          unitn = shr_file_getUnit()

          ierr = 1
          open( unitn, file=trim(NLFilename), status='old', iostat=ierr )
          if (ierr .eq. 0) then
             ! Look for papi_inparm group name in the input file.
             ! If found, leave the file positioned at that namelist group.
             call find_group_name(unitn, 'papi_inparm', status=ierr)

             if (ierr == 0) then  ! found papi_inparm
                read(unitn, nml=papi_inparm, iostat=ierr)
                if (ierr /= 0) then
                   call shr_sys_abort( subname//':: namelist read returns an'// &
                                      ' error condition for papi_inparm' )
                end if
             end if

             close(unitn)

          endif
          call shr_file_freeUnit( unitn )

          ! if enabled and nothing set, use "defaults"
          if ((papi_ctr1_str(1:11) .eq. "PAPI_NO_CTR") .and. &
              (papi_ctr2_str(1:11) .eq. "PAPI_NO_CTR") .and. &
              (papi_ctr3_str(1:11) .eq. "PAPI_NO_CTR") .and. &
              (papi_ctr4_str(1:11) .eq. "PAPI_NO_CTR")) then
!pw              papi_ctr1_str = "PAPI_TOT_CYC"
!pw              papi_ctr2_str = "PAPI_TOT_INS"
!pw              papi_ctr3_str = "PAPI_FP_OPS"
!pw              papi_ctr4_str = "PAPI_FP_INS"
              papi_ctr1_str = "PAPI_FP_OPS"
          endif

          if (papi_ctr1_str(1:11) /= "PAPI_NO_CTR") then
             ierr = gptlevent_name_to_code(trim(papi_ctr1_str), papi_ctr1_id)
          endif
          if (papi_ctr2_str(1:11) /= "PAPI_NO_CTR") then
             ierr = gptlevent_name_to_code(trim(papi_ctr2_str), papi_ctr2_id)
          endif
          if (papi_ctr3_str(1:11) /= "PAPI_NO_CTR") then
             ierr = gptlevent_name_to_code(trim(papi_ctr3_str), papi_ctr3_id)
          endif
          if (papi_ctr4_str(1:11) /= "PAPI_NO_CTR") then
             ierr = gptlevent_name_to_code(trim(papi_ctr4_str), papi_ctr4_id)
          endif

       endif
       ! This logic assumes that there will be only one MasterTask
       ! per communicator, and that this MasterTask is process 0.
       if ( present(MasterTask) .and. present(mpicom) )then
          call shr_mpi_bcast( papi_ctr1_id,    MPICom )
          call shr_mpi_bcast( papi_ctr2_id,    MPICom )
          call shr_mpi_bcast( papi_ctr3_id,    MPICom )
          call shr_mpi_bcast( papi_ctr4_id,    MPICom )
       end if

       call papi_setopts    (papi_ctr1_in=papi_ctr1_id, &
                             papi_ctr2_in=papi_ctr2_id, &
                             papi_ctr3_in=papi_ctr3_id, &
                             papi_ctr4_in=papi_ctr4_id )
    endif
!$OMP END MASTER
!$OMP BARRIER

   if (timing_disable) return

!$OMP MASTER
   !
   ! Set options and initialize timing library.
   !
   ! Set timer
   if (gptlsetutr (perf_timer) < 0) call shr_sys_abort (subname//':: gptlsetutr')
   !
   ! For logical settings, 2nd arg 0
   ! to gptlsetoption means disable, non-zero means enable
   !
   ! Turn off CPU timing (expensive)
   !
   if (gptlsetoption (gptlcpu, 0) < 0) call shr_sys_abort (subname//':: gptlsetoption')
   !
   !
   !
!pw   if(present(MaxThreads)) then
!pw      if (gptlsetoption (gptlmaxthreads, MaxThreads) < 0) call shr_sys_abort (subname//':: gptlsetoption')
!pw   endif
   !
   ! Set max timer depth
   !
   if (gptlsetoption (gptldepthlimit, timer_depth_limit) < 0) &
     call shr_sys_abort (subname//':: gptlsetoption')
   !
   ! Set profile ovhd measurement (default is false)
   !
   if (perf_ovhd_measurement) then
     if (gptlsetoption (gptlprofile_ovhd, 1) < 0) &
       call shr_sys_abort (subname//':: gptlsetoption')
   endif
   !
   ! Next 2 calls only work if PAPI is enabled.  These examples enable counting
   ! of total cycles and floating point ops, respectively
   !
   if (perf_papi_enable) then
      if (papi_ctr1 /= PAPI_NULL) then
         if (gptlsetoption (papi_ctr1, 1) < 0) call shr_sys_abort (subname//':: gptlsetoption')
      endif
      if (papi_ctr2 /= PAPI_NULL) then
         if (gptlsetoption (papi_ctr2, 1) < 0) call shr_sys_abort (subname//':: gptlsetoption')
      endif
      if (papi_ctr3 /= PAPI_NULL) then
         if (gptlsetoption (papi_ctr3, 1) < 0) call shr_sys_abort (subname//':: gptlsetoption')
      endif
      if (papi_ctr4 /= PAPI_NULL) then
         if (gptlsetoption (papi_ctr4, 1) < 0) call shr_sys_abort (subname//':: gptlsetoption')
      endif
   endif
   !
   ! Initialize the timing lib.  This call must occur after all gptlsetoption
   ! calls and before all other timing lib calls.
   !
   if (gptlinitialize () < 0) call shr_sys_abort (subname//':: gptlinitialize')
   timing_initialized = .true.
!$OMP END MASTER
!$OMP BARRIER

   return
   end subroutine t_initf
!
!========================================================================
!
   subroutine t_finalizef()
!-----------------------------------------------------------------------
! Purpose: shut down timing library
! Author: P. Worley
!-----------------------------------------------------------------------
!---------------------------Local workspace-----------------------------
!
   integer  ierr                  ! GPTL error return
!
!-----------------------------------------------------------------------
!
   if (.not. timing_initialized) return

!$OMP MASTER
   ierr = GPTLfinalize()
   timing_initialized = .false.
!$OMP END MASTER
!$OMP BARRIER

   return
   end subroutine t_finalizef

!===============================================================================

end module perf_mod