timing.F90 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800
  1. MODULE timing
  2. !!========================================================================
  3. !! *** MODULE timing ***
  4. !!========================================================================
  5. !! History : 4.0 ! 2001-05 (R. Benshila)
  6. !!------------------------------------------------------------------------
  7. !!------------------------------------------------------------------------
  8. !! timming_init : initialize timing process
  9. !! timing_start : start Timer
  10. !! timing_stop : stop Timer
  11. !! timing_reset : end timing variable creation
  12. !! timing_finalize : compute stats and write output in calling w*_info
  13. !! timing_ini_var : create timing variables
  14. !! timing_listing : print instumented subroutines in ocean.output
  15. !! wcurrent_info : compute and print detailed stats on the current CPU
  16. !! wave_info : compute and print averaged statson all processors
  17. !! wmpi_info : compute and write global stats
  18. !! supress : suppress an element of the timing linked list
  19. !! insert : insert an element of the timing linked list
  20. !!------------------------------------------------------------------------
  21. USE in_out_manager ! I/O manager
  22. USE dom_oce ! ocean domain
  23. USE lib_mpp
  24. IMPLICIT NONE
  25. PRIVATE
  26. PUBLIC timing_init, timing_finalize ! called in nemogcm module
  27. PUBLIC timing_reset ! called in step module
  28. PUBLIC timing_start, timing_stop ! called in each routine to time
  29. #if defined key_mpp_mpi
  30. INCLUDE 'mpif.h'
  31. #endif
  32. ! Variables for fine grain timing
  33. TYPE timer
  34. CHARACTER(LEN=20) :: cname
  35. REAL(wp) :: t_cpu, t_clock, tsum_cpu, tsum_clock, tmax_cpu, tmax_clock, tmin_cpu, tmin_clock, tsub_cpu, tsub_clock
  36. INTEGER :: ncount, ncount_max, ncount_rate
  37. INTEGER :: niter
  38. LOGICAL :: l_tdone
  39. TYPE(timer), POINTER :: next => NULL()
  40. TYPE(timer), POINTER :: prev => NULL()
  41. TYPE(timer), POINTER :: parent_section => NULL()
  42. END TYPE timer
  43. TYPE alltimer
  44. CHARACTER(LEN=20), DIMENSION(:), POINTER :: cname => NULL()
  45. REAL(wp), DIMENSION(:), POINTER :: tsum_cpu => NULL()
  46. REAL(wp), DIMENSION(:), POINTER :: tsum_clock => NULL()
  47. INTEGER, DIMENSION(:), POINTER :: niter => NULL()
  48. TYPE(alltimer), POINTER :: next => NULL()
  49. TYPE(alltimer), POINTER :: prev => NULL()
  50. END TYPE alltimer
  51. TYPE(timer), POINTER :: s_timer_root => NULL()
  52. TYPE(timer), POINTER :: s_timer => NULL()
  53. TYPE(timer), POINTER :: s_wrk => NULL()
  54. REAL(wp) :: t_overclock, t_overcpu
  55. LOGICAL :: l_initdone = .FALSE.
  56. INTEGER :: nsize
  57. ! Variables for coarse grain timing
  58. REAL(wp) :: tot_etime, tot_ctime
  59. REAL(kind=wp), DIMENSION(2) :: t_elaps, t_cpu
  60. REAL(wp), ALLOCATABLE, DIMENSION(:) :: all_etime, all_ctime
  61. INTEGER :: nfinal_count, ncount, ncount_rate, ncount_max
  62. INTEGER, DIMENSION(8) :: nvalues
  63. CHARACTER(LEN=8), DIMENSION(2) :: cdate
  64. CHARACTER(LEN=10), DIMENSION(2) :: ctime
  65. CHARACTER(LEN=5) :: czone
  66. ! From of ouput file (1/proc or one global) !RB to put in nammpp or namctl
  67. LOGICAL :: ln_onefile = .TRUE.
  68. LOGICAL :: lwriter
  69. !!----------------------------------------------------------------------
  70. !! NEMO/OPA 4.0 , NEMO Consortium (2011)
  71. !! $Id: timing.F90 3610 2012-11-19 16:00:49Z acc $
  72. !! Software governed by the CeCILL licence (NEMOGCM/NEMO_CeCILL.txt)
  73. !!----------------------------------------------------------------------
  74. CONTAINS
  75. SUBROUTINE timing_start(cdinfo)
  76. !!----------------------------------------------------------------------
  77. !! *** ROUTINE timing_start ***
  78. !! ** Purpose : collect execution time
  79. !!----------------------------------------------------------------------
  80. CHARACTER(len=*), INTENT(in) :: cdinfo
  81. !
  82. ! Create timing structure at first call
  83. IF( .NOT. l_initdone ) THEN
  84. CALL timing_ini_var(cdinfo)
  85. ELSE
  86. s_timer => s_timer_root
  87. DO WHILE( TRIM(s_timer%cname) /= TRIM(cdinfo) )
  88. IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next
  89. END DO
  90. ENDIF
  91. s_timer%l_tdone = .FALSE.
  92. s_timer%niter = s_timer%niter + 1
  93. s_timer%t_cpu = 0.
  94. s_timer%t_clock = 0.
  95. ! CPU time collection
  96. CALL CPU_TIME( s_timer%t_cpu )
  97. ! clock time collection
  98. #if defined key_mpp_mpi
  99. s_timer%t_clock= MPI_Wtime()
  100. #else
  101. CALL SYSTEM_CLOCK(COUNT_RATE=s_timer%ncount_rate, COUNT_MAX=s_timer%ncount_max)
  102. CALL SYSTEM_CLOCK(COUNT = s_timer%ncount)
  103. #endif
  104. !
  105. END SUBROUTINE timing_start
  106. SUBROUTINE timing_stop(cdinfo, csection)
  107. !!----------------------------------------------------------------------
  108. !! *** ROUTINE timing_stop ***
  109. !! ** Purpose : finalize timing and output
  110. !!----------------------------------------------------------------------
  111. CHARACTER(len=*), INTENT(in) :: cdinfo
  112. CHARACTER(len=*), INTENT(in), OPTIONAL :: csection
  113. !
  114. INTEGER :: ifinal_count, iperiods
  115. REAL(wp) :: zcpu_end, zmpitime
  116. !
  117. s_wrk => NULL()
  118. ! clock time collection
  119. #if defined key_mpp_mpi
  120. zmpitime = MPI_Wtime()
  121. #else
  122. CALL SYSTEM_CLOCK(COUNT = ifinal_count)
  123. #endif
  124. ! CPU time collection
  125. CALL CPU_TIME( zcpu_end )
  126. s_timer => s_timer_root
  127. DO WHILE( TRIM(s_timer%cname) /= TRIM(cdinfo) )
  128. IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next
  129. END DO
  130. ! CPU time correction
  131. s_timer%t_cpu = zcpu_end - s_timer%t_cpu - t_overcpu - s_timer%tsub_cpu
  132. ! clock time correction
  133. #if defined key_mpp_mpi
  134. s_timer%t_clock = zmpitime - s_timer%t_clock - t_overclock - s_timer%tsub_clock
  135. #else
  136. iperiods = ifinal_count - s_timer%ncount
  137. IF( ifinal_count < s_timer%ncount ) &
  138. iperiods = iperiods + s_timer%ncount_max
  139. s_timer%t_clock = REAL(iperiods) / s_timer%ncount_rate - t_overclock - s_timer%tsub_clock
  140. #endif
  141. ! Correction of parent section
  142. IF( .NOT. PRESENT(csection) ) THEN
  143. s_wrk => s_timer
  144. DO WHILE ( ASSOCIATED(s_wrk%parent_section ) )
  145. s_wrk => s_wrk%parent_section
  146. s_wrk%tsub_cpu = s_wrk%tsub_cpu + s_timer%t_cpu
  147. s_wrk%tsub_clock = s_wrk%tsub_clock + s_timer%t_clock
  148. END DO
  149. ENDIF
  150. ! time diagnostics
  151. s_timer%tsum_clock = s_timer%tsum_clock + s_timer%t_clock
  152. s_timer%tsum_cpu = s_timer%tsum_cpu + s_timer%t_cpu
  153. !RB to use to get min/max during a time integration
  154. ! IF( .NOT. l_initdone ) THEN
  155. ! s_timer%tmin_clock = s_timer%t_clock
  156. ! s_timer%tmin_cpu = s_timer%t_cpu
  157. ! ELSE
  158. ! s_timer%tmin_clock = MIN( s_timer%tmin_clock, s_timer%t_clock )
  159. ! s_timer%tmin_cpu = MIN( s_timer%tmin_cpu , s_timer%t_cpu )
  160. ! ENDIF
  161. ! s_timer%tmax_clock = MAX( s_timer%tmax_clock, s_timer%t_clock )
  162. ! s_timer%tmax_cpu = MAX( s_timer%tmax_cpu , s_timer%t_cpu )
  163. !
  164. s_timer%tsub_clock = 0.
  165. s_timer%tsub_cpu = 0.
  166. s_timer%l_tdone = .TRUE.
  167. !
  168. END SUBROUTINE timing_stop
  169. SUBROUTINE timing_init
  170. !!----------------------------------------------------------------------
  171. !! *** ROUTINE timing_init ***
  172. !! ** Purpose : open timing output file
  173. !!----------------------------------------------------------------------
  174. INTEGER :: iperiods, istart_count, ifinal_count
  175. REAL(wp) :: zdum
  176. LOGICAL :: ll_f
  177. IF( ln_onefile ) THEN
  178. IF( lwp) CALL ctl_opn( numtime, 'timing.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, numout,.TRUE., narea )
  179. lwriter = lwp
  180. ELSE
  181. CALL ctl_opn( numtime, 'timing.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, numout,.FALSE., narea )
  182. lwriter = .TRUE.
  183. ENDIF
  184. IF( lwriter) THEN
  185. WRITE(numtime,*)
  186. WRITE(numtime,*) ' CNRS - NERC - Met OFFICE - MERCATOR-ocean - CMCC - INGV'
  187. WRITE(numtime,*) ' NEMO team'
  188. WRITE(numtime,*) ' Ocean General Circulation Model'
  189. WRITE(numtime,*) ' version 3.6 (2015) '
  190. WRITE(numtime,*)
  191. WRITE(numtime,*) ' Timing Informations '
  192. WRITE(numtime,*)
  193. WRITE(numtime,*)
  194. ENDIF
  195. ! Compute clock function overhead
  196. #if defined key_mpp_mpi
  197. t_overclock = MPI_WTIME()
  198. t_overclock = MPI_WTIME() - t_overclock
  199. #else
  200. CALL SYSTEM_CLOCK(COUNT_RATE=ncount_rate, COUNT_MAX=ncount_max)
  201. CALL SYSTEM_CLOCK(COUNT = istart_count)
  202. CALL SYSTEM_CLOCK(COUNT = ifinal_count)
  203. iperiods = ifinal_count - istart_count
  204. IF( ifinal_count < istart_count ) &
  205. iperiods = iperiods + ncount_max
  206. t_overclock = REAL(iperiods) / ncount_rate
  207. #endif
  208. ! Compute cpu_time function overhead
  209. CALL CPU_TIME(zdum)
  210. CALL CPU_TIME(t_overcpu)
  211. ! End overhead omputation
  212. t_overcpu = t_overcpu - zdum
  213. t_overclock = t_overcpu + t_overclock
  214. ! Timing on date and time
  215. CALL DATE_AND_TIME(cdate(1),ctime(1),czone,nvalues)
  216. CALL CPU_TIME(t_cpu(1))
  217. #if defined key_mpp_mpi
  218. ! Start elapsed and CPU time counters
  219. t_elaps(1) = MPI_WTIME()
  220. #else
  221. CALL SYSTEM_CLOCK(COUNT_RATE=ncount_rate, COUNT_MAX=ncount_max)
  222. CALL SYSTEM_CLOCK(COUNT = ncount)
  223. #endif
  224. !
  225. END SUBROUTINE timing_init
  226. SUBROUTINE timing_finalize
  227. !!----------------------------------------------------------------------
  228. !! *** ROUTINE timing_finalize ***
  229. !! ** Purpose : compute average time
  230. !! write timing output file
  231. !!----------------------------------------------------------------------
  232. TYPE(timer), POINTER :: s_temp
  233. INTEGER :: idum, iperiods, icode
  234. LOGICAL :: ll_ord, ll_averep
  235. CHARACTER(len=120) :: clfmt
  236. ll_averep = .TRUE.
  237. ! total CPU and elapse
  238. CALL CPU_TIME(t_cpu(2))
  239. t_cpu(2) = t_cpu(2) - t_cpu(1) - t_overcpu
  240. #if defined key_mpp_mpi
  241. t_elaps(2) = MPI_WTIME() - t_elaps(1) - t_overclock
  242. #else
  243. CALL SYSTEM_CLOCK(COUNT = nfinal_count)
  244. iperiods = nfinal_count - ncount
  245. IF( nfinal_count < ncount ) &
  246. iperiods = iperiods + ncount_max
  247. t_elaps(2) = REAL(iperiods) / ncount_rate - t_overclock
  248. #endif
  249. ! End of timings on date & time
  250. CALL DATE_AND_TIME(cdate(2),ctime(2),czone,nvalues)
  251. ! Compute the numer of routines
  252. nsize = 0
  253. s_timer => s_timer_root
  254. DO WHILE( ASSOCIATED(s_timer) )
  255. nsize = nsize + 1
  256. s_timer => s_timer%next
  257. END DO
  258. idum = nsize
  259. IF(lk_mpp) CALL mpp_sum(idum)
  260. IF( idum/jpnij /= nsize ) THEN
  261. IF( lwriter ) WRITE(numtime,*) ' ===> W A R N I N G: '
  262. IF( lwriter ) WRITE(numtime,*) ' Some CPU have different number of routines instrumented for timing'
  263. IF( lwriter ) WRITE(numtime,*) ' No detailed report on averaged timing can be provided'
  264. IF( lwriter ) WRITE(numtime,*) ' The following detailed report only deals with the current processor'
  265. IF( lwriter ) WRITE(numtime,*)
  266. ll_averep = .FALSE.
  267. ENDIF
  268. #if defined key_mpp_mpi
  269. ! in MPI gather some info
  270. ALLOCATE( all_etime(jpnij), all_ctime(jpnij) )
  271. CALL MPI_ALLGATHER(t_elaps(2), 1, MPI_DOUBLE_PRECISION, &
  272. all_etime , 1, MPI_DOUBLE_PRECISION, &
  273. MPI_COMM_OPA, icode)
  274. CALL MPI_ALLGATHER(t_cpu(2) , 1, MPI_DOUBLE_PRECISION, &
  275. all_ctime, 1, MPI_DOUBLE_PRECISION, &
  276. MPI_COMM_OPA, icode)
  277. tot_etime = SUM(all_etime(:))
  278. tot_ctime = SUM(all_ctime(:))
  279. #else
  280. tot_etime = t_elaps(2)
  281. tot_ctime = t_cpu (2)
  282. #endif
  283. ! write output file
  284. IF( lwriter ) WRITE(numtime,*) 'Total timing (sum) :'
  285. IF( lwriter ) WRITE(numtime,*) '--------------------'
  286. IF( lwriter ) WRITE(numtime,"('Elapsed Time (s) CPU Time (s)')")
  287. IF( lwriter ) WRITE(numtime,'(5x,f12.3,1x,f12.3)') tot_etime, tot_ctime
  288. IF( lwriter ) WRITE(numtime,*)
  289. #if defined key_mpp_mpi
  290. IF( ll_averep ) CALL waver_info
  291. CALL wmpi_info
  292. #endif
  293. IF( lwriter ) CALL wcurrent_info
  294. clfmt='(1X,"Timing started on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT")'
  295. IF( lwriter ) WRITE(numtime, TRIM(clfmt)) &
  296. & cdate(1)(7:8), cdate(1)(5:6), cdate(1)(1:4), &
  297. & ctime(1)(1:2), ctime(1)(3:4), ctime(1)(5:6), &
  298. & czone(1:3), czone(4:5)
  299. clfmt='(1X, "Timing ended on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT")'
  300. IF( lwriter ) WRITE(numtime, TRIM(clfmt)) &
  301. & cdate(2)(7:8), cdate(2)(5:6), cdate(2)(1:4), &
  302. & ctime(2)(1:2), ctime(2)(3:4), ctime(2)(5:6), &
  303. & czone(1:3), czone(4:5)
  304. IF( lwriter ) CLOSE(numtime)
  305. !
  306. END SUBROUTINE timing_finalize
  307. SUBROUTINE wcurrent_info
  308. !!----------------------------------------------------------------------
  309. !! *** ROUTINE wcurrent_info ***
  310. !! ** Purpose : compute and write timing output file
  311. !!----------------------------------------------------------------------
  312. LOGICAL :: ll_ord
  313. CHARACTER(len=2048) :: clfmt
  314. ! reorder the current list by elapse time
  315. s_wrk => NULL()
  316. s_timer => s_timer_root
  317. DO
  318. ll_ord = .TRUE.
  319. s_timer => s_timer_root
  320. DO WHILE ( ASSOCIATED( s_timer%next ) )
  321. IF (.NOT. ASSOCIATED(s_timer%next)) EXIT
  322. IF ( s_timer%tsum_clock < s_timer%next%tsum_clock ) THEN
  323. ALLOCATE(s_wrk)
  324. s_wrk = s_timer%next
  325. CALL insert (s_timer, s_timer_root, s_wrk)
  326. CALL suppress(s_timer%next)
  327. ll_ord = .FALSE.
  328. CYCLE
  329. ENDIF
  330. IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next
  331. END DO
  332. IF( ll_ord ) EXIT
  333. END DO
  334. ! write current info
  335. WRITE(numtime,*) 'Detailed timing for proc :', narea-1
  336. WRITE(numtime,*) '--------------------------'
  337. WRITE(numtime,*) 'Section ', &
  338. & 'Elapsed Time (s) ','Elapsed Time (%) ', &
  339. & 'CPU Time(s) ','CPU Time (%) ','CPU/Elapsed ','Frequency'
  340. s_timer => s_timer_root
  341. clfmt = '(1x,a,4x,f12.3,6x,f12.3,x,f12.3,2x,f12.3,6x,f7.3,2x,i9)'
  342. DO WHILE ( ASSOCIATED(s_timer) )
  343. WRITE(numtime,TRIM(clfmt)) s_timer%cname, &
  344. & s_timer%tsum_clock,s_timer%tsum_clock*100./t_elaps(2), &
  345. & s_timer%tsum_cpu ,s_timer%tsum_cpu*100./t_cpu(2) , &
  346. & s_timer%tsum_cpu/s_timer%tsum_clock, s_timer%niter
  347. s_timer => s_timer%next
  348. END DO
  349. WRITE(numtime,*)
  350. !
  351. END SUBROUTINE wcurrent_info
  352. #if defined key_mpp_mpi
  353. SUBROUTINE waver_info
  354. !!----------------------------------------------------------------------
  355. !! *** ROUTINE wcurrent_info ***
  356. !! ** Purpose : compute and write averaged timing informations
  357. !!----------------------------------------------------------------------
  358. TYPE(alltimer), POINTER :: sl_timer_glob_root => NULL()
  359. TYPE(alltimer), POINTER :: sl_timer_glob => NULL()
  360. TYPE(timer), POINTER :: sl_timer_ave_root => NULL()
  361. TYPE(timer), POINTER :: sl_timer_ave => NULL()
  362. INTEGER :: icode
  363. INTEGER :: ierr
  364. LOGICAL :: ll_ord
  365. CHARACTER(len=200) :: clfmt
  366. ! Initialised the global strucutre
  367. ALLOCATE(sl_timer_glob_root, Stat=ierr)
  368. IF(ierr /= 0)THEN
  369. WRITE(numtime,*) 'Failed to allocate global timing structure in waver_info'
  370. RETURN
  371. END IF
  372. ALLOCATE(sl_timer_glob_root%cname (jpnij), &
  373. sl_timer_glob_root%tsum_cpu (jpnij), &
  374. sl_timer_glob_root%tsum_clock(jpnij), &
  375. sl_timer_glob_root%niter (jpnij), Stat=ierr)
  376. IF(ierr /= 0)THEN
  377. WRITE(numtime,*) 'Failed to allocate global timing structure in waver_info'
  378. RETURN
  379. END IF
  380. sl_timer_glob_root%cname(:) = ''
  381. sl_timer_glob_root%tsum_cpu(:) = 0._wp
  382. sl_timer_glob_root%tsum_clock(:) = 0._wp
  383. sl_timer_glob_root%niter(:) = 0
  384. sl_timer_glob_root%next => NULL()
  385. sl_timer_glob_root%prev => NULL()
  386. !ARPDBG - don't need to allocate a pointer that's immediately then
  387. ! set to point to some other object.
  388. !ALLOCATE(sl_timer_glob)
  389. !ALLOCATE(sl_timer_glob%cname (jpnij))
  390. !ALLOCATE(sl_timer_glob%tsum_cpu (jpnij))
  391. !ALLOCATE(sl_timer_glob%tsum_clock(jpnij))
  392. !ALLOCATE(sl_timer_glob%niter (jpnij))
  393. sl_timer_glob => sl_timer_glob_root
  394. !
  395. IF( narea .EQ. 1 ) THEN
  396. ALLOCATE(sl_timer_ave_root)
  397. sl_timer_ave_root%cname = ''
  398. sl_timer_ave_root%t_cpu = 0._wp
  399. sl_timer_ave_root%t_clock = 0._wp
  400. sl_timer_ave_root%tsum_cpu = 0._wp
  401. sl_timer_ave_root%tsum_clock = 0._wp
  402. sl_timer_ave_root%tmax_cpu = 0._wp
  403. sl_timer_ave_root%tmax_clock = 0._wp
  404. sl_timer_ave_root%tmin_cpu = 0._wp
  405. sl_timer_ave_root%tmin_clock = 0._wp
  406. sl_timer_ave_root%tsub_cpu = 0._wp
  407. sl_timer_ave_root%tsub_clock = 0._wp
  408. sl_timer_ave_root%ncount = 0
  409. sl_timer_ave_root%ncount_rate = 0
  410. sl_timer_ave_root%ncount_max = 0
  411. sl_timer_ave_root%niter = 0
  412. sl_timer_ave_root%l_tdone = .FALSE.
  413. sl_timer_ave_root%next => NULL()
  414. sl_timer_ave_root%prev => NULL()
  415. sl_timer_ave => sl_timer_ave_root
  416. ENDIF
  417. ! Gather info from all processors
  418. s_timer => s_timer_root
  419. DO WHILE ( ASSOCIATED(s_timer) )
  420. CALL MPI_GATHER(s_timer%cname , 20, MPI_CHARACTER, &
  421. sl_timer_glob%cname, 20, MPI_CHARACTER, &
  422. 0, MPI_COMM_OPA, icode)
  423. CALL MPI_GATHER(s_timer%tsum_clock , 1, MPI_DOUBLE_PRECISION, &
  424. sl_timer_glob%tsum_clock, 1, MPI_DOUBLE_PRECISION, &
  425. 0, MPI_COMM_OPA, icode)
  426. CALL MPI_GATHER(s_timer%tsum_cpu , 1, MPI_DOUBLE_PRECISION, &
  427. sl_timer_glob%tsum_cpu, 1, MPI_DOUBLE_PRECISION, &
  428. 0, MPI_COMM_OPA, icode)
  429. CALL MPI_GATHER(s_timer%niter , 1, MPI_INTEGER, &
  430. sl_timer_glob%niter, 1, MPI_INTEGER, &
  431. 0, MPI_COMM_OPA, icode)
  432. IF( narea == 1 .AND. ASSOCIATED(s_timer%next) ) THEN
  433. ALLOCATE(sl_timer_glob%next)
  434. ALLOCATE(sl_timer_glob%next%cname (jpnij))
  435. ALLOCATE(sl_timer_glob%next%tsum_cpu (jpnij))
  436. ALLOCATE(sl_timer_glob%next%tsum_clock(jpnij))
  437. ALLOCATE(sl_timer_glob%next%niter (jpnij))
  438. sl_timer_glob%next%prev => sl_timer_glob
  439. sl_timer_glob%next%next => NULL()
  440. sl_timer_glob => sl_timer_glob%next
  441. ENDIF
  442. s_timer => s_timer%next
  443. END DO
  444. WRITE(*,*) 'ARPDBG: timing: done gathers'
  445. IF( narea == 1 ) THEN
  446. ! Compute some stats
  447. sl_timer_glob => sl_timer_glob_root
  448. DO WHILE( ASSOCIATED(sl_timer_glob) )
  449. sl_timer_ave%cname = sl_timer_glob%cname(1)
  450. sl_timer_ave%tsum_cpu = SUM (sl_timer_glob%tsum_cpu (:)) / jpnij
  451. sl_timer_ave%tsum_clock = SUM (sl_timer_glob%tsum_clock(:)) / jpnij
  452. sl_timer_ave%tmax_cpu = MAXVAL(sl_timer_glob%tsum_cpu (:))
  453. sl_timer_ave%tmax_clock = MAXVAL(sl_timer_glob%tsum_clock(:))
  454. sl_timer_ave%tmin_cpu = MINVAL(sl_timer_glob%tsum_cpu (:))
  455. sl_timer_ave%tmin_clock = MINVAL(sl_timer_glob%tsum_clock(:))
  456. sl_timer_ave%niter = SUM (sl_timer_glob%niter (:))
  457. !
  458. IF( ASSOCIATED(sl_timer_glob%next) ) THEN
  459. ALLOCATE(sl_timer_ave%next)
  460. sl_timer_ave%next%prev => sl_timer_ave
  461. sl_timer_ave%next%next => NULL()
  462. sl_timer_ave => sl_timer_ave%next
  463. ENDIF
  464. sl_timer_glob => sl_timer_glob%next
  465. END DO
  466. WRITE(*,*) 'ARPDBG: timing: done computing stats'
  467. ! reorder the averaged list by CPU time
  468. s_wrk => NULL()
  469. sl_timer_ave => sl_timer_ave_root
  470. DO
  471. ll_ord = .TRUE.
  472. sl_timer_ave => sl_timer_ave_root
  473. DO WHILE( ASSOCIATED( sl_timer_ave%next ) )
  474. IF( .NOT. ASSOCIATED(sl_timer_ave%next) ) EXIT
  475. IF ( sl_timer_ave%tsum_clock < sl_timer_ave%next%tsum_clock ) THEN
  476. ALLOCATE(s_wrk)
  477. ! Copy data into the new object pointed to by s_wrk
  478. s_wrk = sl_timer_ave%next
  479. ! Insert this new timer object before our current position
  480. CALL insert (sl_timer_ave, sl_timer_ave_root, s_wrk)
  481. ! Remove the old object from the list
  482. CALL suppress(sl_timer_ave%next)
  483. ll_ord = .FALSE.
  484. CYCLE
  485. ENDIF
  486. IF( ASSOCIATED(sl_timer_ave%next) ) sl_timer_ave => sl_timer_ave%next
  487. END DO
  488. IF( ll_ord ) EXIT
  489. END DO
  490. ! write averaged info
  491. WRITE(numtime,"('Averaged timing on all processors :')")
  492. WRITE(numtime,"('-----------------------------------')")
  493. WRITE(numtime,"('Section',13x,'Elap. Time(s)',2x,'Elap. Time(%)',2x, &
  494. & 'CPU Time(s)',2x,'CPU Time(%)',2x,'CPU/Elap',1x, &
  495. & 'Max elap(%)',2x,'Min elap(%)',2x, &
  496. & 'Freq')")
  497. sl_timer_ave => sl_timer_ave_root
  498. clfmt = '((A),E15.7,2x,f6.2,5x,f12.2,5x,f6.2,5x,f7.2,2x,f12.2,4x,f6.2,2x,f9.2)'
  499. DO WHILE ( ASSOCIATED(sl_timer_ave) )
  500. WRITE(numtime,TRIM(clfmt)) sl_timer_ave%cname(1:18), &
  501. & sl_timer_ave%tsum_clock,sl_timer_ave%tsum_clock*100.*jpnij/tot_etime, &
  502. & sl_timer_ave%tsum_cpu ,sl_timer_ave%tsum_cpu*100.*jpnij/tot_ctime , &
  503. & sl_timer_ave%tsum_cpu/sl_timer_ave%tsum_clock, &
  504. & sl_timer_ave%tmax_clock*100.*jpnij/tot_etime, &
  505. & sl_timer_ave%tmin_clock*100.*jpnij/tot_etime, &
  506. & sl_timer_ave%niter/REAL(jpnij)
  507. sl_timer_ave => sl_timer_ave%next
  508. END DO
  509. WRITE(numtime,*)
  510. !
  511. DEALLOCATE(sl_timer_ave_root)
  512. ENDIF
  513. !
  514. DEALLOCATE(sl_timer_glob_root%cname , &
  515. sl_timer_glob_root%tsum_cpu , &
  516. sl_timer_glob_root%tsum_clock, &
  517. sl_timer_glob_root%niter)
  518. !
  519. DEALLOCATE(sl_timer_glob_root)
  520. !
  521. END SUBROUTINE waver_info
  522. SUBROUTINE wmpi_info
  523. !!----------------------------------------------------------------------
  524. !! *** ROUTINE wmpi_time ***
  525. !! ** Purpose : compute and write a summary of MPI infos
  526. !!----------------------------------------------------------------------
  527. !
  528. INTEGER :: idum, icode
  529. INTEGER, ALLOCATABLE, DIMENSION(:) :: iall_rank
  530. REAL(wp) :: ztot_ratio
  531. REAL(wp) :: zmax_etime, zmax_ctime, zmax_ratio, zmin_etime, zmin_ctime, zmin_ratio
  532. REAL(wp) :: zavg_etime, zavg_ctime, zavg_ratio
  533. REAL(wp), ALLOCATABLE, DIMENSION(:) :: zall_ratio
  534. CHARACTER(LEN=128), dimension(8) :: cllignes
  535. CHARACTER(LEN=128) :: clhline, clstart_date, clfinal_date
  536. CHARACTER(LEN=2048) :: clfmt
  537. ! Gather all times
  538. ALLOCATE( zall_ratio(jpnij), iall_rank(jpnij) )
  539. IF( narea == 1 ) THEN
  540. iall_rank(:) = (/ (idum,idum=0,jpnij-1) /)
  541. ! Compute elapse user time
  542. zavg_etime = tot_etime/REAL(jpnij,wp)
  543. zmax_etime = MAXVAL(all_etime(:))
  544. zmin_etime = MINVAL(all_etime(:))
  545. ! Compute CPU user time
  546. zavg_ctime = tot_ctime/REAL(jpnij,wp)
  547. zmax_ctime = MAXVAL(all_ctime(:))
  548. zmin_ctime = MINVAL(all_ctime(:))
  549. ! Compute cpu/elapsed ratio
  550. zall_ratio(:) = all_ctime(:) / all_etime(:)
  551. ztot_ratio = SUM(zall_ratio(:))
  552. zavg_ratio = ztot_ratio/REAL(jpnij,wp)
  553. zmax_ratio = MAXVAL(zall_ratio(:))
  554. zmin_ratio = MINVAL(zall_ratio(:))
  555. ! Output Format
  556. clhline ='1x,13("-"),"|",18("-"),"|",14("-"),"|",18("-"),/,'
  557. cllignes(1)='(1x,"MPI summary report :",/,'
  558. cllignes(2)='1x,"--------------------",//,'
  559. cllignes(3)='1x,"Process Rank |"," Elapsed Time (s) |"," CPU Time (s) |"," Ratio CPU/Elapsed",/,'
  560. cllignes(4)=' (1x,i4,9x,"|",f12.3,6x,"|",f12.3,2x,"|",4x,f7.3,/),'
  561. WRITE(cllignes(4)(1:4),'(I4)') jpnij
  562. cllignes(5)='1x,"Total |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,'
  563. cllignes(6)='1x,"Minimum |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,'
  564. cllignes(7)='1x,"Maximum |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,'
  565. cllignes(8)='1x,"Average |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3)'
  566. clfmt=TRIM(cllignes(1))// TRIM(cllignes(2))//TRIM(cllignes(3))// &
  567. & TRIM(clhline)//TRIM(cllignes(4))//TRIM(clhline)//TRIM(cllignes(5))// &
  568. & TRIM(clhline)//TRIM(cllignes(6))//TRIM(clhline)//TRIM(cllignes(7))// &
  569. & TRIM(clhline)//TRIM(cllignes(8))
  570. WRITE(numtime, TRIM(clfmt)) &
  571. (iall_rank(idum),all_etime(idum),all_ctime(idum),zall_ratio(idum),idum=1, jpnij), &
  572. tot_etime, tot_ctime, ztot_ratio, &
  573. zmin_etime, zmin_ctime, zmin_ratio, &
  574. zmax_etime, zmax_ctime, zmax_ratio, &
  575. zavg_etime, zavg_ctime, zavg_ratio
  576. WRITE(numtime,*)
  577. END IF
  578. !
  579. DEALLOCATE(zall_ratio, iall_rank)
  580. !
  581. END SUBROUTINE wmpi_info
  582. #endif
  583. SUBROUTINE timing_ini_var(cdinfo)
  584. !!----------------------------------------------------------------------
  585. !! *** ROUTINE timing_ini_var ***
  586. !! ** Purpose : create timing structure
  587. !!----------------------------------------------------------------------
  588. CHARACTER(len=*), INTENT(in) :: cdinfo
  589. LOGICAL :: ll_section
  590. !
  591. IF( .NOT. ASSOCIATED(s_timer_root) ) THEN
  592. ALLOCATE(s_timer_root)
  593. s_timer_root%cname = cdinfo
  594. s_timer_root%t_cpu = 0._wp
  595. s_timer_root%t_clock = 0._wp
  596. s_timer_root%tsum_cpu = 0._wp
  597. s_timer_root%tsum_clock = 0._wp
  598. s_timer_root%tmax_cpu = 0._wp
  599. s_timer_root%tmax_clock = 0._wp
  600. s_timer_root%tmin_cpu = 0._wp
  601. s_timer_root%tmin_clock = 0._wp
  602. s_timer_root%tsub_cpu = 0._wp
  603. s_timer_root%tsub_clock = 0._wp
  604. s_timer_root%ncount = 0
  605. s_timer_root%ncount_rate = 0
  606. s_timer_root%ncount_max = 0
  607. s_timer_root%niter = 0
  608. s_timer_root%l_tdone = .FALSE.
  609. s_timer_root%next => NULL()
  610. s_timer_root%prev => NULL()
  611. s_timer => s_timer_root
  612. !
  613. s_wrk => NULL()
  614. ELSE
  615. s_timer => s_timer_root
  616. ! case of already existing area (typically inside a loop)
  617. DO WHILE( ASSOCIATED(s_timer) )
  618. IF( TRIM(s_timer%cname) .EQ. TRIM(cdinfo) ) RETURN
  619. s_timer => s_timer%next
  620. END DO
  621. ! end of the chain
  622. s_timer => s_timer_root
  623. DO WHILE( ASSOCIATED(s_timer%next) )
  624. s_timer => s_timer%next
  625. END DO
  626. ALLOCATE(s_timer%next)
  627. s_timer%next%cname = cdinfo
  628. s_timer%next%t_cpu = 0._wp
  629. s_timer%next%t_clock = 0._wp
  630. s_timer%next%tsum_cpu = 0._wp
  631. s_timer%next%tsum_clock = 0._wp
  632. s_timer%next%tmax_cpu = 0._wp
  633. s_timer%next%tmax_clock = 0._wp
  634. s_timer%next%tmin_cpu = 0._wp
  635. s_timer%next%tmin_clock = 0._wp
  636. s_timer%next%tsub_cpu = 0._wp
  637. s_timer%next%tsub_clock = 0._wp
  638. s_timer%next%ncount = 0
  639. s_timer%next%ncount_rate = 0
  640. s_timer%next%ncount_max = 0
  641. s_timer%next%niter = 0
  642. s_timer%next%l_tdone = .FALSE.
  643. s_timer%next%parent_section => NULL()
  644. s_timer%next%prev => s_timer
  645. s_timer%next%next => NULL()
  646. s_timer => s_timer%next
  647. ! are we inside a section
  648. s_wrk => s_timer%prev
  649. ll_section = .FALSE.
  650. DO WHILE( ASSOCIATED(s_wrk) .AND. .NOT. ll_section )
  651. IF( .NOT. s_wrk%l_tdone ) THEN
  652. ll_section = .TRUE.
  653. s_timer%parent_section => s_wrk
  654. ENDIF
  655. s_wrk => s_wrk%prev
  656. END DO
  657. ENDIF
  658. !
  659. END SUBROUTINE timing_ini_var
  660. SUBROUTINE timing_reset
  661. !!----------------------------------------------------------------------
  662. !! *** ROUTINE timing_reset ***
  663. !! ** Purpose : go to root of timing tree
  664. !!----------------------------------------------------------------------
  665. l_initdone = .TRUE.
  666. ! IF(lwp) WRITE(numout,*)
  667. ! IF(lwp) WRITE(numout,*) 'timing_reset : instrumented routines for timing'
  668. ! IF(lwp) WRITE(numout,*) '~~~~~~~~~~~~'
  669. ! CALL timing_list(s_timer_root)
  670. ! WRITE(numout,*)
  671. !
  672. END SUBROUTINE timing_reset
  673. RECURSIVE SUBROUTINE timing_list(ptr)
  674. TYPE(timer), POINTER, INTENT(inout) :: ptr
  675. !
  676. IF( ASSOCIATED(ptr%next) ) CALL timing_list(ptr%next)
  677. IF(lwp) WRITE(numout,*)' ', ptr%cname
  678. !
  679. END SUBROUTINE timing_list
  680. SUBROUTINE insert(sd_current, sd_root ,sd_ptr)
  681. !!----------------------------------------------------------------------
  682. !! *** ROUTINE insert ***
  683. !! ** Purpose : insert an element in timer structure
  684. !!----------------------------------------------------------------------
  685. TYPE(timer), POINTER, INTENT(inout) :: sd_current, sd_root, sd_ptr
  686. !
  687. IF( ASSOCIATED( sd_current, sd_root ) ) THEN
  688. ! If our current element is the root element then
  689. ! replace it with the one being inserted
  690. sd_root => sd_ptr
  691. ELSE
  692. sd_current%prev%next => sd_ptr
  693. END IF
  694. sd_ptr%next => sd_current
  695. sd_ptr%prev => sd_current%prev
  696. sd_current%prev => sd_ptr
  697. ! Nullify the pointer to the new element now that it is held
  698. ! within the list. If we don't do this then a subsequent call
  699. ! to ALLOCATE memory to this pointer will fail.
  700. sd_ptr => NULL()
  701. !
  702. END SUBROUTINE insert
  703. SUBROUTINE suppress(sd_ptr)
  704. !!----------------------------------------------------------------------
  705. !! *** ROUTINE suppress ***
  706. !! ** Purpose : supress an element in timer structure
  707. !!----------------------------------------------------------------------
  708. TYPE(timer), POINTER, INTENT(inout) :: sd_ptr
  709. !
  710. TYPE(timer), POINTER :: sl_temp
  711. sl_temp => sd_ptr
  712. sd_ptr => sd_ptr%next
  713. IF ( ASSOCIATED(sl_temp%next) ) sl_temp%next%prev => sl_temp%prev
  714. DEALLOCATE(sl_temp)
  715. sl_temp => NULL()
  716. !
  717. END SUBROUTINE suppress
  718. !!=====================================================================
  719. END MODULE timing