File: C:\NOAA\NEMS_11731\src\chem\gocart\src\GMAO_Shared\GMAO_pilgrim\decompmodule.F90

1     !-------------------------------------------------------------------------
2     !         NASA/GSFC, Data Assimilation Office, Code 910.3, GEOS/DAS
3     !-------------------------------------------------------------------------
4           MODULE decompmodule
5     !BOP
6     !
7     ! !MODULE: decompmodule
8     !
9     ! !USES:
10     #ifdef STAND_ALONE
11     # define iulog 6
12     #else
13           use cam_logfile, only: iulog
14     #endif
15     #include "debug.h"
16     
17           IMPLICIT NONE
18     
19     !
20     ! !DESCRIPTION:
21     !
22     !      This module provides the DecompType and its create and destroy 
23     !      routines.
24     !      \begin{center}
25     !      \begin{tabular}{|l|l|} \hline \hline
26     !         DecompType        & Type to describe a decomposition \\ \hline
27     !         DecompDefined     & True iff given decomposition is defined\\ \hline
28     !         DecompFree        & Destroy a decomposition \\ \hline
29     !         DecompCopy        & Copy decomposition to newly created one\\ \hline 
30     !         DecompPermute     & Permute decomposition \\ \hline 
31     !         DecompRegular1D   & Create a 1-D decomposition \\ \hline 
32     !         DecompRegular2D   & Create a 2-D decomposition \\ \hline 
33     !         DecompRegular3D   & Create a 3-D decomposition \\ \hline 
34     !         DecompRegular4D   & Create a 4-D decomposition \\ \hline 
35     !         DecompCreateIrr   & Create an irregular 1-D decomposition \\ \hline
36     !         DecompCreateTags  & Create a decomposition from Pe and Tags \\ \hline
37     !         DecompGlobalToLocal& Map a global index to a local one \\ \hline
38     !         DecompLocalToGlobal& Map a local index to a global one \\
39     !         \hline  \hline
40     !      \end{tabular}
41     !      \end{center}
42     !
43     !      The decomposition type contains the sizes of the global array,
44     !      the number of entries on each PE, and for each PE a list
45     !      of "runs", i.e., the starting and finishing global indices
46     !      or "tags" whose inclusive array section resides on that PE.
47     !      Clearly this method of decomposition is only efficient if
48     !      there are long runs, i.e., long array sections which are 
49     !      mapped to one PE.  A random decomposition will cause poor
50     !      results.
51     !
52     !      The decomposition is thus very efficient for 1-D, 2-D or 3-D
53     !      block distributions (particularly for 1-D distributions, where
54     !      there is one "run" per processor).  Problems may occur for
55     !      an irregular decomposition (which is by definition 1-D).  If
56     !      there is little correspondence between the global indices of the 
57     !      entries and the actual decomposition (e.g., the tags are
58     !      assigned randomly), then there will be many runs, most
59     !      containing only one tag, and the resulting instance of
60     !      DecompType will be very large.  Fortunately, most applications
61     !      assign tags to entries in some sort of contiguous fashion, 
62     !      which is then quite appropriate for this data structure.
63     !
64     !      All numbering of multi-dimensional arrays is ROW-MAJOR, that
65     !      is, first in the X direction and then in the Y (and then,
66     !      if appropriate, in Z).  This is true for both the 2-D and
67     !      3-D data sets as also the Cartesian description of the PEs.
68     !
69     !      There is one glaring feature of DecompType.  It is
70     !      supposed to be a `one-size-fits-all' description of the
71     !      decomposition (with the exception of the random indexing
72     !      mentioned above).  Unfortunately, to describe 2-D and 3-D
73     !      regions, it is necessary to carry additional dimension
74     !      information in order have complete information for the 
75     !      mapping.  This means that 2-D and 3-D decompositions 
76     !      inherently carry more information than a 1-D decomposition.
77     !      Thus it {\it is} possible to use a decomposition created
78     !      with the Regular2D or Regular3D routines to describe the
79     !      corresponding decomposition when the 2-D or 3-D array is
80     !      viewed as a 1-D array, but it is clearly {\it not}
81     !      possible to use a decomposition created with Regular1D
82     !      to describe the decomposition of a 2-D or 3-D array
83     !      --- the appropriate information just is not there.
84     !
85     ! !REVISION HISTORY:
86     !   97.07.22   Sawyer     Creation
87     !   97.09.01   Sawyer     Release date
88     !   97.11.06   Sawyer     Addition of row and column communicators
89     !   97.01.24   Sawyer     Added support for non-MPI derived types solution
90     !   97.01.29   Sawyer     Minor revisions for production service
91     !   98.01.30   Sawyer     Added DecompCopy
92     !   98.02.04   Sawyer     Removed Comm, CommRow and CommCol from DecompType
93     !   98.03.13   Sawyer     Removed DecompTypeOld, brushed up for walkthrough
94     !   98.03.19   Sawyer     Minor corrections after walkthrough
95     !   98.05.02   Sawyer     Added DecompPermute
96     !   98.05.11   Sawyer     Removed Permutation from all but DecompPermute
97     !   99.01.19   Sawyer     Minor cleaning
98     !   00.07.07   Sawyer     Removed DimSizes; decomp is now 1D only
99     !   00.11.12   Sawyer     Added DecompCreateTags and DecompInfo
100     !   01.02.03   Sawyer     Updated for free format; corrected DecompCreateTags
101     !   01.03.20   Sawyer     Added DecompRegular3DOrder
102     !   02.12.04   Sawyer     Added DecompDefined, optimized DecompGlobalToLocal
103     !   02.12.06   Sawyer     Bug in new DecompGlobalToLocal (remove out of bounds check)
104     !   02.12.08   Sawyer     Another bug: calculate the Offsets field correctly
105     !   02.12.23   Sawyer     Added DecompRegular4D
106     !
107     ! !PUBLIC TYPES:
108           PUBLIC DecompType, DecompCreate, DecompFree
109           PUBLIC DecompCopy, DecompPermute, DecompDefined
110           PUBLIC DecompGlobalToLocal, DecompLocalToGlobal, DecompInfo
111     
112           INTERFACE     DecompCreate
113             MODULE PROCEDURE DecompRegular1D
114             MODULE PROCEDURE DecompRegular2D
115             MODULE PROCEDURE DecompRegular3D
116             MODULE PROCEDURE DecompRegular3DOrder
117             MODULE PROCEDURE DecompRegular4D
118             MODULE PROCEDURE DecompCreateIrr
119             MODULE PROCEDURE DecompCreateTags
120           END INTERFACE
121     
122           INTERFACE     DecompGlobalToLocal
123             MODULE PROCEDURE DecompG2L
124             MODULE PROCEDURE DecompG2LVector
125           END INTERFACE
126      
127           INTERFACE     DecompLocalToGlobal
128             MODULE PROCEDURE DecompL2G
129             MODULE PROCEDURE DecompL2GVector
130           END INTERFACE
131      
132     ! Decomposition info
133     
134            TYPE Lists
135              INTEGER, POINTER     :: StartTags(:) ! Start of tag run
136              INTEGER, POINTER     :: EndTags(:)   ! End of tag run
137              INTEGER, POINTER     :: Offsets(:)   ! Local offsets for efficiency
138            END TYPE Lists
139     
140            TYPE DecompType
141              LOGICAL              :: Defined      ! Is it defined?
142              INTEGER              :: GlobalSize   ! Size in each dimension
143              INTEGER, POINTER     :: NumEntries(:)! Number of entries per PE
144              TYPE(Lists), POINTER :: Head(:)      ! Array of pointers 
145            END TYPE DecompType
146     
147     !EOP
148           CONTAINS
149     
150     !-----------------------------------------------------------------------
151     !BOP
152     ! !IROUTINE: DecompDefined --- Is the decomp type defined?
153     !
154     ! !INTERFACE:
155           LOGICAL FUNCTION DecompDefined ( Decomp )
156     ! !USES:
157           IMPLICIT NONE
158     
159     ! !INPUT PARAMETERS:
160           TYPE(DecompType), INTENT( IN ):: Decomp  ! Decomp information
161     
162     !
163     ! !DESCRIPTION:
164     !     Returns true if Decomp has been created but not yet destroyed
165     !
166     ! !REVISION HISTORY:
167     !   02.12.04   Sawyer     Creation from GhostDefined
168     !
169     !EOP
170     !-----------------------------------------------------------------------
171     !BOC
172     !
173     !
174           CPP_ENTER_PROCEDURE( "DECOMPDEFINED" )
175           DecompDefined = Decomp%Defined
176           CPP_LEAVE_PROCEDURE( "DECOMPDEFINED" )
177     
178           RETURN
179     !EOC
180           END FUNCTION DecompDefined
181     !-----------------------------------------------------------------------
182     
183     
184     !---------------------------------------------------------------------
185     !BOP
186     ! !IROUTINE: DecompFree --- Free a decomposition
187     !
188     ! !INTERFACE:
189           SUBROUTINE DecompFree ( Decomp )
190     ! !USES:
191           IMPLICIT NONE
192     
193     ! !INPUT/OUTPUT PARAMETERS:
194           TYPE(DecompType), INTENT( INOUT ):: Decomp  ! Decomp information
195     !
196     ! !DESCRIPTION:
197     !     Free the decomposition -- deallocate the data structures.
198     !
199     ! !SYSTEM ROUTINES:
200     !     ASSOCIATED, DEALLOCATE
201     !
202     ! !REVISION HISTORY:
203     !   98.01.30   Sawyer     Creation
204     !
205     !EOP
206     !------------------------------------------------------------------------
207     !BOC
208     !
209     ! !LOCAL VARIABLES:
210           INTEGER  :: I, NPEs
211     !
212           CPP_ENTER_PROCEDURE( "DECOMPFREE" )
213     
214           IF ( ASSOCIATED( Decomp%NumEntries ) )                             &
215                          DEALLOCATE( Decomp%NumEntries )
216           IF ( ASSOCIATED( Decomp%Head ) ) THEN
217             NPEs = SIZE( Decomp%Head )
218             DO I = 1, NPEs
219     !
220     ! Copy the number of entries on each PE
221     !
222               IF ( ASSOCIATED( Decomp%Head(I)%StartTags ) )                  &
223                          DEALLOCATE( Decomp%Head(I)%StartTags )
224               IF ( ASSOCIATED( Decomp%Head(I)%EndTags ) )                    &
225                          DEALLOCATE( Decomp%Head(I)%EndTags )
226               IF ( ASSOCIATED( Decomp%Head(I)%Offsets ) )                    &
227                          DEALLOCATE( Decomp%Head(I)%Offsets )
228             ENDDO
229             DEALLOCATE( Decomp%Head )
230           ENDIF
231           Decomp%Defined = .FALSE.
232     
233           CPP_LEAVE_PROCEDURE( "DECOMPFREE" )
234           RETURN
235     !EOC
236           END SUBROUTINE DecompFree
237     !------------------------------------------------------------------------
238     
239     
240     !------------------------------------------------------------------------
241     !BOP
242     ! !IROUTINE: DecompCopy --- Copy one decomposition to another
243     !
244     ! !INTERFACE:
245           SUBROUTINE DecompCopy ( DecompIn, DecompOut )
246     ! !USES:
247           IMPLICIT NONE
248     !
249     ! !INPUT PARAMETERS:
250           TYPE(DecompType), INTENT( IN )   :: DecompIn  ! Decomp information
251     !
252     ! !OUTPUT PARAMETERS:
253           TYPE(DecompType), INTENT( OUT )  :: DecompOut ! Decomp information
254     !
255     ! !DESCRIPTION:
256     !
257     !   Creates an output decomposition and copies the DecompIn input values 
258     !
259     ! !SYSTEM ROUTINES:
260     !     ALLOCATE
261     !
262     ! !REVISION HISTORY:
263     !   98.01.30   Sawyer     Creation
264     !
265     !EOP
266     !------------------------------------------------------------------------
267     !BOC
268     !
269     ! !LOCAL VARIABLES:
270           INTEGER  :: I, J, NDims, NPEs, NRuns
271     !
272           CPP_ENTER_PROCEDURE( "DECOMPCOPY" )
273     !
274     ! Copy the size of the global array
275     !
276           DecompOut%GlobalSize = DecompIn%GlobalSize
277     
278     !
279     ! Allocate the number of entries and list head arrays
280     !
281           NPEs = SIZE( DecompIn%NumEntries )
282           CPP_ASSERT_F90( SIZE( DecompIn%Head ) .EQ. NPEs )
283           ALLOCATE( DecompOut%NumEntries( NPEs ) )
284           ALLOCATE( DecompOut%Head( NPEs ) )
285     
286           DO I = 1, NPEs
287     !
288     ! Copy the number of entries on each PE
289     !
290             DecompOut%NumEntries( I ) = DecompIn%NumEntries( I )
291             NRuns = SIZE( DecompIn%Head( I )%StartTags )
292             CPP_ASSERT_F90( SIZE( DecompIn%Head( I )%EndTags ) .EQ. NRuns )
293     !
294     ! Allocate and copy the array of runs
295     !
296             ALLOCATE( DecompOut%Head(I)%StartTags( NRuns ) )
297             ALLOCATE( DecompOut%Head(I)%EndTags( NRuns ) )
298             ALLOCATE( DecompOut%Head(I)%Offsets( NRuns ) )
299             DO J = 1, NRuns
300               DecompOut%Head(I)%StartTags(J) = DecompIn%Head(I)%StartTags(J)
301               DecompOut%Head(I)%EndTags(J) = DecompIn%Head(I)%EndTags(J)
302               DecompOut%Head(I)%Offsets(J) = DecompIn%Head(I)%Offsets(J)
303             ENDDO
304           ENDDO
305           DecompOut%Defined = .TRUE.
306     
307           CPP_LEAVE_PROCEDURE( "DECOMPCOPY" )
308           RETURN
309     !EOC
310           END SUBROUTINE DecompCopy
311     !------------------------------------------------------------------------
312     
313     
314     !------------------------------------------------------------------------
315     !BOP
316     ! !IROUTINE: DecompPermute --- Permute one decomposition to another
317     !
318     ! !INTERFACE:
319           SUBROUTINE DecompPermute ( Permutation, Decomp )
320     ! !USES:
321           IMPLICIT NONE
322     !
323     ! !INPUT PARAMETERS:
324           INTEGER :: Permutation( : )                  ! Permutation
325     
326     ! !INPUT/OUTPUT PARAMETERS:
327           TYPE(DecompType), INTENT( INOUT ) :: Decomp  ! Decomp information
328     !
329     !
330     ! !DESCRIPTION:
331     !
332     !   Permutes the PE assignment of a given decomposition. Confusion will
333     !   always arise about whether this is a forward or backward
334     !   transformation.  Picture it this way: draw the array and slice it
335     !   up as indicated by the distribution.  The resulting boxes are of
336     !   course indexed by natural numbering 1, 2, 3, 4, ...  (these are
337     !   the virtual one-based PEs). Now write the true PE numbering
338     !   (one-based) as you would like it.  The resulting array is Perm.
339     !
340     !
341     ! !SYSTEM ROUTINES:
342     !     ALLOCATE
343     !
344     ! !REVISION HISTORY:
345     !   98.05.02   Sawyer     Creation
346     !
347     !EOP
348     !---------------------------------------------------------------------
349     !BOC
350     !
351     ! !LOCAL VARIABLES:
352           INTEGER, POINTER     :: NumEntries(:)! Number of entries
353           TYPE(Lists), POINTER :: Head(:)      ! Array of pointers 
354           INTEGER              :: I, J, NPEs, NRuns, TruePE
355     !
356           CPP_ENTER_PROCEDURE( "DECOMPPERMUTE" )
357     !
358     ! Allocate the number of entries and list head arrays
359     !
360           NPEs = SIZE( Decomp%NumEntries )
361           ALLOCATE( NumEntries( NPEs ) )
362           DO I = 1, NPEs
363             TruePE = Permutation( I )
364             NumEntries( TruePE ) = Decomp%NumEntries( I )
365           ENDDO
366     !
367     ! Deallocate old NumEntries and put the new pointer in its place
368     !
369           DEALLOCATE( Decomp%NumEntries )
370           Decomp%NumEntries => NumEntries
371           NULLIFY( NumEntries )
372     
373     !
374     ! Allocate and set the permuted Lists called with pointer Head
375     !
376           ALLOCATE( Head( NPEs ) )
377           DO I = 1, NPEs
378             TruePE = Permutation( I )
379             NRuns = SIZE( Decomp%Head(I)%StartTags )
380             CPP_ASSERT_F90( SIZE( Decomp%Head(I)%EndTags ) .EQ. NRuns )
381     !
382     ! Allocate and permute the array of runs
383     !
384             ALLOCATE( Head(TruePE)%StartTags(NRuns) )
385             ALLOCATE( Head(TruePE)%EndTags(NRuns) )
386             ALLOCATE( Head(TruePE)%Offsets(NRuns) )
387             DO J = 1, NRuns
388               Head(TruePE)%StartTags(J) = Decomp%Head(I)%StartTags(J)
389               Head(TruePE)%EndTags(J)   = Decomp%Head(I)%EndTags(J)
390               Head(TruePE)%Offsets(J)   = Decomp%Head(I)%Offsets(J)
391             ENDDO
392           ENDDO
393     !
394     ! Deallocate the arrays of starting and ending tags
395     !
396           DO I = 1, NPEs
397             DEALLOCATE( Decomp%Head(I)%StartTags )
398             DEALLOCATE( Decomp%Head(I)%EndTags )
399             DEALLOCATE( Decomp%Head(I)%Offsets )
400           ENDDO
401     !
402     ! Deallocate the heads to the Lists
403     !
404           DEALLOCATE( Decomp%Head )
405     
406     !
407     ! Link the new head to that in the decomposition
408     !
409           Decomp%Head => Head
410           
411           NULLIFY( Head )
412     
413           CPP_LEAVE_PROCEDURE( "DECOMPPERMUTE" )
414           RETURN
415     !EOC
416           END SUBROUTINE DecompPermute
417     !------------------------------------------------------------------------
418     
419     
420     !------------------------------------------------------------------------
421     !BOP
422     ! !IROUTINE: DecompRegular1D --- Create a decomposition for a 1-D grid
423     !
424     ! !INTERFACE:
425           SUBROUTINE DecompRegular1D ( NPEs, Dist, Decomp )
426     ! !USES:
427           IMPLICIT NONE
428     !
429     ! !INPUT PARAMETERS:
430           INTEGER, INTENT( IN )            :: NPEs     ! Number of PEs
431           INTEGER, INTENT( IN )            :: Dist(:)  ! Distribution in X
432     !
433     ! !OUTPUT PARAMETERS:
434           TYPE(DecompType), INTENT( OUT )  :: Decomp   ! Decomp information
435     !
436     ! !DESCRIPTION:
437     !     Creates a variable block decomposition for a regular 1-D grid
438     !     (this is also known as a "block-general" distribution).  The
439     !     decomposition is given through the Dist distribution 
440     !     which contains the number of entries on each PE.  
441     !
442     ! !SYSTEM ROUTINES:
443     !     ALLOCATE
444     !
445     ! !REVISION HISTORY:
446     !   98.01.19   Sawyer     Creation
447     !   98.01.22   Sawyer     Corrections, TESTED
448     !   98.05.11   Sawyer     Removed Perm from arglist -- see DecompPermute
449     !   00.07.07   Sawyer     Removed use of DimSizes(:) array
450     !
451     !EOP
452     !------------------------------------------------------------------------
453     !BOC
454     !
455     ! !LOCAL VARIABLES:
456           INTEGER  :: I, Counter
457     !
458           CPP_ENTER_PROCEDURE( "DECOMPREGULAR1D" )
459     !
460           CPP_ASSERT_F90( NPEs .EQ. SIZE( Dist ) )
461     !
462     ! The head contains NPEs pointers to the tag lists.
463     !
464           Decomp%GlobalSize = SUM(Dist)
465           ALLOCATE( Decomp%NumEntries( NPEs ) )
466           ALLOCATE( Decomp%Head( NPEs ) )
467           Counter = 0
468           DO I = 1, NPEs
469             Decomp%NumEntries(I) = Dist(I)
470     !
471     ! Since this is a regular distribution there is only one run of tags per PE.
472     !
473             NULLIFY( Decomp%Head(I)%StartTags )
474             NULLIFY( Decomp%Head(I)%EndTags )
475             NULLIFY( Decomp%Head(I)%Offsets )
476             ALLOCATE( Decomp%Head(I)%StartTags(1) )
477             ALLOCATE( Decomp%Head(I)%EndTags(1) )
478             ALLOCATE( Decomp%Head(I)%Offsets(1) )
479     !
480     ! The starting and ending tags are immediately determined from
481     ! the decomposition arrays  
482     !
483             Decomp%Head(I)%StartTags(1) = Counter+1
484             Counter = Counter + Dist(I)
485             Decomp%Head(I)%EndTags(1) = Counter
486             Decomp%Head(I)%Offsets(1) = 0   ! Offset in local segment
487           ENDDO
488     
489           Decomp%Defined = .TRUE.
490     
491           CPP_LEAVE_PROCEDURE( "DECOMPREGULAR1D" )
492           RETURN
493     !EOC
494           END SUBROUTINE DecompRegular1D
495     !------------------------------------------------------------------------
496     
497     
498     !------------------------------------------------------------------------
499     !BOP
500     ! !IROUTINE: DecompRegular2D --- Create a decomposition for a 2-D grid
501     !
502     ! !INTERFACE:
503           SUBROUTINE DecompRegular2D( NPEsX, NPEsY, Xdist, Ydist, Decomp )
504     ! !USES:
505           IMPLICIT NONE
506     !
507     ! !INPUT PARAMETERS:
508           INTEGER, INTENT( IN )            :: NPEsX    ! Number of PEs in X
509           INTEGER, INTENT( IN )            :: NPEsY    ! Number of PEs in Y
510           INTEGER, INTENT( IN )            :: Xdist(:) ! Distribution in X
511           INTEGER, INTENT( IN )            :: Ydist(:) ! Distribution in Y
512     !
513     ! !OUTPUT PARAMETERS:
514           TYPE(DecompType), INTENT( OUT )  :: Decomp  ! Decomp information
515     !
516     !
517     ! !DESCRIPTION:
518     !     Creates a variable block-block decomposition for a regular 
519     !     2-D grid.  The decomposition is given through the Xdist and 
520     !     Ydist distributions, which contain the number of entries on 
521     !     each PE in that dimension.  This routine thus defines
522     !     a rectangular "checkerboard" distribution.
523     !
524     ! !SYSTEM ROUTINES:
525     !     ALLOCATE
526     !
527     ! !REVISION HISTORY:
528     !   98.01.19   Sawyer     Creation
529     !   98.01.22   Sawyer     Corrections, TESTED
530     !   98.05.11   Sawyer     Removed Perm from arglist -- see DecompPermute
531     !   00.07.07   Sawyer     Removed use of DimSizes(:) array
532     !
533     ! !BUGS:
534     !     This routine makes the assumption that the sum of the
535     !     distribution in each dimension adds up to the total 
536     !     number of entries in that dimension.  It will cause
537     !     problems if the actual local arrays are over- or 
538     !     under-allocated.  For example, if the local array is
539     !     allocated statically for the maximum size of the
540     !     array on any processor, problems will occur on those
541     !     PEs which have less than the maximum.
542     !
543     !EOP
544     !------------------------------------------------------------------------
545     !BOC
546     !
547     ! !LOCAL VARIABLES:
548           INTEGER :: TruePE, I, J, K, Counter1, Counter2, SizeX, SizeY
549     !
550           CPP_ENTER_PROCEDURE( "DECOMPREGULAR2D" )
551     !
552     ! Some sanity checks
553     !
554           CPP_ASSERT_F90( NPEsX .EQ. SIZE( Xdist ) )
555           CPP_ASSERT_F90( NPEsY .EQ. SIZE( Ydist ) )
556     !
557     ! The head contains NPEs pointers to the tag lists.
558     !
559           SizeX = SUM(Xdist)
560           SizeY = SUM(Ydist)
561           Decomp%GlobalSize = SizeX * SizeY
562           ALLOCATE( Decomp%NumEntries( NPEsX*NPEsY ) )
563           ALLOCATE( Decomp%Head( NPEsX*NPEsY ) )
564           Counter1 = 0
565           DO J = 1, NPEsY
566             DO I = 1, NPEsX
567     !
568     ! WARNING!!!!  The definition of the PE is Row-major ordering
569     !
570               TruePE = ( J-1 ) * NPEsX + I 
571     
572     !
573     ! The number of entries is the product of the local X, Y, Z allotment
574     !
575               Decomp%NumEntries(TruePE) = Xdist(I)*Ydist(J)
576     !
577     ! For each Y there is a separate run
578     !
579               NULLIFY( Decomp%Head(TruePE)%StartTags )
580               NULLIFY( Decomp%Head(TruePE)%EndTags )
581               NULLIFY( Decomp%Head(TruePE)%Offsets )
582               ALLOCATE( Decomp%Head(TruePE)%StartTags(Ydist(J)) )
583               ALLOCATE( Decomp%Head(TruePE)%EndTags(Ydist(J)) )
584               ALLOCATE( Decomp%Head(TruePE)%Offsets(Ydist(J)) )
585               Counter2 = Counter1
586               DO K = 1, Ydist(J)
587     !
588     ! Since this is a regular distribution the definition of
589     ! tags is dictated by Xdist(I), and appears Ydist(J) times
590     !
591     !
592                 Decomp%Head(TruePE)%StartTags(K) = Counter2 + 1
593                 Decomp%Head(TruePE)%EndTags(K) = Counter2 + Xdist(I)
594                 Counter2 = Counter2 + SizeX
595               ENDDO
596               Counter1 = Counter1 + Xdist(I)
597             ENDDO
598     !
599     ! Align the counter such that it points to the start of the next 
600     ! block.  (Ydist(J)-1) since already one layer has been added in.
601     ! Implicit assumption that SizeX = SUM( Xdist )
602     !
603             Counter1 = Counter1 + SizeX*(Ydist(J)-1)
604           ENDDO
605     !
606     ! Calculate offsets
607     !
608           DO I=1, NPEsX*NPEsY
609             IF ( SIZE(Decomp%Head(I)%StartTags) > 0 ) THEN
610               Decomp%Head(I)%Offsets(1) = 0
611               DO J=2, SIZE(Decomp%Head(I)%StartTags)
612                 Decomp%Head(I)%Offsets(J) = Decomp%Head(I)%Offsets(J-1) +      &
613                 Decomp%Head(I)%EndTags(J-1) - Decomp%Head(I)%StartTags(J-1) + 1
614               ENDDO
615             ENDIF
616           ENDDO
617     
618           Decomp%Defined = .TRUE.
619     
620           CPP_LEAVE_PROCEDURE( "DECOMPREGULAR2D" )
621           RETURN
622     !EOC
623           END SUBROUTINE DecompRegular2D
624     !------------------------------------------------------------------------
625     
626     
627     !------------------------------------------------------------------------
628     !BOP
629     ! !IROUTINE: DecompRegular3D --- Create a decomposition for a 3-D grid
630     !
631     ! !INTERFACE:
632           SUBROUTINE DecompRegular3D ( NPEsX, NPEsY, NPEsZ,                  &
633                                        Xdist, Ydist, Zdist, Decomp )
634     ! !USES:
635           IMPLICIT NONE
636     !
637     ! !INPUT PARAMETERS:
638           INTEGER, INTENT( IN )            :: NPEsX    ! Number of PEs in X
639           INTEGER, INTENT( IN )            :: NPEsY    ! Number of PEs in Y
640           INTEGER, INTENT( IN )            :: NPEsZ    ! Number of PEs in Z
641           INTEGER, INTENT( IN )            :: Xdist(:) ! Distribution in X
642           INTEGER, INTENT( IN )            :: Ydist(:) ! Distribution in Y
643           INTEGER, INTENT( IN )            :: Zdist(:) ! Distribution in Z
644     !
645     ! !OUTPUT PARAMETERS:
646           TYPE(DecompType), INTENT( OUT )  :: Decomp  ! Decomp information
647     !
648     !
649     ! !DESCRIPTION:
650     !     Creates a decomposition for a regular 3-D grid.  The
651     !     decomposition is given through the Xdist, Ydist, and Zdist
652     !     distributions, which contain the number of entries on 
653     !     each PE in that dimension.    This routine thus defines
654     !     a parallelopiped (SOMA-block) distribution.
655     !
656     ! !SYSTEM ROUTINES:
657     !     ALLOCATE
658     !
659     ! !REVISION HISTORY:
660     !   98.01.19   Sawyer     Creation
661     !   98.05.11   Sawyer     Removed Perm from arglist -- see DecompPermute
662     !   00.07.07   Sawyer     Removed use of Sizes(:) array
663     !
664     ! !BUGS:
665     !     This routine makes the assumption that the sum of the
666     !     distribution in each dimension adds up to the total 
667     !     number of entries in that dimension.  It will cause
668     !     problems if the actual local arrays are over- or 
669     !     under-allocated.  For example, if the local array is
670     !     allocated statically for the maximum size of the
671     !     array on any processor, problems will occur on those
672     !     PEs which have less than the maximum.
673     !
674     !EOP
675     !------------------------------------------------------------------------
676     !BOC
677     !
678     ! !LOCAL VARIABLES:
679           INTEGER  :: TruePE, Counter1, Counter2, Counter3
680           INTEGER  :: I, J, K, L, M, N, SizeX, SizeY, SizeZ
681     !
682           CPP_ENTER_PROCEDURE( "DECOMPREGULAR3D" )
683     !
684     ! Some sanity checks
685     !
686     !
687           CPP_ASSERT_F90( NPEsX .EQ. SIZE( Xdist ) )
688           CPP_ASSERT_F90( NPEsY .EQ. SIZE( Ydist ) )
689           CPP_ASSERT_F90( NPEsZ .EQ. SIZE( Zdist ) )
690           CPP_ASSERT_F90( .NOT. ASSOCIATED( Decomp%Head ) )
691     !
692     ! The head contains NPEs pointers to the tag lists.
693     !
694           SizeX = SUM(Xdist)
695           SizeY = SUM(Ydist)
696           SizeZ = SUM(Zdist)
697           Decomp%GlobalSize = SizeX * SizeY * SizeZ
698           ALLOCATE( Decomp%NumEntries( NPEsX*NPEsY*NPEsZ ) )
699           ALLOCATE( Decomp%Head( NPEsX*NPEsY*NPEsZ ) )
700           Counter1 = 0
701           DO K = 1, NPEsZ
702             DO J = 1, NPEsY
703               DO I = 1, NPEsX
704     !
705     ! WARNING!!!!  The definition of the PE is Row-major ordering
706     !
707                 TruePE = (K-1)*NPEsX*NPEsY + (J-1)*NPEsX + I 
708                 NULLIFY( Decomp%Head(TruePE)%StartTags )
709                 NULLIFY( Decomp%Head(TruePE)%EndTags )
710                 NULLIFY( Decomp%Head(TruePE)%Offsets )
711     !
712     ! The number of entries is the product of the local X, Y, Z allotment
713     !
714                 Decomp%NumEntries(TruePE) = Xdist(I)*Ydist(J)*Zdist(K)
715     !
716     ! For each Z there are Y separate runs
717     !
718                 ALLOCATE( Decomp%Head(TruePE)%StartTags(Ydist(J)*Zdist(K)) )
719                 ALLOCATE( Decomp%Head(TruePE)%EndTags(Ydist(J)*Zdist(K)) )
720                 ALLOCATE( Decomp%Head(TruePE)%Offsets(Ydist(J)*Zdist(K)) )
721                 Counter2 = Counter1
722                 L = 0
723                 DO N = 1, Zdist(K)
724                   Counter3 = Counter2
725                   DO M = 1, Ydist(J)
726     !
727     !     Since this is a regular distribution the definition of
728     !     tags is dictated by Xdist(I), and appears Ydist(J) times
729     !
730     !
731                     L = L + 1
732                     Decomp%Head(TruePE)%StartTags(L) = Counter3 + 1
733                     Decomp%Head(TruePE)%EndTags(L) = Counter3 + Xdist(I)
734                     Counter3 = Counter3 + SizeX
735                   ENDDO
736                   Counter2 = Counter2 + SizeX*SizeY
737                 ENDDO
738                 Counter1 = Counter1 + Xdist(I)
739               ENDDO
740     !
741     ! Align the counter such that it points to the start of the next 
742     ! block.  (Ydist(J)-1) since already one X layer has been added in.
743     ! Implicit assumption that SizeX = SUM( Xdist )
744     !
745               Counter1 = Counter1 + SizeX*(Ydist(J)-1)
746             ENDDO
747     !
748     ! Align the counter such that it points to the start of the next 
749     ! block.  (Zdist(K)-1) since already one X-Y layer has been added in.
750     ! Implicit assumption that SizeY = SUM( Ydist )
751     !
752             Counter1 = Counter1 + SizeX*SizeY*(Zdist(K)-1)
753           ENDDO
754     !
755     ! Calculate offsets
756     !
757           DO I=1, NPEsX*NPEsY*NPEsZ
758             IF ( SIZE(Decomp%Head(I)%StartTags) > 0 ) THEN
759               Decomp%Head(I)%Offsets(1) = 0
760               DO J=2, SIZE(Decomp%Head(I)%StartTags)
761                 Decomp%Head(I)%Offsets(J) = Decomp%Head(I)%Offsets(J-1) +      &
762                 Decomp%Head(I)%EndTags(J-1) - Decomp%Head(I)%StartTags(J-1) + 1
763               ENDDO
764             ENDIF
765           ENDDO
766     
767           Decomp%Defined = .TRUE.
768     
769           CPP_LEAVE_PROCEDURE( "DECOMPREGULAR3D" )
770           RETURN
771     !EOC
772           END SUBROUTINE DecompRegular3D
773     !------------------------------------------------------------------------
774     
775     
776     !------------------------------------------------------------------------
777     !BOP
778     ! !IROUTINE: DecompRegular3Dorder --- Create a decomposition for a 3-D grid
779     !
780     ! !INTERFACE:
781           SUBROUTINE DecompRegular3Dorder( Order, NPEsX, NPEsY, NPEsZ,       &
782                                            Xdist, Ydist, Zdist, Decomp )
783     ! !USES:
784           IMPLICIT NONE
785     !
786     ! !INPUT PARAMETERS:
787           CHARACTER(3), INTENT( IN )       :: Order    ! Dim. ordering
788           INTEGER, INTENT( IN )            :: NPEsX    ! Number of PEs in X
789           INTEGER, INTENT( IN )            :: NPEsY    ! Number of PEs in Y
790           INTEGER, INTENT( IN )            :: NPEsZ    ! Number of PEs in Z
791           INTEGER, INTENT( IN )            :: Xdist(:) ! Distribution in X
792           INTEGER, INTENT( IN )            :: Ydist(:) ! Distribution in Y
793           INTEGER, INTENT( IN )            :: Zdist(:) ! Distribution in Z
794     !
795     ! !OUTPUT PARAMETERS:
796           TYPE(DecompType), INTENT( OUT )  :: Decomp  ! Decomp information
797     !
798     ! !DESCRIPTION:
799     !     Creates a variable block-block-block decomposition for a regular 
800     !     3-D grid, where the ordering of the PEs can be explicitly given
801     !     (see next paragraph). The decomposition is given through the 
802     !     Xdist, Ydist, and Zdist distributions, which contain the number 
803     !     of entries on each PE in that dimension.  This routine thus defines
804     !     a parallelopiped (SOMA-block) distribution.
805     !
806     !     With the string argument Order, the order of counting in the
807     !     3d PE space can be specified.  There are six possible values:
808     !     "xyz", "xzy", "yxz", "yzx", "zxy", and "zyx".  
809     !
810     !     The same as DecompRegular3Dorder could also be achieved by
811     !     using DecompRegular3D and then permuting the PE ownership
812     !     with DecompPermute.
813     !
814     ! !SYSTEM ROUTINES:
815     !     ALLOCATE
816     !
817     ! !REVISION HISTORY:
818     !   01.03.20   Sawyer     Creation from DecompRegular3Dzy, added ordering
819     !
820     ! !BUGS:
821     !   Not yet tested
822     !
823     !EOP
824     !---------------------------------------------------------------------
825     !BOC
826     ! !LOCAL VARIABLES:
827           INTEGER  :: TruePE, Counter1, Counter2, Counter3
828           INTEGER  :: I, J, K, L, M, N, SizeX, SizeY, SizeZ
829           INTEGER  :: Imult, Jmult, Kmult
830     !
831           CPP_ENTER_PROCEDURE( "DECOMPREGULAR3DORDER" )
832     !
833     ! Some sanity checks
834     !
835     !
836           CPP_ASSERT_F90( NPEsX .EQ. SIZE( Xdist ) )
837           CPP_ASSERT_F90( NPEsY .EQ. SIZE( Ydist ) )
838           CPP_ASSERT_F90( NPEsZ .EQ. SIZE( Zdist ) )
839           CPP_ASSERT_F90( .NOT. ASSOCIATED( Decomp%Head ) )
840     
841           IF ( Order=="xyz" ) THEN
842     ! Looks like:      TruePE = (K-1)*NPEsX*NPEsY + (J-1)*NPEsX + (I-1) + 1
843             Imult = 1
844             Jmult = NPEsX
845             Kmult = NPEsX*NPEsY
846           ELSE IF ( Order=="xzy" ) THEN
847     ! Looks like:      TruePE = (J-1)*NPEsX*NPEsZ + (K-1)*NPEsX + (I-1) + 1
848             Imult = 1
849             Jmult = NPEsX*NPEsZ
850             Kmult = NPEsX
851           ELSE IF ( Order=="yxz" ) THEN
852     ! Looks like:      TruePE = (K-1)*NPEsY*NPEsX + (I-1)*NPEsY + (J-1) + 1
853             Imult = NPEsY
854             Jmult = 1
855             Kmult = NPEsX*NPEsY
856           ELSE IF ( Order=="yzx" ) THEN
857     ! Looks like:      TruePE = (I-1)*NPEsY*NPEsZ + (K-1)*NPEsY + (J-1) + 1
858             Imult = NPEsY*NPEsZ
859             Jmult = 1
860             Kmult = NPEsY
861           ELSE IF ( Order=="zxy" ) THEN
862     ! Looks like:      TruePE = (J-1)*NPEsX*NPEsZ + (I-1)*NPEsZ + (K-1) + 1
863             Imult = NPEsZ
864             Jmult = NPEsX*NPEsZ
865             Kmult = 1
866           ELSE IF ( Order=="zyx" ) THEN
867     ! Looks like:      TruePE = (I-1)*NPEsY*NPEsZ + (J-1)*NPEsZ + (K-1) + 1
868             Imult = NPEsY*NPEsZ
869             Jmult = NPEsZ
870             Kmult = 1
871           ELSE 
872     ! Looks like:      TruePE = (K-1)*NPEsX*NPEsY + (J-1)*NPEsX + (I-1) + 1
873             write(iulog,*) "Warning: DecompCreate3Dorder", Order, "not supported"
874             write(iulog,*) "         Continuing with XYZ ordering"
875             Imult = 1
876             Jmult = NPEsX
877             Kmult = NPEsX*NPEsY
878           ENDIF
879     
880     !
881     ! The head contains NPEs pointers to the tag lists.
882     !
883           SizeX = SUM(Xdist)
884           SizeY = SUM(Ydist)
885           SizeZ = SUM(Zdist)
886           Decomp%GlobalSize = SizeX * SizeY * SizeZ
887           ALLOCATE( Decomp%NumEntries( NPEsX*NPEsY*NPEsZ ) )
888           ALLOCATE( Decomp%Head( NPEsX*NPEsY*NPEsZ ) )
889           Counter1 = 0
890     
891           DO K = 1, NPEsZ
892             DO J = 1, NPEsY
893               DO I = 1, NPEsX
894     !
895     ! WARNING!!!!  The definition of the PE is Row-major ordering
896     !
897                 
898                 TruePE = (I-1)*Imult + (J-1)*Jmult + (K-1)*Kmult + 1 
899     !
900     ! The number of entries is the product of the local X, Y, Z allotment
901     !
902                 Decomp%NumEntries(TruePE) = Xdist(I)*Ydist(J)*Zdist(K)
903     !
904     ! For each Z there are Y separate runs
905     !
906                 ALLOCATE( Decomp%Head(TruePE)%StartTags(Ydist(J)*Zdist(K)) )
907                 ALLOCATE( Decomp%Head(TruePE)%EndTags(Ydist(J)*Zdist(K)) )
908                 ALLOCATE( Decomp%Head(TruePE)%Offsets(Ydist(J)*Zdist(K)) )
909                 Counter2 = Counter1
910                 L = 0
911                 DO N = 1, Zdist(K)
912                   Counter3 = Counter2
913                   DO M = 1, Ydist(J)
914     !
915     !     Since this is a regular distribution the definition of
916     !     tags is dictated by Xdist(I), and appears Ydist(J) times
917     !
918     !
919                     L = L + 1
920                     Decomp%Head(TruePE)%StartTags(L) = Counter3 + 1
921                     Decomp%Head(TruePE)%EndTags(L) = Counter3 + Xdist(I)
922                     Counter3 = Counter3 + SizeX
923                   ENDDO
924                   Counter2 = Counter2 + SizeX*SizeY
925                 ENDDO
926                 Counter1 = Counter1 + Xdist(I)
927               ENDDO
928     !
929     ! Align the counter such that it points to the start of the next 
930     ! block.  (Ydist(J)-1) since already one X layer has been added in.
931     ! Implicit assumption that SizeX = SUM( Xdist )
932     !
933               Counter1 = Counter1 + SizeX*(Ydist(J)-1)
934             ENDDO
935     !
936     ! Align the counter such that it points to the start of the next 
937     ! block.  (Zdist(K)-1) since already one X-Y layer has been added in.
938     ! Implicit assumption that SizeY = SUM( Ydist )
939     !
940             Counter1 = Counter1 + SizeX*SizeY*(Zdist(K)-1)
941           ENDDO
942     !
943     ! Calculate offsets
944     !
945           DO I=1, NPEsX*NPEsY*NPEsZ
946             IF ( SIZE(Decomp%Head(I)%StartTags) > 0 ) THEN
947               Decomp%Head(I)%Offsets(1) = 0
948               DO J=2, SIZE(Decomp%Head(I)%StartTags)
949                 Decomp%Head(I)%Offsets(J) = Decomp%Head(I)%Offsets(J-1) +      &
950                 Decomp%Head(I)%EndTags(J-1) - Decomp%Head(I)%StartTags(J-1) + 1
951               ENDDO
952             ENDIF
953           ENDDO
954     
955           Decomp%Defined = .TRUE.
956     
957           CPP_LEAVE_PROCEDURE( "DECOMPREGULAR3DORDER" )
958           RETURN
959     !EOC
960           END SUBROUTINE DecompRegular3DOrder
961     !------------------------------------------------------------------------
962     
963     
964     !------------------------------------------------------------------------
965     !BOP
966     ! !IROUTINE: DecompRegular4D --- Create a decomposition for a 3-D grid
967     !
968     ! !INTERFACE:
969           SUBROUTINE DecompRegular4D ( NPEsX, NPEsY, NPEsZ, NPEsT,          &
970                                        Xdist, Ydist, Zdist, Tdist, Decomp )
971     ! !USES:
972           IMPLICIT NONE
973     !
974     ! !INPUT PARAMETERS:
975           INTEGER, INTENT( IN )            :: NPEsX    ! Number of PEs in X
976           INTEGER, INTENT( IN )            :: NPEsY    ! Number of PEs in Y
977           INTEGER, INTENT( IN )            :: NPEsZ    ! Number of PEs in Z
978           INTEGER, INTENT( IN )            :: NPEsT    ! Number of PEs in T
979           INTEGER, INTENT( IN )            :: Xdist(:) ! Distribution in X
980           INTEGER, INTENT( IN )            :: Ydist(:) ! Distribution in Y
981           INTEGER, INTENT( IN )            :: Zdist(:) ! Distribution in Z
982           INTEGER, INTENT( IN )            :: Tdist(:) ! Distribution in T
983     !
984     ! !OUTPUT PARAMETERS:
985           TYPE(DecompType), INTENT( OUT )  :: Decomp  ! Decomp information
986     !
987     !
988     ! !DESCRIPTION:
989     !     Creates a decomposition for a regular 4-D grid.  The
990     !     decomposition is given through the Xdist, Ydist, Zdist, Tdist
991     !     distributions, which contain the number of entries on 
992     !     each PE in that dimension.    This routine thus defines
993     !     a parallelopiped (SOMA-block) distribution.
994     !
995     ! !SYSTEM ROUTINES:
996     !     ALLOCATE
997     !
998     ! !REVISION HISTORY:
999     !   02.12.23   Sawyer     Creation from DecompRegular4D
1000     !
1001     ! !BUGS:
1002     !     This routine makes the assumption that the sum of the
1003     !     distribution in each dimension adds up to the total 
1004     !     number of entries in that dimension.  It will cause
1005     !     problems if the actual local arrays are over- or 
1006     !     under-allocated.  For example, if the local array is
1007     !     allocated statically for the maximum size of the
1008     !     array on any processor, problems will occur on those
1009     !     PEs which have less than the maximum.
1010     !
1011     !EOP
1012     !------------------------------------------------------------------------
1013     !BOC
1014     !
1015     ! !LOCAL VARIABLES:
1016           INTEGER  :: TruePE, Counter1, Counter2, Counter3, Counter4
1017           INTEGER  :: I, J, K, L, M, N, P, T, SizeX, SizeY, SizeZ, SizeT
1018     !
1019           CPP_ENTER_PROCEDURE( "DECOMPREGULAR4D" )
1020     !
1021     ! Some sanity checks
1022     !
1023     !
1024           CPP_ASSERT_F90( NPEsX .EQ. SIZE( Xdist ) )
1025           CPP_ASSERT_F90( NPEsY .EQ. SIZE( Ydist ) )
1026           CPP_ASSERT_F90( NPEsZ .EQ. SIZE( Zdist ) )
1027           CPP_ASSERT_F90( NPEsT .EQ. SIZE( Tdist ) )
1028           CPP_ASSERT_F90( .NOT. ASSOCIATED( Decomp%Head ) )
1029     !
1030     ! The head contains NPEs pointers to the tag lists.
1031     !
1032           SizeX = SUM(Xdist)
1033           SizeY = SUM(Ydist)
1034           SizeZ = SUM(Zdist)
1035           SizeT = SUM(Tdist)
1036           Decomp%GlobalSize = SizeX * SizeY * SizeZ * SizeT
1037           ALLOCATE( Decomp%NumEntries( NPEsX*NPEsY*NPEsZ*NPEsT ) )
1038           ALLOCATE( Decomp%Head( NPEsX*NPEsY*NPEsZ*NPEsT ) )
1039           Counter1 = 0
1040           DO T = 1, NPEsT
1041             DO K = 1, NPEsZ
1042               DO J = 1, NPEsY
1043                 DO I = 1, NPEsX
1044     !
1045     ! WARNING!!!!  The definition of the PE is Row-major ordering
1046     !
1047                   TruePE = (T-1)*NPEsX*NPEsY*NPEsZ +                      &
1048                            (K-1)*NPEsX*NPEsY + (J-1)*NPEsX + I 
1049                   NULLIFY( Decomp%Head(TruePE)%StartTags )
1050                   NULLIFY( Decomp%Head(TruePE)%EndTags )
1051                   NULLIFY( Decomp%Head(TruePE)%Offsets )
1052     !
1053     ! The number of entries is the product of the local X, Y, Z allotment
1054     !
1055                   Decomp%NumEntries(TruePE) =                             &
1056                          Xdist(I)*Ydist(J)*Zdist(K)*Tdist(T)
1057     !
1058     ! For each Z there are Y separate runs
1059     !
1060                   ALLOCATE( Decomp%Head(TruePE)%StartTags(Ydist(J)*Zdist(K)*Tdist(T)) )
1061                   ALLOCATE( Decomp%Head(TruePE)%EndTags(Ydist(J)*Zdist(K)*Tdist(T)) )
1062                   ALLOCATE( Decomp%Head(TruePE)%Offsets(Ydist(J)*Zdist(K)*Tdist(T)) )
1063                   Counter2 = Counter1     ! Base address
1064                   L = 0
1065                   DO P = 1, Tdist(T)
1066                     Counter3 = Counter2
1067                     DO N = 1, Zdist(K)
1068                       Counter4 = Counter3
1069                       DO M = 1, Ydist(J)
1070     !
1071     !     Since this is a regular distribution the definition of
1072     !     tags is dictated by Xdist(I), and appears Ydist(J) times
1073     !
1074     !
1075                         L = L + 1
1076                         Decomp%Head(TruePE)%StartTags(L) = Counter4 + 1
1077                         Decomp%Head(TruePE)%EndTags(L) = Counter4 + Xdist(I)
1078                         Counter4 = Counter4 + SizeX
1079                       ENDDO
1080                       Counter3 = Counter3 + SizeX*SizeY
1081                     ENDDO
1082                     Counter2 = Counter2 + SizeX*SizeY*SizeZ
1083                   ENDDO
1084                   Counter1 = Counter1 + Xdist(I)   ! Increment base address
1085                 ENDDO
1086     !
1087     ! Align the counter such that it points to the start of the next 
1088     ! block.  (Ydist(J)-1) since already one X layer has been added in.
1089     ! Implicit assumption that SizeX = SUM( Xdist )
1090     !
1091                 Counter1 = Counter1 + SizeX*(Ydist(J)-1)
1092               ENDDO
1093     !
1094     ! Align the counter such that it points to the start of the next 
1095     ! block.  (Zdist(K)-1) since already one X-Y layer has been added in.
1096     ! Implicit assumption that SizeY = SUM( Ydist )
1097     !
1098               Counter1 = Counter1 + SizeX*SizeY*(Zdist(K)-1)
1099             ENDDO
1100             Counter1 = Counter1 + SizeX*SizeY*SizeZ*(Tdist(T)-1)
1101           ENDDO
1102     !
1103     ! Calculate offsets
1104     !
1105           DO I=1, NPEsX*NPEsY*NPEsZ*NPEsT
1106             IF ( SIZE(Decomp%Head(I)%StartTags) > 0 ) THEN
1107               Decomp%Head(I)%Offsets(1) = 0
1108               DO J=2, SIZE(Decomp%Head(I)%StartTags)
1109                 Decomp%Head(I)%Offsets(J) = Decomp%Head(I)%Offsets(J-1) +      &
1110                 Decomp%Head(I)%EndTags(J-1) - Decomp%Head(I)%StartTags(J-1) + 1
1111               ENDDO
1112             ENDIF
1113           ENDDO
1114     
1115           Decomp%Defined = .TRUE.
1116     
1117           CPP_LEAVE_PROCEDURE( "DECOMPREGULAR4D" )
1118           RETURN
1119     !EOC
1120           END SUBROUTINE DecompRegular4D
1121     !------------------------------------------------------------------------
1122     
1123     
1124     !------------------------------------------------------------------------
1125     !BOP
1126     ! !IROUTINE: DecompCreateIrr --- Decomposition for an irregular mesh
1127     !
1128     ! !INTERFACE:
1129           SUBROUTINE DecompCreateIrr( NPEs, Pe, TotalPts, Decomp )
1130     ! !USES:
1131           IMPLICIT NONE
1132     !
1133     ! !INPUT PARAMETERS:
1134           INTEGER, INTENT( IN )            :: NPEs     ! Number of PEs
1135           INTEGER, INTENT( IN )            :: Pe(:)    ! Processor location
1136           INTEGER, INTENT( IN )            :: TotalPts ! Number of points
1137     !
1138     ! !OUTPUT PARAMETERS:
1139           TYPE(DecompType), INTENT( OUT )  :: Decomp  ! Decomp information
1140     !
1141     !
1142     ! !DESCRIPTION:
1143     !     Creates a decomposition for a irregular 1-D mesh.  The
1144     !     decomposition is given through the number of points and
1145     !     an array containing the PE which each point is mapped to.
1146     !     This mapping naturally assumes that the local numbering
1147     !     is incrementally increasing as points are mapped to PEs.
1148     !     This assumption is sufficient for most applications, but
1149     !     another irregular mapping routine is available for more
1150     !     complex cases.
1151     !
1152     ! !SYSTEM ROUTINES:
1153     !     ALLOCATE
1154     !
1155     ! !REVISION HISTORY:
1156     !   98.01.19   Sawyer     Creation, with requirements from Jay Larson
1157     !   98.11.02   Sawyer     Rewritten to requirements for Andrea Molod
1158     !   00.07.07   Sawyer     Removed use of DimSizes(:) array
1159     !   00.11.12   Sawyer     Changed argument order for overloading
1160     !
1161     !EOP
1162     !------------------------------------------------------------------------
1163     !BOC
1164     !
1165     ! !LOCAL VARIABLES:
1166           INTEGER  :: I, J, PEhold
1167           INTEGER  :: Counter( NPEs )
1168     !
1169           CPP_ENTER_PROCEDURE( "DECOMPCREATEIRR" )
1170     !
1171           CPP_ASSERT_F90( TotalPts .LE. SIZE( PE ) )
1172           CPP_ASSERT_F90( .NOT. ASSOCIATED( Decomp%Head ) )
1173     
1174     !
1175     ! The head contains NPEs pointers to the tag lists.
1176     !
1177           Decomp%GlobalSize = TotalPts
1178           ALLOCATE( Decomp%NumEntries( NPEs ) )
1179           ALLOCATE( Decomp%Head( NPEs ) )
1180     !
1181     ! Perform over all points in the mapping
1182     !
1183           PEhold= -1
1184           Counter = 0
1185           Decomp%NumEntries = 0
1186           DO I=1, TotalPts
1187             CPP_ASSERT_F90( ( PE( I ) .LT. NPEs .AND. PE( I ) .GE. 0 ) )
1188             IF ( PE( I ) .NE. PEhold ) THEN
1189               PEhold = PE( I )
1190               Counter( PEhold+1 ) = Counter( PEhold+1 ) + 1
1191             ENDIF
1192             Decomp%NumEntries(PEHold+1) = Decomp%NumEntries(PEHold+1) + 1
1193           ENDDO
1194           DO I=1, NPEs
1195     !
1196     ! Now the amount of space to allocate is known.  It is acceptable
1197     ! to in allocated an array of size 0 (F90 Handbook, Section 6.5.1)
1198     !
1199             ALLOCATE( Decomp%Head(I)%StartTags(Counter(I)) )
1200             ALLOCATE( Decomp%Head(I)%EndTags(Counter(I)) )
1201             ALLOCATE( Decomp%Head(I)%Offsets(Counter(I)) )
1202           ENDDO
1203     !
1204     ! Perform over all points in the mapping
1205     !
1206           PEhold= -1
1207           Counter = 0
1208           DO I=1, TotalPts
1209             IF ( PE( I ) .NE. PEhold ) THEN
1210     !
1211     ! If not first entry, close up shop on previous run
1212     !
1213               IF ( I .GT. 1 ) THEN
1214                 Decomp%Head(PEhold+1)%EndTags(Counter(PEhold+1)) = I-1
1215               ENDIF
1216               PEhold = PE( I )
1217               Counter( PEhold+1 ) = Counter( PEhold+1 ) + 1
1218               Decomp%Head(PEhold+1)%StartTags(Counter(PEhold+1)) = I
1219             ENDIF
1220           ENDDO
1221     !
1222     ! Clean up shop for the final run
1223     !
1224           IF ( TotalPts .GT. 0 ) THEN
1225             Decomp%Head(PEhold+1)%EndTags(Counter(PEhold+1)) = TotalPts
1226           ENDIF
1227     
1228     !
1229     ! Calculate offsets
1230     !
1231           DO I=1, NPEs
1232             IF ( Counter(I) > 0 ) THEN
1233               Decomp%Head(I)%Offsets(1) = 0
1234               DO J=2, Counter(I)
1235                 Decomp%Head(I)%Offsets(J) = Decomp%Head(I)%Offsets(J-1) +    &
1236                 Decomp%Head(I)%EndTags(J-1) - Decomp%Head(I)%StartTags(J-1) + 1
1237               ENDDO
1238             ENDIF
1239           ENDDO
1240           Decomp%Defined = .TRUE.
1241     
1242           CPP_LEAVE_PROCEDURE( "DECOMPCREATEIRR" )
1243           RETURN
1244     !EOC
1245           END SUBROUTINE DecompCreateIrr
1246     !------------------------------------------------------------------------
1247     
1248     
1249     !------------------------------------------------------------------------
1250     !BOP
1251     ! !IROUTINE: DecompCreateTags --- Decomposition from Pe and Tags
1252     !
1253     ! !INTERFACE:
1254           SUBROUTINE DecompCreateTags(Npes, Pe, TotalPts, Tags, Decomp )
1255     ! !USES:
1256           IMPLICIT NONE
1257     !
1258     ! !INPUT PARAMETERS:
1259           INTEGER, INTENT( IN )            :: NPEs     ! Number of PEs
1260           INTEGER, INTENT( IN )            :: Pe(:)    ! Processor location
1261           INTEGER, INTENT( IN )            :: TotalPts ! Number of points
1262           INTEGER, INTENT( IN )            :: Tags(:)  ! Global index
1263     !
1264     ! !OUTPUT PARAMETERS:
1265           TYPE(DecompType), INTENT( OUT )  :: Decomp   ! Decomp information
1266     !
1267     !
1268     ! !DESCRIPTION:
1269     !     Creates a decomposition for a irregular mesh from the 
1270     !     Pe ownership and the Tags.  This is a simple extension of 
1271     !     DecompCreateIrr (previously DecompIrregular1D) but is
1272     !     much more dangerous, since the user can define the Tags
1273     !     (global indices) arbitrarily.
1274     !
1275     ! !SYSTEM ROUTINES:
1276     !     ALLOCATE
1277     !
1278     ! !REVISION HISTORY:
1279     !   00.11.12   Sawyer     Creation from DecompCreateIrr
1280     !
1281     !EOP
1282     !------------------------------------------------------------------------
1283     !BOC
1284     !
1285     ! !LOCAL VARIABLES:
1286           INTEGER  :: I, J, PEhold, LastTag
1287           INTEGER  :: Counter( NPEs )
1288     !
1289           CPP_ENTER_PROCEDURE( "DECOMPCREATETAGS" )
1290     !
1291           CPP_ASSERT_F90( TotalPts .LE. SIZE( PE ) )
1292           CPP_ASSERT_F90( TotalPts .LE. SIZE( Tags ) )
1293           CPP_ASSERT_F90( .NOT. ASSOCIATED( Decomp%Head ) )
1294     
1295     !
1296     ! The head contains NPEs pointers to the tag lists.
1297     !
1298           Decomp%GlobalSize = TotalPts
1299           ALLOCATE( Decomp%NumEntries( NPEs ) )
1300           ALLOCATE( Decomp%Head( NPEs ) )
1301     !
1302     ! Perform over all points in the mapping
1303     !
1304           PEhold  = -1
1305           LastTag = -999999999
1306           Counter = 0
1307           Decomp%NumEntries = 0
1308           DO I=1, TotalPts
1309             CPP_ASSERT_F90( PE( I ) .LT. NPEs .AND. PE( I ) .GE. 0 )
1310             IF ( LastTag==0 .OR. Tags(I)/=LastTag+1 .OR. PE(I)/=PEhold ) THEN
1311               PEhold = PE( I )
1312               Counter( PEhold+1 ) = Counter( PEhold+1 ) + 1
1313             ENDIF
1314             Decomp%NumEntries(PEHold+1) = Decomp%NumEntries(PEHold+1) + 1
1315             LastTag = Tags(I)
1316           ENDDO
1317     
1318           DO I=1, NPEs
1319     !
1320     ! Now the amount of space to allocate is known.  It is acceptable
1321     ! to in allocated an array of size 0 (F90 Handbook, Section 6.5.1)
1322     !
1323             ALLOCATE( Decomp%Head(I)%StartTags(Counter(I)) )
1324             ALLOCATE( Decomp%Head(I)%EndTags(Counter(I)) )
1325             ALLOCATE( Decomp%Head(I)%Offsets(Counter(I)) )
1326           ENDDO
1327     
1328     !
1329     ! Perform over all points in the domain
1330     !
1331           PEhold  = -1
1332           LastTag = -999999999
1333           Counter = 0
1334           DO I=1, TotalPts
1335             IF ( LastTag==0 .OR. Tags(I)/=LastTag+1 .OR. PE(I)/=PEhold ) THEN
1336     !
1337     ! If not first entry, close up shop on previous run
1338     !
1339               IF ( I .GT. 1 ) THEN
1340                 Decomp%Head(PEhold+1)%EndTags(Counter(PEhold+1)) = LastTag
1341               ENDIF
1342               PEhold = PE( I )
1343               Counter( PEhold+1 ) = Counter( PEhold+1 ) + 1
1344               Decomp%Head(PEhold+1)%StartTags(Counter(PEhold+1)) = Tags(I)
1345             ENDIF
1346             LastTag = Tags(I)
1347           ENDDO
1348     !
1349     ! Clean up shop for the final run
1350     !
1351           IF ( TotalPts .GT. 0 ) THEN
1352             Decomp%Head(PEhold+1)%EndTags(Counter(PEhold+1)) =Tags(TotalPts)
1353           ENDIF
1354     
1355     !
1356     ! Calculate offsets
1357     !
1358           DO I=1, NPEs
1359             IF ( Counter(I) > 0 ) THEN
1360               Decomp%Head(I)%Offsets(1) = 0
1361               DO J=2, Counter(I)
1362                 Decomp%Head(I)%Offsets(J) = Decomp%Head(I)%Offsets(J-1) +      &
1363                 Decomp%Head(I)%EndTags(J-1) - Decomp%Head(I)%StartTags(J-1) + 1
1364               ENDDO
1365             ENDIF
1366           ENDDO
1367           Decomp%Defined = .TRUE.
1368     
1369           CPP_LEAVE_PROCEDURE( "DECOMPCREATETAGS" )
1370           RETURN
1371     !EOC
1372           END SUBROUTINE DecompCreateTags
1373     !------------------------------------------------------------------------
1374     
1375     
1376     !------------------------------------------------------------------------
1377     !BOP
1378     ! !IROUTINE: DecompG2L --- Map global index to local and PE
1379     !
1380     ! !INTERFACE:
1381           SUBROUTINE DecompG2L ( Decomp, Global, Local, Pe )
1382     ! !USES:
1383           IMPLICIT NONE
1384     !
1385     ! !INPUT PARAMETERS:
1386           TYPE(DecompType), INTENT( IN )   :: Decomp  ! Decomp information
1387           INTEGER, INTENT( IN )            :: Global  ! Global index
1388     !
1389     ! !OUTPUT PARAMETERS:
1390     
1391           INTEGER, INTENT( OUT )  :: Local            ! Local index
1392           INTEGER, INTENT( OUT )  :: Pe               ! Pe location
1393     !
1394     !
1395     ! !DESCRIPTION:
1396     !     Given a decomposition and a global index, this routine returns
1397     !     the local index and PE location of that global tag.  If the
1398     !     global index is not found, Local = 0, Pe = -1 is returned.
1399     !
1400     !     Note that this routine is not efficient by any stretch of the 
1401     !     imagination --- only one index can be converted at a time.
1402     !     In addition, a search procedure must be performed, whose 
1403     !     efficiency is inversely proportional to the size of the decomposition
1404     !     (in particular, to the number of "runs").  Conceptually this
1405     !     mapping should be used only once in the program for
1406     !     initialization, and subsequently all calculations should take
1407     !     place using local indices.
1408     !
1409     ! !SYSTEM ROUTINES:
1410     !     SIZE
1411     !
1412     ! !REVISION HISTORY:
1413     !   98.03.20   Sawyer     Creation
1414     !   01.03.17   Sawyer     Test for Global==0 (undefined element)
1415     !   02.11.22   Sawyer     Optimized by caching previously used block
1416     !EOP
1417     !------------------------------------------------------------------------
1418     !BOC
1419     !
1420     ! !LOCAL VARIABLES:
1421           INTEGER, SAVE  :: Ipe   =  0   ! Initial process ID
1422           INTEGER, SAVE  :: J     =  0   ! Initial DO loop value
1423           INTEGER        :: Ipeold, Jold, PEsize, Jsize
1424     !
1425           CPP_ENTER_PROCEDURE( "DECOMPG2L" )
1426     
1427     !
1428     ! Search over all the PEs
1429     !
1430           Pe    = -1
1431           Local = 0
1432           IF ( Global == 0 ) RETURN          ! quick return
1433           PEsize = SIZE( Decomp%Head )
1434           IF ( Ipe >= PEsize ) Ipe = 0
1435           Ipeold= Ipe
1436     PEs:  DO                                 ! Loop over all PEs starting 
1437             Jsize = SIZE( Decomp%Head(Ipe+1)%StartTags )
1438             IF ( J >= Jsize ) J = 0
1439             Jold = J                         ! from the PE used previously
1440     Blocks: DO WHILE (Jsize > 0)             ! Loop through data segments
1441               IF ( Global >= Decomp%Head(Ipe+1)%StartTags(J+1) .AND.          &
1442                    Global <= Decomp%Head(Ipe+1)%EndTags(J+1) ) THEN
1443                 Local = Decomp%Head(Ipe+1)%Offsets(J+1) + Global -            &
1444                         Decomp%Head(Ipe+1)%StartTags(J+1) + 1
1445                 Pe = Ipe
1446                 EXIT PEs                     ! Global tag has been found
1447               ELSE
1448                 J = MOD(J+1,Jsize)           ! Increment the block index
1449               ENDIF
1450               IF ( J == Jold ) EXIT Blocks   ! Global tag not on this PE
1451             ENDDO Blocks
1452             Ipe = MOD(Ipe+1,PEsize)          ! Increment the pe number
1453             J   = 0
1454             IF ( Ipe == Ipeold ) EXIT PEs    ! Global tag not found on any PE
1455           ENDDO PEs
1456     
1457           CPP_ASSERT_F90( Local .LE. Decomp%NumEntries(Pe+1) ) 
1458     
1459           CPP_LEAVE_PROCEDURE( "DECOMPG2L" )
1460           RETURN
1461     !
1462     !EOC
1463           END SUBROUTINE DecompG2L
1464     !------------------------------------------------------------------------
1465     
1466     
1467     !------------------------------------------------------------------------
1468     !BOP
1469     ! !IROUTINE: DecompG2LVector --- Map global index to local and PE
1470     !
1471     ! !INTERFACE:
1472           SUBROUTINE DecompG2LVector ( Decomp, N, Global, Local, Pe )
1473     ! !USES:
1474           IMPLICIT NONE
1475     !
1476     ! !INPUT PARAMETERS:
1477           TYPE(DecompType), INTENT( IN ):: Decomp     ! Decomp information
1478           INTEGER, INTENT( IN )         :: N          ! Number of indices
1479           INTEGER, INTENT( IN )         :: Global(:)  ! Global index
1480     !
1481     ! !OUTPUT PARAMETERS:
1482     
1483           INTEGER, INTENT( OUT )  :: Local(:)         ! Local index
1484           INTEGER, INTENT( OUT )  :: Pe(:)            ! Pe location
1485     !
1486     !
1487     ! !DESCRIPTION:
1488     !     Given a decomposition and a global index, this routine returns
1489     !     the local index and PE location of that global tag.  If the
1490     !     global index is not found, Local = 0, Pe = -1 is returned.
1491     !
1492     !     Note that this routine is not efficient by any stretch of the 
1493     !     imagination --- only one index can be converted at a time.
1494     !     In addition, a search procedure must be performed, whose 
1495     !     efficiency is inversely proportional to the size of the decomposition
1496     !     (in particular, to the number of "runs").  Conceptually this
1497     !     mapping should be used only once in the program for
1498     !     initialization, and subsequently all calculations should take
1499     !     place using local indices.
1500     !
1501     ! !SYSTEM ROUTINES:
1502     !     SIZE
1503     !
1504     ! !REVISION HISTORY:
1505     !   02.11.09   Sawyer     Creation from decompglobaltolocal
1506     !   02.11.22   Sawyer     Optimized by caching previously used block
1507     !
1508     !EOP
1509     !------------------------------------------------------------------------
1510     !BOC
1511     !
1512     ! !LOCAL VARIABLES:
1513           INTEGER, SAVE :: J    = 0   ! Initial value
1514           INTEGER, SAVE :: Ipe  = 0   ! Initial value
1515           INTEGER  :: I, Ipeold, Jold, PEsize, Jsize
1516     !
1517           CPP_ENTER_PROCEDURE( "DECOMPG2LVECTOR" )
1518     
1519           PEsize = SIZE( Decomp%Head )
1520     !
1521     ! Search over all the PEs
1522     !
1523           DO I=1, N
1524             Pe(I) = -1
1525             Local(I) = 0
1526             IF ( Global(I) == 0 ) CYCLE
1527             IF ( Ipe >= PEsize ) Ipe = 0
1528             Ipeold= Ipe
1529     PEs:    DO WHILE ( PEsize > 0 )            ! Loop over all PEs starting 
1530               Jsize = SIZE( Decomp%Head(Ipe+1)%StartTags )
1531               IF ( J >= Jsize ) J = 0
1532               Jold = J                         ! from the PE used previously
1533     Blocks: DO WHILE (Jsize > 0)               ! Loop through data segments
1534                 IF ( Global(I) >= Decomp%Head(Ipe+1)%StartTags(J+1) .AND.        &
1535                    Global(I) <= Decomp%Head(Ipe+1)%EndTags(J+1) ) THEN
1536                   Local(I) = Decomp%Head(Ipe+1)%Offsets(J+1) + Global(I) -       &
1537                              Decomp%Head(Ipe+1)%StartTags(J+1) + 1
1538                   Pe(I) = Ipe
1539                   EXIT PEs                     ! Global tag has been found
1540                 ELSE
1541                   J = MOD(J+1,Jsize)           ! Increment the block index
1542                 ENDIF
1543                 IF ( J == Jold ) EXIT Blocks   ! Global tag not on this PE
1544               ENDDO Blocks
1545               Ipe = MOD(Ipe+1,PEsize)          ! Increment the pe number
1546               J   = 0
1547               IF ( Ipe == Ipeold ) EXIT PEs    ! Global tag not found on any PE
1548             ENDDO PEs
1549     
1550           CPP_ASSERT_F90( Local(I) .LE. Decomp%NumEntries(Pe(I)+1) ) 
1551     
1552           ENDDO
1553           CPP_LEAVE_PROCEDURE( "DECOMPG2LVECTOR" )
1554           RETURN
1555     !
1556     !EOC
1557         END SUBROUTINE DecompG2LVector
1558     !------------------------------------------------------------------------
1559     
1560     
1561     !------------------------------------------------------------------------
1562     !BOP
1563     ! !IROUTINE: DecompL2G --- Map global index to local and PE
1564     !
1565     ! !INTERFACE:
1566           SUBROUTINE DecompL2G ( Decomp, Local, Pe, Global )
1567     ! !USES:
1568           IMPLICIT NONE
1569     !
1570     ! !INPUT PARAMETERS:
1571           TYPE(DecompType), INTENT( IN )   :: Decomp  ! Decomp information
1572           INTEGER, INTENT( IN )            :: Local   ! Local index
1573           INTEGER, INTENT( IN )            :: Pe      ! Pe location
1574     !
1575     ! !OUTPUT PARAMETERS:
1576           INTEGER, INTENT( OUT )           :: Global  ! Global index
1577     !
1578     !
1579     ! !DESCRIPTION:
1580     !     Given a decomposition and a local-pe index pair, this routine 
1581     !     returns  the 2-D global index. If the local index is not found, 
1582     !     0 is returned. 
1583     !
1584     !     Note that this routine is not efficient by any stretch of the 
1585     !     imagination --- only one index can be converted at a time.
1586     !     In addition, a search procedure must be performed, whose 
1587     !     efficiency is inversely proportional to the size of the 
1588     !     decomposition (in particular, to the number of "runs").  
1589     !     Conceptually this mapping should be used only once in the 
1590     !     program for initialization, and subsequently all calculations 
1591     !     should take place using local indices.
1592     !
1593     ! !SYSTEM ROUTINES:
1594     !     SIZE
1595     !
1596     ! !REVISION HISTORY:
1597     !   98.03.20   Sawyer     Creation
1598     !
1599     !EOP
1600     !------------------------------------------------------------------------
1601     !BOC
1602     !
1603     ! !LOCAL VARIABLES:
1604           INTEGER  :: J, Counter
1605           LOGICAL  :: Found
1606     !
1607           CPP_ENTER_PROCEDURE( "DECOMPL2G" )
1608           CPP_ASSERT_F90( Pe .GE. 0 )
1609           CPP_ASSERT_F90( Pe .LT. SIZE(Decomp%Head) )
1610           CPP_ASSERT_F90( Local .GT. 0 )
1611           CPP_ASSERT_F90( Local .LE. Decomp%NumEntries(Pe+1) )
1612     
1613           Counter = 0
1614           Found   = .FALSE.
1615           J = 0
1616           DO WHILE ( .NOT. Found )
1617             J = J+1
1618             Counter = Counter + Decomp%Head(Pe+1)%EndTags(J) -               &
1619                                 Decomp%Head(Pe+1)%StartTags(J) + 1
1620             IF ( Local .LE.  Counter ) THEN
1621               Found = .TRUE.
1622     !
1623     ! The following calculation is not immediately obvious.  Think about it
1624     !
1625               Global = Local - Counter + Decomp%Head(Pe+1)%EndTags(J)
1626               Found = .TRUE.
1627             ELSEIF ( J .GE. SIZE( Decomp%Head(Pe+1)%StartTags ) ) THEN
1628     !
1629     ! Emergency brake
1630     !
1631               Found = .TRUE.
1632               Global = 0
1633             ENDIF
1634           ENDDO
1635     
1636           CPP_LEAVE_PROCEDURE( "DECOMPL2G" )
1637           RETURN
1638     !
1639     !EOC
1640           END SUBROUTINE DecompL2G
1641     !------------------------------------------------------------------------
1642     
1643     
1644     !------------------------------------------------------------------------
1645     !BOP
1646     ! !IROUTINE: DecompL2GVector --- Map global index to local and PE
1647     !
1648     ! !INTERFACE:
1649           SUBROUTINE DecompL2GVector ( Decomp, N, Local, Pe, Global )
1650     ! !USES:
1651           IMPLICIT NONE
1652     !
1653     ! !INPUT PARAMETERS:
1654           TYPE(DecompType), INTENT( IN )   :: Decomp  ! Decomp information
1655           INTEGER, INTENT( IN )            :: N       ! Number of indices
1656           INTEGER, INTENT( IN )            :: Local(:)! Local index
1657           INTEGER, INTENT( IN )            :: Pe(:)   ! Pe location
1658     !
1659     ! !OUTPUT PARAMETERS:
1660           INTEGER, INTENT( OUT )           :: Global(:) ! Global index
1661     !
1662     !
1663     ! !DESCRIPTION:
1664     !     Given a decomposition and a local-pe index pair, this routine 
1665     !     returns  the 2-D global index. If the local index is not found, 
1666     !     0 is returned. 
1667     !
1668     !     Note that this routine is not efficient by any stretch of the 
1669     !     imagination --- only one index can be converted at a time.
1670     !     In addition, a search procedure must be performed, whose 
1671     !     efficiency is inversely proportional to the size of the 
1672     !     decomposition (in particular, to the number of "runs").  
1673     !     Conceptually this mapping should be used only once in the 
1674     !     program for initialization, and subsequently all calculations 
1675     !     should take place using local indices.
1676     !
1677     ! !SYSTEM ROUTINES:
1678     !     SIZE
1679     !
1680     ! !REVISION HISTORY:
1681     !   02.11.09   Sawyer     Creation from decomplocaltoglobal
1682     !
1683     !EOP
1684     !------------------------------------------------------------------------
1685     !BOC
1686     !
1687     ! !LOCAL VARIABLES:
1688         INTEGER  :: I, J, Counter
1689         LOGICAL  :: Found
1690     !
1691         CPP_ENTER_PROCEDURE( "DECOMPL2GVECTOR" )
1692         DO I=1,N
1693           CPP_ASSERT_F90( Pe(I) .GE. 0 )
1694           CPP_ASSERT_F90( Pe(I) .LT. SIZE(Decomp%Head) )
1695           CPP_ASSERT_F90( Local(I) .GT. 0 )
1696           CPP_ASSERT_F90( Local(I) .LE. Decomp%NumEntries(Pe(I)+1) )
1697     
1698           Counter = 0
1699           Found   = .FALSE.
1700           J = 0
1701           DO WHILE ( .NOT. Found )
1702             J = J+1
1703             Counter = Counter + Decomp%Head(Pe(I)+1)%EndTags(J) -               &
1704                                 Decomp%Head(Pe(I)+1)%StartTags(J) + 1
1705             IF ( Local(I) .LE.  Counter ) THEN
1706               Found = .TRUE.
1707     !
1708     ! The following calculation is not immediately obvious.  Think about it
1709     !
1710               Global(I) = Local(I) - Counter + Decomp%Head(Pe(I)+1)%EndTags(J)
1711               Found = .TRUE.
1712             ELSEIF ( J .GE. SIZE( Decomp%Head(Pe(I)+1)%StartTags ) ) THEN
1713     !
1714     ! Emergency brake
1715     !
1716               Found = .TRUE.
1717               Global(I) = 0
1718             ENDIF
1719           ENDDO
1720         ENDDO
1721     
1722         CPP_LEAVE_PROCEDURE( "DECOMPL2GVECTOR" )
1723         RETURN
1724     !
1725     !EOC
1726         END SUBROUTINE DecompL2GVector
1727     !------------------------------------------------------------------------
1728     
1729     
1730     !------------------------------------------------------------------------
1731     !BOP
1732     ! !IROUTINE: DecompInfo --- Information about decomposition
1733     !
1734     ! !INTERFACE:
1735           SUBROUTINE DecompInfo( Decomp, Npes, TotalPts )
1736     ! !USES:
1737           IMPLICIT NONE
1738     
1739     ! !INPUT PARAMETERS:
1740           TYPE(DecompType), INTENT( IN ):: Decomp   ! Decomp information
1741     
1742     ! !OUTPUT PARAMETERS:
1743           INTEGER, INTENT( OUT )        :: Npes     ! Npes in decomposition
1744           INTEGER, INTENT( OUT )        :: TotalPts ! Total points in domain
1745     !
1746     !
1747     ! !DESCRIPTION:
1748     !     Return information about the decomposition: the number of
1749     !     PEs over which the domain is decomposed, and the size of
1750     !     the domain.
1751     !
1752     ! !REVISION HISTORY:
1753     !   00.11.12   Sawyer     Creation
1754     !
1755     !EOP
1756     !---------------------------------------------------------------------
1757     !BOC
1758     !
1759     !
1760           CPP_ENTER_PROCEDURE( "DECOMPINFO" )
1761     
1762           Npes = SIZE( Decomp%Head )
1763           TotalPts = Decomp%GlobalSize
1764     
1765           CPP_LEAVE_PROCEDURE( "DECOMPINFO" )
1766           RETURN
1767     !EOC
1768           END SUBROUTINE DecompInfo
1769     !------------------------------------------------------------------------
1770     
1771           END MODULE decompmodule
1772     
1773