Actual source code: ParDelta.hh
1: #ifndef included_ALE_ParDelta_hh
2: #define included_ALE_ParDelta_hh
4: #ifndef included_ALE_Sifter_hh
5: #include <Sifter.hh>
6: #endif
10: //
11: // Classes and methods implementing the parallel Overlap and Fusion algorithms on ASifter-like objects.
12: //
13: namespace ALE {
15: template <typename RightConeSequence_>
16: class RightSequenceDuplicator {
17: // Replicate the cone sequence on the right in the overlap graph.
18: int debug;
19: public:
20: //Encapsulated types
21: typedef RightConeSequence_ right_sequence_type;
22: typedef typename right_sequence_type::target_type right_target_type;
23: //
24: typedef typename right_sequence_type::source_type fusion_source_type;
25: typedef typename right_sequence_type::target_type fusion_target_type;
26: typedef typename right_sequence_type::color_type fusion_color_type;
27: public:
28: //
29: // Basic interface
30: //
31: RightSequenceDuplicator(int debug = 0) : debug(debug) {};
32: RightSequenceDuplicator(const RightSequenceDuplicator& f) {};
33: virtual ~RightSequenceDuplicator() {};
35: template <typename left_target_type>
36: fusion_target_type
37: fuseBasePoints(const left_target_type& ltarget, const right_target_type& rtarget) {
38: return rtarget;
39: };
41: // FIX: need to have const left_sequence& and const right_sequence& , but begin() and end() aren't const methods
42: template <typename left_sequence_type, typename fusion_sequence_type>
43: void
44: fuseCones(left_sequence_type& lcone, right_sequence_type& rcone, const Obj<fusion_sequence_type>& fcone) {
45: for(typename right_sequence_type::iterator rci = rcone.begin(); rci != rcone.end(); rci++) {
46: fcone->addArrow(rci.arrow());
47: }
48: };
49: }; // struct RightSequenceDuplicator
52: template <typename Arrow_>
53: class ConeArraySequence {
54: // ConeArraySequence wraps a raw byte array of (Source_,Color_) pairs
55: // presenting it as a cone sequence for a given target.
56: public:
57: typedef Arrow_ arrow_type;
58: typedef typename arrow_type::source_type source_type;
59: typedef typename arrow_type::target_type target_type;
60: typedef typename arrow_type::color_type color_type;
61: //
62: struct cone_arrow_type {
63: source_type source;
64: color_type color;
65: //
66: cone_arrow_type(const arrow_type& a) : source(a.source), color(a.color) {};
67: cone_arrow_type(const source_type& s, const color_type& c) : source(s), color(c) {};
68: cone_arrow_type(const cone_arrow_type& ca) : source(ca.source), color(ca.color) {};
69: //
70: static void place(cone_arrow_type* ca_ptr, const arrow_type& a) {
71: // WARNING: an unsafe method in that it has no way of checking the validity of ca_ptr
72: ca_ptr->source = a.source;
73: ca_ptr->color = a.color;
74: };
75: static void place(cone_arrow_type* ca_ptr, const source_type& s, const color_type& c) {
76: // WARNING: an unsafe method in that it has no way of checking the validity of ca_ptr
77: ca_ptr->source = s;
78: ca_ptr->color = c;
79: };
80: };
81: protected:
82: typedef cone_arrow_type* cone_arrow_array;
83: target_type _target;
84: cone_arrow_array _arr_ptr;
85: size_t _seq_size;
86: public:
87: class iterator {
88: target_type _target;
89: cone_arrow_type* _ptr;
90: public:
91: iterator(const target_type& target, const cone_arrow_array& ptr) : _target(target), _ptr(ptr) {};
92: iterator(const iterator& it) : _target(it._target), _ptr(it._ptr) {};
93: virtual ~iterator() {};
94: //
95: virtual source_type operator*() const { return this->_ptr->source;};
96: virtual iterator operator++() {this->_ptr++; return *this;};
97: virtual iterator operator++(int n) {iterator tmp(this->_target, this->_ptr); this->_ptr++; return tmp;};
98: virtual bool operator!=(const iterator& it) {return ((it._target != this->_target)||(it._ptr != this->_ptr));};
99: //
100: virtual const source_type& source() const {return this->_ptr->source;};
101: virtual const color_type& color() const {return this->_ptr->color; };
102: virtual const target_type& target() const {return this->_target; };
103: virtual const arrow_type arrow() const {
104: return arrow_type(this->_ptr->source,this->_target,this->_ptr->color);
105: };
106: };
107: // Basic interface
108: ConeArraySequence(cone_arrow_array arr_ptr, const size_t& seq_size, const target_type& target) :
109: _target(target), _arr_ptr(arr_ptr), _seq_size(seq_size) {};
110: ConeArraySequence(const ConeArraySequence& seq) :
111: _target(seq._target), _arr_ptr(seq._arr_ptr), _seq_size(seq._seq_size) {};
112: virtual ~ConeArraySequence() {};
113: //
114: virtual iterator begin() { return iterator(this->_target, this->_arr_ptr); };
115: virtual iterator end() { return iterator(this->_target, this->_arr_ptr+this->_seq_size); };
116: virtual size_t size() { return this->_seq_size; };
117: virtual bool empty() { return (this->size() == 0); };
119: template<typename ostream_type>
120: void view(ostream_type& os, const bool& useColor = false, const char* label = NULL){
121: if(label != NULL) {
122: os << "Viewing " << label << " sequence:" << std::endl;
123: }
124: os << "[";
125: for(iterator i = this->begin(); i != this->end(); i++) {
126: os << " (" << *i;
127: if(useColor) {
128: os << "," << i.color();
129: }
130: os << ")";
131: }
132: os << " ]" << std::endl;
133: };
134: };// class ConeArraySequence
137: template <typename ParSifter_,
138: typename Fuser_ = RightSequenceDuplicator<ConeArraySequence<typename ParSifter_::traits::arrow_type> >,
139: typename FusionSifter_ = typename ParSifter_::template rebind<typename Fuser_::fusion_source_type,
140: typename Fuser_::fusion_target_type,
141: typename Fuser_::fusion_color_type>::type
142: >
143: class ParConeDelta { // class ParConeDelta
144: public:
145: // Here we specialize to Sifters based on Points in order to enable parallel overlap discovery.
146: // We also assume that the Points in the base are ordered appropriately so we can use baseSequence.begin() and
147: // baseSequence.end() as the extrema for global reduction.
148: typedef ParConeDelta<ParSifter_, Fuser_, FusionSifter_> delta_type;
149: typedef ParSifter_ graph_type;
150: typedef Fuser_ fuser_type;
151: // These are default "return" types, although methods are templated on their main input/return types
152: typedef ASifter<int, ALE::Point, ALE::pair<ALE::Point, ALE::pair<int,int> >, SifterDef::uniColor> overlap_type;
153: typedef ASifter<int, ALE::pair<int,ALE::Point>, ALE::pair<ALE::Point, ALE::pair<int,int> >, SifterDef::uniColor> bioverlap_type;
154: typedef FusionSifter_ fusion_type;
156: //
159: static Obj<overlap_type>
160: overlap(const Obj<graph_type> graph) {
161: ALE_LOG_EVENT_BEGIN;
162: Obj<overlap_type> overlap = new overlap_type(graph->comm());
163: // If this is a serial object, we return an empty overlap
164: if((graph->comm() != PETSC_COMM_SELF) && (graph->commSize() > 1)) {
165: computeOverlap(graph, overlap);
166: }
167: ALE_LOG_EVENT_END;
168: return overlap;
169: };
171: template <typename Overlap_>
172: static void computeOverlap(const Obj<graph_type>& graph, Obj<Overlap_>& overlap){
173: __computeOverlapNew(graph, overlap);
174: };
178: static Obj<bioverlap_type>
179: overlap(const Obj<graph_type> graphA, const Obj<graph_type> graphB) {
180: ALE_LOG_EVENT_BEGIN;
181: Obj<bioverlap_type> overlap = new bioverlap_type(graphA->comm());
182: PetscMPIInt comp;
184: MPI_Comm_compare(graphA->comm(), graphB->comm(), &comp);
185: if (comp != MPI_IDENT) {
186: throw ALE::Exception("Non-matching communicators for overlap");
187: }
188: computeOverlap(graphA, graphB, overlap);
189: ALE_LOG_EVENT_END;
190: return overlap;
191: };
193: template <typename Overlap_>
194: static void computeOverlap(const Obj<graph_type>& graphA, const Obj<graph_type>& graphB, Obj<Overlap_>& overlap){
195: __computeOverlapNew(graphA, graphB, overlap);
196: };
198: template <typename Overlap_>
199: static Obj<fusion_type>
200: fusion(const Obj<graph_type>& graph, const Obj<Overlap_>& overlap, const Obj<fuser_type>& fuser = new fuser_type()) {
201: Obj<fusion_type> fusion = new fusion_type(graph->comm());
202: // If this is a serial object, we return an empty delta
203: if((graph->comm() != PETSC_COMM_SELF) && (graph->commSize() > 1)) {
204: computeFusion(graph, overlap, fusion, fuser);
205: }
206: return fusion;
207: };
209: template <typename Overlap_>
210: static void computeFusion(const Obj<graph_type>& graph, const Obj<Overlap_>& overlap, Obj<fusion_type>& fusion, const Obj<fuser_type>& fuser = new fuser_type()){
211: __computeFusionNew(graph, overlap, fusion, fuser);
212: };
214: template <typename Overlap_>
215: static Obj<fusion_type>
216: fusion(const Obj<graph_type>& graphA, const Obj<graph_type>& graphB, const Obj<Overlap_>& overlap, const Obj<fuser_type>& fuser = new fuser_type()) {
217: Obj<fusion_type> fusion = new fusion_type(graphA->comm());
218: PetscMPIInt comp;
220: MPI_Comm_compare(graphA->comm(), graphB->comm(), &comp);
221: if (comp != MPI_IDENT) {
222: throw ALE::Exception("Non-matching communicators for overlap");
223: }
224: computeFusion(graphA, graphB, overlap, fusion, fuser);
225: return fusion;
226: };
228: template <typename Overlap_>
229: static void computeFusion(const Obj<graph_type>& graphA, const Obj<graph_type>& graphB, const Obj<Overlap_>& overlap, Obj<fusion_type>& fusion, const Obj<fuser_type>& fuser = new fuser_type()){
230: PetscMPIInt comp;
232: MPI_Comm_compare(graphA->comm(), graphB->comm(), &comp);
233: if (comp != MPI_IDENT) {
234: throw ALE::Exception("Non-matching communicators for overlap");
235: }
236: __computeFusionNew(graphA, graphB, overlap, fusion, fuser);
237: };
239: protected:
240: static int debug;
241: // Internal type definitions to ensure compatibility with the legacy code in the parallel subroutines
242: typedef ALE::Point Point;
243: typedef int int32_t;
244: typedef std::pair<int32_t, int32_t> int_pair;
245: typedef std::set<std::pair<int32_t, int32_t> > int_pair_set;
246: typedef std::map<int32_t,int32_t> int__int;
247: typedef std::map<Point, int32_t> Point__int;
248: typedef std::map<Point, std::pair<int32_t,int32_t> > Point__int_int;
249: typedef std::map<Point, int_pair_set> Point__int_pair_set;
251: protected:
252: //--------------------------------------------------------------------------------------------------------
253: template <typename Sequence>
254: static void __determinePointOwners(const Obj<graph_type> _graph, const Obj<Sequence>& points, int32_t *LeaseData, int__int& owner) {
256: // The Sequence points will be referred to as 'base' throughout, although it may in fact represent a cap.
257: MPI_Comm comm = _graph->comm();
258: int size = _graph->commSize();
259: int rank = _graph->commRank();
261: // We need to partition global nodes among lessors, which we do by global prefix
262: // First we determine the extent of global prefices and the bounds on the indices with each global prefix.
263: int minGlobalPrefix = 0;
264: // Determine the local extent of global domains
265: for(typename Sequence::iterator point_itor = points->begin(); point_itor != points->end(); point_itor++) {
266: Point p = (*point_itor);
267: if((p.prefix < 0) && (p.prefix < minGlobalPrefix)) {
268: minGlobalPrefix = p.prefix;
269: }
270: }
271: int MinGlobalPrefix;
272: MPI_Allreduce(&minGlobalPrefix, &MinGlobalPrefix, 1, MPIU_INT, MPI_MIN, comm);
273: CHKERROR(ierr, "Error in MPI_Allreduce");
274:
275: int__int BaseLowerBound, BaseUpperBound; // global quantities computed from the local quantities below
276: int__int BaseMaxSize; // the maximum size of the global base index space by global prefix
277: int__int BaseSliceScale, BaseSliceSize, BaseSliceOffset;
278:
279: if(MinGlobalPrefix < 0) { // if we actually do have global base points
280: // Determine the upper and lower bounds on the indices of base points with each global prefix.
281: // We use maps to keep track of these quantities with different global prefices.
282: int__int baseLowerBound, baseUpperBound; // local quantities
283: // Initialize local bound maps with the upper below lower so we can later recognize omitted prefices.
284: for(int d = -1; d >= MinGlobalPrefix; d--) {
285: baseLowerBound[d] = 0; baseUpperBound[d] = -1;
286: }
287: // Compute local bounds
288: for(typename Sequence::iterator point_itor = points->begin(); point_itor != points->end(); point_itor++) {
289: Point p = (*point_itor);
290: int d = p.prefix;
291: int i = p.index;
292: if(d < 0) { // it is indeed a global prefix
293: if (i < baseLowerBound[d]) {
294: baseLowerBound[d] = i;
295: }
296: if (i > baseUpperBound[d]) {
297: baseUpperBound[d] = i;
298: }
299: }
300: }
301: // Compute global bounds
302: for(int d = -1; d >= MinGlobalPrefix; d--){
303: int lowerBound, upperBound, maxSize;
304: MPI_Allreduce(&baseLowerBound[d],&lowerBound,1,MPIU_INT,MPI_MIN,comm);
305: CHKERROR(ierr, "Error in MPI_Allreduce");
306: MPI_Allreduce(&baseUpperBound[d],&upperBound,1,MPIU_INT,MPI_MAX,comm);
307: CHKERROR(ierr, "Error in MPI_Allreduce");
308: maxSize = upperBound - lowerBound + 1;
309: if(maxSize > 0) { // there are actually some indices in this global prefix
310: BaseLowerBound[d] = lowerBound;
311: BaseUpperBound[d] = upperBound;
312: BaseMaxSize[d] = maxSize;
313:
314: // Each processor (at least potentially) owns a slice of the base indices with each global indices.
315: // The size of the slice with global prefix d is BaseMaxSize[d]/size + 1 (except if rank == size-1,
316: // where the slice size can be smaller; +1 is for safety).
317:
318: // For a non-empty domain d we compute and store the slice size in BaseSliceScale[d] (the 'typical' slice size) and
319: // BaseSliceSize[d] (the 'actual' slice size, which only differs from 'typical' for processor with rank == size -1 ).
320: // Likewise, each processor has to keep track of the index offset for each slice it owns and stores it in BaseSliceOffset[d].
321: BaseSliceScale[d] = BaseMaxSize[d]/size + 1;
322: BaseSliceSize[d] = BaseSliceScale[d];
323: if (rank == size-1) {
324: BaseSliceSize[d] = BaseMaxSize[d] - BaseSliceScale[d]*(size-1);
325: }
326: BaseSliceSize[d] = PetscMax(1,BaseSliceSize[d]);
327: BaseSliceOffset[d] = BaseLowerBound[d] + BaseSliceScale[d]*rank;
328: }// for(int d = -1; d >= MinGlobalPrefix; d--){
329: }
330: }// if(MinGlobalDomain < 0)
331:
332: for (typename Sequence::iterator point_itor = points->begin(); point_itor != points->end(); point_itor++) {
333: Point p = (*point_itor);
334: // Determine which slice p falls into
335: // ASSUMPTION on Point type
336: int d = p.prefix;
337: int i = p.index;
338: int proc;
339: if(d < 0) { // global domain -- determine the owner by which slice p falls into
340: proc = (i-BaseLowerBound[d])/BaseSliceScale[d];
341: }
342: else { // local domain -- must refer to a rank within the comm
343: if(d >= size) {
344: throw ALE::Exception("Local domain outside of comm size");
345: }
346: proc = d;
347: }
348: // FIX
349: owner[p.index] = proc;
350: LeaseData[2*proc+1] = 1; // processor owns at least one of ours (i.e., the number of leases from proc is 1)
351: LeaseData[2*proc]++; // count of how many we lease from proc
352: }
354: // Base was empty
355: if(points->begin() == points->end()) {
356: for(int p = 0; p < size; p++) {
357: LeaseData[2*p+0] = 0;
358: LeaseData[2*p+1] = 0;
359: }
360: }
361: }; // __determinePointOwners()
364: //-------------------------------------------------------------------------------------------------------
365: #undef __FUNCT__
367: template <typename Overlap_>
368: static void __computeOverlapNew(const Obj<graph_type>& _graph, Obj<Overlap_>& overlap) {
369: typedef typename graph_type::traits::baseSequence Sequence;
370: MPI_Comm comm = _graph->comm();
371: int size = _graph->commSize();
372: int rank = _graph->commRank();
373: PetscObject petscObj = _graph->petscObj();
374: PetscMPIInt tag1, tag2, tag3;
376: // The base we are going to work with
377: Obj<Sequence> points = _graph->base();
378: // 2 ints per processor: number of points we buy and number of sales (0 or 1).
379: int *BuyData;
380: PetscMalloc(2*size * sizeof(int), &BuyData);CHKERROR(ierr, "Error in PetscMalloc");
381: PetscMemzero(BuyData, 2*size * sizeof(int));CHKERROR(ierr, "Error in PetscMemzero");
382: // Map from points to the process managing its bin (seller)
383: int__int owner;
385: // determine owners of each base node and save it in a map
386: __determinePointOwners(_graph, points, BuyData, owner);
388: int msgSize = 3; // A point is 2 ints, and the cone size is 1
389: int BuyCount = 0; // The number of sellers with which this process (buyer) communicates
390: int *BuySizes = PETSC_NULL; // The number of points to buy from each seller
391: int *Sellers = PETSC_NULL; // The process for each seller
392: int *offsets = new int[size];
393: for(int p = 0; p < size; ++p) {BuyCount += BuyData[2*p+1];}
394: PetscMalloc2(BuyCount,int,&BuySizes,BuyCount,int,&Sellers);CHKERROR(ierr, "Error in PetscMalloc");
395: for(int p = 0, buyNum = 0; p < size; ++p) {
396: if (BuyData[2*p]) {
397: Sellers[buyNum] = p;
398: BuySizes[buyNum++] = BuyData[2*p];
399: }
400: if (p == 0) {
401: offsets[p] = 0;
402: } else {
403: offsets[p] = offsets[p-1] + msgSize*BuyData[2*(p-1)];
404: }
405: }
407: // All points are bought from someone
408: int32_t *BuyPoints;
409: PetscMalloc(msgSize*points->size() *sizeof(int32_t),&BuyPoints);CHKERROR(ierr,"Error in PetscMalloc");
410: for (typename Sequence::iterator p_itor = points->begin(); p_itor != points->end(); p_itor++) {
411: BuyPoints[offsets[owner[*p_itor]]++] = (*p_itor).prefix;
412: BuyPoints[offsets[owner[*p_itor]]++] = (*p_itor).index;
413: BuyPoints[offsets[owner[*p_itor]]++] = _graph->cone(*p_itor)->size();
414: }
415: for(int b = 0, o = 0; b < BuyCount; ++b) {
416: if (offsets[Sellers[b]] - o != msgSize*BuySizes[b]) {
417: throw ALE::Exception("Invalid point size");
418: }
419: o += msgSize*BuySizes[b];
420: }
421: delete [] offsets;
423: int SellCount; // The number of buyers with which this process (seller) communicates
424: int *SellSizes = PETSC_NULL; // The number of points to sell to each buyer
425: int *Buyers = PETSC_NULL; // The process for each buyer
426: int MaxSellSize; // The maximum number of messages to be sold to any buyer
427: int32_t *SellPoints = PETSC_NULL; // The points and cone sizes from all buyers
428: PetscMaxSum(comm, BuyData, &MaxSellSize, &SellCount);CHKERROR(ierr,"Error in PetscMaxSum");
429: PetscMalloc2(SellCount,int,&SellSizes,SellCount,int,&Buyers);CHKERROR(ierr, "Error in PetscMalloc");
430: for(int s = 0; s < SellCount; s++) {
431: SellSizes[s] = MaxSellSize;
432: Buyers[s] = MPI_ANY_SOURCE;
433: }
435: if (debug) {
436: ostringstream txt;
438: for(int p = 0; p < (int) points->size(); p++) {
439: txt << "["<<rank<<"]: BuyPoints["<<p<<"]: ("<<BuyPoints[p*msgSize]<<", "<<BuyPoints[p*msgSize+1]<<") coneSize "<<BuyPoints[p*msgSize+2]<<std::endl;
440: }
441: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
442: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
443: }
445: // First tell sellers which points we want to buy
446: PetscObjectGetNewTag(petscObj, &tag1); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
447: commCycle(comm, tag1, msgSize, BuyCount, BuySizes, Sellers, BuyPoints, SellCount, SellSizes, Buyers, &SellPoints);
449: if (debug) {
450: ostringstream txt;
452: if (!rank) {txt << "Unsquished" << std::endl;}
453: for(int p = 0; p < SellCount*MaxSellSize; p++) {
454: txt << "["<<rank<<"]: SellPoints["<<p<<"]: ("<<SellPoints[p*msgSize]<<", "<<SellPoints[p*msgSize+1]<<") coneSize "<<SellPoints[p*msgSize+2]<<std::endl;
455: }
456: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
457: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
458: }
460: // Since we gave maximum sizes, we need to squeeze SellPoints
461: for(int s = 0, offset = 0; s < SellCount; s++) {
462: if (offset != s*MaxSellSize*msgSize) {
463: PetscMemmove(&SellPoints[offset], &SellPoints[s*MaxSellSize*msgSize], SellSizes[s]*msgSize*sizeof(int32_t));CHKERROR(ierr,"Error in PetscMemmove");
464: }
465: offset += SellSizes[s]*msgSize;
466: }
468: if (debug) {
469: ostringstream txt;
470: int SellSize = 0;
472: if (!rank) {txt << "Squished" << std::endl;}
473: for(int s = 0; s < SellCount; s++) {
474: SellSize += SellSizes[s];
475: txt << "SellSizes["<<s<<"]: "<<SellSizes[s]<< std::endl;
476: }
477: for(int p = 0; p < SellSize; p++) {
478: txt << "["<<rank<<"]: SellPoints["<<p<<"]: ("<<SellPoints[p*msgSize]<<", "<<SellPoints[p*msgSize+1]<<") coneSize "<<SellPoints[p*msgSize+2]<<std::endl;
479: }
480: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
481: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
482: }
484: // SellSizes, Buyers, and SellPoints are output
485: Point__int_pair_set BillOfSale;
487: for(int s = 0, offset = 0; s < SellCount; s++) {
488: for(int m = 0; m < SellSizes[s]; m++) {
489: Point point = Point(SellPoints[offset], SellPoints[offset+1]);
491: BillOfSale[point].insert(int_pair(Buyers[s], SellPoints[offset+2]));
492: offset += msgSize;
493: }
494: }
495: for(int s = 0, offset = 0; s < SellCount; s++) {
496: for(int m = 0; m < SellSizes[s]; m++) {
497: Point point = Point(SellPoints[offset], SellPoints[offset+1]);
499: // Decrement the buyer count so as not to count the current buyer itself
500: SellPoints[offset+2] = BillOfSale[point].size()-1;
501: offset += msgSize;
502: }
503: }
505: // Then tell buyers how many other buyers there were
506: PetscObjectGetNewTag(petscObj, &tag2); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
507: commCycle(comm, tag2, msgSize, SellCount, SellSizes, Buyers, SellPoints, BuyCount, BuySizes, Sellers, &BuyPoints);
509: int BuyConesSize = 0;
510: int SellConesSize = 0;
511: int *BuyConesSizes = PETSC_NULL; // The number of points to buy from each seller
512: int *SellConesSizes = PETSC_NULL; // The number of points to sell to each buyer
513: int32_t *SellCones = PETSC_NULL; // The (rank, cone size) for each point from all other buyers
514: int32_t *overlapInfo = PETSC_NULL; // The (rank, cone size) for each point from all other buyers
515: PetscMalloc2(BuyCount,int,&BuyConesSizes,SellCount,int,&SellConesSizes);CHKERROR(ierr, "Error in PetscMalloc");
516: for(int s = 0, offset = 0; s < SellCount; s++) {
517: SellConesSizes[s] = 0;
519: for(int m = 0; m < SellSizes[s]; m++) {
520: SellConesSizes[s] += SellPoints[offset+2]+1;
521: offset += msgSize;
522: }
523: SellConesSize += SellConesSizes[s];
524: }
526: for(int b = 0, offset = 0; b < BuyCount; b++) {
527: BuyConesSizes[b] = 0;
529: for(int m = 0; m < BuySizes[b]; m++) {
530: BuyConesSizes[b] += BuyPoints[offset+2]+1;
531: offset += msgSize;
532: }
533: BuyConesSize += BuyConesSizes[b];
534: }
536: int cMsgSize = 2;
537: PetscMalloc(SellConesSize*cMsgSize * sizeof(int32_t), &SellCones);CHKERROR(ierr, "Error in PetscMalloc");
538: for(int s = 0, offset = 0, cOffset = 0, SellConeSize = 0; s < SellCount; s++) {
539: for(int m = 0; m < SellSizes[s]; m++) {
540: Point point(SellPoints[offset],SellPoints[offset+1]);
542: for(typename int_pair_set::iterator p_iter = BillOfSale[point].begin(); p_iter != BillOfSale[point].end(); ++p_iter) {
543: SellCones[cOffset+0] = (*p_iter).first;
544: SellCones[cOffset+1] = (*p_iter).second;
545: cOffset += cMsgSize;
546: }
547: offset += msgSize;
548: }
549: if (cOffset - cMsgSize*SellConeSize != cMsgSize*SellConesSizes[s]) {
550: throw ALE::Exception("Nonmatching sizes");
551: }
552: SellConeSize += SellConesSizes[s];
553: }
555: // Then send buyers a (rank, cone size) for all buyers of the same points
556: PetscObjectGetNewTag(petscObj, &tag3); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
557: commCycle(comm, tag3, cMsgSize, SellCount, SellConesSizes, Buyers, SellCones, BuyCount, BuyConesSizes, Sellers, &overlapInfo);
559: // Finally build the overlap sifter
560: // (remote rank) ---(base overlap point, remote cone size, local cone size)---> (base overlap point)
561: for(int b = 0, offset = 0, cOffset = 0; b < BuyCount; b++) {
562: for(int m = 0; m < BuySizes[b]; m++) {
563: Point p(BuyPoints[offset],BuyPoints[offset+1]);
565: for(int n = 0; n <= BuyPoints[offset+2]; n++) {
566: int neighbor = overlapInfo[cOffset+0];
567: int coneSize = overlapInfo[cOffset+1];
569: if (neighbor != rank) {
570: // Record the point, size of the cone over p coming in from neighbor, and going out to the neighbor for the arrow color
571: overlap->addArrow(neighbor, p, ALE::pair<Point,ALE::pair<int,int> >(p, ALE::pair<int,int>(coneSize, _graph->cone(p)->size())) );
572: }
573: cOffset += cMsgSize;
574: }
575: offset += msgSize;
576: }
577: }
578: };
580: #undef __FUNCT__
582: template <typename Overlap_>
583: static void __computeOverlapNew(const Obj<graph_type>& _graphA, const Obj<graph_type>& _graphB, Obj<Overlap_>& overlap) {
584: typedef typename graph_type::traits::baseSequence Sequence;
585: MPI_Comm comm = _graphA->comm();
586: int size = _graphA->commSize();
587: int rank = _graphA->commRank();
588: PetscObject petscObj = _graphA->petscObj();
589: PetscMPIInt tag1, tag2, tag3, tag4, tag5, tag6;
591: // The bases we are going to work with
592: Obj<Sequence> pointsA = _graphA->base();
593: Obj<Sequence> pointsB = _graphB->base();
595: // We MUST have the same sellers for points in A and B (same point owner determination)
596: int *BuyDataA; // 2 ints per processor: number of A base points we buy and number of sales (0 or 1).
597: int *BuyDataB; // 2 ints per processor: number of B base points we buy and number of sales (0 or 1).
598: PetscMalloc2(2*size,int,&BuyDataA,2*size,int,&BuyDataB);CHKERROR(ierr, "Error in PetscMalloc");
599: PetscMemzero(BuyDataA, 2*size * sizeof(int));CHKERROR(ierr, "Error in PetscMemzero");
600: PetscMemzero(BuyDataB, 2*size * sizeof(int));CHKERROR(ierr, "Error in PetscMemzero");
601: // Map from points to the process managing its bin (seller)
602: int__int ownerA, ownerB;
604: // determine owners of each base node and save it in a map
605: __determinePointOwners(_graphA, pointsA, BuyDataA, ownerA);
606: __determinePointOwners(_graphB, pointsB, BuyDataB, ownerB);
608: int msgSize = 3; // A point is 2 ints, and the cone size is 1
609: int BuyCountA = 0; // The number of sellers with which this process (A buyer) communicates
610: int BuyCountB = 0; // The number of sellers with which this process (B buyer) communicates
611: int *BuySizesA; // The number of A points to buy from each seller
612: int *BuySizesB; // The number of B points to buy from each seller
613: int *SellersA; // The process for each seller of A points
614: int *SellersB; // The process for each seller of B points
615: int *offsetsA = new int[size];
616: int *offsetsB = new int[size];
617: for(int p = 0; p < size; ++p) {
618: BuyCountA += BuyDataA[2*p+1];
619: BuyCountB += BuyDataB[2*p+1];
620: }
621: PetscMalloc2(BuyCountA,int,&BuySizesA,BuyCountA,int,&SellersA);CHKERROR(ierr, "Error in PetscMalloc");
622: PetscMalloc2(BuyCountB,int,&BuySizesB,BuyCountB,int,&SellersB);CHKERROR(ierr, "Error in PetscMalloc");
623: for(int p = 0, buyNumA = 0, buyNumB = 0; p < size; ++p) {
624: if (BuyDataA[2*p+1]) {
625: SellersA[buyNumA] = p;
626: BuySizesA[buyNumA++] = BuyDataA[2*p];
627: }
628: if (BuyDataB[2*p+1]) {
629: SellersB[buyNumB] = p;
630: BuySizesB[buyNumB++] = BuyDataB[2*p];
631: }
632: if (p == 0) {
633: offsetsA[p] = 0;
634: offsetsB[p] = 0;
635: } else {
636: offsetsA[p] = offsetsA[p-1] + msgSize*BuyDataA[2*(p-1)];
637: offsetsB[p] = offsetsB[p-1] + msgSize*BuyDataB[2*(p-1)];
638: }
639: }
641: // All points are bought from someone
642: int32_t *BuyPointsA; // (point, coneSize) for each A point boung from a seller
643: int32_t *BuyPointsB; // (point, coneSize) for each B point boung from a seller
644: PetscMalloc2(msgSize*pointsA->size(),int32_t,&BuyPointsA,msgSize*pointsB->size(),int32_t,&BuyPointsB);CHKERROR(ierr,"Error in PetscMalloc");
645: for (typename Sequence::iterator p_itor = pointsA->begin(); p_itor != pointsA->end(); p_itor++) {
646: BuyPointsA[offsetsA[ownerA[*p_itor]]++] = *p_itor;
647: BuyPointsA[offsetsA[ownerA[*p_itor]]++] = *p_itor;
648: BuyPointsA[offsetsA[ownerA[*p_itor]]++] = _graphA->cone(*p_itor)->size();
649: }
650: for (typename Sequence::iterator p_itor = pointsB->begin(); p_itor != pointsB->end(); p_itor++) {
651: BuyPointsB[offsetsB[ownerB[*p_itor]]++] = *p_itor;
652: BuyPointsB[offsetsB[ownerB[*p_itor]]++] = *p_itor;
653: BuyPointsB[offsetsB[ownerB[*p_itor]]++] = _graphB->cone(*p_itor)->size();
654: }
655: for(int b = 0, o = 0; b < BuyCountA; ++b) {
656: if (offsetsA[SellersA[b]] - o != msgSize*BuySizesA[b]) {
657: throw ALE::Exception("Invalid A point size");
658: }
659: o += msgSize*BuySizesA[b];
660: }
661: for(int b = 0, o = 0; b < BuyCountB; ++b) {
662: if (offsetsB[SellersB[b]] - o != msgSize*BuySizesB[b]) {
663: throw ALE::Exception("Invalid B point size");
664: }
665: o += msgSize*BuySizesB[b];
666: }
667: delete [] offsetsA;
668: delete [] offsetsB;
670: int SellCountA; // The number of A point buyers with which this process (seller) communicates
671: int SellCountB; // The number of B point buyers with which this process (seller) communicates
672: int *SellSizesA; // The number of A points to sell to each buyer
673: int *SellSizesB; // The number of B points to sell to each buyer
674: int *BuyersA; // The process for each A point buyer
675: int *BuyersB; // The process for each B point buyer
676: int MaxSellSizeA; // The maximum number of messages to be sold to any A point buyer
677: int MaxSellSizeB; // The maximum number of messages to be sold to any B point buyer
678: int32_t *SellPointsA = PETSC_NULL; // The points and cone sizes from all buyers
679: int32_t *SellPointsB = PETSC_NULL; // The points and cone sizes from all buyers
680: PetscMaxSum(comm, BuyDataA, &MaxSellSizeA, &SellCountA);CHKERROR(ierr,"Error in PetscMaxSum");
681: PetscMaxSum(comm, BuyDataB, &MaxSellSizeB, &SellCountB);CHKERROR(ierr,"Error in PetscMaxSum");
682: PetscMalloc2(SellCountA,int,&SellSizesA,SellCountA,int,&BuyersA);CHKERROR(ierr, "Error in PetscMalloc");
683: PetscMalloc2(SellCountB,int,&SellSizesB,SellCountB,int,&BuyersB);CHKERROR(ierr, "Error in PetscMalloc");
684: for(int s = 0; s < SellCountA; s++) {
685: SellSizesA[s] = MaxSellSizeA;
686: BuyersA[s] = MPI_ANY_SOURCE;
687: }
688: for(int s = 0; s < SellCountB; s++) {
689: SellSizesB[s] = MaxSellSizeB;
690: BuyersB[s] = MPI_ANY_SOURCE;
691: }
693: if (debug) {
694: ostringstream txt;
696: for(int s = 0; s < BuyCountA; s++) {
697: txt << "BuySizesA["<<s<<"]: "<<BuySizesA[s]<<" from seller "<<SellersA[s]<< std::endl;
698: }
699: for(int p = 0; p < (int) pointsA->size(); p++) {
700: txt << "["<<rank<<"]: BuyPointsA["<<p<<"]: ("<<BuyPointsA[p*msgSize]<<", "<<BuyPointsA[p*msgSize+1]<<") coneSize "<<BuyPointsA[p*msgSize+2]<<std::endl;
701: }
702: for(int s = 0; s < BuyCountB; s++) {
703: txt << "BuySizesB["<<s<<"]: "<<BuySizesB[s]<<" from seller "<<SellersB[s]<< std::endl;
704: }
705: for(int p = 0; p < (int) pointsB->size(); p++) {
706: txt << "["<<rank<<"]: BuyPointsB["<<p<<"]: ("<<BuyPointsB[p*msgSize]<<", "<<BuyPointsB[p*msgSize+1]<<") coneSize "<<BuyPointsB[p*msgSize+2]<<std::endl;
707: }
708: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
709: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
710: }
712: // First tell sellers which points we want to buy
713: // SellSizes, Buyers, and SellPoints are output
714: PetscObjectGetNewTag(petscObj, &tag1); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
715: commCycle(comm, tag1, msgSize, BuyCountA, BuySizesA, SellersA, BuyPointsA, SellCountA, SellSizesA, BuyersA, &SellPointsA);
716: PetscObjectGetNewTag(petscObj, &tag2); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
717: commCycle(comm, tag2, msgSize, BuyCountB, BuySizesB, SellersB, BuyPointsB, SellCountB, SellSizesB, BuyersB, &SellPointsB);
719: if (debug) {
720: ostringstream txt;
722: if (!rank) {txt << "Unsquished" << std::endl;}
723: for(int p = 0; p < SellCountA*MaxSellSizeA; p++) {
724: txt << "["<<rank<<"]: SellPointsA["<<p<<"]: ("<<SellPointsA[p*msgSize]<<", "<<SellPointsA[p*msgSize+1]<<") coneSize "<<SellPointsA[p*msgSize+2]<<std::endl;
725: }
726: for(int p = 0; p < SellCountB*MaxSellSizeB; p++) {
727: txt << "["<<rank<<"]: SellPointsB["<<p<<"]: ("<<SellPointsB[p*msgSize]<<", "<<SellPointsB[p*msgSize+1]<<") coneSize "<<SellPointsB[p*msgSize+2]<<std::endl;
728: }
729: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
730: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
731: }
733: // Since we gave maximum sizes, we need to squeeze SellPoints
734: for(int s = 0, offset = 0; s < SellCountA; s++) {
735: if (offset != s*MaxSellSizeA*msgSize) {
736: PetscMemmove(&SellPointsA[offset], &SellPointsA[s*MaxSellSizeA*msgSize], SellSizesA[s]*msgSize*sizeof(int32_t));CHKERROR(ierr,"Error in PetscMemmove");
737: }
738: offset += SellSizesA[s]*msgSize;
739: }
740: for(int s = 0, offset = 0; s < SellCountB; s++) {
741: if (offset != s*MaxSellSizeB*msgSize) {
742: PetscMemmove(&SellPointsB[offset], &SellPointsB[s*MaxSellSizeB*msgSize], SellSizesB[s]*msgSize*sizeof(int32_t));CHKERROR(ierr,"Error in PetscMemmove");
743: }
744: offset += SellSizesB[s]*msgSize;
745: }
747: if (debug) {
748: ostringstream txt;
749: int SellSizeA = 0, SellSizeB = 0;
751: if (!rank) {txt << "Squished" << std::endl;}
752: for(int s = 0; s < SellCountA; s++) {
753: SellSizeA += SellSizesA[s];
754: txt << "SellSizesA["<<s<<"]: "<<SellSizesA[s]<<" from buyer "<<BuyersA[s]<< std::endl;
755: }
756: for(int p = 0; p < SellSizeA; p++) {
757: txt << "["<<rank<<"]: SellPointsA["<<p<<"]: ("<<SellPointsA[p*msgSize]<<", "<<SellPointsA[p*msgSize+1]<<") coneSize "<<SellPointsA[p*msgSize+2]<<std::endl;
758: }
759: for(int s = 0; s < SellCountB; s++) {
760: SellSizeB += SellSizesB[s];
761: txt << "SellSizesB["<<s<<"]: "<<SellSizesB[s]<<" from buyer "<<BuyersB[s]<< std::endl;
762: }
763: for(int p = 0; p < SellSizeB; p++) {
764: txt << "["<<rank<<"]: SellPointsB["<<p<<"]: ("<<SellPointsB[p*msgSize]<<", "<<SellPointsB[p*msgSize+1]<<") coneSize "<<SellPointsB[p*msgSize+2]<<std::endl;
765: }
766: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
767: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
768: }
770: // Map from A base points to (B process, B coneSize) pairs
771: Point__int_pair_set BillOfSaleAtoB;
772: // Map from B base points to (A process, A coneSize) pairs
773: Point__int_pair_set BillOfSaleBtoA;
775: // Find the A points being sold to B buyers and record the B cone size
776: for(int s = 0, offset = 0; s < SellCountA; s++) {
777: for(int m = 0; m < SellSizesA[s]; m++) {
778: Point point = Point(SellPointsA[offset], SellPointsA[offset+1]);
779: // Just insert the point
780: int size = BillOfSaleAtoB[point].size();
781: // Avoid unused variable warning
782: if (!size) offset += msgSize;
783: }
784: }
785: for(int s = 0, offset = 0; s < SellCountB; s++) {
786: for(int m = 0; m < SellSizesB[s]; m++) {
787: Point point = Point(SellPointsB[offset], SellPointsB[offset+1]);
789: if (BillOfSaleAtoB.find(point) != BillOfSaleAtoB.end()) {
790: BillOfSaleAtoB[point].insert(int_pair(BuyersB[s], SellPointsB[offset+2]));
791: }
792: offset += msgSize;
793: }
794: }
795: // Find the B points being sold to A buyers and record the A cone size
796: for(int s = 0, offset = 0; s < SellCountB; s++) {
797: for(int m = 0; m < SellSizesB[s]; m++) {
798: Point point = Point(SellPointsB[offset], SellPointsB[offset+1]);
799: // Just insert the point
800: int size = BillOfSaleBtoA[point].size();
801: // Avoid unused variable warning
802: if (!size) offset += msgSize;
803: }
804: }
805: for(int s = 0, offset = 0; s < SellCountA; s++) {
806: for(int m = 0; m < SellSizesA[s]; m++) {
807: Point point = Point(SellPointsA[offset], SellPointsA[offset+1]);
809: if (BillOfSaleBtoA.find(point) != BillOfSaleBtoA.end()) {
810: BillOfSaleBtoA[point].insert(int_pair(BuyersA[s], SellPointsA[offset+2]));
811: }
812: offset += msgSize;
813: }
814: }
815: // Calculate number of B buyers for A base points
816: for(int s = 0, offset = 0; s < SellCountA; s++) {
817: for(int m = 0; m < SellSizesA[s]; m++) {
818: Point point = Point(SellPointsA[offset], SellPointsA[offset+1]);
820: SellPointsA[offset+2] = BillOfSaleAtoB[point].size();
821: offset += msgSize;
822: }
823: }
824: // Calculate number of A buyers for B base points
825: for(int s = 0, offset = 0; s < SellCountB; s++) {
826: for(int m = 0; m < SellSizesB[s]; m++) {
827: Point point = Point(SellPointsB[offset], SellPointsB[offset+1]);
829: SellPointsB[offset+2] = BillOfSaleBtoA[point].size();
830: offset += msgSize;
831: }
832: }
834: // Tell A buyers how many B buyers there were (contained in BuyPointsA)
835: // Tell B buyers how many A buyers there were (contained in BuyPointsB)
836: PetscObjectGetNewTag(petscObj, &tag3); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
837: commCycle(comm, tag3, msgSize, SellCountA, SellSizesA, BuyersA, SellPointsA, BuyCountA, BuySizesA, SellersA, &BuyPointsA);
838: PetscObjectGetNewTag(petscObj, &tag4); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
839: commCycle(comm, tag4, msgSize, SellCountB, SellSizesB, BuyersB, SellPointsB, BuyCountB, BuySizesB, SellersB, &BuyPointsB);
841: if (debug) {
842: ostringstream txt;
843: int BuySizeA = 0, BuySizeB = 0;
845: if (!rank) {txt << "Got other B and A buyers" << std::endl;}
846: for(int s = 0; s < BuyCountA; s++) {
847: BuySizeA += BuySizesA[s];
848: txt << "BuySizesA["<<s<<"]: "<<BuySizesA[s]<<" from seller "<<SellersA[s]<< std::endl;
849: }
850: for(int p = 0; p < BuySizeA; p++) {
851: txt << "["<<rank<<"]: BuyPointsA["<<p<<"]: ("<<BuyPointsA[p*msgSize]<<", "<<BuyPointsA[p*msgSize+1]<<") B buyers "<<BuyPointsA[p*msgSize+2]<<std::endl;
852: }
853: for(int s = 0; s < BuyCountB; s++) {
854: BuySizeB += BuySizesB[s];
855: txt << "BuySizesB["<<s<<"]: "<<BuySizesB[s]<<" from seller "<<SellersB[s]<< std::endl;
856: }
857: for(int p = 0; p < BuySizeB; p++) {
858: txt << "["<<rank<<"]: BuyPointsB["<<p<<"]: ("<<BuyPointsB[p*msgSize]<<", "<<BuyPointsB[p*msgSize+1]<<") A buyers "<<BuyPointsB[p*msgSize+2]<<std::endl;
859: }
860: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
861: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
862: }
864: int BuyConesSizeA = 0;
865: int BuyConesSizeB = 0;
866: int SellConesSizeA = 0;
867: int SellConesSizeB = 0;
868: int *BuyConesSizesA; // The number of A points to buy from each seller
869: int *BuyConesSizesB; // The number of B points to buy from each seller
870: int *SellConesSizesA; // The number of A points to sell to each buyer
871: int *SellConesSizesB; // The number of B points to sell to each buyer
872: int32_t *SellConesA; // The (rank, B cone size) for each A point from all other B buyers
873: int32_t *SellConesB; // The (rank, A cone size) for each B point from all other A buyers
874: int32_t *overlapInfoA = PETSC_NULL; // The (rank, B cone size) for each A point from all other B buyers
875: int32_t *overlapInfoB = PETSC_NULL; // The (rank, A cone size) for each B point from all other A buyers
876: PetscMalloc2(BuyCountA,int,&BuyConesSizesA,SellCountA,int,&SellConesSizesA);CHKERROR(ierr, "Error in PetscMalloc");
877: PetscMalloc2(BuyCountB,int,&BuyConesSizesB,SellCountB,int,&SellConesSizesB);CHKERROR(ierr, "Error in PetscMalloc");
878: for(int s = 0, offset = 0; s < SellCountA; s++) {
879: SellConesSizesA[s] = 0;
881: for(int m = 0; m < SellSizesA[s]; m++) {
882: SellConesSizesA[s] += SellPointsA[offset+2];
883: offset += msgSize;
884: }
885: SellConesSizeA += SellConesSizesA[s];
886: }
887: for(int s = 0, offset = 0; s < SellCountB; s++) {
888: SellConesSizesB[s] = 0;
890: for(int m = 0; m < SellSizesB[s]; m++) {
891: SellConesSizesB[s] += SellPointsB[offset+2];
892: offset += msgSize;
893: }
894: SellConesSizeB += SellConesSizesB[s];
895: }
897: for(int b = 0, offset = 0; b < BuyCountA; b++) {
898: BuyConesSizesA[b] = 0;
900: for(int m = 0; m < BuySizesA[b]; m++) {
901: BuyConesSizesA[b] += BuyPointsA[offset+2];
902: offset += msgSize;
903: }
904: BuyConesSizeA += BuyConesSizesA[b];
905: }
906: for(int b = 0, offset = 0; b < BuyCountB; b++) {
907: BuyConesSizesB[b] = 0;
909: for(int m = 0; m < BuySizesB[b]; m++) {
910: BuyConesSizesB[b] += BuyPointsB[offset+2];
911: offset += msgSize;
912: }
913: BuyConesSizeB += BuyConesSizesB[b];
914: }
916: int cMsgSize = 2;
917: PetscMalloc2(SellConesSizeA*cMsgSize,int32_t,&SellConesA,SellConesSizeB*cMsgSize,int32_t,&SellConesB);CHKERROR(ierr, "Error in PetscMalloc");
918: for(int s = 0, offset = 0, cOffset = 0, SellConeSize = 0; s < SellCountA; s++) {
919: for(int m = 0; m < SellSizesA[s]; m++) {
920: Point point(SellPointsA[offset],SellPointsA[offset+1]);
922: for(typename int_pair_set::iterator p_iter = BillOfSaleAtoB[point].begin(); p_iter != BillOfSaleAtoB[point].end(); ++p_iter) {
923: SellConesA[cOffset+0] = (*p_iter).first;
924: SellConesA[cOffset+1] = (*p_iter).second;
925: cOffset += cMsgSize;
926: }
927: offset += msgSize;
928: }
929: if (cOffset - cMsgSize*SellConeSize != cMsgSize*SellConesSizesA[s]) {
930: throw ALE::Exception("Nonmatching sizes");
931: }
932: SellConeSize += SellConesSizesA[s];
933: }
934: for(int s = 0, offset = 0, cOffset = 0, SellConeSize = 0; s < SellCountB; s++) {
935: for(int m = 0; m < SellSizesB[s]; m++) {
936: Point point(SellPointsB[offset],SellPointsB[offset+1]);
938: for(typename int_pair_set::iterator p_iter = BillOfSaleBtoA[point].begin(); p_iter != BillOfSaleBtoA[point].end(); ++p_iter) {
939: SellConesB[cOffset+0] = (*p_iter).first;
940: SellConesB[cOffset+1] = (*p_iter).second;
941: cOffset += cMsgSize;
942: }
943: offset += msgSize;
944: }
945: if (cOffset - cMsgSize*SellConeSize != cMsgSize*SellConesSizesB[s]) {
946: throw ALE::Exception("Nonmatching sizes");
947: }
948: SellConeSize += SellConesSizesB[s];
949: }
951: // Then send A buyers a (rank, cone size) for all B buyers of the same points
952: PetscObjectGetNewTag(petscObj, &tag5); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
953: commCycle(comm, tag5, cMsgSize, SellCountA, SellConesSizesA, BuyersA, SellConesA, BuyCountA, BuyConesSizesA, SellersA, &overlapInfoA);
954: PetscObjectGetNewTag(petscObj, &tag6); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
955: commCycle(comm, tag6, cMsgSize, SellCountB, SellConesSizesB, BuyersB, SellConesB, BuyCountB, BuyConesSizesB, SellersB, &overlapInfoB);
957: // Finally build the A-->B overlap sifter
958: // (remote rank) ---(base A overlap point, remote cone size, local cone size)---> (base A overlap point)
959: for(int b = 0, offset = 0, cOffset = 0; b < BuyCountA; b++) {
960: for(int m = 0; m < BuySizesA[b]; m++) {
961: Point p(BuyPointsA[offset],BuyPointsA[offset+1]);
963: for(int n = 0; n < BuyPointsA[offset+2]; n++) {
964: int neighbor = overlapInfoA[cOffset+0];
965: int coneSize = overlapInfoA[cOffset+1];
967: // Record the point, size of the cone over p coming in from neighbor, and going out to the neighbor for the arrow color
968: overlap->addArrow(neighbor, ALE::pair<int,Point>(0, p), ALE::pair<Point,ALE::pair<int,int> >(p, ALE::pair<int,int>(coneSize, _graphA->cone(p)->size())) );
969: cOffset += cMsgSize;
970: }
971: offset += msgSize;
972: }
973: }
975: // Finally build the B-->A overlap sifter
976: // (remote rank) ---(base B overlap point, remote cone size, local cone size)---> (base B overlap point)
977: for(int b = 0, offset = 0, cOffset = 0; b < BuyCountB; b++) {
978: for(int m = 0; m < BuySizesB[b]; m++) {
979: Point p(BuyPointsB[offset],BuyPointsB[offset+1]);
981: for(int n = 0; n < BuyPointsB[offset+2]; n++) {
982: int neighbor = overlapInfoB[cOffset+0];
983: int coneSize = overlapInfoB[cOffset+1];
985: // Record the point, size of the cone over p coming in from neighbor, and going out to the neighbor for the arrow color
986: overlap->addArrow(neighbor, ALE::pair<int,Point>(1, p), ALE::pair<Point,ALE::pair<int,int> >(p, ALE::pair<int,int>(coneSize, _graphB->cone(p)->size())) );
987: cOffset += cMsgSize;
988: }
989: offset += msgSize;
990: }
991: }
992: };
994: #undef __FUNCT__
996: template <typename Overlap_>
997: static void __computeOverlap(const Obj<graph_type>& _graph, Obj<Overlap_>& overlap) {
998: typedef typename graph_type::traits::baseSequence Sequence;
1000: MPI_Comm comm = _graph->comm();
1001: int size = _graph->commSize();
1002: int rank = _graph->commRank();
1003: PetscObject petscObj = _graph->petscObj();
1005: bool debug = delta_type::debug > 0;
1006: bool debug2 = delta_type::debug > 1;
1008: // Allocate space for the ownership data
1009: int32_t *LeaseData; // 2 ints per processor: number of leased nodes and number of leases (0 or 1).
1010: PetscMalloc(2*size*sizeof(PetscInt),&LeaseData);CHKERROR(ierr, "Error in PetscMalloc");
1011: PetscMemzero(LeaseData,2*size*sizeof(PetscInt));CHKERROR(ierr, "Error in PetscMemzero");
1012:
1013: // The base we are going to work with
1014: Obj<Sequence> points = _graph->base();
1016: // determine owners of each base node and save it in a map
1017: Point__int owner;
1018: __determinePointOwners(_graph, _graph->base(), LeaseData, owner);
1019:
1020: // Now we accumulate the max lease size and the total number of renters
1021: // Determine the owners of base nodes and collect the lease data for each processor:
1022: // the number of nodes leased and the number of leases (0 or 1).
1023: int32_t MaxLeaseSize, RenterCount;
1024: PetscMaxSum(comm,LeaseData,&MaxLeaseSize,&RenterCount);CHKERROR(ierr,"Error in PetscMaxSum");
1025: //PetscInfo1(0,"%s: Number of renters %d\n", __FUNCT__, RenterCount);
1026: //CHKERROR(ierr,"Error in PetscInfo");
1028: if(debug) { /* -------------------------------------------------------------- */
1029: PetscSynchronizedPrintf(comm, "[%d]: %s: RenterCount = %d, MaxLeaseSize = %d\n",
1030: rank, __FUNCT__, RenterCount, MaxLeaseSize);
1031: CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1032: PetscSynchronizedFlush(comm);
1033: CHKERROR(ierr, "Error in PetscSynchronizedFlush");
1034: } /* ----------------------------------------------------------------------- */
1035:
1036: // post receives for all Rented nodes; we will be receiving 3 data items per rented node,
1037: // and at most MaxLeaseSize of nodes per renter
1038: PetscMPIInt tag1;
1039: PetscObjectGetNewTag(petscObj, &tag1); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
1040: int32_t *RentedNodes;
1041: MPI_Request *Renter_waits;
1042: if(RenterCount){
1043: PetscMalloc((RenterCount)*(3*MaxLeaseSize+1)*sizeof(int32_t),&RentedNodes); CHKERROR(ierr,"Error in PetscMalloc");
1044: PetscMemzero(RentedNodes,(RenterCount)*(3*MaxLeaseSize+1)*sizeof(int32_t)); CHKERROR(ierr,"Error in PetscMemzero");
1045: PetscMalloc((RenterCount)*sizeof(MPI_Request),&Renter_waits); CHKERROR(ierr,"Error in PetscMalloc");
1046: }
1047: for (int32_t i=0; i<RenterCount; i++) {
1048: MPI_Irecv(RentedNodes+3*MaxLeaseSize*i,3*MaxLeaseSize,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,Renter_waits+i);
1049: CHKERROR(ierr,"Error in MPI_Irecv");
1050: }
1051:
1052: int32_t LessorCount;
1053: LessorCount = 0; for (int32_t i=0; i<size; i++) LessorCount += LeaseData[2*i+1];
1054: //PetscInfo1(0,"%s: Number of lessors %d\n",__FUNCT__, LessorCount);
1055: //CHKERROR(ierr,"Error in PetscInfo");
1056: if(debug) { /* -------------------------------------------------------------- */
1057: PetscSynchronizedPrintf(comm, "[%d]: %s: LessorCount = %d\n", rank, __FUNCT__, LessorCount);
1058: CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1059: PetscSynchronizedFlush(comm);
1060: CHKERROR(ierr, "Error in PetscSynchronizedFlush");
1061: } /* ----------------------------------------------------------------------- */
1062:
1063: // We keep only the data about the real lessors -- those that own the nodes we lease
1064: int32_t *LeaseSizes, *Lessors;
1065: if(LessorCount) {
1066: PetscMalloc(sizeof(int32_t)*(LessorCount), &LeaseSizes); CHKERROR(ierr, "Error in PetscMalloc");
1067: PetscMalloc(sizeof(int32_t)*(LessorCount), &Lessors); CHKERROR(ierr, "Error in PetscMalloc");
1068: }
1069: // We also need to compute the inverse to the Lessors array, since we need to be able to convert i into cntr
1070: // after using the owner array. We use a map LessorIndex; it is likely to be small -- ASSUMPTION
1071: int__int LessorIndex;
1072: // Traverse all processes in ascending order
1073: int32_t cntr = 0; // keep track of entered records
1074: for(int32_t i = 0; i < size; i++) {
1075: if(LeaseData[2*i]) { // if there are nodes leased from process i, record it
1076: LeaseSizes[cntr] = LeaseData[2*i];
1077: Lessors[cntr] = i;
1078: LessorIndex[i] = cntr;
1079: cntr++;
1080: }
1081: }
1082: PetscFree(LeaseData); CHKERROR(ierr, "Error in PetscFree");
1083: if(debug2) { /* ----------------------------------- */
1084: ostringstream txt;
1085: txt << "[" << rank << "]: " << __FUNCT__ << ": lessor data [index, rank, lease size]: ";
1086: for(int32_t i = 0; i < LessorCount; i++) {
1087: txt << "[" << i << ", " << Lessors[i] << ", " << LeaseSizes[i] << "] ";
1088: }
1089: txt << "\n";
1090: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1091: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
1092: }/* ----------------------------------- */
1093: if(debug2) { /* ----------------------------------- */
1094: ostringstream txt;
1095: txt << "[" << rank << "]: " << __FUNCT__ << ": LessorIndex: ";
1096: for(int__int::iterator li_itor = LessorIndex.begin(); li_itor!= LessorIndex.end(); li_itor++) {
1097: int32_t i = (*li_itor).first;
1098: int32_t j = (*li_itor).second;
1099: txt << i << "-->" << j << "; ";
1100: }
1101: txt << "\n";
1102: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1103: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
1104: }/* ----------------------------------- */
1105:
1106:
1107: // pack messages containing lists of leased base nodes and their cone sizes to the lessors
1108: int32_t LeasedNodeCount = points->size(); // all points are considered leased from someone
1109: int32_t *LeasedNodes;
1110: int32_t *LessorOffsets;
1111: // We need 3 ints per leased node -- 2 per Point and 1 for the cone size
1112: if(LeasedNodeCount) {
1113: PetscMalloc((3*LeasedNodeCount)*sizeof(PetscInt),&LeasedNodes); CHKERROR(ierr,"Error in PetscMalloc");
1114: }
1115: if(LessorCount) {
1116: PetscMalloc((LessorCount)*sizeof(PetscInt),&LessorOffsets); CHKERROR(ierr,"Error in PetscMalloc");
1117: LessorOffsets[0] = 0;
1118: }
1119: for (int32_t i=1; i<LessorCount; i++) { LessorOffsets[i] = LessorOffsets[i-1] + 3*LeaseSizes[i-1];}
1120: for (typename Sequence::iterator point_itor = points->begin(); point_itor != points->end(); point_itor++) {
1121: Point p = (*point_itor);
1122: int32_t ow = owner[p];
1123: int32_t ind = LessorIndex[ow];
1124: LeasedNodes[LessorOffsets[ind]++] = p.prefix;
1125: LeasedNodes[LessorOffsets[ind]++] = p.index;
1126: LeasedNodes[LessorOffsets[ind]++] = _graph->cone(p)->size();
1127: }
1128: if(LessorCount) {
1129: LessorOffsets[0] = 0;
1130: }
1131: for (int32_t i=1; i<LessorCount; i++) { LessorOffsets[i] = LessorOffsets[i-1] + 3*LeaseSizes[i-1];}
1132:
1133: // send the messages to the lessors
1134: MPI_Request *Lessor_waits;
1135: if(LessorCount) {
1136: PetscMalloc((LessorCount)*sizeof(MPI_Request),&Lessor_waits);CHKERROR(ierr,"Error in PetscMalloc");
1137: }
1138: for (int32_t i=0; i<LessorCount; i++) {
1139: MPI_Isend(LeasedNodes+LessorOffsets[i],3*LeaseSizes[i],MPIU_INT,Lessors[i],tag1,comm,&Lessor_waits[i]);
1140: CHKERROR(ierr,"Error in MPI_Isend");
1141: }
1142:
1143: // wait on receive request and prepare to record the identities of the renters responding to the request and their lease sizes
1144: int__int Renters, RenterLeaseSizes;
1145: // Prepare to compute the set of renters of each owned node along with the cone sizes held by those renters over the node.
1146: // Since we don't have a unique ordering on the owned nodes a priori, we will utilize a map.
1147: Point__int_pair_set NodeRenters;
1148: cntr = RenterCount;
1149: while (cntr) {
1150: int32_t arrivalNumber;
1151: MPI_Status Renter_status;
1152: MPI_Waitany(RenterCount,Renter_waits,&arrivalNumber,&Renter_status);
1153: CHKMPIERROR(ierr,ERRORMSG("Error in MPI_Waitany"));
1154: int32_t renter = Renter_status.MPI_SOURCE;
1155: Renters[arrivalNumber] = renter;
1156: MPI_Get_count(&Renter_status,MPIU_INT,&RenterLeaseSizes[arrivalNumber]); CHKERROR(ierr,"Error in MPI_Get_count");
1157: // Since there are 3 ints per leased node, the lease size is computed by dividing the received count by 3;
1158: RenterLeaseSizes[arrivalNumber] = RenterLeaseSizes[arrivalNumber]/3;
1159: // Record the renters for each node
1160: for (int32_t i=0; i<RenterLeaseSizes[arrivalNumber]; i++) {
1161: // Compute the offset into the RentedNodes array for the arrived lease.
1162: int32_t LeaseOffset = arrivalNumber*3*MaxLeaseSize;
1163: // ASSUMPTION on Point type
1164: Point node = Point(RentedNodes[LeaseOffset + 3*i], RentedNodes[LeaseOffset + 3*i+1]);
1165: int32_t coneSize = RentedNodes[LeaseOffset + 3*i + 2];
1166: NodeRenters[node].insert(int_pair(renter,coneSize));
1167: }
1168: cntr--;
1169: }
1170:
1171: if (debug) { /* ----------------------------------- */
1172: // We need to collect all the data to be submitted to PetscSynchronizedPrintf
1173: // We use a C++ string streams for that
1174: ostringstream txt;
1175: for (Point__int_pair_set::iterator nodeRenters_itor=NodeRenters.begin();nodeRenters_itor!= NodeRenters.end();nodeRenters_itor++) {
1176: Point node = (*nodeRenters_itor).first;
1177: int_pair_set renterSet = (*nodeRenters_itor).second;
1178: // ASSUMPTION on point type
1179: txt << "[" << rank << "]: " << __FUNCT__ << ": node (" << node.prefix << "," << node.index << ") is rented by " << renterSet.size() << " renters (renter, cone size): ";
1180: for (int_pair_set::iterator renterSet_itor = renterSet.begin(); renterSet_itor != renterSet.end(); renterSet_itor++)
1181: {
1182: txt << "(" << (*renterSet_itor).first << "," << (*renterSet_itor).second << ") ";
1183: }
1184: txt << "\n";
1185: }
1186: // Now send the C-string behind txt to PetscSynchronizedPrintf
1187: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1188: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
1189: }/* ----------------------------------- */
1190:
1191: // wait on the original sends to the lessors
1192: MPI_Status *Lessor_status;
1193: if (LessorCount) {
1194: PetscMalloc((LessorCount)*sizeof(MPI_Status),&Lessor_status); CHKERROR(ierr,"Error in PetscMalloc");
1195: MPI_Waitall(LessorCount,Lessor_waits,Lessor_status); CHKERROR(ierr,"Error in MPI_Waitall");
1196: }
1197:
1198:
1199: // Neighbor counts: here the renters receive from the lessors the number of other renters sharing each leased node.
1200: // Prepare to receive three integers per leased node: two for the node itself and one for the number of neighbors over that node.
1201: // The buffer has the same structure as LeasedNodes, hence LessorOffsets can be reused.
1202: // IMPROVE: can probably reduce the message size by a factor of 3 if we assume an ordering on the nodes received from each lessor.
1203: // ASSUMPTION on Point type
1204: int32_t *NeighborCounts;
1205: if(LeasedNodeCount) {
1206: PetscMalloc(3*(LeasedNodeCount)*sizeof(PetscInt),&NeighborCounts); CHKERROR(ierr,"Error in PetscMalloc");
1207: }
1208: // Post receives for NeighbornCounts
1209: PetscMPIInt tag2;
1210: PetscObjectGetNewTag(petscObj, &tag2); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
1211: for (int32_t i=0; i<LessorCount; i++) {
1212: MPI_Irecv(NeighborCounts+LessorOffsets[i],3*LeaseSizes[i],MPIU_INT,Lessors[i],tag2,comm,&Lessor_waits[i]);
1213: CHKERROR(ierr,"Error in MPI_Irecv");
1214: }
1215: // pack and send messages back to renters; we need to send 3 integers per rental (2 for Point, 1 for sharer count)
1216: // grouped by the renter
1217: // ASSUMPTION on Point type
1218: // first we compute the total number of rentals
1219: int32_t TotalRentalCount = 0;
1220: for(Point__int_pair_set::iterator nodeRenters_itor=NodeRenters.begin();nodeRenters_itor!=NodeRenters.end();nodeRenters_itor++){
1221: TotalRentalCount += (*nodeRenters_itor).second.size();
1222: }
1223: if(debug2) {
1224: PetscSynchronizedPrintf(comm, "[%d]: TotalRentalCount %d\n", rank, TotalRentalCount);
1225: CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1226: PetscSynchronizedFlush(comm); CHKERROR(ierr, "PetscSynchronizedFlush");
1227: }/* ----------------------------------- */
1228:
1229: // Allocate sharer counts array for all rentals
1230: int32_t *SharerCounts;
1231: if(TotalRentalCount) {
1232: PetscMalloc(3*(TotalRentalCount)*sizeof(int32_t),&SharerCounts); CHKERROR(ierr,"Error in PetscMalloc");
1233: }
1234: // Renters are traversed in the order of their original arrival index by arrival number a
1235: int32_t RenterOffset = 0;
1236: cntr = 0;
1237: for(int32_t a = 0; a < RenterCount; a++) {
1238: // traverse the nodes leased by the renter
1239: int32_t RenterLeaseOffset = a*3*MaxLeaseSize;
1240: for(int32_t i = 0; i < RenterLeaseSizes[a]; i++) {
1241: // ASSUMPTION on Point type
1242: Point node;
1243: node.prefix = RentedNodes[RenterLeaseOffset + 3*i];
1244: node.index = RentedNodes[RenterLeaseOffset + 3*i + 1];
1245: SharerCounts[cntr++] = node.prefix;
1246: SharerCounts[cntr++] = node.index;
1247: // Decrement the sharer count by one not to count the current renter itself (with arrival number a).
1248: SharerCounts[cntr++] = NodeRenters[node].size()-1;
1249: }
1250: // Send message to renter
1251: MPI_Isend(SharerCounts+RenterOffset,3*RenterLeaseSizes[a],MPIU_INT,Renters[a],tag2,comm,Renter_waits+a);
1252: CHKERROR(ierr, "Error in MPI_Isend");
1253: // Offset is advanced by thrice the number of leased nodes, since we store 3 integers per leased node: Point and cone size
1254: RenterOffset = cntr;
1255: }
1256: // Wait on receives from lessors with the neighbor counts
1257: if (LessorCount) {
1258: MPI_Waitall(LessorCount,Lessor_waits,Lessor_status); CHKERROR(ierr,"Error in MPI_Waitall");
1259: }
1260: // Wait on the original sends to the renters
1261: MPI_Status *Renter_status;
1262: PetscMalloc((RenterCount)*sizeof(MPI_Status),&Renter_status);CHKERROR(ierr,"Error in PetscMalloc");
1263: if(RenterCount) {
1264: MPI_Waitall(RenterCount, Renter_waits, Renter_status);CHKERROR(ierr,"Error in MPI_Waitall");
1265: }
1266:
1267: if (debug) { /* ----------------------------------- */
1268: // Use a C++ string stream to report the numbers of shared nodes leased from each lessor
1269: ostringstream txt;
1270: cntr = 0;
1271: txt << "[" << rank << "]: " << __FUNCT__ << ": neighbor counts by lessor-node [lessor rank, (node), neighbor count]: ";
1272: for(int32_t i = 0; i < LessorCount; i++) {
1273: // ASSUMPTION on point type
1274: for(int32_t j = 0; j < LeaseSizes[i]; j++)
1275: {
1276: int32_t prefix, index, sharerCount;
1277: prefix = NeighborCounts[cntr++];
1278: index = NeighborCounts[cntr++];
1279: sharerCount = NeighborCounts[cntr++];
1280: txt << "[" << Lessors[i] <<", (" << prefix << "," << index << "), " << sharerCount << "] ";
1281: }
1282: }
1283: txt << "\n";
1284: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1285: PetscSynchronizedFlush(comm); CHKERROR(ierr, "PetscSynchronizedFlush");
1286: }/* ----------------------------------- */
1287:
1288:
1289: // Now we allocate an array to receive the neighbor ranks and the remote cone sizes for each leased node,
1290: // hence, the total array size is 2*TotalNeighborCount.
1291: // Note that the lessor offsets must be recalculated, since they are no longer based on the number of nodes
1292: // leased from that lessor, but on the number of neighbor over the nodes leased from that lessor.
1293:
1294: // First we compute the numbers of neighbors over the nodes leased from a given lessor.
1295: // NeighborCountsByLessor[lessor] = # of neighbors on that lessor
1296: int32_t TotalNeighborCount = 0;
1297: int32_t *NeighborCountsByLessor;
1298: if(LessorCount) {
1299: PetscMalloc((LessorCount)*sizeof(int32_t), &NeighborCountsByLessor); CHKERROR(ierr, "Error in PetscMalloc");
1300: }
1301: cntr = 0;
1302: for(int32_t i = 0; i < LessorCount; i++) {
1303: int32_t neighborCountByLessor = 0;
1304: for(int32_t j = 0; j < LeaseSizes[i]; j++) {
1305: //ASSUMPTION on Point type affects NeighborCountsOffset size
1306: cntr += 2;
1307: neighborCountByLessor += NeighborCounts[cntr++];
1308: }
1309: NeighborCountsByLessor[i] = neighborCountByLessor;
1310: TotalNeighborCount += neighborCountByLessor;
1311: }
1312: if (debug2) { /* ----------------------------------- */
1313: // Use a C++ string stream to report the numbers of shared nodes leased from each lessor
1314: ostringstream txt;
1315: cntr = 0;
1316: txt << "[" << rank << "]: " << __FUNCT__ << ": NeighborCountsByLessor [rank, count]: ";
1317: for(int32_t i = 0; i < LessorCount; i++) {
1318: txt << "[" << Lessors[i] <<"," << NeighborCountsByLessor[i] << "]; ";
1319: }
1320: txt << std::endl;
1321: txt << "[" << rank << "]: " << __FUNCT__ << ": TotalNeighborCount: " << TotalNeighborCount << std::endl;
1322: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1323: PetscSynchronizedFlush(comm); CHKERROR(ierr, "PetscSynchronizedFlush");
1324: }/* ----------------------------------- */
1325: int32_t *Neighbors = 0;
1326: if(TotalNeighborCount) {
1327: PetscMalloc((2*TotalNeighborCount)*sizeof(int32_t),&Neighbors); CHKERROR(ierr,"Error in PetscMalloc");
1328: }
1329:
1330: // Post receives for Neighbors
1331: PetscMPIInt tag3;
1332: PetscObjectGetNewTag(petscObj, &tag3); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
1333: int32_t lessorOffset = 0;
1334: for(int32_t i=0; i<LessorCount; i++) {
1335: if(NeighborCountsByLessor[i]) { // We expect messages from lessors with a non-zero NeighborCountsByLessor entry only
1336: MPI_Irecv(Neighbors+lessorOffset,2*NeighborCountsByLessor[i],MPIU_INT,Lessors[i],tag3,comm,&Lessor_waits[i]);
1337: CHKERROR(ierr,"Error in MPI_Irecv");
1338: lessorOffset += 2*NeighborCountsByLessor[i];
1339: }
1340: }
1341: if (lessorOffset != 2*TotalNeighborCount) {
1342: ostringstream msg;
1344: msg << "["<<rank<<"]Invalid lessor offset " << lessorOffset << " should be " << 2*TotalNeighborCount << std::endl;
1345: throw ALE::Exception(msg.str().c_str());
1346: }
1347: // Pack and send messages back to renters.
1348: // For each node p and each renter r (hence for each rental (p,r)) we must send to r a segment consisting of the list of all
1349: // (rr,cc) such that (p,rr) is a share and cc is the cone size over p at rr.
1350: // ALTERNATIVE, SCALABILITY:
1351: // 1. allocate an array capable of holding all messages to all renters and send one message per renter (more memory)
1352: // 2. allocate an array capable of holding all rentals for all nodes and send one message per share (more messages).
1353: // Here we choose 1 since we assume that the memory requirement is modest and the communication time is much more expensive,
1354: // however, this is likely to be application-dependent, and a switch should be introduced to change this behavior at will.
1355: // The rental segments are grouped by the renter recepient and within the renter by the node in the same order as SharerCounts.
1356:
1357: // We need to compute the send buffer size using the SharerCounts array.
1358: // Traverse the renters in order of their original arrival, indexed by the arrival number a, and then by the nodes leased by a.
1359: // Add up all entries equal to 2 mod 3 in SharerCounts (0 & 1 mod 3 are node IDs, ASSUMPTION on Point type) and double that number
1360: // to account for sharer ranks AND the cone sizes we are sending.
1361: int32_t SharersSize = 0; // 'Sharers' buffer size
1362: cntr = 0;
1363: for(int32_t a = 0; a < RenterCount; a++) {
1364: // traverse the number of nodes leased by the renter
1365: for(int32_t i = 0; i < RenterLeaseSizes[a]; i++) {
1366: SharersSize += SharerCounts[3*cntr+2];
1367: cntr++;
1368: }
1369: }
1370: SharersSize *= 2;
1371: // Allocate the Sharers array
1372: int32_t *Sharers;
1373: if(SharersSize) {
1374: PetscMalloc(SharersSize*sizeof(int32_t),&Sharers); CHKERROR(ierr,"Error in PetscMalloc");
1375: }
1376: // Now pack the messages and send them off.
1377: // Renters are traversed in the order of their original arrival index by arrival number a
1378: ostringstream txt; // DEBUG
1379: if(debug2) {
1380: txt << "[" << rank << "]: " << __FUNCT__ << ": RenterCount = " << RenterCount << "\n";
1381: }
1382: RenterOffset = 0; // this is the current offset into Sharers needed for the send statement
1383: for(int32_t a = 0; a < RenterCount; a++) {//
1384: int32_t r = Renters[a];
1385: int32_t RenterLeaseOffset = a*3*MaxLeaseSize;
1386: int32_t SegmentSize = 0;
1387: // traverse the nodes leased by the renter
1388: for(int32_t i = 0; i < RenterLeaseSizes[a]; i++) {
1389: // Get a node p rented to r
1390: // ASSUMPTION on Point type
1391: Point p;
1392: p.prefix = RentedNodes[RenterLeaseOffset + 3*i];
1393: p.index = RentedNodes[RenterLeaseOffset + 3*i + 1];
1394: if(debug) {
1395: txt << "[" << rank << "]: " << __FUNCT__ << ": renters sharing with " << r << " of node (" << p.prefix << "," << p.index << ") [rank, cone size]: ";
1396: }
1397: // now traverse the set of all the renters of p
1398: for(int_pair_set::iterator pRenters_itor=NodeRenters[p].begin(); pRenters_itor!=NodeRenters[p].end(); pRenters_itor++) {
1399: int32_t rr = (*pRenters_itor).first; // rank of a pRenter
1400: int32_t cc = (*pRenters_itor).second; // cone size over p at rr
1401: // skip r itself
1402: if(rr != r){
1403: Sharers[RenterOffset+SegmentSize++] = rr;
1404: Sharers[RenterOffset+SegmentSize++] = cc;
1405: if(debug) {
1406: txt << "[" << rr << "," << cc << "]; ";
1407: }
1408: }
1409: }// for(int_pair_set::iterator pRenters_itor=NodeRenters[p].begin(); pRenters_itor!=NodeRenters[p].end(); pRenters_itor++) {
1410: if(debug) {
1411: txt << "\n";
1412: }
1413: }// for(int32_t i = 0; i < RenterLeaseSizes[a]; i++) {
1414: // Send message to renter only if the segment size is positive
1415: if(SegmentSize > 0) {
1416: MPI_Isend(Sharers+RenterOffset,SegmentSize,MPIU_INT,Renters[a],tag3,comm,Renter_waits+a);
1417: CHKERROR(ierr, "Error in MPI_Isend");
1418: }
1419: // Offset is advanced by the segmentSize
1420: RenterOffset += SegmentSize;
1421: }// for(int32_t a = 0; a < RenterCount; a++) {
1422: if(debug) {
1423: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1424: PetscSynchronizedFlush(comm); CHKERROR(ierr, "PetscSynchronizedFlush");
1425: }
1426:
1427: // Wait on receives from lessors with the neighbor counts
1428: if (LessorCount) {
1429: MPI_Waitall(LessorCount,Lessor_waits,Lessor_status);CHKERROR(ierr,"Error in MPI_Waitall");
1430: }
1431: if (debug) { /* ----------------------------------- */
1432: // To report the neighbors at each lessor we use C++ a string stream
1433: ostringstream txt;
1434: int32_t cntr1 = 0;
1435: int32_t cntr2 = 0;
1436: for(int32_t i = 0; i < LessorCount; i++) {
1437: // ASSUMPTION on point type
1438: txt << "[" <<rank<< "]: " << __FUNCT__ << ": neighbors over nodes leased from " <<Lessors[i]<< ":\n";
1439: int32_t activeLessor = 0;
1440: for(int32_t j = 0; j < LeaseSizes[i]; j++)
1441: {
1442: int32_t prefix, index, sharerCount;
1443: prefix = NeighborCounts[cntr1++];
1444: index = NeighborCounts[cntr1++];
1445: sharerCount = NeighborCounts[cntr1++];
1446: if(sharerCount > 0) {
1447: txt <<"[" << rank << "]:\t(" << prefix <<","<<index<<"): [rank, coneSize]: ";
1448: activeLessor++;
1449: }
1450: for(int32_t k = 0; k < sharerCount; k++) {
1451: int32_t sharer = Neighbors[cntr2++];
1452: int32_t coneSize = Neighbors[cntr2++];
1453: txt << "[" <<sharer <<", "<< coneSize << "] ";
1454: }
1455: }// for(int32_t j = 0; j < LeaseSizes[i]; j++)
1456: if(!activeLessor) {
1457: txt <<"[" << rank << "]:\tnone";
1458: }
1459: txt << "\n";
1460: }// for(int32_t i = 0; i < LessorCount; i++)
1461: PetscSynchronizedPrintf(comm,txt.str().c_str());CHKERROR(ierr,"Error in PetscSynchronizedPrintf");
1462: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
1463: }/* ----------------------------------- */
1464:
1465: // This concludes the interaction of lessors and renters, and the exchange is completed by a peer-to-peer neighbor cone swap
1466: // (except we still have to wait on our last sends to the renters -- see below).
1467: // However, we don't free all of the arrays associated with the lessor-renter exchanges, since some of the data
1468: // still use those structures. Here are the arrays we can get rid of:
1469: if(RenterCount) {
1470: PetscFree(RentedNodes); CHKERROR(ierr, "Error in PetscFree");
1471: }
1472: if(SharersSize) {PetscFree(Sharers); CHKERROR(ierr, "Error in PetscFree");}
1473: if(LessorCount) {
1474: PetscFree(NeighborCountsByLessor); CHKERROR(ierr, "Error in PetscFree");
1475: PetscFree(Lessor_status); CHKERROR(ierr,"Error in PetscFree");
1476: PetscFree(Lessor_waits); CHKERROR(ierr,"Error in PetscFree");
1477: PetscFree(LessorOffsets); CHKERROR(ierr,"Error in PetscFree");
1478: PetscFree(LeaseSizes); CHKERROR(ierr,"Error in PetscFree");
1479: PetscFree(Lessors); CHKERROR(ierr,"Error in PetscFree");
1480: }
1481: if(LeasedNodeCount) {
1482: PetscFree(LeasedNodes); CHKERROR(ierr,"Error in PetscFree");
1483: }
1484:
1485: // Now we record the neighbors and the cones over each node to be received from or sent to each neigbor.
1486: // cntr keeps track of the current position within the Neighbors array, node boundaries are delineated using NeighborCounts.
1487: // ASSUMPTION: 'Neighbors' stores node renter segments in the same order as NeighborCounts stores the node data.
1488: cntr = 0;
1489: for(int32_t i = 0; i < LeasedNodeCount; i++) {
1490: // ASSUMPTION on Point type
1491: Point p;
1492: p.prefix = NeighborCounts[3*i];
1493: p.index = NeighborCounts[3*i+1];
1494: int32_t pNeighborsCount = NeighborCounts[3*i+2]; // recall that NeighborCounts lists the number of neighbors after each node
1495: // extract the renters of p from Neighbors
1496: for(int32_t j = 0; j < pNeighborsCount; j++) {
1497: int32_t neighbor = Neighbors[cntr++];
1498: int32_t coneSize = Neighbors[cntr++];
1499: // Record the size of the cone over p coming in from neighbor and going out to the neighbor as a pair of integers
1500: // which is the color of the overlap arrow from neighbor to p
1501: overlap->addArrow(neighbor, p, ALE::pair<Point,ALE::pair<int,int> >(p, ALE::pair<int,int>(coneSize, _graph->cone(p)->size())) );
1502: }
1503: }// for(int32_t i = 0; i < LeasedNodeCount; i++)
1505: // Wait on the original sends to the renters (the last vestige of the lessor-renter exchange epoch; we delayed it to afford the
1506: // greatest opportunity for a communication-computation overlap).
1507: if(RenterCount) {
1508: MPI_Waitall(RenterCount, Renter_waits, Renter_status); CHKERROR(ierr,"Error in MPI_Waitall");
1509: }
1510: if(RenterCount) {
1511: PetscFree(Renter_waits); CHKERROR(ierr, "Error in PetscFree");
1512: PetscFree(Renter_status); CHKERROR(ierr, "Error in PetscFree");
1513: }
1515: if(LeasedNodeCount) {PetscFree(NeighborCounts); CHKERROR(ierr,"Error in PetscFree");}
1516: if(TotalNeighborCount) {PetscFree(Neighbors); CHKERROR(ierr, "Error in PetscFree");}
1517: if(TotalRentalCount){PetscFree(SharerCounts); CHKERROR(ierr, "Error in PetscFree");}
1519: };// __computeOverlap()
1521: #undef __FUNCT__
1523: /*
1524: Seller: A possessor of data
1525: Buyer: A requestor of data
1527: Note that in this routine, the caller functions as BOTH a buyer and seller.
1529: When we post receives, we use a buffer of the maximum size for each message
1530: in order to simplify the size calculations (less communication).
1532: BuyCount: The number of sellers with which this process (buyer) communicates
1533: This is calculated locally
1534: BuySizes: The number of messages to buy from each seller
1535: Sellers: The process for each seller
1536: BuyData: The data to be bought from each seller. There are BuySizes[p] messages
1537: to be purchased from each process p, in order of rank.
1538: SellCount: The number of buyers with which this process (seller) communicates
1539: This requires communication
1540: SellSizes: The number of messages to be sold to each buyer
1541: Buyers: The process for each buyer
1542: msgSize: The number of integers in each message
1543: SellData: The data to be sold to each buyer. There are SellSizes[p] messages
1544: to be sold to each process p, in order of rank.
1545: */
1546: static void commCycle(MPI_Comm comm, PetscMPIInt tag, int msgSize, int BuyCount, int BuySizes[], int Sellers[], int32_t BuyData[], int SellCount, int SellSizes[], int Buyers[], int32_t *SellData[]) {
1547: int32_t *locSellData; // Messages to sell to buyers (received from buyers)
1548: int SellSize = 0;
1549: int *BuyOffsets = PETSC_NULL, *SellOffsets = PETSC_NULL;
1550: MPI_Request *buyWaits = PETSC_NULL, *sellWaits = PETSC_NULL;
1551: MPI_Status *buyStatus = PETSC_NULL;
1554: // Allocation
1555: PetscMallocValidate(__LINE__,__FUNCT__,__FILE__,__SDIR__);CHKERROR(ierr,"Memory corruption");
1556: for(int s = 0; s < SellCount; s++) {SellSize += SellSizes[s];}
1557: PetscMalloc2(BuyCount,int,&BuyOffsets,SellCount,int,&SellOffsets);CHKERROR(ierr,"Error in PetscMalloc");
1558: PetscMalloc3(BuyCount,MPI_Request,&buyWaits,SellCount,MPI_Request,&sellWaits,BuyCount,MPI_Status,&buyStatus);
1559: CHKERROR(ierr,"Error in PetscMalloc");
1560: if (*SellData) {
1561: locSellData = *SellData;
1562: } else {
1563: // Stupid, stupid, stupid fucking MPICH fails with 0-length storage
1564: PetscMalloc(PetscMax(1, msgSize*SellSize) * sizeof(int32_t), &locSellData);CHKERROR(ierr,"Error in PetscMalloc");
1565: }
1566: // Initialization
1567: for(int b = 0; b < BuyCount; b++) {
1568: if (b == 0) {
1569: BuyOffsets[0] = 0;
1570: } else {
1571: BuyOffsets[b] = BuyOffsets[b-1] + msgSize*BuySizes[b-1];
1572: }
1573: }
1574: for(int s = 0; s < SellCount; s++) {
1575: if (s == 0) {
1576: SellOffsets[0] = 0;
1577: } else {
1578: SellOffsets[s] = SellOffsets[s-1] + msgSize*SellSizes[s-1];
1579: }
1580: }
1581: PetscMemzero(locSellData, msgSize*SellSize * sizeof(int32_t));CHKERROR(ierr,"Error in PetscMemzero");
1583: // Post receives for bill of sale (data request)
1584: for(int s = 0; s < SellCount; s++) {
1585: MPI_Irecv(&locSellData[SellOffsets[s]], msgSize*SellSizes[s], MPIU_INT, Buyers[s], tag, comm, &sellWaits[s]);
1586: CHKERROR(ierr,"Error in MPI_Irecv");
1587: }
1588: // Post sends with bill of sale (data request)
1589: for(int b = 0; b < BuyCount; b++) {
1590: MPI_Isend(&BuyData[BuyOffsets[b]], msgSize*BuySizes[b], MPIU_INT, Sellers[b], tag, comm, &buyWaits[b]);
1591: CHKERROR(ierr,"Error in MPI_Isend");
1592: }
1593: // Receive the bill of sale from buyer
1594: for(int s = 0; s < SellCount; s++) {
1595: MPI_Status sellStatus;
1596: int num;
1598: MPI_Waitany(SellCount, sellWaits, &num, &sellStatus);CHKMPIERROR(ierr,ERRORMSG("Error in MPI_Waitany"));
1599: // OUTPUT: Overwriting input buyer process
1600: Buyers[num] = sellStatus.MPI_SOURCE;
1601: // OUTPUT: Overwriting input sell size
1602: MPI_Get_count(&sellStatus, MPIU_INT, &SellSizes[num]);CHKERROR(ierr,"Error in MPI_Get_count");
1603: SellSizes[num] /= msgSize;
1604: }
1605: // Wait on send for bill of sale
1606: if (BuyCount) {
1607: MPI_Waitall(BuyCount, buyWaits, buyStatus); CHKERROR(ierr,"Error in MPI_Waitall");
1608: }
1610: PetscFree2(BuyOffsets, SellOffsets);CHKERROR(ierr,"Error in PetscFree");
1611: PetscFree3(buyWaits, sellWaits, buyStatus);CHKERROR(ierr,"Error in PetscFree");
1612: // OUTPUT: Providing data out
1613: *SellData = locSellData;
1614: }
1616: // -------------------------------------------------------------------------------------------------------------------
1619: template <typename Overlap_, typename Fusion_>
1620: static void __computeFusion(const Obj<graph_type>& _graph, const Obj<Overlap_>& overlap, Obj<Fusion_> fusion, const Obj<fuser_type>& fuser) {
1621: //
1622: typedef ConeArraySequence<typename graph_type::traits::arrow_type> cone_array_sequence;
1623: typedef typename cone_array_sequence::cone_arrow_type cone_arrow_type;
1625: MPI_Comm comm = _graph->comm();
1626: int rank = _graph->commRank();
1627: PetscObject petscObj = _graph->petscObj();
1629: bool debug = delta_type::debug > 0;
1630: bool debug2 = delta_type::debug > 1;
1632: // Compute total incoming cone sizes by neighbor and the total incomping cone size.
1633: // Also count the total number of neighbors we will be communicating with
1634: int32_t NeighborCountIn = 0;
1635: int__int NeighborConeSizeIn;
1636: int32_t ConeSizeIn = 0;
1637: ostringstream txt3;
1638: // Traverse all of the neighbors from whom we will be receiving cones -- the cap of the overlap.
1639: typename Overlap_::traits::capSequence overlapCap = overlap->cap();
1640: for(typename Overlap_::traits::capSequence::iterator ci = overlapCap.begin(); ci != overlapCap.end(); ci++)
1641: { // traversing overlap.cap()
1642: int32_t neighborIn = *ci;
1643: // Traverse the supports of the overlap graph under each neighbor rank, count cone sizes to be received and add the cone sizes
1644: typename Overlap_::traits::supportSequence supp = overlap->support(*ci);
1645: if(debug2) {
1646: //txt3 << "[" << rank << "]: " << __FUNCT__ << ": overlap: support of rank " << neighborIn << ": " << std::endl;
1647: //txt3 << supp;
1648: }
1649: int32_t coneSizeIn = 0;
1650: for(typename Overlap_::traits::supportSequence::iterator si = supp.begin(); si != supp.end(); si++) {
1651: // FIX: replace si.color() type: Point --> ALE::pair
1652: //coneSizeIn += si.color().prefix;
1653: coneSizeIn += si.color().second.first;
1654: }
1655: if(coneSizeIn > 0) {
1656: // Accumulate the total cone size
1657: ConeSizeIn += coneSizeIn;
1658: NeighborConeSizeIn[neighborIn] = coneSizeIn;
1659: NeighborCountIn++;
1660: txt3 << "[" << rank << "]: " << "NeighborConeSizeIn[" << neighborIn << "]: " << NeighborConeSizeIn[neighborIn] << "\n";
1661: }
1662: }
1663: if(debug2) {
1664: if(NeighborCountIn == 0) {
1665: txt3 << "[" << rank << "]: no incoming Neighbors" << std::endl;
1666: }
1667: PetscSynchronizedPrintf(comm,txt3.str().c_str());CHKERROR(ierr,"Error in PetscSynchronizedPrintf");
1668: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
1669: }
1670: if(debug) {/* --------------------------------------------------------------------------------------------- */
1671: ostringstream txt;
1672: txt << "[" << rank << "]: " << __FUNCT__ << ": total size of incoming cone: " << ConeSizeIn << "\n";
1673: for(int__int::iterator np_itor = NeighborConeSizeIn.begin();np_itor!=NeighborConeSizeIn.end();np_itor++)
1674: {
1675: int32_t neighbor = (*np_itor).first;
1676: int32_t coneSize = (*np_itor).second;
1677: txt << "[" << rank << "]: " << __FUNCT__ << ": size of cone from " << neighbor << ": " << coneSize << "\n";
1678:
1679: }//int__int::iterator np_itor=NeighborConeSizeIn.begin();np_itor!=NeighborConeSizeIn.end();np_itor++)
1680: PetscSynchronizedPrintf(comm, txt.str().c_str());
1681: CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1682: PetscSynchronizedFlush(comm);
1683: CHKERROR(ierr, "Error in PetscSynchronizedFlush");
1684: }/* --------------------------------------------------------------------------------------------- */
1685: // Compute the size of a cone element
1686: size_t cone_arrow_size = sizeof(cone_arrow_type);
1687: // Now we can allocate a receive buffer to receive all of the remote cones from neighbors
1688: cone_arrow_type *ConesIn;
1689: if(ConeSizeIn) {
1690: PetscMalloc(ConeSizeIn*cone_arrow_size,&ConesIn); CHKERROR(ierr,"Error in PetscMalloc");
1691: }
1692: // Allocate receive requests
1693: MPI_Request *NeighborsIn_waits;
1694: if(NeighborCountIn) {
1695: PetscMalloc((NeighborCountIn)*sizeof(MPI_Request),&NeighborsIn_waits);CHKERROR(ierr,"Error in PetscMalloc");
1696: }
1697: // Post receives for ConesIn
1698: PetscMPIInt tag4;
1699: PetscObjectGetNewTag(petscObj, &tag4); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
1700: // Traverse all neighbors from whom we are receiving cones
1701: cone_arrow_type *NeighborOffsetIn = ConesIn;
1702: if(debug2) {
1703: PetscSynchronizedPrintf(comm, "[%d]: %s: NeighborConeSizeIn.size() = %d\n",rank, __FUNCT__, NeighborConeSizeIn.size());
1704: CHKERROR(ierr, "Error in PetscSynchornizedPrintf");
1705: PetscSynchronizedFlush(comm);
1706: CHKERROR(ierr, "Error in PetscSynchornizedFlush");
1707: if(NeighborConeSizeIn.size()) {
1708: ierr=PetscSynchronizedPrintf(comm, "[%d]: %s: *NeighborConeSizeIn.begin() = (%d,%d)\n",
1709: rank, __FUNCT__, (*NeighborConeSizeIn.begin()).first, (*NeighborConeSizeIn.begin()).second);
1710: CHKERROR(ierr, "Error in PetscSynchornizedPrintf");
1711: PetscSynchronizedFlush(comm);
1712: CHKERROR(ierr, "Error in PetscSynchornizedFlush");
1713:
1714: }
1715: }
1716: int32_t n = 0;
1717: for(std::map<int32_t, int32_t>::iterator n_itor = NeighborConeSizeIn.begin(); n_itor!=NeighborConeSizeIn.end(); n_itor++) {
1718: int32_t neighborIn = (*n_itor).first;
1719: int32_t coneSizeIn = (*n_itor).second;
1720: MPI_Irecv(NeighborOffsetIn,cone_arrow_size*coneSizeIn,MPI_BYTE,neighborIn,tag4,comm, NeighborsIn_waits+n);
1721: CHKERROR(ierr, "Error in MPI_Irecv");
1722: NeighborOffsetIn += coneSizeIn;
1723: n++;
1724: }
1725:
1726: // Compute the total outgoing cone sizes by neighbor and the total outgoing cone size.
1727: int__int NeighborConeSizeOut;
1728: int32_t ConeSizeOut = 0;
1729: int32_t NeighborCountOut = 0;
1730: for(typename Overlap_::traits::capSequence::iterator ci = overlapCap.begin(); ci != overlapCap.end(); ci++)
1731: { // traversing overlap.cap()
1732: int32_t neighborOut = *ci;
1733: // Traverse the supports of the overlap graph under each neighbor rank, count cone sizes to be sent and add the cone sizes
1734: typename Overlap_::traits::supportSequence supp = overlap->support(*ci);
1735: if(debug2) {
1736: //txt3 << "[" << rank << "]: " << __FUNCT__ << ": overlap: support of rank " << neighborOut << ": " << std::endl;
1737: //txt3 << supp;
1738: }
1739: int32_t coneSizeOut = 0;
1740: for(typename Overlap_::traits::supportSequence::iterator si = supp.begin(); si != supp.end(); si++) {
1741: // FIX: replace si.color() Point --> ALE::pair
1742: //coneSizeOut += si.color().index;
1743: coneSizeOut += si.color().second.second;
1744: }
1745: if(coneSizeOut > 0) {
1746: // Accumulate the total cone size
1747: ConeSizeOut += coneSizeOut;
1748: NeighborConeSizeOut[neighborOut] = coneSizeOut;
1749: NeighborCountOut++;
1750: }
1751: }//traversing overlap.cap()
1752:
1753: if(debug) {/* --------------------------------------------------------------------------------------------- */
1754: ostringstream txt;
1755: txt << "[" << rank << "]: " << __FUNCT__ << ": total size of outgoing cone: " << ConeSizeOut << "\n";
1756: for(int__int::iterator np_itor = NeighborConeSizeOut.begin();np_itor!=NeighborConeSizeOut.end();np_itor++)
1757: {
1758: int32_t neighborOut = (*np_itor).first;
1759: int32_t coneSizeOut = (*np_itor).second;
1760: txt << "[" << rank << "]: " << __FUNCT__ << ": size of cone to " << neighborOut << ": " << coneSizeOut << "\n";
1761:
1762: }//int__int::iterator np_itor=NeighborConeSizeOut.begin();np_itor!=NeighborConeSizeOut.end();np_itor++)
1763: PetscSynchronizedPrintf(comm, txt.str().c_str());
1764: CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1765: PetscSynchronizedFlush(comm);
1766: CHKERROR(ierr, "Error in PetscSynchronizedFlush");
1767: }/* --------------------------------------------------------------------------------------------- */
1768:
1769: // Now we can allocate a send buffer to send all of the remote cones to neighbors
1770: cone_arrow_type *ConesOut;
1771: if(ConeSizeOut) {
1772: PetscMalloc(cone_arrow_size*ConeSizeOut,&ConesOut); CHKERROR(ierr,"Error in PetscMalloc");
1773: }
1774: // Allocate send requests
1775: MPI_Request *NeighborsOut_waits;
1776: if(NeighborCountOut) {
1777: PetscMalloc((NeighborCountOut)*sizeof(MPI_Request),&NeighborsOut_waits);CHKERROR(ierr,"Error in PetscMalloc");
1778: }
1779:
1780: // Pack and send messages
1781: cone_arrow_type *NeighborOffsetOut = ConesOut;
1782: int32_t cntr = 0; // arrow counter
1783: n = 0; // neighbor counter
1784: ostringstream txt2;
1785: // Traverse all neighbors to whom we are sending cones
1786: for(typename Overlap_::traits::capSequence::iterator ci = overlapCap.begin(); ci != overlapCap.end(); ci++)
1787: { // traversing overlap.cap()
1788: int32_t neighborOut = *ci;
1790: // Make sure we have a cone going out to this neighbor
1791: if(NeighborConeSizeOut.find(neighborOut) != NeighborConeSizeOut.end()) { // if there is anything to send
1792: if(debug) { /* ------------------------------------------------------------ */
1793: txt2 << "[" << rank << "]: " << __FUNCT__ << ": outgoing cones destined for " << neighborOut << "\n";
1794: }/* ----------------------------------------------------------------------- */
1795: int32_t coneSizeOut = NeighborConeSizeOut[neighborOut];
1796: // ASSUMPTION: all overlap supports are "symmetric" with respect to swapping processes,so we safely can assume that
1797: // the receiver will be expecting points in the same order as they appear in the support here.
1798: // Traverse all the points within the overlap with this neighbor
1799: typename Overlap_::traits::supportSequence supp = overlap->support(*ci);
1800: for(typename Overlap_::traits::supportSequence::iterator si = supp.begin(); si != supp.end(); si++) {
1801: Point p = *si;
1802: if(debug) { /* ------------------------------------------------------------ */
1803: txt2 << "[" << rank << "]: \t cone over " << p << ": ";
1804: }/* ----------------------------------------------------------------------- */
1805: // Traverse the cone over p in the local _graph and place corresponding TargetArrows in ConesOut
1806: typename graph_type::traits::coneSequence cone = _graph->cone(p);
1807: for(typename graph_type::traits::coneSequence::iterator cone_itor = cone.begin(); cone_itor != cone.end(); cone_itor++) {
1808: // Place a TargetArrow into the ConesOut buffer
1809: // WARNING: pointer arithmetic involving ConesOut takes place here
1810: //cone_arrow_type::place(ConesOut+cntr, cone_itor.arrow());
1811: cone_arrow_type::place(ConesOut+cntr, typename graph_type::traits::arrow_type(*cone_itor,p,cone_itor.color()));
1812: cntr++;
1813: if(debug) { /* ------------------------------------------------------------ */
1814: txt2 << " " << *cone_itor;
1815: }/* ----------------------------------------------------------------------- */
1816: }
1817: if(debug) { /* ------------------------------------------------------------ */
1818: txt2 << std::endl;
1819: }/* ----------------------------------------------------------------------- */
1820: }
1821: MPI_Isend(NeighborOffsetOut,cone_arrow_size*coneSizeOut,MPI_BYTE,neighborOut,tag4,comm, NeighborsOut_waits+n);
1822: CHKERROR(ierr, "Error in MPI_Isend");
1823: // WARNING: pointer arithmetic involving NeighborOffsetOut takes place here
1824: NeighborOffsetOut += coneSizeOut; // keep track of offset
1825: n++; // count neighbors
1826: }// if there is anything to send
1827: }// traversing overlap.cap()
1828: if(debug && NeighborCountOut) {/* --------------------------------------------------------------- */
1829: PetscSynchronizedPrintf(comm, txt2.str().c_str());
1830: CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1831: PetscSynchronizedFlush(comm);
1832: CHKERROR(ierr, "Error in PetscSynchronizedFlush");
1833: }/* --------------------------------------------------------------------------------------------- */
1834:
1835: // Allocate an In status array
1836: MPI_Status *NeighborIn_status;
1837: if(NeighborCountIn) {
1838: PetscMalloc((NeighborCountIn)*sizeof(MPI_Status),&NeighborIn_status);CHKERROR(ierr,"Error in PetscMalloc");
1839: }
1840:
1841: // Wait on the receives
1842: if(NeighborCountIn) {
1843: ostringstream txt;
1844: txt << "[" << _graph->commRank() << "]: Error in MPI_Waitall";
1845: MPI_Waitall(NeighborCountIn, NeighborsIn_waits, NeighborIn_status); CHKERROR(ierr,txt.str().c_str());
1846: }
1847:
1848: // Now we unpack the received cones, fuse them with the local cones and store the result in the completion graph.
1849: // Traverse all neighbors from whom we are expecting cones
1850: cntr = 0; // arrow counter
1851: NeighborOffsetIn = ConesIn;
1852: ostringstream txt;
1853: for(typename Overlap_::traits::capSequence::iterator ci = overlapCap.begin(); ci != overlapCap.end(); ci++)
1854: { // traversing overlap.cap()
1855: // Traverse all the points within the overlap with this neighbor
1856: // ASSUMPTION: points are sorted within each neighbor, so we are expecting points in the same order as they arrived in ConesIn
1857: typename Overlap_::traits::supportSequence supp = overlap->support(*ci);
1858: for(typename Overlap_::traits::supportSequence::iterator si = supp.begin(); si != supp.end(); si++)
1859: {
1860: Point p = *si;
1861: //int32_t coneSizeIn = si.color().prefix; // FIX: color() type Point --> ALE::pair
1862: int32_t coneSizeIn = si.color().second.first;
1863: // NOTE: coneSizeIn may be 0, which is legal, since the fuser in principle can operate on an empty cone.
1864: // Extract the local cone into a coneSequence
1865: typename graph_type::traits::coneSequence lcone = _graph->cone(p);
1866: // Wrap the arrived cone in a cone_array_sequence
1867: cone_array_sequence rcone(NeighborOffsetIn, coneSizeIn, p);
1868: if(debug) { /* ---------------------------------------------------------------------------------------*/
1869: txt << "[" << rank << "]: "<<__FUNCT__<< ": received a cone over " << p << " of size " << coneSizeIn << " from rank "<<*ci<< ":" << std::endl;
1870: rcone.view(txt, true);
1871: }/* --------------------------------------------------------------------------------------------------*/
1872: // Fuse the cones
1873: fuser->fuseCones(lcone, rcone, fusion->cone(fuser->fuseBasePoints(p,p)));
1874: if(debug) {
1875: //ostringstream txt;
1876: //txt << "[" << rank << "]: ... after fusing the cone over" << p << std::endl;
1877: //fusion->view(std::cout, txt.str().c_str());
1878: }
1879: NeighborOffsetIn += coneSizeIn;
1880: }
1881: }
1882: if(debug) { /* ---------------------------------------------------------------------------------------*/
1883: if(NeighborCountIn == 0) {
1884: txt << "[" << rank << "]: no cones to fuse in" << std::endl;
1885: }
1886: PetscSynchronizedPrintf(comm, txt.str().c_str());
1887: CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1888: PetscSynchronizedFlush(comm);
1889: CHKERROR(ierr, "Error in PetscSynchronizedFlush");
1890: }
1892: // Wait on the original sends
1893: // Allocate an Out status array
1894: MPI_Status *NeighborOut_status;
1895: if(NeighborCountOut) {
1896: PetscMalloc((NeighborCountOut)*sizeof(MPI_Status),&NeighborOut_status);CHKERROR(ierr,"Error in PetscMalloc");
1897: MPI_Waitall(NeighborCountOut, NeighborsOut_waits, NeighborOut_status); CHKERROR(ierr,"Error in MPI_Waitall");
1898: }
1899:
1900: // Computation complete; freeing memory.
1901: // Some of these can probably be freed earlier, if memory is needed.
1902: // However, be careful while freeing memory that may be in use implicitly.
1903: // For instance, ConesOut is a send buffer and should probably be retained until all send requests have been waited on.
1904: if(NeighborCountOut){
1905: PetscFree(NeighborsOut_waits); CHKERROR(ierr, "Error in PetscFree");
1906: PetscFree(NeighborOut_status); CHKERROR(ierr, "Error in PetscFree");
1907: }
1908: if(NeighborCountIn){
1909: PetscFree(NeighborsIn_waits); CHKERROR(ierr, "Error in PetscFree");
1910: PetscFree(NeighborIn_status); CHKERROR(ierr, "Error in PetscFree");
1911: }
1912:
1913: if(ConeSizeIn) {PetscFree(ConesIn); CHKERROR(ierr, "Error in PetscFree");}
1914: if(ConeSizeOut){PetscFree(ConesOut); CHKERROR(ierr, "Error in PetscFree");}
1915:
1916: // Done!
1917: };// __computeFusion()
1921: template <typename Overlap_, typename Fusion_>
1922: static void __computeFusionNew(const Obj<graph_type>& _graph, const Obj<Overlap_>& overlap, Obj<Fusion_> fusion, const Obj<fuser_type>& fuser) {
1923: typedef ConeArraySequence<typename graph_type::traits::arrow_type> cone_array_sequence;
1924: typedef typename cone_array_sequence::cone_arrow_type cone_arrow_type;
1925: MPI_Comm comm = _graph->comm();
1926: int rank = _graph->commRank();
1927: int size = _graph->commSize();
1928: PetscObject petscObj = _graph->petscObj();
1929: PetscMPIInt tag1;
1932: Obj<typename Overlap_::traits::capSequence> overlapCap = overlap->cap();
1933: int msgSize = sizeof(cone_arrow_type)/sizeof(int); // Messages are arrows
1935: int NeighborCount = overlapCap->size();
1936: int *Neighbors = PETSC_NULL, *NeighborByProc = PETSC_NULL; // Neighbor processes and the reverse map
1937: int *SellSizes = PETSC_NULL, *BuySizes = PETSC_NULL; // Sizes of the cones to transmit and receive
1938: int *SellCones = PETSC_NULL, *BuyCones = PETSC_NULL; //
1939: int n, offset;
1940: PetscMalloc2(NeighborCount,int,&Neighbors,size,int,&NeighborByProc);CHKERROR(ierr, "Error in PetscMalloc");
1941: PetscMalloc2(NeighborCount,int,&SellSizes,NeighborCount,int,&BuySizes);CHKERROR(ierr, "Error in PetscMalloc");
1943: n = 0;
1944: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
1945: Neighbors[n] = *neighbor;
1946: NeighborByProc[*neighbor] = n;
1947: BuySizes[n] = 0;
1948: SellSizes[n] = 0;
1949: n++;
1950: }
1952: n = 0;
1953: offset = 0;
1954: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
1955: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
1957: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
1958: BuySizes[n] += p_iter.color().second.first;
1959: SellSizes[n] += p_iter.color().second.second;
1960: offset += _graph->cone(*p_iter)->size();
1961: }
1962: n++;
1963: }
1965: PetscMalloc(offset*msgSize * sizeof(int), &SellCones);CHKERROR(ierr, "Error in PetscMalloc");
1966: cone_arrow_type *ConesOut = (cone_arrow_type *) SellCones;
1967: offset = 0;
1968: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
1969: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
1970: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
1971: Obj<typename graph_type::traits::coneSequence> cone = _graph->cone(*p_iter);
1973: for(typename graph_type::traits::coneSequence::iterator c_iter = cone->begin(); c_iter != cone->end(); ++c_iter) {
1974: if (debug) {
1975: ostringstream txt;
1977: txt << "["<<rank<<"]Packing arrow for " << *neighbor << " " << *c_iter << "--" << c_iter.color() << "-->" << *p_iter << std::endl;
1978: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
1979: }
1980: cone_arrow_type::place(ConesOut+offset, typename graph_type::traits::arrow_type(*c_iter, *p_iter, c_iter.color()));
1981: offset++;
1982: }
1983: if (p_iter.color().second.second != (int) cone->size()) {
1984: throw ALE::Exception("Non-matching sizes");
1985: }
1986: }
1987: }
1988: if (debug) {
1989: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
1990: }
1992: // Send and retrieve cones of the base overlap
1993: PetscObjectGetNewTag(petscObj, &tag1); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
1994: commCycle(comm, tag1, msgSize, NeighborCount, SellSizes, Neighbors, SellCones, NeighborCount, BuySizes, Neighbors, &BuyCones);
1996: cone_arrow_type *ConesIn = (cone_arrow_type *) BuyCones;
1997: offset = 0;
1998: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
1999: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
2001: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2002: const Obj<typename graph_type::traits::coneSequence>& localCone = _graph->cone(*p_iter);
2003: int remoteConeSize = p_iter.color().second.first;
2004: cone_array_sequence remoteCone(&ConesIn[offset], remoteConeSize, *p_iter);
2005: if (debug) {
2006: ostringstream txt;
2008: txt << "["<<rank<<"]Unpacking cone for " << *p_iter << std::endl;
2009: remoteCone.view(txt, true);
2010: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
2011: }
2012: // Fuse in received cones
2013: fuser->fuseCones(localCone, remoteCone, fusion->cone(fuser->fuseBasePoints(*p_iter, *p_iter)));
2014: offset += remoteConeSize;
2015: }
2016: }
2017: if (debug) {
2018: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
2019: }
2020: };
2024: template <typename Overlap_, typename Fusion_>
2025: static void __computeFusionNew(const Obj<graph_type>& _graphA, const Obj<graph_type>& _graphB, const Obj<Overlap_>& overlap, Obj<Fusion_> fusion, const Obj<fuser_type>& fuser) {
2026: typedef ConeArraySequence<typename graph_type::traits::arrow_type> cone_array_sequence;
2027: typedef typename cone_array_sequence::cone_arrow_type cone_arrow_type;
2028: MPI_Comm comm = _graphA->comm();
2029: int rank = _graphA->commRank();
2030: PetscObject petscObj = _graphA->petscObj();
2031: PetscMPIInt tag1;
2034: Obj<typename Overlap_::traits::capSequence> overlapCap = overlap->cap();
2035: int msgSize = sizeof(cone_arrow_type)/sizeof(int); // Messages are arrows
2037: int NeighborCountA = 0, NeighborCountB = 0;
2038: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
2039: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
2041: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2042: if (((*p_iter).first == 0) && (p_iter.color().second.first || p_iter.color().second.second)) {
2043: NeighborCountA++;
2044: break;
2045: }
2046: }
2047: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2048: if (((*p_iter).first == 1) && (p_iter.color().second.first || p_iter.color().second.second)) {
2049: NeighborCountB++;
2050: break;
2051: }
2052: }
2053: }
2055: int *NeighborsA, *NeighborsB; // Neighbor processes
2056: int *SellSizesA, *BuySizesA; // Sizes of the A cones to transmit and B cones to receive
2057: int *SellSizesB, *BuySizesB; // Sizes of the B cones to transmit and A cones to receive
2058: int *SellConesA = PETSC_NULL, *BuyConesA = PETSC_NULL;
2059: int *SellConesB = PETSC_NULL, *BuyConesB = PETSC_NULL;
2060: int nA, nB, offsetA, offsetB;
2061: PetscMalloc2(NeighborCountA,int,&NeighborsA,NeighborCountB,int,&NeighborsB);CHKERROR(ierr, "Error in PetscMalloc");
2062: PetscMalloc2(NeighborCountA,int,&SellSizesA,NeighborCountA,int,&BuySizesA);CHKERROR(ierr, "Error in PetscMalloc");
2063: PetscMalloc2(NeighborCountB,int,&SellSizesB,NeighborCountB,int,&BuySizesB);CHKERROR(ierr, "Error in PetscMalloc");
2065: nA = 0;
2066: nB = 0;
2067: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
2068: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
2070: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2071: if (((*p_iter).first == 0) && (p_iter.color().second.first || p_iter.color().second.second)) {
2072: NeighborsA[nA] = *neighbor;
2073: BuySizesA[nA] = 0;
2074: SellSizesA[nA] = 0;
2075: nA++;
2076: break;
2077: }
2078: }
2079: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2080: if (((*p_iter).first == 1) && (p_iter.color().second.first || p_iter.color().second.second)) {
2081: NeighborsB[nB] = *neighbor;
2082: BuySizesB[nB] = 0;
2083: SellSizesB[nB] = 0;
2084: nB++;
2085: break;
2086: }
2087: }
2088: }
2089: if ((nA != NeighborCountA) || (nB != NeighborCountB)) {
2090: throw ALE::Exception("Invalid neighbor count");
2091: }
2093: nA = 0;
2094: offsetA = 0;
2095: nB = 0;
2096: offsetB = 0;
2097: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
2098: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
2099: int foundA = 0, foundB = 0;
2101: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2102: if (((*p_iter).first == 0) && (p_iter.color().second.first || p_iter.color().second.second)) {
2103: BuySizesA[nA] += p_iter.color().second.first;
2104: SellSizesA[nA] += p_iter.color().second.second;
2105: offsetA += _graphA->cone((*p_iter).second)->size();
2106: foundA = 1;
2107: } else if (((*p_iter).first == 1) && (p_iter.color().second.first || p_iter.color().second.second)) {
2108: BuySizesB[nB] += p_iter.color().second.first;
2109: SellSizesB[nB] += p_iter.color().second.second;
2110: offsetB += _graphB->cone((*p_iter).second)->size();
2111: foundB = 1;
2112: }
2113: }
2114: if (foundA) nA++;
2115: if (foundB) nB++;
2116: }
2118: PetscMalloc2(offsetA*msgSize,int,&SellConesA,offsetB*msgSize,int,&SellConesB);CHKERROR(ierr, "Error in PetscMalloc");
2119: cone_arrow_type *ConesOutA = (cone_arrow_type *) SellConesA;
2120: cone_arrow_type *ConesOutB = (cone_arrow_type *) SellConesB;
2121: offsetA = 0;
2122: offsetB = 0;
2123: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
2124: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
2126: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2127: Obj<typename graph_type::traits::coneSequence> cone;
2128: const Point& p = (*p_iter).second;
2130: if ((*p_iter).first == 0) {
2131: cone = _graphA->cone(p);
2132: for(typename graph_type::traits::coneSequence::iterator c_iter = cone->begin(); c_iter != cone->end(); ++c_iter) {
2133: if (debug) {
2134: ostringstream txt;
2136: txt << "["<<rank<<"]Packing A arrow for " << *neighbor << " " << *c_iter << "--" << c_iter.color() << "-->" << p << std::endl;
2137: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
2138: }
2139: cone_arrow_type::place(ConesOutA+offsetA, typename graph_type::traits::arrow_type(*c_iter, p, c_iter.color()));
2140: offsetA++;
2141: }
2142: } else {
2143: cone = _graphB->cone(p);
2144: for(typename graph_type::traits::coneSequence::iterator c_iter = cone->begin(); c_iter != cone->end(); ++c_iter) {
2145: if (debug) {
2146: ostringstream txt;
2148: txt << "["<<rank<<"]Packing B arrow for " << *neighbor << " " << *c_iter << "--" << c_iter.color() << "-->" << p << std::endl;
2149: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
2150: }
2151: cone_arrow_type::place(ConesOutB+offsetB, typename graph_type::traits::arrow_type(*c_iter, p, c_iter.color()));
2152: offsetB++;
2153: }
2154: }
2155: if (p_iter.color().second.second != (int) cone->size()) {
2156: std::cout << "["<<rank<<"] " << p_iter.color() << " does not match cone size " << cone->size() << std::endl;
2157: throw ALE::Exception("Non-matching sizes");
2158: }
2159: }
2160: }
2161: if (debug) {
2162: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
2163: }
2165: // Send and retrieve cones of the base overlap
2166: PetscObjectGetNewTag(petscObj, &tag1); CHKERROR(ierr, "Failed on PetscObjectGetNewTag");
2167: commCycle(comm, tag1, msgSize, NeighborCountA, SellSizesA, NeighborsA, SellConesA, NeighborCountB, BuySizesB, NeighborsB, &BuyConesB);
2168: commCycle(comm, tag1, msgSize, NeighborCountB, SellSizesB, NeighborsB, SellConesB, NeighborCountA, BuySizesA, NeighborsA, &BuyConesA);
2170: // Must unpack with the BtoA overlap
2171: //cone_arrow_type *ConesInA = (cone_arrow_type *) BuyConesA;
2172: cone_arrow_type *ConesInB = (cone_arrow_type *) BuyConesB;
2173: offsetA = 0;
2174: offsetB = 0;
2175: for(typename Overlap_::traits::capSequence::iterator neighbor = overlapCap->begin(); neighbor != overlapCap->end(); ++neighbor) {
2176: Obj<typename Overlap_::traits::supportSequence> support = overlap->support(*neighbor);
2178: for(typename Overlap_::traits::supportSequence::iterator p_iter = support->begin(); p_iter != support->end(); ++p_iter) {
2179: Obj<typename graph_type::traits::coneSequence> localCone;
2180: const Point& p = (*p_iter).second;
2181: int remoteConeSize = p_iter.color().second.first;
2183: // Right now we only provide the A->B fusion
2184: if ((*p_iter).first == 0) {
2185: #if 0
2186: cone_array_sequence remoteCone(&ConesInA[offsetA], remoteConeSize, p);
2188: localCone = _graphA->cone(p);
2189: offsetA += remoteConeSize;
2190: if (debug) {
2191: ostringstream txt;
2193: txt << "["<<rank<<"]Unpacking B cone for " << p << " from " << *neighbor << std::endl;
2194: remoteCone.view(txt, true);
2195: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
2196: }
2197: // Fuse in received cones
2198: fuser->fuseCones(localCone, remoteCone, fusion->cone(fuser->fuseBasePoints(p, p)));
2199: #endif
2200: } else {
2201: cone_array_sequence remoteCone(&ConesInB[offsetB], remoteConeSize, p);
2203: localCone = _graphB->cone(p);
2204: offsetB += remoteConeSize;
2205: if (debug) {
2206: ostringstream txt;
2208: txt << "["<<rank<<"]Unpacking A cone for " << p << " from " << *neighbor << std::endl;
2209: remoteCone.view(txt, true);
2210: PetscSynchronizedPrintf(comm, txt.str().c_str()); CHKERROR(ierr, "Error in PetscSynchronizedPrintf");
2211: }
2212: // Fuse in received cones
2213: fuser->fuseCones(localCone, remoteCone, fusion->cone(fuser->fuseBasePoints(p, p)));
2214: }
2215: }
2216: }
2217: if (debug) {
2218: PetscSynchronizedFlush(comm);CHKERROR(ierr,"Error in PetscSynchronizedFlush");
2219: }
2220: };
2222: public:
2223: static void setDebug(int debug) {ParConeDelta::debug = debug;};
2224: static int getDebug() {return ParConeDelta::debug;};
2225: }; // class ParConeDelta
2226:
2227: template <typename ParSifter_, typename Fuser_, typename FusionSifter_>
2228: int ParConeDelta<ParSifter_, Fuser_, FusionSifter_>::debug = 0;
2229:
2231: //
2232: // Auxiliary type
2233: //
2234: template <typename Sifter_>
2235: class Flip { // class Flip
2236: public:
2237: typedef Sifter_ graph_type;
2238: typedef Flip<Sifter_> flip_type;
2239: protected:
2240: Obj<graph_type> _graph;
2241: public:
2242: //
2243: struct traits {
2244: // Basic types
2245: typedef typename graph_type::traits::arrow_type::flip::type arrow_type;
2246: typedef typename arrow_type::source_type source_type;
2247: typedef typename arrow_type::target_type target_type;
2248: typedef typename arrow_type::color_type color_type;
2249: // Sequences
2250: // Be careful: use only a limited set of iterator methods: NO arrow(), source(), target() etc; operator*() and color() are OK.
2251: typedef typename graph_type::traits::coneSequence supportSequence;
2252: typedef typename graph_type::traits::supportSequence coneSequence;
2253: typedef typename graph_type::traits::baseSequence capSequence;
2254: typedef typename graph_type::traits::capSequence baseSequence;
2255: };
2256: // Basic interface
2257: Flip(const Obj<graph_type>& graph) : _graph(graph) {};
2258: Flip(const Flip& flip) : _graph(flip._graph) {};
2259: virtual ~Flip() {};
2260: // Redirect
2261: // Only a limited set of methods is redirected: simple cone, support, base, cap and arrow insertion.
2262: //
2263: // Query methods
2264: //
2265: MPI_Comm comm() const {return this->_graph->comm();};
2266: int commSize() const {return this->_graph->commSize();};
2267: int commRank() const {return this->_graph->commRank();}
2268: PetscObject petscObj() const {return this->_graph->petscObj();};
2270: int view(const char* label = NULL) {return this->_graph->view(label);}
2271:
2272: // FIX: need const_cap, const_base returning const capSequence etc, but those need to have const_iterators, const_begin etc.
2273: Obj<typename traits::capSequence> cap() {
2274: return this->_graph->base();
2275: };
2276: Obj<typename traits::baseSequence> base() {
2277: return this->_graph->cap();
2278: };
2279:
2280: Obj<typename traits::coneSequence>
2281: cone(const typename traits::target_type& p) {
2282: return this->_graph->support(p);
2283: };
2284:
2285: Obj<typename traits::coneSequence>
2286: cone(const typename traits::target_type& p, const typename traits::color_type& color) {
2287: return this->_graph->support(p, color);
2288: };
2290: template<typename PointCheck>
2291: bool coneContains(const typename traits::target_type& p, const PointCheck& checker) {
2292: return this->_graph->supportContains(p, checker);
2293: };
2295: template<typename PointProcess>
2296: void coneApply(const typename traits::target_type& p, const PointProcess& processor) {
2297: this->_graph->supportApply(p, processor);
2298: };
2299:
2300: Obj<typename traits::supportSequence>
2301: support(const typename traits::source_type& p) {
2302: return this->_graph->cone(p);
2303: };
2304:
2305: Obj<typename traits::supportSequence>
2306: support(const typename traits::source_type& p, const typename traits::color_type& color) {
2307: return this->_graph->cone(p,color);
2308: };
2309:
2310: virtual void addArrow(const typename traits::source_type& p, const typename traits::target_type& q) {
2311: this->_graph->addArrow(q, p);
2312: };
2313:
2314: virtual void addArrow(const typename traits::source_type& p, const typename traits::target_type& q, const typename traits::color_type& color) {
2315: this->_graph->addArrow(q, p, color);
2316: };
2317:
2318: virtual void addArrow(const typename traits::arrow_type& a) {
2319: this->_graph->addArrow(a.target, a.source, a.color);
2320: };
2321:
2322: };// class Flip
2325: // WARNING: must pass in a 'flipped' Fuser, that is a fuser that acts on cones instead of supports
2326: template<typename ParSifter_,
2327: typename Fuser_ = RightSequenceDuplicator<ConeArraySequence<typename ParSifter_::traits::arrow_type::flip::type> >,
2328: typename FusionSifter_ = typename ParSifter_::template rebind<typename Fuser_::fusion_target_type,
2329: typename Fuser_::fusion_source_type,
2330: typename Fuser_::fusion_color_type>::type>
2331: class ParSupportDelta {
2332: public:
2333: // Here we specialize to Sifters based on Points in order to enable parallel overlap discovery.
2334: // We also assume that the Points in the base are ordered appropriately so we can use baseSequence.begin() and
2335: // baseSequence.end() as the extrema for global reduction.
2336: typedef ParSupportDelta<ParSifter_, Fuser_, FusionSifter_> delta_type;
2337: typedef ParSifter_ graph_type;
2338: typedef Fuser_ fuser_type;
2339: typedef ASifter<ALE::Point, int, ALE::pair<ALE::Point, ALE::pair<int,int> >, SifterDef::uniColor> overlap_type;
2340: typedef ASifter<ALE::pair<int,ALE::Point>, int, ALE::pair<ALE::Point, ALE::pair<int,int> >, SifterDef::uniColor> bioverlap_type;
2341: typedef FusionSifter_ fusion_type;
2342: //
2344: //
2345: // FIX: Is there a way to inherit this from ParConeDelta? Right now it is a verbatim copy.
2348: static Obj<overlap_type>
2349: overlap(const Obj<graph_type> graph) {
2350: ALE_LOG_EVENT_BEGIN;
2351: Obj<overlap_type> overlap = new overlap_type(graph->comm());
2352: // If this is a serial object, we return an empty overlap
2353: if((graph->comm() != PETSC_COMM_SELF) && (graph->commSize() > 1)) {
2354: computeOverlap(graph, overlap);
2355: }
2356: ALE_LOG_EVENT_END;
2357: return overlap;
2358: };
2360: template <typename Overlap_>
2361: static void computeOverlap(const Obj<graph_type>& graph, Obj<Overlap_>& overlap){
2362: // Flip the graph and the overlap and use ParConeDelta's method
2363: Obj<Flip<graph_type> > graph_flip = Flip<graph_type>(graph);
2364: Obj<Flip<Overlap_> > overlap_flip = Flip<Overlap_>(overlap);
2365: ParConeDelta<Flip<graph_type>, fuser_type, Flip<fusion_type> >::computeOverlap(graph_flip, overlap_flip);
2366: };
2370: static Obj<bioverlap_type>
2371: overlap(const Obj<graph_type> graphA, const Obj<graph_type> graphB) {
2372: ALE_LOG_EVENT_BEGIN;
2373: Obj<bioverlap_type> overlap = new bioverlap_type(graphA->comm());
2374: PetscMPIInt comp;
2376: MPI_Comm_compare(graphA->comm(), graphB->comm(), &comp);
2377: if (comp != MPI_IDENT) {
2378: throw ALE::Exception("Non-matching communicators for overlap");
2379: }
2380: Obj<Flip<graph_type> > graphA_flip = Flip<graph_type>(graphA);
2381: Obj<Flip<graph_type> > graphB_flip = Flip<graph_type>(graphB);
2382: Obj<Flip<bioverlap_type> > overlap_flip = Flip<bioverlap_type>(overlap);
2384: ParConeDelta<Flip<graph_type>, fuser_type, Flip<fusion_type> >::computeOverlap(graphA_flip, graphB_flip, overlap_flip);
2385: ALE_LOG_EVENT_END;
2386: return overlap;
2387: };
2389: template <typename Overlap_>
2390: static Obj<fusion_type>
2391: fusion(const Obj<graph_type>& graphA, const Obj<graph_type>& graphB, const Obj<Overlap_>& overlap, const Obj<fuser_type>& fuser = fuser_type()) {
2392: Obj<fusion_type> fusion = new fusion_type(graphA->comm());
2393: PetscMPIInt comp;
2395: MPI_Comm_compare(graphA->comm(), graphB->comm(), &comp);
2396: if (comp != MPI_IDENT) {
2397: throw ALE::Exception("Non-matching communicators for overlap");
2398: }
2399: Obj<Flip<graph_type> > graphA_flip = Flip<graph_type>(graphA);
2400: Obj<Flip<graph_type> > graphB_flip = Flip<graph_type>(graphB);
2401: Obj<Flip<Overlap_> > overlap_flip = Flip<Overlap_>(overlap);
2402: Obj<Flip<fusion_type> > fusion_flip = Flip<fusion_type>(fusion);
2404: ParConeDelta<Flip<graph_type>, fuser_type, Flip<fusion_type> >::computeFusion(graphA_flip, graphB_flip, overlap_flip, fusion_flip);
2405: return fusion;
2406: };
2408: // FIX: Is there a way to inherit this from ParConeDelta? Right now it is a verbatim copy.
2409: template <typename Overlap_>
2410: static Obj<fusion_type>
2411: fusion(const Obj<graph_type>& graph, const Obj<Overlap_>& overlap, const Obj<fuser_type>& fuser = new fuser_type()) {
2412: Obj<fusion_type> fusion = new fusion_type(graph->comm());
2413: // If this is a serial object, we return an empty delta
2414: if((graph->comm() != PETSC_COMM_SELF) && (graph->commSize() > 1)) {
2415: computeFusion(graph, overlap, fusion, fuser);
2416: }
2417: return fusion;
2418: };
2420: template <typename Overlap_, typename Fusion_>
2421: static void computeFusion(const Obj<graph_type>& graph, const Obj<Overlap_>& overlap, Obj<Fusion_> fusion, const Obj<fuser_type>& fuser = new fuser_type()){
2422: // Flip the graph, the overlap and the fusion, and the use ParConeDelta's method
2423: Obj<Flip<graph_type> > graph_flip = Flip<graph_type>(graph);
2424: Obj<Flip<Overlap_> > overlap_flip = Flip<Overlap_>(overlap);
2425: Obj<Flip<Fusion_> > fusion_flip = Flip<Fusion_>(fusion);
2426: ParConeDelta<Flip<graph_type>, fuser_type, Flip<fusion_type> >::computeFusion(graph_flip, overlap_flip, fusion_flip);
2427: };
2428: public:
2429: static void setDebug(int debug) {ParConeDelta<Flip<graph_type>, fuser_type, Flip<fusion_type> >::setDebug(debug);};
2430: static int getDebug() {return ParConeDelta<Flip<graph_type>, fuser_type, Flip<fusion_type> >::getDebug();};
2431: }; // class ParSupportDelta
2432:
2433: } // namespace ALE
2435: #endif