00001
00098 #include <stdio.h>
00099 #include <stdarg.h>
00100 #include <time.h>
00101 #include <string.h>
00102
00103 #ifdef _CRAYMPP
00104 #define IPC_USE_SHMEM
00105 #include <intrinsics.h>
00106 #include <mpp/shmem.h>
00107
00108
00109
00110
00111 #ifdef _CRAYT3E
00112 #undef _CRAYT3E
00113 #include <mpp/sync_proto.h>
00114 #define _CRAYT3E
00115 #else
00116 #include <mpp/sync_proto.h>
00117 #endif
00118 #endif
00119
00120
00121 #ifndef IPC_USE_SHMEM
00122 #include <string.h>
00123 #endif
00124
00125 #include "ipc.h"
00126
00127
00128
00129
00130
00131
00133 #define IPC_PE_MSG_DELAY 0.1
00134
00136 #define IPC_MAXFILENAMELENGTH 1024
00137
00138
00139
00140
00141
00142
00143
00144 int ipc_msg_level = IPC_STD;
00145 const char* ipc_msg_level_docstring =
00146 "Level of notification messages to display. Standard levels:\n\n"
00147
00148 " IPC_None \n"
00149 " IPC_Error \n"
00150 " IPC_Warning \n"
00151 " IPC_Caution \n"
00152 " IPC_Alert \n"
00153 " IPC_Summary \n"
00154 " IPC_Std \n"
00155 " IPC_Verbose \n"
00156 " IPC_Overwhelm\n\n"
00157
00158 "The last one is special in that it overrides the values of\n"
00159 "ipc_msg_level_forceall and ipc_msg_synch_level, forcing\n"
00160 "all PEs to present all messages and to synchronize while doing\n"
00161 "so.";
00162
00163
00164 int ipc_msg_forceall_level = IPC_NONE;
00165 const char* ipc_msg_forceall_level_docstring =
00166 "Messages with a debug level higher than this level (see ipc_msg_level\n"
00167 "for definitions) will be printed by all PEs who make the call, regardless\n"
00168 "of whether the call declares that only one should print. Useful for\n"
00169 "debugging on multiple PEs when some PEs are reporting errors or warnings\n"
00170 "in the totals yet no messages were displayed.";
00171
00172
00173 int ipc_msg_synch_level = IPC_ERROR;
00174 const char* ipc_msg_synch_level_docstring =
00175 "Attempt to print messages with a msg level higher than this level (see\n"
00176 "ipc_msg_level for definitions) in order of PE number, rather than\n"
00177 "interleaving the output from different PEs. The synchronization is\n"
00178 "currently accomplished only by having each PE delay for a short\n"
00179 "time proportional to its PE number. As a consequence, it slows\n"
00180 "down execution while not guaranteeing perfect order of output.\n"
00181 "However, it has the advantage of not failing catastrophically in\n"
00182 "error conditions where PEs become out of synch due to missed\n"
00183 "barriers, which is important for an error message handler.";
00184
00185
00186 int ipc_exit_on_error_num = 0;
00187 const char* ipc_exit_on_error_num_docstring =
00188 "If nonzero, when this many errors have been reached, the program\n"
00189 "will exit automatically.";
00190
00191
00192 int ipc_max_warnings = 100;
00193 const char* ipc_max_warnings_docstring =
00194 "Maximum number of warnings to be printed, per PE. If this limit is\n"
00195 "reached, further warnings will still increment the warning counter\n"
00196 "but no messages will be printed.";
00197
00198
00199 int ipc_max_errors = 100;
00200 const char* ipc_max_errors_docstring =
00201 "Maximum number of error messages to be printed, per PE. If this limit\n"
00202 "is reached, further errors will still increment the error counter\n"
00203 "but no messages will be printed.";
00204
00205
00206
00207
00208
00209
00210
00211 FILE *ipc_logfile = NULL;
00212 int ipc_warnings = 0;
00213 int ipc_errors = 0;
00214
00215
00216
00217
00218
00219
00220
00221 ipc_status ipc_put_base(void *target_data, void *source_data,
00222 ipc_datatype datatype, size_t count, int process);
00223
00224 ipc_status ipc_get_base(void *target_data, void *source_data,
00225 ipc_datatype datatype, size_t count, int process);
00226
00227 void ipc_notify_base(int terminateline, int print_pe, int message_level, const char *format, va_list args);
00228
00229
00230
00231
00232
00233
00234
00236 int ipc_datatype_size(ipc_datatype datatype)
00237 {
00238 switch(datatype){
00239 case IPC_RAW8: return sizeof(i8 );
00240 case IPC_RAW32: return sizeof(i32 );
00241 #ifndef NO_I64
00242 case IPC_RAW64: return sizeof(i64 );
00243 #endif
00244 case IPC_CHAR: return sizeof(signed char );
00245 case IPC_SHORT: return sizeof(signed short );
00246 case IPC_INT: return sizeof(signed int );
00247 case IPC_LONG: return sizeof(signed long );
00248 #ifndef LONG_LONG_UNAVAILABLE
00249 case IPC_LONG_LONG: return sizeof(signed long long);
00250 #endif
00251 case IPC_UNSIGNED_CHAR: return sizeof(unsigned char );
00252 case IPC_UNSIGNED_SHORT: return sizeof(unsigned short );
00253 case IPC_UNSIGNED: return sizeof(unsigned int );
00254 case IPC_UNSIGNED_LONG: return sizeof(unsigned long );
00255 case IPC_FLOAT: return sizeof(float );
00256 case IPC_DOUBLE: return sizeof(double );
00257 #ifndef LONG_DOUBLE_UNAVAILABLE
00258 case IPC_LONG_DOUBLE: return sizeof(long double );
00259 #endif
00260 default:
00261 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_datatype %d unknown",datatype);
00262 return(sizeof(int));
00263 }
00264 }
00265
00266
00267
00272 int ipc_num_processes(void)
00273 {
00274 #ifdef IPC_USE_SHMEM
00275 return _num_pes();
00276 #else
00277 return 1;
00278 #endif
00279 }
00280
00281
00282
00284 int ipc_my_process(void)
00285 {
00286 #ifdef IPC_USE_SHMEM
00287 return _my_pe();
00288 #else
00289 return 0;
00290 #endif
00291 }
00292
00293
00294
00296 void ipc_barrier(void)
00297 {
00298 #ifdef IPC_USE_SHMEM
00299
00300 shmem_barrier_all();
00301 #else
00302 if (ipc_num_processes() != 1) {
00303 ipc_notify(IPC_ONE,IPC_ERROR,"ipc_barrier with multiple processes (%d) unimplemented",ipc_num_processes());
00304 exit(-1);
00305 }
00306 #endif
00307 }
00308
00309
00310
00312 void ipc_set_barrier(void)
00313 {
00314 #ifdef IPC_USE_SHMEM
00315
00316 #ifdef __cplusplus
00317 ipc_notify(IPC_ONE,IPC_ERROR,"ipc_set_barrier not implemented");
00318 #else
00319 set_barrier();
00320 #endif
00321
00322 #else
00323
00324 if (ipc_num_processes() != 1) {
00325 ipc_notify(IPC_ONE,IPC_ERROR,"ipc_set_barrier with multiple processes (%d) unimplemented",ipc_num_processes());
00326 exit(-1);
00327 }
00328
00329 #endif
00330 }
00331
00332
00333
00334
00335
00336
00342 #define IPC_PUT_RAW(bits) \
00343 ipc_status ipc_put ## bits(void *data, size_t count, int process) \
00344 { return ipc_put_base(data,data,IPC_RAW ## bits,count,process); }
00345 IPC_PUT_RAW(8)
00346 IPC_PUT_RAW(32)
00347 IPC_PUT_RAW(64)
00348 #undef IPC_PUT_RAW
00349
00350 #define IPC_GET_RAW(bits) \
00351 ipc_status ipc_get ## bits(void *data, size_t count, int process) \
00352 { return ipc_get_base(data,data,IPC_RAW ## bits,count,process); }
00353 IPC_GET_RAW(8)
00354 IPC_GET_RAW(32)
00355 IPC_GET_RAW(64)
00356 #undef IPC_GET_RAW
00357
00358
00359 #ifndef __cplusplus
00360
00361
00363 ipc_status ipc_put(void *data, ipc_datatype datatype, size_t count, int process)
00364 { return ipc_put_base(data,data,datatype,count,process); }
00365
00366 ipc_status ipc_get(void *data, ipc_datatype datatype, size_t count, int process)
00367 { return ipc_get_base(data,data,datatype,count,process); }
00368
00369 ipc_status ipc_put_to(void *target_data, void *source_data, ipc_datatype datatype, size_t count, int process)
00370 { return ipc_put_base(target_data,source_data,datatype,count,process); }
00371
00372 ipc_status ipc_get_to(void *target_data, void *source_data, ipc_datatype datatype, size_t count, int process)
00373 { return ipc_get_base(target_data,source_data,datatype,count,process); }
00375 #endif
00376
00377
00378 #ifdef __cplusplus
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00390
00391 #ifdef NDEBUG
00392 #define IPC_CHECK_DATATYPE(name,ipc_type,data) (void)datatype
00393 #else
00394 #define IPC_CHECK_DATATYPE(name,ipc_type,data) \
00395 if (datatype != ipc_type && \
00396 !((datatype==IPC_RAW8 && sizeof(data)==1) || \
00397 (datatype==IPC_RAW32 && sizeof(data)==4) || \
00398 (datatype==IPC_RAW64 && sizeof(data)==8))) \
00399 ipc_notify(3,IPC_WARNING,"ipc_" #name " called with incorrect datatype (%d != %d, sizeof(data)=%d != %d)",\
00400 ipc_type,datatype,sizeof(data),ipc_datatype_size(ipc_type))
00401 #endif
00402
00403 #define IPC_CALL(name, c_type,ipc_type) \
00404 ipc_status ipc_ ## name (c_type *data, size_t count, int process) \
00405 { return ipc_ ## name ## _base(data,data,ipc_type,count,process); } \
00406 \
00407 ipc_status ipc_ ## name ## _to (c_type *target, c_type *source, size_t count, int process) \
00408 { return ipc_ ## name ## _base(target,source,ipc_type,count,process); } \
00409 \
00410 ipc_status ipc_ ## name \
00411 (c_type *data, ipc_datatype datatype, size_t count, int process) \
00412 { \
00413 IPC_CHECK_DATATYPE(name,ipc_type,data); \
00414 return ipc_ ## name ## _base(data,data,ipc_type,count,process); \
00415 } \
00416 \
00417 ipc_status ipc_ ## name ## _to \
00418 (c_type *target, c_type *source, ipc_datatype datatype, size_t count, int process) \
00419 { \
00420 IPC_CHECK_DATATYPE(name,ipc_type,source); \
00421 return ipc_ ## name ## _base(target,source,ipc_type,count,process); \
00422 }
00423
00424 IPC_CALL(put, signed short ,IPC_SHORT )
00425 IPC_CALL(put, signed int ,IPC_INT )
00426 IPC_CALL(put, signed long ,IPC_LONG )
00427 #ifndef LONG_LONG_UNAVAILABLE
00428 IPC_CALL(put, signed long long ,IPC_LONG_LONG )
00429 #endif
00430 IPC_CALL(put, unsigned char ,IPC_UNSIGNED_CHAR )
00431 IPC_CALL(put, unsigned short ,IPC_UNSIGNED_SHORT )
00432 IPC_CALL(put, unsigned int ,IPC_UNSIGNED )
00433 IPC_CALL(put, unsigned long ,IPC_UNSIGNED_LONG )
00434 IPC_CALL(put, float ,IPC_FLOAT )
00435 IPC_CALL(put, double ,IPC_DOUBLE )
00436 #ifndef LONG_DOUBLE_UNAVAILABLE
00437 IPC_CALL(put, long double ,IPC_LONG_DOUBLE )
00438 #endif
00439
00440 IPC_CALL(get, signed short ,IPC_SHORT )
00441 IPC_CALL(get, signed int ,IPC_INT )
00442 IPC_CALL(get, signed long ,IPC_LONG )
00443 #ifndef LONG_LONG_UNAVAILABLE
00444 IPC_CALL(get, signed long long ,IPC_LONG_LONG )
00445 #endif
00446 IPC_CALL(get, unsigned char ,IPC_UNSIGNED_CHAR )
00447 IPC_CALL(get, unsigned short ,IPC_UNSIGNED_SHORT )
00448 IPC_CALL(get, unsigned int ,IPC_UNSIGNED )
00449 IPC_CALL(get, unsigned long ,IPC_UNSIGNED_LONG )
00450 IPC_CALL(get, float ,IPC_FLOAT )
00451 IPC_CALL(get, double ,IPC_DOUBLE )
00452 #ifndef LONG_DOUBLE_UNAVAILABLE
00453 IPC_CALL(get, long double ,IPC_LONG_DOUBLE )
00454 #endif
00455 #undef IPC_CALL
00456
00457
00458 #endif
00459
00460
00461
00462
00463
00464
00465
00466 ipc_status ipc_put_base(void *target_data, void *source_data,
00467 ipc_datatype datatype, size_t count, int process)
00468 {
00469
00470 #ifdef IPC_USE_SHMEM
00471
00472 switch(ipc_datatype_size(datatype)){
00473 case 4:
00474 shmem_put32(target_data,source_data,count,process);
00475 break;
00476 case 8:
00477 shmem_put64(target_data,source_data,count,process);
00478 break;
00479 default:
00480 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_put called with unknown datatype: %d",datatype);
00481
00482 }
00483 #else
00484
00485 if (process != ipc_my_process()) {
00486 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_put to remote process unimplemented");
00487 exit(-1);
00488 }
00489 else if (target_data != source_data)
00490 memmove(target_data, source_data, count*ipc_datatype_size(datatype));
00491 #endif
00492
00493 return IPC_NO_ERROR;
00494 }
00495
00496
00497
00499 ipc_status ipc_get_base(void *target_data, void *source_data,
00500 ipc_datatype datatype, size_t count, int process)
00501 {
00502
00503 #ifdef IPC_USE_SHMEM
00504
00505 switch(ipc_datatype_size(datatype)){
00506 case 4:
00507 shmem_get32(target_data,source_data,count,process);
00508 break;
00509 case 8:
00510 shmem_get64(target_data,source_data,count,process);
00511 break;
00512 default:
00513 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_get called with unknown datatype: %d",datatype);
00514
00515 }
00516
00517 #else
00518
00519 if (process != ipc_my_process()) {
00520 ipc_notify(IPC_ALL,IPC_ERROR,"ipc_get from remote process unimplemented");
00521 exit(-1);
00522 }
00523
00524 if (target_data != source_data)
00525 memmove(target_data, source_data, count*ipc_datatype_size(datatype));
00526 #endif
00527
00528 return IPC_NO_ERROR;
00529 }
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00552 void ipc_pe_msg_delay( double scale )
00553 {
00554 clock_t start=clock();
00555 while ((clock()-start)/(double)(CLOCKS_PER_SEC) < IPC_PE_MSG_DELAY*ipc_my_process()*scale);
00556 }
00557
00558
00559
00565 void ipc_init(void)
00566 {
00567 #ifdef _CRAYT3D
00568 shmem_set_cache_inv();
00569 #endif
00570 }
00571
00572
00573
00580 void ipc_init_logfile(const char *basefilename)
00581 {
00582 static char oldname[IPC_MAXFILENAMELENGTH];
00583 char buf[IPC_MAXFILENAMELENGTH];
00584
00585 if (basefilename) {
00586 SNPRINTF(buf,IPC_MAXFILENAMELENGTH,"%s.log",basefilename);
00587
00588 if (ipc_logfile && strncmp(basefilename,oldname,IPC_MAXFILENAMELENGTH)) {
00589 fclose(ipc_logfile);
00590 ipc_logfile=NULL;
00591 }
00592
00593 if (!ipc_logfile)
00594 ipc_logfile=fopen(buf,"w");
00595
00596 if (!ipc_logfile)
00597 ipc_notify(IPC_ALL,IPC_WARNING,"Runtime file %s could not be opened; log messages will be lost",buf);
00598
00599 SNPRINTF(oldname,IPC_MAXFILENAMELENGTH,"%s",basefilename);
00600 }
00601 }
00602
00603
00604
00611 void ipc_log(int print_pe, const char *format, ...)
00612 {
00613 va_list args;
00614 va_start (args, format);
00615 if ((print_pe==IPC_ALL || print_pe==ipc_my_process()) && ipc_logfile)
00616 vfprintf (ipc_logfile,format, args);
00617 va_end (args);
00618 }
00619
00620
00621
00627 void ipc_error( void ) {}
00628 void ipc_warning( void ) {}
00629
00630
00631
00632 int ipc_notify(int print_pe, int message_level, const char *format, ...)
00633 {
00634 va_list args;
00635 va_start (args, format);
00636 ipc_notify_base(1,print_pe,message_level,format,args);
00637 va_end (args);
00638
00639
00640 return 0;
00641 }
00642
00643
00644
00646 void ipc_notify2(int terminateline, int print_pe, int message_level, const char *format, ...)
00647 {
00648 va_list args;
00649 va_start (args, format);
00650 ipc_notify_base(terminateline,print_pe,message_level,format,args);
00651 va_end (args);
00652 }
00653
00654
00655
00657 void ipc_notify_base(int terminateline, int print_pe, int message_level, const char *format, va_list args)
00658 {
00659
00660 const int forcealltoprint = ipc_msg_forceall_level >= message_level || ipc_msg_level >=IPC_OVERWHELM;
00661 const int allpesprint = forcealltoprint || print_pe==IPC_ALL;
00662 const int thispeprints = allpesprint || print_pe==ipc_my_process();
00663
00664 const int iserror = IPC_ERROR >= message_level;
00665 const int iswarning = IPC_WARNING >= message_level && !iserror;
00666
00667 const int pasterrorlimit = iserror && ipc_errors++ >= ipc_max_errors;
00668 const int pastwarnlimit = iswarning && ipc_warnings++ >= ipc_max_warnings;
00669 const int pastlimit = pasterrorlimit || pastwarnlimit;
00670
00671 const int aterrorlimit = iserror && ipc_errors == ipc_max_errors;
00672 const int atwarnlimit = iswarning && ipc_warnings == ipc_max_warnings;
00673 const int atlimit = aterrorlimit || atwarnlimit;
00674
00675 const int printthismsg = thispeprints && ipc_msg_level >= message_level && !pastlimit;
00676 const int usedelay = ipc_num_processes() > 1 && (ipc_msg_synch_level >= message_level || ipc_msg_level>=IPC_OVERWHELM);
00677 const int labelpes = ipc_num_processes() > 1;
00678
00679
00680
00681
00682 FILE* outputstream = stdout;
00683
00684
00685 if (printthismsg) {
00686 if (usedelay) ipc_pe_msg_delay( 1.0 );
00687 if (labelpes) fprintf(outputstream,"PE = %3d: ",ipc_my_process());
00688 if (iserror) { fprintf(outputstream,"Error -- " ); ipc_error(); }
00689 if (iswarning){ fprintf(outputstream,"Warning -- "); ipc_warning(); }
00690
00691 vfprintf(outputstream, format, args);
00692 if (terminateline) fprintf( outputstream,".\n");
00693
00694 if (atlimit)
00695 ipc_notify(print_pe,IPC_CAUTION,"Maximum number of %s reached (%d); the rest will be discarded",
00696 (iserror ? "errors" : "warnings"),
00697 (iserror ? ipc_errors : ipc_warnings));
00698 }
00699
00700
00701 if (iserror && ipc_exit_on_error_num && (ipc_errors >= ipc_exit_on_error_num))
00702 ipc_abort(IPC_EXIT_TOO_MANY_ERRORS,"Error limit reached",ipc_errors);
00703 }
00704
00705
00706
00712 void ipc_exit(int status,const char *format, ...)
00713 {
00714 va_list args;
00715 va_start (args, format);
00716
00717
00718 if (ipc_num_processes() > 1) {
00719 ipc_pe_msg_delay( 1.0 );
00720 fprintf(stderr,"PE = %3d: ",ipc_my_process());
00721 }
00722
00723 fprintf(stderr,"Exiting -- ");
00724
00725
00726 vfprintf(stderr, format, args);
00727 va_end (args);
00728
00729 fprintf(stderr,". There were %d error(s) and %d warning(s).\n",ipc_errors,ipc_warnings);
00730
00731
00732 if (ipc_my_process()==0 && ipc_logfile)
00733 fclose(ipc_logfile);
00734
00735
00736 ipc_barrier();
00737 exit(status);
00738 }
00739
00740
00741
00745 void ipc_abort(int status,const char *format, ...)
00746 {
00747 va_list args;
00748 va_start (args, format);
00749
00750
00751 fflush(NULL);
00752
00753
00754 if (ipc_num_processes() > 1) {
00755
00756
00757
00758 clock_t start=clock();
00759 while ((clock()-start)/(double)(CLOCKS_PER_SEC) < 5);
00760
00761 ipc_pe_msg_delay( 1.0 );
00762 fprintf(stderr,"PE = %3d: ",ipc_my_process());
00763 }
00764
00765 fprintf(stderr,"Aborting -- ");
00766
00767
00768 vfprintf(stderr, format, args);
00769 va_end (args);
00770
00771 fprintf(stderr,". There were %d error(s) and %d warning(s).\n",ipc_errors,ipc_warnings);
00772
00773
00774 if (ipc_my_process()==0 && ipc_logfile)
00775 fclose(ipc_logfile);
00776
00777 #ifdef _CRAYT3E
00778
00779 globalexit(status);
00780 #else
00781
00782 exit(status);
00783 #endif
00784 }
00785
00786
00787 #ifdef __cplusplus
00788 #ifndef NO_STRINGS
00789 namespace Msg {
00790 void ipc_verbose (const string& s, const bool terminate)
00791 { ::ipc_notify2(terminate,IPC_ONE,IPC_VERBOSE, s.c_str()); }
00792 void ipc_notify (const string& s, const bool terminate)
00793 { ::ipc_notify2(terminate,IPC_ONE,IPC_STD, s.c_str()); }
00794 void ipc_requested (const string& s, const bool terminate)
00795 { ::ipc_notify2(terminate,IPC_ONE,IPC_REQUESTED, s.c_str()); }
00796 void ipc_error (const string& s, const bool terminate)
00797 { ::ipc_notify2(terminate,IPC_ONE,IPC_ERROR, s.c_str()); }
00798 void ipc_warning (const string& s, const bool terminate)
00799 { ::ipc_notify2(terminate,IPC_ONE,IPC_WARNING, s.c_str()); }
00800 }
00801 #endif
00802 #endif