diff --git a/Makefile b/Makefile index 321b9b41f4369f7e1dbc9912d2ed77912a57b19e..15d919fe257c64d642a562cb22c5056282f64afd 100644 --- a/Makefile +++ b/Makefile @@ -29,23 +29,24 @@ LIB_PATH_EXT = $(LIB)/$(LIB_NAME_EXT) NM_NAME = nam_manager.bin EX_NAME = nam_example.bin ADMIN_NAME = nam_admin.bin +USER_NAME = nam_user.bin NM_PATH = $(BIN)/$(NM_NAME) EX_PATH = $(BIN)/$(EX_NAME) ADMIN_PATH = $(BIN)/$(ADMIN_NAME) - +USER_PATH = $(BIN)/$(USER_NAME) LIB_OBJ = nam_globals.o nam_interface.o nam_util.o nam_memory.o nam_extoll.o LIB_OBJ_EXT = nam_ext_interface.o -OBJ = nam_example.o nam_admin.o nam_manager.o +OBJ = nam_example.o nam_admin.o nam_user.o nam_manager.o TAR_FILES = src doxy.conf Makefile Makefile.asictest nam_env.sh nam_env_asictest.sh nam.info $(SPEC) CC = icc MPICC = mpicc -all: lib lib_ext $(EX_NAME) $(NM_NAME) $(ADMIN_NAME) +all: lib lib_ext $(EX_NAME) $(NM_NAME) $(ADMIN_NAME) $(USER_NAME) rpm: rpm-prep rpmbuild -ba $(RPM_BUILD_PATH)/SPECS/$(SPEC) @@ -62,6 +63,9 @@ $(EX_NAME): lib lib_ext nam_example.o $(ADMIN_NAME): lib nam_admin.o $(CC) -L$(LIB) $(LIBFLAGS) -o $(ADMIN_PATH) nam_admin.o -lNAM $(CLIBS) + +$(USER_NAME): lib lib_ext nam_user.o + $(CC) -L$(LIB) $(LIBFLAGS) -o $(USER_PATH) nam_user.o -lNAM -lNAM_ext $(CLIBS) lib_ext : lib $(LIB_OBJ_EXT) $(MPICC) $(CFLAGS_EXT) -shared -o $(LIB_PATH_EXT) $(LIB_OBJ_EXT) $(CLIBS_EXT) -Llib -lNAM @@ -88,4 +92,4 @@ tar: rm -rf $(VERS) clean: - rm -rf $(LIB_OBJ) $(OBJ) $(LIB_OBJ_EXT) $(LIB)/* $(NM_PATH) $(EX_PATH) $(TARBALL) doc/html $(TARBALL) + rm -rf $(LIB_OBJ) $(OBJ) $(LIB_OBJ_EXT) $(LIB)/* $(BIN)/* $(TARBALL) doc/html $(TARBALL) diff --git a/Makefile.asictest b/Makefile.asictest index 8db4d0617fe7a6cb2d8d400664aa2de27b9fe1f2..dfc7c73ed7db0bdfc54fbaa48ef35e142a23c557 100644 --- a/Makefile.asictest +++ b/Makefile.asictest @@ -43,7 +43,7 @@ doc: doxy.conf $(NM_NAME): lib nam_manager.o $(CC) $(CFLAGS) -L$(LIB) $(LIBFLAGS) -o $(NM_PATH) nam_manager.o -lNAM $(CLIBS) -$(EX_NAME): lib_ext nam_example.o +$(EX_NAME): lib lib_ext nam_example.o $(MPICC) -L$(LIB) $(LIBFLAGS_EXT) -lNAM -lNAM_ext -o $(EX_PATH) nam_example.o -lssl -lcrypto $(CLIBS_EXT) $(ADMIN_NAME): lib nam_admin.o diff --git a/libNAM-1.0-1.spec b/libNAM-1.0-1.spec index 61dc4164570d4b85ab749da3aa508ab34c722bc4..936816d72e7fddcded86477bfc1b904ca59628ad 100644 --- a/libNAM-1.0-1.spec +++ b/libNAM-1.0-1.spec @@ -33,6 +33,7 @@ install -m 644 src/nam_interface.h $RPM_BUILD_ROOT/usr/local/libNAM/include/nam_ install -m 644 src/nam_ext_interface.h $RPM_BUILD_ROOT/usr/local/libNAM/include/nam_ext_interface.h install -m 744 nam.info $RPM_BUILD_ROOT/usr/local/libNAM/etc/nam.info install -m 700 bin/nam_admin.bin $RPM_BUILD_ROOT/usr/local/libNAM/bin/nam_admin.bin +install -m 755 bin/nam_admin.bin $RPM_BUILD_ROOT/usr/local/libNAM/bin/nam_user.bin install -m 755 bin/nam_example.bin $RPM_BUILD_ROOT/usr/local/libNAM/bin/nam_example.bin %clean @@ -46,6 +47,7 @@ rm -rf $RPM_BUILD_ROOT /usr/local/libNAM/lib/libNAM.a /usr/local/libNAM/lib/libNAM_ext.a /usr/local/libNAM/bin/nam_admin.bin +/usr/local/libNAM/bin/nam_user.bin /usr/local/libNAM/bin/nam_example.bin /usr/local/libNAM/include/nam_interface.h /usr/local/libNAM/include/nam_ext_interface.h diff --git a/nam.asictest b/nam.asictest index af1e3141d7020a57ff3bb8d4fea46d4d7ee0a3c8..7e67c65c19eee8434c0ff722672e60ece239135e 100644 --- a/nam.asictest +++ b/nam.asictest @@ -1,6 +1,6 @@ count = 1 name = NAM-ASICTEST2 -NODEID = 130 +NODEID = 129 VPID = 0 size = 2 GB \ No newline at end of file diff --git a/nam_env.sh b/nam_env.sh index d18e7197db07afb8462937b30464aaf357e75a17..1133496e6aa3e2cc706b196ddc747813d758575a 100644 --- a/nam_env.sh +++ b/nam_env.sh @@ -7,7 +7,7 @@ export NAM_PUT_NOTIS=8 export NAM_RMA2_MTU=65536 export NAM_MANAGER="deepm" export NAM_INFO=$HOME/nam/libNAM/nam.info -export NAM_MAX_RANKS_CPRS=24 +export NAM_MAX_RANKS_CPRS=48 export NAM_PERSISTANT_SECONDS=300 #export LD_LIBRARY_PATH=/usr/local/intel/composerxe/lib/intel64:/direct/Software/Extoll/ASIC2/extoll2/lib:/usr/local/parastation/psmpi-5.1.1-1/lib:/usr/local/parastation/pscom/lib64:/homec/deep/deep06/nam/libNAM/lib:$LD_LIBRARY_PATH diff --git a/src/nam_admin.c b/src/nam_admin.c index cae453a026660a6b0cb68d5777415bcbca15b3b3..3161698a85a2c35e75f715a896ac716bad9550fc 100644 --- a/src/nam_admin.c +++ b/src/nam_admin.c @@ -33,6 +33,7 @@ RMA2_Nodeid arg_nodeid = 3; RMA2_VPID arg_vpid = 0; int arg_verbose = 0; int arg_clear_all = 0; +int arg_clear_own = 0; int arg_remove = 0; int arg_rra_get = 0; int arg_rra_put = 0; @@ -47,7 +48,8 @@ static void parse_opt(int argc, char **argv) struct poptOption optionsTable [] = { - { "clear-all", 'c',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_clear_all, 0, "Clear all allocations on all NAM", "switch" }, + { "clear-all", 'c',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_clear_all, 0, "Clear all allocations on all NAMs", "switch" }, + { "clear-own", 'o',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_clear_own, 0, "Clear own allocations", "switch" }, { "rra-put", '\0',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_rra_put, 0, "Put Data via RRA", "flag" }, { "rra-get", '\0',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_rra_get, 0, "Get Data via RRA", "flag" }, { "list", 'l',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_list, 0, "List all allocations managed by the NM", "flag" }, @@ -190,6 +192,7 @@ int clear_all_allocations() int clear_persistant_allocation(uint64_t challenge) { nm_request_t req; + nam_ext_allocation_t *current; char f_str[300]; nm_connection = tcp_connect(nm_address); @@ -214,6 +217,17 @@ int clear_persistant_allocation(uint64_t challenge) if(answer) { printf("Persistant allocation with challenge %"PRIu64" deleted successfully!\n", challenge); + //run through allocations + LIST_FOREACH(current,&pers_ext_alloc_head, ext_allocations) + { + if(current->alloc->challenge == challenge) + { + LIST_REMOVE(current,ext_allocations); + nam_free_ext_allocation(current); + break; + } + } + //delete the allocation file sprintf(f_str, "%s/%"PRIu64".alloc",nam_home, challenge); unlink(f_str); @@ -233,6 +247,74 @@ int clear_persistant_allocation(uint64_t challenge) return 1; } +int clear_own_allocations() +{ + nm_request_t req; + uint64_t challenge; + nam_ext_allocation_t *current; + + char f_str[300]; + int answer = 0; + int again = 1; + + nm_connection = tcp_connect(nm_address); + + while(again) + { + again = 0; + LIST_FOREACH(current,&pers_ext_alloc_head, ext_allocations) + { + answer = 0; + challenge = current->alloc->challenge; + req.challenge = challenge; + req.flag = NM_FREE_PERSISTANT; + + if(tcp_writeall(nm_connection, &req, sizeof(req)) != sizeof(req)) + { + printf("Error requesting to delete persistant allocation with challenge %"PRIu64" from NM!\n", challenge); + goto err; + } + + if(tcp_readall(nm_connection, &answer, sizeof(int)) != sizeof(req)) + { + printf("Error reading answer from NM to delete persistant allocation with challenge %"PRIu64"!\n", challenge); + goto err; + } + + + if(answer) + { + printf("Persistant allocation with challenge %"PRIu64" deleted successfully!\n", challenge); + + + } + else + { + printf("Persistant allocation with challenge %"PRIu64" could not be found!\n", challenge); + } + + LIST_REMOVE(current,ext_allocations); + nam_free_ext_allocation(current); + + + //delete the allocation file + sprintf(f_str, "%s/%"PRIu64".alloc",nam_home, challenge); + unlink(f_str); + sprintf(f_str, "%s/%"PRIu64".alloc_ext",nam_home, challenge); + unlink(f_str); + again = 1; + + } + } + + close(nm_connection); + + return 0; + + err: + + return 1; +} int list_allocations() { int rc = 0; @@ -372,6 +454,12 @@ int main(int argc, char **argv) clear_all_allocations(); } + if(arg_clear_own) + { + printf("Clearing own allocations in NM\n"); + + clear_own_allocations(); + } if(arg_remove) { if(!challenge_set) diff --git a/src/nam_example.c b/src/nam_example.c index 72b1c3f934305365aaffbe0f9bef83bf01b44a35..68bd72b977cbcad9cbd091f093874c944afca2d9 100644 --- a/src/nam_example.c +++ b/src/nam_example.c @@ -7,6 +7,7 @@ #include <sys/types.h> #include <sys/socket.h> +#include <limits.h> #include <netdb.h> #include <fcntl.h> #include <stdlib.h> @@ -26,6 +27,18 @@ #include "nam_ext_interface.h" #include "nam_memory.h" +#if SIZE_MAX == UCHAR_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR +#elif SIZE_MAX == USHRT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_SHORT +#elif SIZE_MAX == UINT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED +#elif SIZE_MAX == ULONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG +#elif SIZE_MAX == ULLONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG +#endif + #define CHALLENGE_SET 1 #define NAM_SET 2 @@ -216,8 +229,59 @@ int compareXORBuffers(md5_buffer_t *buf1, md5_buffer_t *buf2) return 0; } +int get_random_length(int max, int rank) +{ + int r; + + srand(time(NULL) * rank); + + r = rand() / (RAND_MAX / (max + 1) + 1); + + r = ((r-1)/16 +1) * 16; + + return r; +} +int init_buffer(void *buf, size_t size) +{ + int i; + int rank; + int *buffer; + + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + + buffer = (int *) buf; + for (i = 0; i < size/sizeof(int); ++i) + { + buffer[i] = rank; + } + + + + return 0; +} +int print_buffer(int rank, void *buf, size_t size) +{ + int i; + int n; + int *buffer = (int*) buf; + + + printf("Rank %d: ", rank); + -int init_xor_buffer(md5_buffer_t *xor_buffer, size_t size) + for (i = 0; i < size/sizeof(int); ++i) + { + printf("%d", buffer[i]); + + } + printf("\n"); + + return 0; + +} +int init_xor_buffer(md5_buffer_t *xor_buffer, size_t size, int random) { xor_buffer->buf = malloc(size); assert(xor_buffer->buf); @@ -230,7 +294,11 @@ int init_xor_buffer(md5_buffer_t *xor_buffer, size_t size) xor_buffer->size = size; - randomizeBuffer(xor_buffer->buf, xor_buffer->size); + if(random) + randomizeBuffer(xor_buffer->buf, xor_buffer->size); + else + init_buffer(xor_buffer->buf, xor_buffer->size); + md5hash(xor_buffer->buf, xor_buffer->size, xor_buffer->md5_digest); @@ -263,8 +331,8 @@ md5_buffer_t computeXOR(md5_buffer_t *input, int root, MPI_Comm comm) if(my_rank == root) { - //gather all bytes - MPI_Gather(&input->size, 1, MPI_UNSIGNED_LONG, recv_sizes, 1, MPI_UNSIGNED_LONG, root, comm); + // gather all bytes + MPI_Gather(&input->size, 1, my_MPI_SIZE_T, recv_sizes, 1, my_MPI_SIZE_T, root, comm); for (i = 0; i < ranks; ++i) { @@ -290,10 +358,11 @@ md5_buffer_t computeXOR(md5_buffer_t *input, int root, MPI_Comm comm) for (i = 0; i < ranks; ++i) { + nam_print(1,"Recieving %zu byte from rank %d and and XORing", recv_sizes[i], i); // recv the message if(my_rank != i) { - MPI_Recv(recv, recv_sizes[i], MPI_CHAR, i, 99, comm, &status); + MPI_Recv(recv, recv_sizes[i], MPI_BYTE, i, 99, comm, &status); } assert(b); @@ -306,16 +375,16 @@ md5_buffer_t computeXOR(md5_buffer_t *input, int root, MPI_Comm comm) } //compute the md5 hash - md5hash(result.buf, max_size, result.md5_digest); + md5hash(result.buf, result.size, result.md5_digest); } else { // gather all bytes - MPI_Gather(&input->size, 1, MPI_UNSIGNED_LONG, recv_sizes, 1, MPI_UNSIGNED_LONG, root, comm); + MPI_Gather(&input->size, 1, my_MPI_SIZE_T, recv_sizes, 1, my_MPI_SIZE_T, root, comm); //send own buffer - MPI_Send(input->buf, input->size, MPI_CHAR, root, 99, comm); + MPI_Send(input->buf, input->size, MPI_BYTE, root, 99, comm); } @@ -458,12 +527,14 @@ static int test_checkpointing() md5_buffer_t buffer; md5_buffer_t result; md5_buffer_t compare; - void *buf; + md5_buffer_t rebuild_buf; MPI_Comm comm = MPI_COMM_WORLD; size_t my_bytes = arg_bytcount; + size_t max_size; + size_t total_bytes; nam_ext_allocation_t *ext_alloc; @@ -471,43 +542,29 @@ static int test_checkpointing() MPI_Comm_size(comm, &size); ext_alloc = nam_alloc_cprs(root,comm); - if(my_rank == root) + + if(arg_dbc) { - if(arg_dbc) - { - nam_print(10,"using different bytecount for rank 0: %lu (std: %lu)", my_bytes/2, my_bytes); - my_bytes/=2; - } - //only root requires to do this - buf = malloc(my_bytes); - assert(buf); - nam_get_sync(buf, 0, my_bytes, ext_alloc->alloc); -// printf("Result from get BEFORE checkpointing:\n"); -// printHex(buf, my_bytes); + nam_print(10,"using different bytecount!"); + my_bytes = get_random_length(arg_bytcount, my_rank); } + nam_print(1,"My Rank: %d, bytecount: %zu", my_rank, my_bytes); + assert(ext_alloc); MPI_Comm_rank(ext_alloc->comm_local, &my_local_rank); // memset(buf, 99, my_bytes); - init_xor_buffer(&buffer, my_bytes); -// nam_print(1, "HEX VALUE of Buffer to checkpoint: ") -// printHex(buffer.buf, my_bytes); + init_xor_buffer(&buffer, my_bytes,0); + print_buffer(my_rank, buffer.buf, buffer.size); + - - //test if buffer is buffer -// if(compareXORBuffers(&buffer, &buffer)) -// { -// nam_print(0, "Error in MD5 checksum, buffer is not buffer (wondering!)"); -// } //compute XOR of all buffers + result = computeXOR(&buffer, root, ext_alloc->comm_local); -// if(my_rank == root) -// { -// printf("Result from own XOR checkpointing (256 bytes)\n"); -// printHex(result.buf, 256); -// } + if(my_rank == 0) + print_buffer(my_rank, result.buf, result.size); MPI_Barrier(comm); @@ -520,31 +577,34 @@ static int test_checkpointing() time = (double)(t2 - t1); MPI_Reduce(&time, &max_time, 1, MPI_DOUBLE, MPI_MAX, root, comm); - + MPI_Reduce(&my_bytes, &total_bytes, 1, MPI_UNSIGNED_LONG, MPI_SUM, root, comm); if(my_rank == root) { throuput = my_bytes * size / max_time; - nam_print(3," Checkpointing of %.2f MB took %f s, Bandwidth: %f MB/s",my_bytes*size / 1024.0 / 1024.0 , time / 10e6, throuput); + nam_print(3," Checkpointing of %.2f MB took %f s, Bandwidth: %f MB/s",total_bytes / 1024.0 / 1024.0 , time / 10e6, throuput); } // read the result if(my_local_rank == root) { - compare.buf = malloc(result.size); + compare.buf = calloc(result.size, sizeof(char)); assert(compare.buf); compare.size = result.size; - nam_get_sync(compare.buf, 0, result.size, ext_alloc->alloc); + nam_get_sync(compare.buf, 0, compare.size, ext_alloc->alloc); + print_buffer(my_rank, result.buf, result.size); + print_buffer(my_rank, compare.buf, compare.size); md5hash(compare.buf, compare.size, compare.md5_digest); + if(compareXORBuffers(&result, &compare)) { - nam_print(0, "Error in MD5 checksum comparison! NAM computed XOR wrong!"); + nam_print(0, "Checkpoint: Rank %d Error in MD5 checksum comparison! NAM computed XOR wrong!", my_rank); } else { - nam_print(0, "MD5 checksum comparison successful! NAM XOR computation valid!"); + nam_print(0, "Checkpoint: Rank %d MD5 checksum comparison successful! NAM XOR computation valid!", my_rank); } // memcpy(buf, compare.buf, 256); // printf("Result from get AFTER checkpointing (256 bytes)\n"); @@ -564,7 +624,7 @@ static int test_checkpointing() if(my_local_rank == rank_to_rebuild) { - rebuild_buf.buf = malloc(my_bytes); + rebuild_buf.buf = calloc(my_bytes, sizeof(char)); rebuild_buf.size = my_bytes; nam_restart_sync(rebuild_buf.buf, rebuild_buf.size, 0, ext_alloc); md5hash(rebuild_buf.buf, rebuild_buf.size, rebuild_buf.md5_digest); @@ -739,7 +799,7 @@ static int checkpoint_bm() alloc = nam_alloc_cprs(root,comm); - init_xor_buffer(&buffer, my_bytes); + init_xor_buffer(&buffer, my_bytes,1); MPI_Barrier(comm); t1 = getusec(); diff --git a/src/nam_ext_interface.c b/src/nam_ext_interface.c index c34919bc969498a0448cf0de28a386454cd53824..c31732abbf9f6d37ff15c0d3db79d4461de8d6bb 100644 --- a/src/nam_ext_interface.c +++ b/src/nam_ext_interface.c @@ -13,24 +13,13 @@ #include <sys/stat.h> #include <fcntl.h> #include <regex.h> +#include <mpi.h> #include "nam_memory.h" #include "nam_util.h" #include "nam_interface.h" #include "nam_ext_interface.h" #include "nam_extoll.h" -#if SIZE_MAX == UCHAR_MAX - #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR -#elif SIZE_MAX == USHRT_MAX - #define my_MPI_SIZE_T MPI_UNSIGNED_SHORT -#elif SIZE_MAX == UINT_MAX - #define my_MPI_SIZE_T MPI_UNSIGNED -#elif SIZE_MAX == ULONG_MAX - #define my_MPI_SIZE_T MPI_UNSIGNED_LONG -#elif SIZE_MAX == ULLONG_MAX - #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG -#endif - #define GET_COMM 11 #define GET_B_COMM 10 #define PUT_COMM 3 @@ -65,8 +54,6 @@ typedef struct nam_cp_args nam_ext_allocation_t *ext_alloc; } nam_cp_args_t; - - static inline unsigned long getusec(void) { @@ -74,6 +61,24 @@ unsigned long getusec(void) gettimeofday(&tv,NULL); return (tv.tv_usec+tv.tv_sec*1000000); } +int dummy_allgather (void *s_buf, int count, MPI_Datatype type, void *r_buf, MPI_Comm comm) +{ +//MPI_AllGather(&my_rank_global, 1, MPI_INT, ext_alloc->global_ranks, 1, MPI_INT, comm_local); + + int root = 0; + int ranks; + + MPI_Comm_size(comm, &ranks); + + + MPI_Gather(s_buf, count, type,r_buf,count,type,root,comm); + MPI_Bcast(r_buf, ranks*count, type, root, comm); + + + return 0; +} + + int nam_create_nam_mpi_allocation_type() { MPI_Datatype oldtypes[3]; @@ -537,68 +542,7 @@ int nam_bcast(size_t offset, void *buf, size_t bytes, int root, nam_ext_allocati } -int nam_distribute_extoll_info(RMA2_Nodeid my_nodeid, RMA2_VPID my_vpid, RMA2_Nodeid *nodeids, RMA2_VPID *vpids, int n) -{ -// MPI_AllGather(&my_vpid,1,MPI_UINT16_T,vpids,1,MPI_UINT16_T,MPI_COMM_WORLD); -// MPI_AllGather(&my_nodeid,1,MPI_UINT16_T,nodeids,1,MPI_UINT16_T,MPI_COMM_WORLD); - - return 0; -} - -int nam_needs_restart(MPI_Comm comm, RMA2_Nodeid *nodeid, RMA2_VPID *vpid) -{ - RMA2_Nodeid *oldnodeids; - RMA2_VPID *oldvpids; - int i; - int found_myself = 0; - int *found; - int nprocs; - int n_notfound = 0; - int restart_rank; - - MPI_Comm_size(comm, &nprocs); - - found = malloc(nprocs * sizeof(int)); - oldnodeids = malloc(nprocs * sizeof(RMA2_Nodeid)); - oldvpids = malloc(nprocs * sizeof(RMA2_VPID)); - - //check if we can read the file - if(read_extoll_nodefile(nam_xor_nodefile_path, oldnodeids, oldvpids, nprocs)) - { - nam_print(1,"Nodefile not existant or incomplete. No restart!"); - return -1; - } - - //Everyone tries to find itself - for (i = 0; i < nprocs; ++i) - { - if(oldnodeids[i] == my_nodeid && oldvpids[i] == my_vpid) - found_myself = 1; - } - - //Gather result - MPI_Allgather(&found_myself, 1, MPI_INT, found, 1, MPI_INT, comm); - - //check if there is one and only one which is missing - for (i = 0; i < nprocs; ++i) - { - if(!found[i]) - { - n_notfound++; - restart_rank = i; - if(n_notfound > 1) - return -1; - } - } - - if(n_notfound) - { - *nodeid = oldnodeids[restart_rank]; - *vpid = oldvpids[restart_rank]; - } - return -1; -} int nam_restart(nam_cp_args_t *args) { int my_data_avail; @@ -756,6 +700,7 @@ int nam_checkpoint(nam_cp_args_t *args) { int size = 4; int my_rank; + int my_rank_global; int i; int rc = 0; int cprs_active = 0; @@ -795,6 +740,9 @@ int nam_checkpoint(nam_cp_args_t *args) mem_info_t *mem = extoll_register(buf, my_bytes); my_nla = mem->nla; + MPI_Comm_rank(ext_alloc->comm_global, &my_rank_global); + nam_print(1,"Rank %d checkpointing %"PRIu64" bytes on NAM %d", my_rank_global, my_bytes, alloc->nam_index); + //check if its already configured. MPI_Allreduce(&mem->cprs_active, &cprs_active, 1, MPI_INT, MPI_LAND, comm); @@ -898,7 +846,7 @@ int nam_checkpoint(nam_cp_args_t *args) } mem->cprs_active = 1; - MPI_Reduce(&my_bytes, &max_bytes,1, my_MPI_SIZE_T ,MPI_MAX, root, comm); + MPI_Allreduce(&my_bytes, &max_bytes,1, my_MPI_SIZE_T ,MPI_MAX, comm); ext_alloc->max_bytes = max_bytes; if(my_rank == root) write_ext_allocation(nam_home, ext_alloc); @@ -1057,7 +1005,7 @@ nam_allocation_t *nam_sync_allocation(nam_allocation_t *alloc, int root, MPI_Com goto err; } - nam_print_allocation(alloc); +// nam_print_allocation(alloc); MPI_Bcast(&created, 1, MPI_INT, root, comm); @@ -1080,8 +1028,8 @@ nam_allocation_t *nam_sync_allocation(nam_allocation_t *alloc, int root, MPI_Com // MPI_Bcast(&alloc_info.challenge, 1, MPI_UINT64_T, root, comm); - MPI_Allgather(&connected, 1, MPI_INT, all_connected, 1, MPI_INT, comm); - + //MPI_Allgather(&connected, 1, MPI_INT, all_connected, 1, MPI_INT, comm); + dummy_allgather(&connected, 1, MPI_INT, all_connected, comm); } @@ -1121,7 +1069,7 @@ nam_allocation_t *nam_sync_allocation(nam_allocation_t *alloc, int root, MPI_Com - nam_print_allocation(alloc); +// nam_print_allocation(alloc); if(nam_connect_allocation(alloc)) @@ -1129,7 +1077,8 @@ nam_allocation_t *nam_sync_allocation(nam_allocation_t *alloc, int root, MPI_Com connected = 0; } - MPI_Allgather(&connected, 1, MPI_INT, all_connected, 1, MPI_INT, comm); + //MPI_Allgather(&connected, 1, MPI_INT, all_connected, 1, MPI_INT, comm); + dummy_allgather(&connected, 1, MPI_INT, all_connected, comm); } @@ -1172,7 +1121,7 @@ nam_ext_allocation_t *nam_alloc_cprs(int root, MPI_Comm comm) //local root requests allocation MPI_Comm_rank(comm_local, &my_rank_local); - nam_print(1, "My rank global: %d, my rank local: %d", my_rank_global, my_rank_local); + nam_print(1, "My rank global: %d, my rank local: %d, my nam: %d", my_rank_global, my_rank_local, i_nam); if(my_rank_local == local_root) { @@ -1186,6 +1135,7 @@ nam_ext_allocation_t *nam_alloc_cprs(int root, MPI_Comm comm) if(alloc) { + nam_print(1, "My rank global: %d, my rank local: %d, got allocation", my_rank_global, my_rank_local); ext_alloc = malloc(sizeof(nam_ext_allocation_t)); ext_alloc->alloc = alloc; ext_alloc->comm_global = comm; @@ -1195,13 +1145,14 @@ nam_ext_allocation_t *nam_alloc_cprs(int root, MPI_Comm comm) MPI_Comm_size(comm_local, &ext_alloc->local_size); ext_alloc->global_ranks = malloc(ext_alloc->local_size* sizeof(int)); - MPI_Gather(&my_rank_global, 1, MPI_INT, ext_alloc->global_ranks, 1, MPI_INT, local_root, comm_local); - + //MPI_AllGather(&my_rank_global, 1, MPI_INT, ext_alloc->global_ranks, 1, MPI_INT, comm_local); + dummy_allgather(&my_rank_global, 1, MPI_INT, ext_alloc->global_ranks, comm); + LIST_INSERT_HEAD(&pers_ext_alloc_head,ext_alloc,ext_allocations); if(my_rank_local == local_root) { - LIST_INSERT_HEAD(&pers_ext_alloc_head,ext_alloc,ext_allocations); + write_ext_allocation(nam_home, ext_alloc); } } @@ -1245,8 +1196,8 @@ nam_ext_allocation_t *nam_reuse_cprs(int root, MPI_Comm comm) all_found = malloc(global_size * sizeof(int)); //check if all found their allocation - MPI_Allgather(&i_found, 1, MPI_INT, all_found, 1, MPI_INT, comm); - + //MPI_Allgather(&i_found, 1, MPI_INT, all_found, 1, MPI_INT, comm); + dummy_allgather(&i_found, 1, MPI_INT, all_found, comm); for (i = 0; i < global_size; ++i) { if(!all_found[i]) { @@ -1412,9 +1363,8 @@ int nam_free_all(nam_ext_allocation_t *ext_alloc) unlink(f_str); } - - free(ext_alloc->alloc); - free(ext_alloc); + LIST_REMOVE(ext_alloc, ext_allocations); + nam_free_ext_allocation(ext_alloc); diff --git a/src/nam_extoll.c b/src/nam_extoll.c index ae0583c4487cd67618a28169e244545f3d3ee0fc..7d1493174470839fa4d80fabc0f1abacd5a34288 100644 --- a/src/nam_extoll.c +++ b/src/nam_extoll.c @@ -533,6 +533,7 @@ void *extoll_get(extoll_transfer_args_t *args) // printf("Reading from offset %"PRIu32" to NLA %" PRIu64 " with len %lu\n", src_offset, dest_add, copy_len ); /* post the request and mark buffer as in use */ + nam_print(1,"Getting %zu bytes", copy_len); rc = rma2_post_get_qw(con->rma2_port, con->rma2_handle, recv->bufs.mr, src_offset, copy_len, dest_add, RMA2_COMPLETER_NOTIFICATION, RMA2_CMD_DEFAULT); if(rc) diff --git a/src/nam_interface.c b/src/nam_interface.c index 2d4d28b960041f319b3ec259aa0c695207ccbe89..1233376ca6f7ec463ccced7f442ebc71b82ef707 100644 --- a/src/nam_interface.c +++ b/src/nam_interface.c @@ -502,6 +502,7 @@ int nam_free(nam_allocation_t *alloc) if(!alloc->persistant) { free(alloc); + alloc = NULL; } else { diff --git a/src/nam_interface.h b/src/nam_interface.h index ba7f6903ec80d9430c022d353b7660a1d5822be6..ca4786fe1702e876afd26f5be9dd87bba38fdeb1 100644 --- a/src/nam_interface.h +++ b/src/nam_interface.h @@ -10,10 +10,22 @@ #define NAM_INTERFACE_H_ #include <stddef.h> - +#include <limits.h> #include "nam_extoll.h" #include "nam_globals.h" +#if SIZE_MAX == UCHAR_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR +#elif SIZE_MAX == USHRT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_SHORT +#elif SIZE_MAX == UINT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED +#elif SIZE_MAX == ULONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG +#elif SIZE_MAX == ULLONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG +#endif + /** * \struct nam_info_t * Structure representing information about a NAM diff --git a/src/nam_memory.c b/src/nam_memory.c index 232546c990f51ec5bf8384747297971115846a13..2dd2499cccb1b37ce70fc93e7958893228603ef7 100644 --- a/src/nam_memory.c +++ b/src/nam_memory.c @@ -431,3 +431,16 @@ int nam_print_allocation(nam_allocation_t *alloc) } + +int nam_free_ext_allocation(nam_ext_allocation_t *ext_alloc) +{ + if(ext_alloc->alloc) + free(ext_alloc->alloc); + + if(ext_alloc->global_ranks) + free(ext_alloc->global_ranks); + + free(ext_alloc); + + return 0; +} diff --git a/src/nam_memory.h b/src/nam_memory.h index 3f4b2a7e4dc1586acb2ebe646aa51ea8b157c166..750e5a7c8997645dac00cbfb7ffa1aace8a06aad 100644 --- a/src/nam_memory.h +++ b/src/nam_memory.h @@ -18,6 +18,7 @@ #include <inttypes.h> #include "nam_extoll.h" #include "nam_interface.h" +#include "nam_ext_interface.h" @@ -48,7 +49,7 @@ int delete_allocation( nam_allocation_t *alloc); nam_allocation_t *re_allocation(nam_allocation_t *alloc, size_t size); int getNumberofExistingAllocations(); int nam_print_allocation(nam_allocation_t *alloc); - +int nam_free_ext_allocation(nam_ext_allocation_t *ext_alloc); #endif /* NAM_MEMORY_H_ */ diff --git a/src/nam_user.c b/src/nam_user.c new file mode 100644 index 0000000000000000000000000000000000000000..226ff9a1e71f24cbcd919d9ff22829dea570f8c9 --- /dev/null +++ b/src/nam_user.c @@ -0,0 +1,349 @@ +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> +#include <fcntl.h> +#include <regex.h> +#include <stdlib.h> +#include <stdio.h> +#include <inttypes.h> +#include <dirent.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <strings.h> +#include <popt.h> +#include <rma2.h> +#include "nam_interface.h" +#include "nam_memory.h" + + +#define PAYLOAD_SET 2 +#define ADDRESS_SET 4 +#define NODEID_SET 8 +#define CHALLENGE_SET 16 + + +int challenge_set = 0; +RMA2_Nodeid arg_nodeid = 3; +RMA2_VPID arg_vpid = 0; +int arg_verbose = 0; +char *arg_challenge = "123456789101112131"; +int arg_clear_own = 0; +int arg_remove = 0; + +int arg_list = 0; + + +static void parse_opt(int argc, char **argv) +{ + int c; + poptContext optCon; + const char *no_arg; + + struct poptOption optionsTable [] = + { + { "clear-own", 'o',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_clear_own, 0, "Clear own allocations", "switch" }, + { "list", 'l',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_NONE, &arg_list, 0, "List all allocations managed by the NM", "flag" }, + { "remove", 'r',POPT_ARGFLAG_SHOW_DEFAULT | POPT_ARG_STRING, &arg_challenge, CHALLENGE_SET, "Clear allocation with challenge on the NAM", "challenge" }, + POPT_AUTOHELP + POPT_TABLEEND + }; + + optCon = poptGetContext(NULL, argc, (const char **) argv, optionsTable, 0); + + + if (argc < 2) { + poptPrintUsage(optCon, stderr, 0); + exit(1); + } + + + while ((c = poptGetNextOpt(optCon)) >= 0) { + switch (c) { // c = poptOption.val; + case 'v': + { + arg_verbose++; break; + } + case CHALLENGE_SET: + { + challenge_set = 1; break; + } + //default: fprintf(stderr, "unhandled popt value %d\n", c); break; + } + } + + if (c < -1) { /* an error occurred during option processing */ + fprintf(stderr, "%s: %s\n", + poptBadOption(optCon, POPT_BADOPTION_NOALIAS), + poptStrerror(c)); + poptPrintHelp(optCon, stderr, 0); + exit(1); + } + + + no_arg = poptGetArg(optCon); // should return NULL + if (no_arg) { + fprintf(stderr, "%s: %s\n", + no_arg, poptStrerror(POPT_ERROR_BADOPT)); + poptPrintHelp(optCon, stderr, 0); + exit(1); + } + + poptFreeContext(optCon); + +} + +int clear_persistant_allocation(uint64_t challenge) +{ + nm_request_t req; + nam_ext_allocation_t *current; + char f_str[300]; + + nm_connection = tcp_connect(nm_address); + int answer = 0; + + req.challenge = challenge; + req.flag = NM_FREE_PERSISTANT; + + if(tcp_writeall(nm_connection, &req, sizeof(req)) != sizeof(req)) + { + printf("Error requesting to delete persistant allocation with challenge %"PRIu64" from NM!\n", challenge); + goto err; + } + + if(tcp_readall(nm_connection, &answer, sizeof(int)) != sizeof(req)) + { + printf("Error reading answer from NM to delete persistant allocation with challenge %"PRIu64"!\n", challenge); + goto err; + } + + + if(answer) + { + printf("Persistant allocation with challenge %"PRIu64" deleted successfully!\n", challenge); + + } + else + { + printf("Persistant allocation with challenge %"PRIu64" could not be found!\n", challenge); + } + + LIST_FOREACH(current,&pers_ext_alloc_head, ext_allocations) + { + if(current->alloc->challenge == challenge) + { + LIST_REMOVE(current,ext_allocations); + nam_free_ext_allocation(current); + break; + } + } + + //delete the allocation file + sprintf(f_str, "%s/%"PRIu64".alloc",nam_home, challenge); + unlink(f_str); + sprintf(f_str, "%s/%"PRIu64".alloc_ext",nam_home, challenge); + unlink(f_str); + + close(nm_connection); + + return 0; + + err: + + return 1; +} +int clear_own_allocations() +{ + nm_request_t req; + uint64_t challenge; + nam_ext_allocation_t *current; + nam_ext_allocation_t *ownallocations; + char f_str[300]; + int answer = 0; + int again = 1; + + nm_connection = tcp_connect(nm_address); + + while(again) + { + again = 0; + LIST_FOREACH(current,&pers_ext_alloc_head, ext_allocations) + { + answer = 0; + challenge = current->alloc->challenge; + req.challenge = challenge; + req.flag = NM_FREE_PERSISTANT; + + if(tcp_writeall(nm_connection, &req, sizeof(req)) != sizeof(req)) + { + printf("Error requesting to delete persistant allocation with challenge %"PRIu64" from NM!\n", challenge); + goto err; + } + + if(tcp_readall(nm_connection, &answer, sizeof(int)) != sizeof(req)) + { + printf("Error reading answer from NM to delete persistant allocation with challenge %"PRIu64"!\n", challenge); + goto err; + } + + + if(answer) + { + printf("Persistant allocation with challenge %"PRIu64" deleted successfully!\n", challenge); + + + } + else + { + printf("Persistant allocation with challenge %"PRIu64" could not be found!\n", challenge); + } + + LIST_REMOVE(current,ext_allocations); + nam_free_ext_allocation(current); + + + //delete the allocation file + sprintf(f_str, "%s/%"PRIu64".alloc",nam_home, challenge); + unlink(f_str); + sprintf(f_str, "%s/%"PRIu64".alloc_ext",nam_home, challenge); + unlink(f_str); + again = 1; + + } + } + + close(nm_connection); + + return 0; + + err: + + return 1; +} +int list_allocations() +{ + int rc = 0; + int n_allocations; + nam_allocation_t *all; + nam_allocation_t *alloc; + char timebuffer[80]; + struct tm timeinfo; + int i, k = 0; + + nm_request_t req; + size_t bytes; + + req.flag = NM_LIST; + nm_connection = tcp_connect(nm_address); + + if(tcp_writeall(nm_connection, &req, sizeof(req)) != sizeof(req)) + { + printf("Error requesting to list allocations from NM!\n"); + goto err; + } + + //read how much + if(tcp_readall(nm_connection, &n_allocations, sizeof(int)) != sizeof(int)) + { + printf("Error reading answer from NM to list allocations!\n"); + rc = 1; + goto err; + } + + bytes = n_allocations * sizeof(nam_allocation_t); + //allocate buffer + all = malloc(bytes); + + if(tcp_readall(nm_connection, all, bytes) != bytes) + { + printf("Error reading allocations from NM!\n"); + rc = 1; + goto err; + } + + for (i = 0; i < n_nams; ++i) + { + printf("Allocations for NAM[%d] %s\n", i, nam[i].name); + printf("%12s %12s %12s \t %19s \t %6s \t %10s \t %9s\n", "Size", "Start", "End", "Created", "Status", "Persistant", "Challenge" ); + + for (k = 0; k < n_allocations; ++k) { + alloc = all + k; + if(alloc->nam_index == i) + { + localtime_r(&alloc->created, &timeinfo); + strftime (timebuffer,80,"%H:%M:%S_%Y-%m-%d",&timeinfo); + + char *status = statustostr(alloc->status); + printf("%12lu %12"PRIu64" %12lu \t %19s \t %6s \t %10d \t %lu\n", alloc->size, alloc->nla, (size_t) alloc->nla+alloc->size-1, timebuffer, status, alloc->persistant, alloc->challenge); + } + } + + printf("\n"); + + } + + + + err: + close(nm_connection); + free(all); + return rc; + + + +} +int main(int argc, char **argv) +{ + + extoll_con_info_t *config_con; + + uint64_t payload; + uint64_t address; + + nam_init(); + + + + //overwrite with args parsed + parse_opt(argc, argv); + + + if(arg_clear_own) + { + printf("Clearing own allocations in NM\n"); + + clear_own_allocations(); + } + if(arg_remove) + { + if(!challenge_set) + { + printf("No challenge specified for remove\n"); + goto error; + } + + payload = strtoull(arg_challenge, NULL, 10); + printf("Clearing allocation with challenge %"PRIu64" in NM\n", payload); + clear_persistant_allocation(payload); + } + + if(arg_list) + { + printf("Listing all allocations of NM\n"); + + list_allocations(); + } + + + + + + printf("Connection closed\n Exiting\n"); + return 0; + + error: + + return 1; + + + +} diff --git a/submit.job b/submit.job index 8e1af5e22f27175f7fc3e0c6644c49c5496c7841..8c2419cbef694f9b9f4bc6ccbb6dd6747ee4bd31 100644 --- a/submit.job +++ b/submit.job @@ -20,4 +20,4 @@ source $HOME/nam/libNAM/nam_env.sh export NAM_DEBUG=10 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/nam/libNAM/lib:/direct/Software/Extoll/ASIC2/extoll2/lib bin/nam_admin.bin --clear-all -mpiexec -np 4 -x bin/nam_example.bin --cprs-bm +mpiexec -np 4 -x bin/nam_example.bin --cprs -b $[8*1024] -d