*** OpenPBS_2_3_16/src/resmom/mom_main.c Wed Dec 5 01:38:45 2001 --- ../BUILD/OpenPBS_2_3_16.working/src/resmom/mom_main.c Wed Nov 19 09:19:44 2003 *************** *** 177,182 **** --- 177,185 ---- #endif /* MOM_CHECKPOINT */ double wallfactor = 1.00; + #ifdef TRANSIENT_TMPDIR + extern char tmpdir_basename[]; + #endif /* TRANSIENT_TMPDIR */ /* Local Data Items */ *************** *** 548,553 **** --- 551,568 ---- return ipaddr; } + #ifdef TRANSIENT_TMPDIR + static u_long + settmpdir(name) + char *name; + { + static char id[] = "settmpdir"; + + strcpy(tmpdir_basename, name); + return 0; + } + #endif /* TRANSIENT_TMPDIR */ + static u_long setlogevent(value) char *value; *************** *** 758,763 **** --- 773,781 ---- { "logevent", setlogevent }, { "max_load", setmaxload }, { "prologalarm",prologalarm }, + #ifdef TRANSIENT_TMPDIR + { "tmpdir", settmpdir }, + #endif /* TRANSIENT_TMPDIR */ { "restricted", restricted }, { "usecp", usecp }, { "wallmult", wallmult }, *************** *** 2183,2188 **** --- 2201,2210 ---- if (gethostname(ret_string, ret_size) == 0) (void)addclient(ret_string); + #ifdef TRANSIENT_TMPDIR + settmpdir(TMP_DIR); + #endif /* TRANSIENT_TMPDIR */ + if (read_config(NULL)) { fprintf(stderr, "%s: config file '%s' failed\n", argv[0], config_file); *** OpenPBS_2_3_16/src/resmom/start_exec.c Wed Dec 5 01:38:46 2001 --- ../BUILD/OpenPBS_2_3_16.working/src/resmom/start_exec.c Wed Nov 19 09:21:08 2003 *************** *** 142,147 **** --- 142,151 ---- int mom_reader_go; /* see catchinter() & mom_writer() */ struct var_table vtable; /* for building up Job's environ */ + #ifdef TRANSIENT_TMPDIR + char tmpdir_basename[MAXPATHLEN]; + #endif /* TRANSIENT_TMPDIR */ + /* Local Varibles */ static int script_in; /* script file, will be stdin */ *************** *** 162,167 **** --- 166,174 ---- "PBS_TASKNUM", "PBS_MOMPORT", "PBS_NODEFILE" + #ifdef TRANSIENT_TMPDIR + , "TMPDIR" + #endif /* TRANSIENT_TMPDIR */ }; static char *variables_env[NUM_LCL_ENV_VAR]; *************** *** 520,525 **** --- 527,536 ---- struct array_strings *vstrs; struct stat sb; struct sockaddr_in saddr; + #ifdef TRANSIENT_TMPDIR + int rc; + char tmpdir[MAXPATHLEN]; + #endif /* TRANSIENT_TMPDIR */ if ( pjob->ji_numnodes > 1 ) { /* *************** *** 988,993 **** --- 999,1060 ---- fclose(nhow); } + #ifdef TRANSIENT_TMPDIR + /* only when TMPDIR is not set by someone else */ + if ( getenv("TMPDIR") == NULL ) { + *buf=0; + snprintf(tmpdir, sizeof(tmpdir), "%s/%s", tmpdir_basename, pjob->ji_qs.ji_jobid); + rc=stat(tmpdir,&sb); + if (rc) rc=errno; + switch (rc) { + case ENOENT: + break; + case 0: + if( ! S_ISDIR(sb.st_mode)) { + sprintf(log_buffer, "Job transient tmpdir %s exists, but is not a directory", tmpdir); + log_err(errno, id, log_buffer); + starter_return(upfds, downfds, JOB_EXEC_FAIL1, &sjr); + } + break; + default: + sprintf(log_buffer, "Cannot name job tmp directory %s (on stat)", tmpdir); + log_err(errno, id, log_buffer); + starter_return(upfds, downfds, JOB_EXEC_FAIL1, &sjr); + break; + } + + /* only Mother Superior shoudl create the directory */ + if ( pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE ) { + if (mkdir(tmpdir,0755) == -1 ) { + sprintf(log_buffer, "cannot mkdir tmpdir %s", tmpdir); + log_err(errno, id, log_buffer); + starter_return(upfds, downfds, JOB_EXEC_FAIL1, &sjr); + } + if (chown(tmpdir,pjob->ji_qs.ji_un.ji_momt.ji_exuid,pjob->ji_qs.ji_un.ji_momt.ji_exgid) == -1 ) { + sprintf(log_buffer, "cannot chown tmpdir %s to %d:%d", tmpdir, + pjob->ji_qs.ji_un.ji_momt.ji_exuid, + pjob->ji_qs.ji_un.ji_momt.ji_exgid); + log_err(errno, id, log_buffer); + starter_return(upfds, downfds, JOB_EXEC_FAIL1, &sjr); + } + if (chmod(tmpdir,0755) == -1 ) { + sprintf(log_buffer, "cannot chmod tmpdir %s", tmpdir); + log_err(errno, id, log_buffer); + starter_return(upfds, downfds, JOB_EXEC_FAIL1, &sjr); + } + } + + sprintf(log_buffer,"using transient tmpdir %s",tmpdir); + log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, + pjob->ji_qs.ji_jobid, log_buffer); + + bld_env_variables(&vtable, variables_else[12], tmpdir); + } else { + *tmpdir=0; /* did not create a unique directory */ + bld_env_variables(&vtable, variables_else[12], getenv("TMPDIR")); + } + #endif /* TRANSIENT_TMPDIR */ + /* specific system related variables */ j = set_mach_vars(pjob, &vtable); *** OpenPBS_2_3_16/src/resmom/catch_child.c Wed Dec 5 01:38:44 2001 --- ../BUILD/OpenPBS_2_3_16.working/src/resmom/catch_child.c Wed Nov 19 09:25:14 2003 *************** *** 83,88 **** --- 83,91 ---- #include #include #include + #ifdef TRANSIENT_TMPDIR + #include + #endif /* TRANSIENT_TMPDIR */ #include #include #include "dis.h" *************** *** 124,129 **** --- 127,135 ---- extern struct connection svr_conn[]; extern int resc_access_perm; extern char *path_home; + #ifdef TRANSIENT_TMPDIR + extern char tmpdir_basename[]; + #endif /* TRANSIENT_TMPDIR */ static void obit_reply A_((int sock)); *************** *** 260,265 **** --- 266,274 ---- char *svrport; char *cookie; unsigned int port; + #ifdef TRANSIENT_TMPDIR + char tmpdir[MAXPATHLEN]; + #endif /* TRANSIENT_TMPDIR */ u_long gettime A_((resource *pres)); u_long getsize A_((resource *pres)); task *task_find A_(( job *pjob, *************** *** 530,535 **** --- 539,562 ---- pjob->ji_flags &= ~MOM_HAS_NODEFILE; } + #ifdef TRANSIENT_TMPDIR + /* this is the set of condition for a transient tmpdir as used at + * creation time in start_exec.c + * we already know we are Mother Superior */ + if ( (getenv("TMPDIR") == NULL) && *tmpdir_basename ) { + snprintf(tmpdir, sizeof(tmpdir), "%s/%s", tmpdir_basename, pjob->ji_qs.ji_jobid); + sprintf(log_buffer,"Removing transient job directory %s",tmpdir); + log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, + pjob->ji_qs.ji_jobid, log_buffer); + + if ( rrmdir(tmpdir) != 0 ) { + (void)sprintf(log_buffer, + "recursive remove of job transient tmpdir %s failed", + tmpdir); + log_err(errno, "recursive (r)rmdir", log_buffer); + } + } + #endif /* TRANSIENT_TMPDIR */ /* Send the Job Obiturary Notice to the server */ *************** *** 557,562 **** --- 584,652 ---- if (pjob == 0) exiting_tasks = 0; /* went through all jobs */ } + + #ifdef TRANSIENT_TMPDIR + /* this routine will recursively remove everything below */ + int rrmdir(path) + char *path; + { + + #define SUBPATHLENCHUNK 2048 + + int rc; + DIR *dir; + struct dirent *dent; + struct stat sb; + char *subpath,*p; + int subpathlen=0; + + if ( *path == (int)'0' ) return ENOENT; + if ( (dir=opendir(path)) == NULL ) return errno; + + if ( (subpath=malloc(SUBPATHLENCHUNK)) == NULL ) + return ENOMEM; + subpathlen=SUBPATHLENCHUNK; + + while ( dent=readdir(dir) ) { + /* save me and my parent; and myself from a loop */ + if( *(dent->d_name)=='.' && *(1+dent->d_name)==0 ) continue; + if( *(dent->d_name)=='.' && *(1+dent->d_name)=='.' && *(2+dent->d_name)==0 ) continue; + + if(2+strlen(path)+strlen(dent->d_name) > subpathlen ) { + if( (p=realloc(subpath,subpathlen+SUBPATHLENCHUNK)) == NULL ) { + free(subpath); + return ENOMEM; + } + subpath=p; + subpathlen+=SUBPATHLENCHUNK; + } + strcpy(subpath,path); + strcat(subpath,"/"); + strcat(subpath,dent->d_name); + + if ( (rc=lstat(subpath,&sb)) != 0 ) { + free(subpath); + return rc; + } + + if(S_ISDIR(sb.st_mode)) + rc=rrmdir(subpath); + else + rc=unlink(subpath); + + if(rc) { + free(subpath); + return rc; + } + } + closedir(dir); + free(subpath); + rc=rmdir(path); + return rc; + } + #endif /* TRANSIENT_TMPDIR */ + + /* * obit_reply - read and process the reply from the server acknowledging * the job obiturary notice. *** OpenPBS_2_3_16/Readme.tmpdir Thu Jan 1 01:00:00 1970 --- ../BUILD/OpenPBS_2_3_16.working/Readme.tmpdir Wed Nov 19 10:09:22 2003 *************** *** 0 **** --- 1,95 ---- + + Readme.tmpdir - Enabling transient temporary directories for jobs + ------------------------------------------------------------------------------ + + The transient-tmpdir patch enables the per-job TMPDIR functionality + such as found in the CRAY systems and in PBS Pro version 5.2 and + above. This document described the way to enable this patch and how + to configure the behaviour of TMPDIR using the pbs_mom configuration + file. + Although the functionality is the same, the way this patch works and + is configured may be different from PBS Pro or CRAY. + + + Compilation + ----------- + + The transient-tmpdir patch is enabled by defining the TRANSIENT_TMPDIR + symbol when compiling pbs-mom (in src/resmom). When using + autoconf, define CFLAGS as follows + + CFLAGS="$CFLAGS -DTRANSIENT_TMPDIR=1" + export CFLAGS + + ./configure --set-tmpdir= [ other configure options ] + + and run "make" and "make install". The default for tmpdir is defined + by configure, and set to "/tmp" unless spoecified otherwise. + + + How it works + ------------ + + Transient job directories are only created when a non-null value + is set for TMP_DIR (either using configure or by using the + "$tmpdir " directive in mom's config file), and the TMPDIR + environment variable is not set in the pbs_environment file. + + Before the job prologue script (or the job itself) is started, + a temporary, per-job, directory name is generated based on the + value of TMP_DIR (as set in configure or using the $tmpdir directive + in Mom's configuration file) and the job ID. This name is + passed to the job in the $TMPDIR environment variable. + + The Mother-Superior (MS) responsible for the job will actually + create the directory; other mom's will only define the value for the + job and refrain from creating it, although they will ensure that + TMPDIR is pointing to a directory and not a (special) file. + + This directory is created before the prologue script is run, and + TMPDIR will be available to the prologue script as well. + + When the job finished (the job is sent to the Obituary), this + directory is removed by Mother Superior after the epilogue script + has been run and after the PBS_NODEFILE has been removed. + + + Disabling the transient-tmpdir functionality + -------------------------------------------- + + If your copy of (Scalable) OpenPBS has been built with transient-tmpdir + functionality, but you would like to disable the creation of transient + directories, you can do so by explicitly setting TMPDIR in + the pbs_environment file ("TMPDIR=/tmp"). If TMPDIR is set explicitly, + no directories will be created (or removed). + + + Caveats + ------- + + When jobs are using multiple nodes for execution, and the + transient-tmpdir functionality is used, the basename for the temporary + directories (TMP_DIR or the $tmpdir directive in the mom config file) + must be the same for all mom's, and must be pointing to a file system + shared by all nodes. + + + Changes + ------- + + The transient-tmpdir patch only affects the following files + + src/resmom/mom_main.c + src/resmom/start_exec.c + src/resmom/catch_child.c + + + Acknowledgements + ---------------- + + This patch has been provided by the Scientific Computing project + (DataGrid Team) of the Dutch National Institute for Nuclear and + High-Energy Physics (NIKHEF), Amsterdam, The Netherlands. + David Groep , 2003-11-19. + +