diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..3912109 --- /dev/null +++ b/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..c2a5f36 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,516 @@ +2008-09-25 Mark Grondona + + * : tag v0.34. + + * preserve-env.c : + Added. + + * auto-affinity.c : + Check current CPU mask in task context as well as in + post opt and abort auto-set of affinity if CPU masks + have changed. This probably means something else is + controlling CPU affinity (or cpusets). + +2008-09-11 Mark Grondona + + * : tag v0.33. + + * cpuset/log.c : + Fix off-by-one use of log level. + + * cpuset/conf-parser.y : + Change message about opening config file from verbose to debug. + + * cpuset/cpuset.c, cpuset/create.c, cpuset/util.c : + Change locking methodology to use global lockfile in /var/lock + instead of using lockfile under /dev/cpuset/slurm. Advisory + locks are dropped if any fd open against a locked file is closed, + not just the original fd. Since libcpuset opens all files under + the /dev/cpuset heirarchy, no file within that fs is usable + as a lockfile for slurm cpuset. + +2008-09-10 Mark Grondona + + * use-env/use-env.c : + Fix generation of SLURM_CMDLINE. (Was accidentally generated + in reverse order). + +2008-08-21 Mark Grondona + + * : tag v0.32. + + * oom-detect.c : + Add 'do_syslog' parameter, which, when true, + sends a message via syslog(3) when any task of + a job step is thought to have been terminated by + the OOM killer. + +2008-08-19 Mark Grondona + + * : tag v0.31. + + * oom-detect.c : + Delay slightly if an OOM killed task is detected. + This should give srun more time to recv the error + message. + +2008-08-04 Mark Grondona + + * : tag v0.30. + + * cpuset/conf-parser.y, cpuset/cpuset.c : + Improve config file parse errors. + + * auto-affinty.c : + Update --auto-affinity=help. + +2008-07-29 Mark Grondona + + * : tag v0.29. + + * auto-affinity.c : + Do not set CPU affinity by default if the number of CPUs + is not a multiple of the number of tasks. + + * lib/hostlist.c, lib/hostlist.h, Makefile: + Include hostlist code, used by cpuset PAM module. + + * cpuset/* : Overhaul cpuset support, including new config file + parser, PAM module, and man pages. + +2008-07-22 Mark Grondona + + * : tag v0.28. + + * auto-affinity.c : + Only run spank_init_post_opt() hook on remote side (slurmd). + +2008-07-16 Mark Grondona + + * : tag v0.27. + + * cpuset/README : + Document `tasks' option to --use-cpusets. + + * cpuset/cpuset.c, cpuset/util.c : + Add --use-cpusets=tasks support to constrain tasks to + their own cpusets under the job step cpuset. + +2008-07-16 Mark Grondona + + * : tag v0.26 + + * cpuset/README : + Add documentation for --use-cpusets option. + + * cpuset/cpuset.c, cpuset/util.h, cpuset/util.c, + cpuset/nodemap.c, cpuset/Makefile : + Add spank user option --use-cpusets to optionally allow + per-job-step cpusets, which are created under the overall + job cpuset. + + * auto-affinity.so : + Move check for cpuset to after user options have been + processed, in case cpuset was changed. Open cpuset related + proc files with O_RDONLY instead of O_RDWR. + +2008-07-10 Mark Grondona + + * : tag v0.25. + + * chaos-spankings.spec : + Add cpuset subpackage for SLURM cpuset plugin, + /etc/init.d/slurm-cpuset init script, and + /sbin/cpuset_release_agent binary. + + * cpsuet/cpuset.init : + Add initscript to mount /dev/cpuset. + + * cpuset/nodemap.c : + Allocate CPUs from nodes in reverse for best-fit and + worst-fit, but in order for first-fit. + + * cpuset/cpuset.c : + Be sure to call slurm_cpuset_create() early in plugin, + before slurm_cpuset_lock(). + + * auto-affinity.c : + Fix bug in auto-affinity plugin when cpuset filesystem + is not mounted. + + * cpuset/README : + Added. + +2008-07-09 Mark Grondona + + * cpuset.c, util.c, util.h : + Add !mem or !mem-constrain option to disable constraint + of memory nodes. Change "idle-first" options to + !idle-1st, idle-1st=gt, idle-1st=mult, idle-1st=no. + + * Makefile : + Add cpuset to subdirs. + + * cpuset/Makefile, cpuset/util.h, cpuset/util.c, + cpuset/nodemap.h, cpuset/nodemap.c, cpuset/cpsuet.c, + cpuset/test.c, cpuset/release-agent.c : + Add initial version of SLURM cpuset.so module. + + * Makefile : + Add dependence on lib/fd.o to auto-affinity.so. + + * auto-affinity.c : + Allow auto-affinity to work when running inside a cpuset. + Map CPUs as chosen for CPU affinity back to actual CPUs + available to tasks inside their cpuset. (Plugin should + work the same as before, except that the number of available + CPUs is adjusted to the number of CPUs in the cpuset). + +2008-06-10 Mark Grondona + + * : tag v0.24. + + * Makefile : + auto-affinity.so now needs to link against libslurm. + + * auto-affinity.c : + If SLURM_JOB_CPUS_PER_NODE is not set, fall back to querying + slurm controller for necessary information. This is only + used in exclusive_only mode, and is a temporary solution + until the env var above is set for all SLURM jobs. + +2008-06-10 Mark Grondona + + * : tag v0.23. + + * auto-affinity.c : + Change `exclusive' option to `exclusive_only'. + +2008-06-09 Mark Grondona + + * auto-affinity.c : + Add `exclusive' option to auto-affinity plugin, which, when + used, will disable auto-affinity when the running job does + not have exclusive access to the node. + +2008-05-15 Mark Grondona + + * addr-no-randomize.c : + Added plugin to set ADDR_NO_RANOMIZE personality on + processes, thus disabling address space randomization. + +2007-08-13 Jim Garlick + + * iorelay/* : New. + +2007-07-27 Mark Grondona + + * pty.c : + Add ability to process window size changes. + + * : tag v0.20. + + * pty.c : Instead of closing stdin/out/err, dup onto /dev/null. + Allow SLURM_PTY_NO_CLOSE_STDIO env variable to disable close + of stdio in tasks != task0. + + * overcommit-memory/overcommit.c (unregister_job) : + Fix bug that caused improper cleanup when runnin against all steps + for a given jobid, i.e. stepid = -1. + + * overcommit-memory/overcommit.c, overcommit-memory/util.c : + Properly report failed job cleanup. + + * : tag v0.21. + +2007-07-27 Mark Grondona + + * pty.c : + Added. New --pty option to srun(1) runs task 0 under a pseudo-tty. + + * Makefile, chaos-spankings.spec : + Build and package pty.so. + +2007-07-03 Mark Grondona + + * chaos-spankings.spec : + Include proper BuildRequires. + + * : tag v0.19. + +2007-02-12 Mark Grondona + + * auto-affinity.c : Don't ignore 1 task/node if CPUs/task is set. + + * overcommit-memory/overcommit-memory.c, + overcommit-memory/overcommit.c, overcommit-memory/overcommit.h : + Also adjust overcommmit_ratio when overcommit-memory plugin is + used (mainly for the "no overcommit" case). Reset original + value when the last user exits. + + * : tag v0.18. + +2007-02-02 Mark Grondona + + * : tag v0.17. + +2007-02-02 Mark Grondona + + * overcommit-memory.c, overcommit-memory/overcommit-memory.c + overcommit-memory/overcommit.h, overcommit-memory/overcommit.c, + overcommit-memory/util.c, lib/fd.c, lib/fd.h : + Move overcommit-memory source into its own dir. + Plugin now uses a shared memory file to track current users + and restores default overcommit policy when the last user + exits. Supply a utility, overcommit-util, to clean up state + of shared memory file, query current users, etc. + + * chaos-spankings.spec : + Updates for changes in overcommit-memory plugin. + +2007-02-02 Mark Grondona + + * auto-affinity.c : Force enable auto-affinity if any user option is + passed to --auto-affinity (except "off" of course). + + * auto-affinity.c : Rename "last_cpu_first" to "reverse." + Add start=N option to begin CPU affinity at CPU [N] instead + of CPU 0. Add shorthands "v" for verbose, "rev" for reverse. + +2007-01-24 Mark Grondona + + * auto-affinity.c : + Added. Set up some sane CPU affinity defaults. + + * use-env/use-env-parser.l : + Expand `~' to $HOME in POSTOP and STRING conditions. + + * : tag v0.16. + +2007-01-19 Mark Grondona + + * use-env/use-env-parser.y : + Be sure to not evaluate "matches" keyword when the condition + state is not true. + + * use-env/use-env.c : + Do not report errors from spank_setenv() when overwrite == 0. + + * overcommit-memory.c : + Added. Allow users to change overcommit behavior on nodes + of their job. + + * : tag v0.15. + +2007-01-10 Mark Grondona + + * io-watchdog/* : + Remove io-watchdog code. It is now its own project. + + * use-env/use-env-parser.l, use-env/use-env-parser.y, + README.use-env: + Change fnmatch() function to ``STRING matches PATTERN'' + + * : tag v0.13. + + * use-env/use-env-parser.y : + Allow empty input file. + + * : tag v0.14. + +2006-12-29 Mark Grondona + + * io-watchdog/io-watchdog-interposer.c : + Glob for proper libc using pattern /lib{64,}/libc.so* instead + of explicitly specifying libc filenames. + + * io-watchdog/io-watchdog-interposer.c : + Intercept calls to glibc IO functions _IO_putc and IO_puts. + + * io-watchdog/io-watchdog-interposer.c : + Set ctx.progname even if IO_WATCHDOG_TARGET not set. + +2006-12-28 Mark Grondona + + * io-watchdog/io-watchdog-interposer.c : + Also check for libc.so.6.1 if libc.so.6 is not found. + + * use-env/use-env-parser.y, use-env/use-env-parser.l, + use-env/use-env.c : + Add fnmatch() "function" to use-env config file. + Additional comments in use-env.c. + +2006-12-27 Mark Grondona + + * use-env/use-env.c : + Replace slurm_spank_local_user_init() which was inadvertently + removed earlier. + + * use-env/use-env.c : + Set SLURM_CMDLINE and SLURM_ARGV*/SLURM_ARGC keywords for + use in use-env config files. + + * README.use-env : Update documentation. + +2006-12-26 Mark Grondona + + * use-env/use-env.c, use-env/user-env.h , + use-env/use-env-parser.y, use-env/use-env-parser.l, + use-env/main.c, use-env/test.conf : + Add support for "in task" blocks in use-env config files + that are only parsed from spank_task_init. Provide wrappers + for {get,set,unset}env to access job environment in remote + context. + +2006-12-21 Mark Grondona + + * io-watchdog/io-watchdog-interposer.c : + Undefine fwrite_unlocked if it is a #define. Fix for compile + problem. + + * : tag v0.12. + +2006-12-20 Mark Grondona + + * lib/split.c, lib/split.h, lib/list.c, lib/list.h : + Move src files that may be used by multiple plugins into + a lib dir. + + * use-env/split.c, use-env/split.h, + use-env/list.c, use-env/list.h : + Removed. + + * use-env/Makefile : + Use sources from ../lib/. + + * Makefile, chaos-spankings.spec : + Better use of subdirectories. + + * chaos-spankings.spec : + Package tmpdir.so. + + * Makefile, io-watchdog/* : + Initial support for io-watchdog plugin. + +2006-12-13 Mark Grondona + + * tmpdir.c, Makefile : Add toy module that creates and + destroys job-step specific TMPDIR. + +2006-11-30 Mark Grondona + + * use-env/use-env.c : Only run cleanup in local context, + i.e. when !spank_remote(). + + * : tag v0.11. + +2006-11-28 Mark Grondona + + * use-env : + Moved use-env plugin into its own directory. + Complete redesign of use-env parser implemented with lex & yacc. + - Support for double-quoted strings. + - Added support for conditional if/else if/else/endif blocks. + - Added support for expansion of symbols with $ID and ${ID} + constructs. Symbols are use-env keywords, locally defined + symbols, or environment variables (searched in that order). + - Added support for keywords SLURM_NNODES, SLURM_NPROCS, SLURM_JOBID, + SLURM_STEPID for testing attributes of the current job. + - New "set" command for setting parser options (currently only + debuglevel is supported) + - New "dump" command to dump either the current list of "symbols" + "keywords" or both ("all"). + - New "print" command for printing arbitrary strings to stdout. + - New "define" command for defining symbols not exported to + the environment and "undefine" for deleting local symbols. + - See README.use-env for more information. + + * : tag v0.10. + +2006-11-15 Mark Grondona + + * use-env.c, env-override.c : + - Prefer files in /etc/slurm/environment/name instead + of /etc/slurm/env-name.conf (same for ~/.slurm/) + - Always read both system and user "default" file. Apply + user defaults after system defaults so user can override + system settings. + - User default file is always called "default" + - Allow a list of names to be specified to --use-env, e.g. + --use-env=mvapich,test. The settings are applied in order + i.e. test after mvapich. + + * use-env.c : + - Check for slurm_spank_local_user_init support from SPANK, + and if it exists, read environment overrides in that + callback instead of in spank_init and option processing hooks. + + * list.c, list.h, split.c, split.h : Added. + + * README.use-env : Updated documentation for --use-env. + + * : tag v0.9. + +2006-11-11 Mark Grondona + + * use-env.c, env-override.c : + - Fix environment variable value overwrite. + - Allow config files to be included from other files with + the "include" directive. + - Add "unset" directive for unsetting + - Improve environment override file parsing a bit. + + * README.use-env, chaos-spankings.spec : + Add and install README for the use-env plugin. + + * : tag v0.8. + +2006-11-09 Mark Grondona + + * Makefile, use-env.c, env-override.c, env-override.h, + strlcpy.c, strlcpy.h, list.c, list.h, chaos-spankings.spec : + Add --use-env capability for overriding environment variables + in srun before sending environment to the remote job. + + * : tag v0.7. + +2006-10-18 Jim Garlick + + * iotrace.c : Added ability to pass flags to plasticfs log module. + Remove extraneous code. + + * : tag v0.6. + +2006-10-17 Jim Garlick + + * iotrace.c : New --iotrace capability using plasticfs via LD_PRELOAD. + Derived from Mark's system-safe.c. + + * chaos-spankings.spec, Makefile : Add iotrace. + + * : tag v0.5. + +2006-10-09 Mark Grondona + + * system-safe.c, system-safe-preload.c : Add srun option to place + system-safe-preload.so in job's LD_PRELOAD, which replaces + system(3) with a version that calls fork(2) before application's + main(), thus allowing MPI applications to use system(3) on MPI + implementations that might not be fork()-safe. + + * oom-detect.c : Fix "(null)" at end of error message. + + * : tag v0.3. + + * system-safe-preload.c : Fix some stray fprintf's. + + * : tag v0.4. + +2006-07-25 Mark Grondona + + * renice.c : Fix format for verbose message. + + * oom-detect.c : Fix formatting of error message. + +2006-07-21 Mark Grondona + * : Initial version. diff --git a/DISCLAIMER b/DISCLAIMER new file mode 100644 index 0000000..1bb04be --- /dev/null +++ b/DISCLAIMER @@ -0,0 +1,24 @@ +This work was produced at the Lawrence Livermore National Laboratory +(LLNL) under Contract No. DE-AC52-07NA27344 (Contract 44) between +the U.S. Department of Energy (DOE) and Lawrence Livermore National +Security, LLC (LLNS) for the operation of LLNL. + +This work was prepared as an account of work sponsored by an agency of +the United States Government. Neither the United States Government nor +Lawrence Livermore National Security, LLC nor any of their employees, +makes any warranty, express or implied, or assumes any liability or +responsibility for the accuracy, completeness, or usefulness of any +information, apparatus, product, or process disclosed, or represents +that its use would not infringe privately-owned rights. + +Reference herein to any specific commercial products, process, or +services by trade name, trademark, manufacturer or otherwise does +not necessarily constitute or imply its endorsement, recommendation, +or favoring by the United States Government or Lawrence Livermore +National Security, LLC. The views and opinions of authors expressed +herein do not necessarily state or reflect those of the Untied States +Government or Lawrence Livermore National Security, LLC, and shall +not be used for advertising or product endorsement purposes. + +The precise terms and conditions for copying, distribution, and +modification are specified in the file "COPYING". diff --git a/META b/META new file mode 100644 index 0000000..f1b68de --- /dev/null +++ b/META @@ -0,0 +1,4 @@ + Name: chaos-spankings + Version: 0.34 + Release: 1 + Author: Mark Grondona diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bf2cb17 --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ + +CFLAGS = -Wall -ggdb + +all: renice.so \ + oom-detect.so \ + system-safe-preload.so system-safe.so \ + iotrace.so \ + tmpdir.so \ + auto-affinity.so \ + pty.so \ + addr-no-randomize.so \ + preserve-env.so \ + subdirs + +SUBDIRS = use-env overcommit-memory cpuset + +.SUFFIXES: .c .o .so + +.c.o: + $(CC) $(CFLAGS) -o $@ -fPIC -c $< +.o.so: + $(CC) -shared -o $*.so $< $(LIBS) + +subdirs: + @for d in $(SUBDIRS); do make -C $$d; done + +system-safe-preload.so : system-safe-preload.o + $(CC) -shared -o $*.so $< -ldl + +auto-affinity.so : auto-affinity.o lib/split.o lib/list.o lib/fd.o + $(CC) -shared -o $*.so auto-affinity.o lib/split.o lib/list.o -lslurm + +preserve-env.so : preserve-env.o lib/list.o + $(CC) -shared -o $*.so preserve-env.o lib/list.o + +pty.so : pty.o + $(CC) -shared -o $*.so $< -lutil + +clean: subdirs-clean + rm -f *.so *.o lib/*.o + +subdirs-clean: + @for d in $(SUBDIRS); do make -C $$d clean; done + diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..ff29cea --- /dev/null +++ b/NEWS @@ -0,0 +1,73 @@ +Version 0.34 (2008-09-25): +- auto-affinity: Fix for using auto-affinity module with jobs using + --use-cpusets=task. The auto-affinity module now checks to make sure + CPU mask has not changed in task context, and if so, silently + does nothing. +- preserve-env: New plugin which, when enabled with --preserve-slurm-env + option, will attempt to keep the remote SLURM_* environment variables + the same as in the current context. Useful for invoking + "srun -n1 --pty bash" from within an allocation shell. + +Version 0.33 (2008-09-11): +- Fix for critical locking bug in cpuset plugin. The cpuset plugin + now uses a global lockfile in /var/lock instead of locking files + under /dev/cpuset. +- Fix for generation of SLURM_CMDLINE in use-env plugin. + +Version 0.32 (2008-08-21): +- oom-detect: Optionally log OOM killed jobs via syslog(3), if + the do_syslog parameter is used in plugstack.conf. The syslog + message has the form "slurmd: OOM detected: jobid=JOBID uid=UID" + +Version 0.31 (2008-08-19): +- oom-detect: Delay slightly if an OOM killed process is detected + to give the error message time to make it to srun stderr. + +Version 0.30 (2008-08-04): +- cpuset: Slightly improve config file error messages. +- cpuset: Minor fixes for man pages. +- auto-affinity: Update --auto-affinity=help message. + +Version 0.29 (2008-07-29): +- cpuset: Major overhaul of SLURM cpuset support. Now includes a PAM + module, pam_slurm_cpuset.so, and a global config file in + /etc/slurm/slurm-cpuset.conf. For more information, see the + new manual pages included with the distribution. +- auto-affinity: Do not set CPU affinity by default if the number + of available CPUs is not evenly divisible by the number of tasks. + +Version 0.28 (2008-07-22): +- auto-affinity: Fix error where spank_post_opt hook was incorrectly + run in srun, which caused an immediate error and abort. + +Version 0.27 (2008-07-16): +- cpuset: Expand cpuset support to per-task cpusets via --use-cpusets=tasks. + +Version 0.26 (2008-07-16): +- cpuset: Add support for per-job-step cpusets via the new srun option + '--use-cpusets'. See the README or --use-cpusets=help for more information. +- auto-affinity: Delay detection of current cpuset until after user + option processing in the event that user option changed our cpuset. + +Version 0.25 (2008-07-10): +- cpuset: Added cpuset plugin to constrain jobs to number of CPUs + allocated on shared, but not oversubscribed nodes. +- auto-affinity: Make auto-affinity plugin cpuset-aware. CPU affinity + is assigned as if the job were running on a node the size of the + current cpuset. If cpusets are not enabled, the auto-affinity behavior + is unchanged. + +Version 0.24 (2008-06-10): + - auto-affinity: Query SLURM controller for number of CPUs allocated + to the current job in exclusive_only mode if the environment variable + SLURM_JOB_CPUS_PER_NODE is not set. + +Version 0.23 (2008-06-10): + - auto-affinity: Add 'exclusive_only' flag to auto-affinity plugin + to constrain plugin activity to only those jobs that have exclusive + use of the current node. + +(2008-06-10): + - Started NEWS file. + +$Id: NEWS 7811 2008-09-25 22:21:11Z grondo $ diff --git a/README b/README new file mode 100644 index 0000000..b330b27 --- /dev/null +++ b/README @@ -0,0 +1,156 @@ +SLURM spank plugins README +================================== + +This package includes several SLURM spank plugins developed +at LLNL and used on production compute clusters onsite. A few +of these plugins are only valid when used on LLNL's software +stack (oom-detect.so, for example, requires LLNL-specific patches +to track job's terminated by the OOM killer). However, the +source for all plugins is provided here in the hope that they +might be useful to other plugin developers. The following +is a short description of most of the plugins in this package. + +addr-no-randomize +----------------- + +The addr-no-randomize plugin allows sysadmins to set a default +policy for address space randomization (when supported and +enabled in the Linux kernel), and provides an option for users +to enable/disable randomization on a per-job basis. + +auto-affinity +----------------- + +Automatically assign CPU affinity using best-guess defaults. + +The default behavior of this plugin attempts to accomodate +multi-threaded apps by assigning more than one CPU per task +if the number of tasks running on the node is evenly divisible +into the number of CPUs. Otherwise, CPU affinity is not enabled +unless the cpus_per_task (cpt) option is specified. The default +behavior may be modified using the --auto-affinity options +listed below. Also, the srun(1) --cpu_bind option is processed +after auto-affinity, and thus may be used to override any CPU +affinity settings from this module. + +This plugin should not be used alone on systems using node +sharing. In that case, it should be used along with +the cpuset plugin below (and auto-affinity.so should be listed +*after* cpuset.so in the plugstack.conf). + +cpuset +----------------- + +The cpuset plugin uses Linux cpusets to constrain jobs to the +number of CPUs they have been allocated on nodes. The plugin +is specifically designed for sytems sharing nodes and using CPU +scheduling (i.e. using the select/cons_res plugin). The plugin +will not work on systems where CPUs are oversubscribed to jobs +(i.e. strict node sharing without the use of select/cons_res). + +The plugin also has a pam_slurm_cpuset counterpart, which +replaces pam_slurm and serves an identical functionality, +except that user login sessions are constrained to their +currently allocated CPUs on a node. + +The cpuset plugin requires the SGI libbitmask and libcpuset +libraries available from + + http://oss.sgi.com/projects/cpusets + +(See also cpuset/README) + +iorelay +----------------- + +The iorelay plugin is an experimental proof-of-concept plugin +for remounting required filesystems for a parallel job from +the first allocated node to all others. It is meant to reduce +the load on global NFS servers. + +It has not been used in production. + + +iotrace +----------------- + +The iotrace plugin is another experimental plugin which +uses "plasticfs" to log filesystem access on a per-job +basis. + + +oom-detect +----------------- + +The oom-detect plugin detects jobs that have been victims +of the OOM killer using some special code added to the LLNL +Linux kernel. As tasks exit after having been killed by +the OOM killer, a message is printed to the user's stderr +along with some memory information about the task. + +overcommit-memory +----------------- + +The overcommit-memory plugin is an attempt to allow users +to tune global overcommit behavior of the Linux kernel on +a per-job basis. It is currently buggy and thus not used. + +preserve-env +----------------- + +The preserve-env plugin adds an srun option + + --preserve-slurm-env + +which attempts to preserve the current state of all SLURM_* +environment variables in the remotely executed environment. This +is meant solely to be used from an allocation shell with +the syntax + + srun -n1 -N1 --pty --preserve-slurm-env $SHELL + +as a sort of "remote" allocation shell. + +pty +----------------- + +The pty plugin provides the SLURM --pty option, introduced +in slurm-1.3, for slurm-1.2. It isn't fully functional at this +point, but is a good example of a complex feature added solely +from a spank plugin. + + +renice +----------------- + +The renice plugin is the same as the example code in the +spank(8) man page. It provides a new srun option "--renice=VALUE" +which allows users to set the nice value of their remote +tasks (down to a minimum value configured by sysadmin). + +system-safe +------------------ + +The system-safe plugin provides an MPI-safe system(3) +replacement through an LD_PRELOAD library (most of the work +is done in system-safe-preload.c). The preloaded library +interposes a version of system(3) that does not fork. Instead, +the command line is passed through a pipe to a copy of the +program which was pre-forked before MPI_Init(). The return +value of the real system() call is passed back through the +pipe and returned to the calling application, for which there +is no noticable difference with the real system(3). + +use-env +------------------ + +The use-env plugin allows system administrators and users to +modify the environment of SLURM jobs using a set of simple +yet very flexible config files. Environment variables can +be overridden, set only if unset, set based on conditional +syntax, and even defined in a per-task context. The config +files have access to key slurm variables such as SLURM_NNODES, +SLURM_NPROCS, etc., so variables can even be defined differently +depending of the size of the job. + +See README.use-env for further information. diff --git a/README.use-env b/README.use-env new file mode 100644 index 0000000..ab5fdde --- /dev/null +++ b/README.use-env @@ -0,0 +1,343 @@ +The use-env.so plugin for SLURM +============================================================================ + +SYNOPSIS + +The "use-env" spank(8) plugin for SLURM provides a simple facility for +utilizing SLURM to initialize and/or modify the current environment for +users launching jobs through srun(1). When the plugin is enabled in the +spank plugin stack (plugstack.conf by default), it reads environment +overrides from a default config file at srun initialization, and +also allows user-selected environment overrides via the srun option +"--use-env=name." When using --use-env=name, the config file +loaded is from ~/.slurm/environment/ or /etc/slurm/environment/name. +(~/.slurm/env-.conf or /etc/slurm/env-.conf is also +supported for backwards compatibility, but these locations are +deprecated and the file in environment/ is preferred) +The format of the config file is described below. + +This plugin also supports generation of a different environment per +task throught use of "in task" blocks, which are parsed by slurmd +in task context just before calling exec(). See TASK BLOCKS below +for more information. + +DEFAULT CONFIG + +The default config file is read from /etc/slurm/environment/default +and is always used if it exists. A user default is also read +from ~/.slurm/environment/default. Settings in the user file are applied +after the global defaults in /etc/slurm so that user settings can +override system defaults. The default environment settings are +applied before any user-selected environment via the --use-env +option. + +The name of the global default config can be overridden by use of the +"default=" option to plugin, e.g., with the following line in +plugstack.conf: + + required use-env.so default=mvapich + +would read /etc/slurm/environment/mvapich by default instead of +/etc/slurm/environment/default. The user default file is always +named "default" however. + + +CONFIG FILE FORMAT + +Lines in the use-env config file(s) can have the following format. +A '#' anywhere on the line indicates a comment. Statements +are separated by newlines or semicolons ";". + +Config files can be included from other files with the "include" +statement + + include name + +will include file "name" from the same directory as the file +in which the "include" was invoked. An absolute pathname +may also be specified, e.g.: + + include /etc/slurm/environment/foo + +TASK BLOCKS + +Configuration that should only be applied to remotely executed +tasks may be specified in special "in task" blocks, which +have the form + + in task { statments... } + +This block, if present, will be read by each task in the job +just before exec() is called. This allows the environment +to be tailored for a specific task, for example: + + in task { + if ($SLURM_PROCID == 0) + LD_PRELOAD = "$LD_PRELOAD libdebug.so" + endif + } + +would append libdebug.so to LD_PRELOAD only for task 0 +in the job. The rest of the config file is ignored +by the task. Likewise, these task blocks are ignored +when the config file is parsed by srun (except for +syntax checking). + + +ASSIGNMENT EXPRESSIONS + +The simplest form of expression in the config file is to assign +a new value to an environment variable + + identifier = value + +Where identifier is a valid environment variable of the form +[A-Za-z_][0-9A-Za-Z_] and value can be any arbitrary quoted string +or string literal. Environment variables (and other locally defined +symbols or keywords) can be expanded by the familiar form of +$ID or ${ID}. Variable expansion will occur in both unquoted and +quoted strings. Whitespace outside of quoted strings is ignored. + +Examples of assignment are + + MYENV = foo # Valid: MYENV="foo" + MYENV2 = $MYENV/bar # Valid: MYENV2="foo/bar" + MYENV3 = ${MYENV}bar # Valid: MYENV3="foobar" + MYENV3 = "${MYENV}bar" # Valid: MYENV4="foobar" + MYENV4 = foo bar # Invalid + MYENV5 = "foo bar" # Valid: MYENV5="foo bar" + MYENV6 = "foo \"bar\"" # Valid: MYENV6="foo "bar"" + +There are additional assignment operators that may be used in +assignment expressions as well. These include: + + "|=" Set new value only if identifier was previously unset. + "+=" Prepend value to colon-separated identifier (e.g. PATH) + "=+" Append value to colon-separated identifier (e.g. PATH) + +For example: + + MYENV = foo + MYENV |= bar # Does nothing. MYENV="foo" + + PATH = /usr/bin # PATH = "/usr/bin" + PATH += /bin # PATH = "/bin:/usr/bin" + PATH =+ /usr/local/bin # PATH = "/bin:/usr/bin:/usr/local/bin" + +Note that + + PATH += "/usr/bin" + +is the same as + + PATH = /usr/bin:$PATH + +except that when $PATH is empty, the trailing ":" will not appear +when using the "+=" operator. + + +UNSET EXPRESSION + +Environment variables may also be unset using the simple +unset expression + + unset identifier + +For example: + + unset MYENV + +would unset the environment variable "MYENV" from the current env. + + +AVAILABLE KEYWORDS + +A small set of keywords are available within the config file +which describe current parameters of the executing job. These +include + + SLURM_JOBID Current SLURM job id + SLURM_STEPID Current SLURM job step id + SLURM_NNODES Number of nodes in current job + SLURM_NPROCS Number of processes in current job + SLURM_CMDLINE Remote command line for this job + SLURM_ARGC Number of command line arguments + SLURM_ARGV* Command line argument(s) ARGV0-ARGVN + +In task context, the following additional keywords are also available + + SLURM_PROCID Global task id or rank + SLURM_NODEID Global node id + +These are called keywords because their values cannot be overridden +by the user. However, they are referenced just like any other +variable. + +A full list of keywords and their values can be dumped to +stderr with the "dump keywords" command. See the DEBUGGING +section below. + + +DEFINING LOCAL SYMBOLS + +Occaisionally it may be desireable to define new variables that are +not exported to the current environment. The "define" keyword is used +for this purpose + + define identifier = value + +works much like the assignment expression, except that the variable +is not exported to the local environment (and thus, not to the job). + +Locally defined variables such as these are undefined with the +"undefine" keyword: + + undefine n + +will delete "n" from the symbol table. + + +CONDITIONAL EXPRESSIONS + +The use-env configuration file supports conditional expressions of +the form + + if (tests) + statements + else if (tests) + statements + else if ... + statements + else + statements + endif + +Where ``tests'' can have combinations of the following formsa + + value < value # Numeric comparison only + value > value # Numeric comparison only + value >= value # Numeric comparison only + value <- value # Numeric comparison only + value == value # Numeric or string compare + value !- value # Numeric or string compare + value # True if var is not 0 or empty string; + defined var # True if var is defined + S matches P # True if string S matches the glob expression P + + ! tests + tests && tests + tests || tests + ( tests ) + + +For example: + + if ($SLURM_NNODES > 100) + MORE_THAN_100_NODES = 1 + else if ($SLURM_NNODES > 50) + MORE_THAN_50_NODES = 1 + else + FIFTY_NODES_OR_LESS = 1 + endif + + if ( "$SLURM_ARGV0" matches "*myapp*") + include env.myapp + endif + + +DEBUGGING + +Other commands that are mainly useful for debugging include: + + print "STRING" Print the value of string to stdout + set debuglevel N Set the debug level for the parser to value N + dump keywords Dump a list of currently defined keywords + dump symbols Dump a list of currently defined local symbols + dump all Dump both of the above + +The use-env plugin also looks for the environment variable: + + SPANK_USE_ENV_DEBUG + +which will increase the verbosity of debug logs for the use-env +parser if non-zero. + + + +EXAMPLES + +/etc/slurm/environment/default: + # + # Include global defaults + include global + # + # Include environment for mvapich + include mvapich + +/etc/slurm/environment/global + # + # If TMPDIR not set, set to /tmp + TMPDIR |= /tmp + # + +/etc/slurm/environment/mvapich + # + # Force MVAPICH timeout to 22 + # + VIADEV_DEFAULT_TIME_OUT=22 + # + # Prepend /usr/lib/mpi/dbg/mvapich-gen2/lib/shared to LD_LIBRARY_PATH + LD_LIBRARY_PATH += /usr/lib/mpi/dbg/mvapich-gen2/lib/shared + + +~/.slurm/environment/mvapich-test + # + # environment for testing new versions of MVAPICH + # + PATH += /home/grondo/mvapich-test/root/lib/shared + LD_LIBRARY_PATH += /home/grondo/mvapich-test/root/bin + +PATH and LD_LIBRARAY_PATH can then be adjusted to use the mvapich-test +version with the srun command line: + + srun --use-env=mvapich-test ... + + +Using conditional expressions + +~/.slurm/environment/default + # + # Using different environment variables based on job size + # + + define n = $SLURM_NPROCS + define N = $SLURM_NNODES + + if ($N > 128 || $n > 1024) + include large-env + else if (($N > 16) || ($n > 128)) + include medium-env + else + include small-env + endif + + if (defined $DEBUG) + print "environment setup for $SLURM_JOBID.$SLURM_STEPID complete" + dump keywords + dump symbols + endif + + +Output for this config file for a run with DEBUG set might look like: + +~ > DEBUG=1 srun hostname +environment setup for 4862.4 complete +use-env: default: 18: Dumping keywords +use-env: default: 18: SLURM_STEPID = "4" +use-env: default: 18: SLURM_JOBID = "4862" +use-env: default: 18: SLURM_NPROCS = "16" +use-env: default: 18: SLURM_NNODES = "2" +use-env: default: 19: Dumping symbols +use-env: default: 19: N = "2" +use-env: default: 19: n = "16" + diff --git a/addr-no-randomize.c b/addr-no-randomize.c new file mode 100644 index 0000000..38eedbe --- /dev/null +++ b/addr-no-randomize.c @@ -0,0 +1,114 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include + +#include + +/* + * All spank plugins must define this macro for the SLURM plugin loader. + */ +SPANK_PLUGIN(no-randomize, 1); + +#define ADDR_NO_RANDOMIZE 0x0040000 + +static int default_randomize = 0; +static int randomize = -1; + +#define OPT_RANDOMIZE 1 +#define OPT_NO_RANDOMIZE 2 + +static int process_opts (int val, const char *optarg, int remote); + +/* + * Provide options to srun: + */ +struct spank_option spank_options[] = +{ + { "addr-randomize", NULL, + "Enable address space randomization", 0, OPT_RANDOMIZE, + (spank_opt_cb_f) process_opts + }, + { "no-addr-randomize", NULL, + "Disable address space randomization", 0, OPT_NO_RANDOMIZE, + (spank_opt_cb_f) process_opts + }, + SPANK_OPTIONS_TABLE_END +}; + + +/* + * Called from both srun and slurmd. + */ +int slurm_spank_init (spank_t sp, int ac, char **av) +{ + int i; + + for (i = 0; i < ac; i++) { + if (strncmp ("default_randomize=", av[i], 8) == 0) { + const char *optarg = av[i] + 18; + if (*optarg == '0') + default_randomize = 0; + else if (*optarg == '1') + default_randomize = 1; + else + slurm_error ("no-randomize: Ignoring invalid default value: " + "\"%s\"", av[i]); + } + else { + slurm_error ("no-randomize: Invalid option \"%s\"", av[i]); + } + } + + randomize = default_randomize; + + return (0); +} + +static int process_opts (int val, const char *optarg, int remote) +{ + if (val == OPT_RANDOMIZE) + randomize = 1; + else if (val == OPT_NO_RANDOMIZE) + randomize = 0; + else + randomize = default_randomize; + + return (0); +} + +int slurm_spank_task_init (spank_t sp, int ac, char **av) +{ + if (randomize == -1) + randomize = default_randomize; + + slurm_info ("randomize = %d\n", randomize); + + if (randomize == 0 && (personality (ADDR_NO_RANDOMIZE) < 0)) + slurm_error ("Failed to set personality: %m"); + return 0; +} + diff --git a/auto-affinity.c b/auto-affinity.c new file mode 100644 index 0000000..a0d67b3 --- /dev/null +++ b/auto-affinity.c @@ -0,0 +1,552 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#define __USE_GNU +#include + +#include +#include + +#include "lib/split.h" +#include "lib/fd.h" + +SPANK_PLUGIN(auto-affinity, 1); + +static int ncpus = -1; +static int ntasks = -1; +static int enabled = 1; +static int verbose = 0; +static int reverse = 0; +static int startcpu = 0; +static int requested_cpus_per_task = 0; +static int exclusive_only = 0; + +static cpu_set_t cpus_available; +static int ncpus_available; + +static const char auto_affinity_help [] = +"\ +auto-affinity: Automatically assign CPU affinity using best-guess defaults.\n\ +\n\ +The default behavior attempts to accomodate multi-threaded apps by \n\ +assigning more than one CPU per task if the number of tasks running \n\ +on the node is evenly divisible into the number of CPUs. Otherwise, \n\ +CPU affinity is not enabled unless the cpus_per_task (cpt) option is \n\ +specified. The default behavior may be modified using the \n\ +--auto-affinity options listed below. Also, the srun(1) --cpu_bind option\n\ +is processed after auto-affinity, and thus may be used to override any \n\ +CPU affinity settings from this module.\n\ + \n\ +Option Usage: --auto-affinity=[args...]\n\ + \n\ +where args... is a comma separated list of one or more of the following\n\ + help Display this message.\n\ + v(erbose) Print CPU affinty list for each remote task\n\ + \n\ + off Disable automatic CPU affinity.\n\ + \n\ + start=N Start affinity assignment at CPU [N]. If assigning CPUs\n\ + in reverse, start [N] CPUs from the last CPU.\n\ + rev(erse) Allocate last CPU first instead of starting with CPU0.\n\ + cpus_per_task=N Allocate [N] CPUs to each task.\n\ + cpt=N Shorthand for cpus_per_task.\n\n"; + + +static int parse_user_option (int val, const char *optarg, int remote); + +struct spank_option spank_options [] = { + { "auto-affinity", "[args]", + "Automatic, best guess CPU affinity for SMP machines " + "(args=`help' for more info)", + 2, 0, (spank_opt_cb_f) parse_user_option + }, + SPANK_OPTIONS_TABLE_END +}; + +static int str2int (const char *str) +{ + char *p; + long l = strtol (str, &p, 10); + + if (p && (*p != '\0')) + return (-1); + + return ((int) l); +} + +static int parse_option (const char *opt, int *remotep) +{ + if (strcmp (opt, "off") == 0) + enabled = 0; + else if ((strcmp (opt, "reverse") == 0) || (strcmp (opt, "rev") == 0)) + reverse = 1; + else if (strncmp (opt, "cpt=", 4) == 0) { + if ((requested_cpus_per_task = str2int (opt+4)) < 0) + goto fail; + } + else if (strncmp (opt, "cpus_per_task=", 14) == 0) { + if ((requested_cpus_per_task = str2int (opt+14)) < 0) + goto fail; + } + else if (strncmp (opt, "start=", 6) == 0) { + if ((startcpu = str2int (opt+6)) < 0) + goto fail; + } + else if (strcmp (opt, "verbose") == 0 || strcmp (opt, "v") == 0) + verbose = 1; + else if ((strcmp (opt, "help") == 0) && !(*remotep)) { + fprintf (stderr, auto_affinity_help); + exit (0); + } + + return (0); + + fail: + slurm_error ("auto-affinity: Invalid option: `%s'", opt); + return (-1); +} + +static int parse_user_option (int val, const char *arg, int remote) +{ + char *str; + List l; + int rc = 1; + + if (arg == NULL) + return (0); + + l = list_split (",", (str = strdup (arg))); + rc = list_for_each (l, (ListForF) parse_option, &remote); + + list_destroy (l); + free (str); + + return (rc); +} + +static int parse_argv (int ac, char **av, int remote) +{ + int i; + for (i = 0; i < ac; i++) { + if (strcmp (av[i], "off") == 0) + enabled = 0; + else if (strcmp (av[i], "exclusive_only") == 0) + exclusive_only = 1; + else + return (-1); + } + return (0); +} + + +/* + * XXX: Since we don't have a good way to determine the number of + * CPUs allocated to this job on this node, we have to query + * the slurm controller (!). + * + * Hopefully this function can be removed in the near future. + * It should only be called when SLURM_JOB_CPUS_PER_NODE is not + * set in the environment. + */ +static int query_ncpus_per_node (spank_t sp) +{ + job_info_msg_t * msg; + uint32_t jobid; + int cpus_per_node = -1; + int i; + + if (spank_get_item (sp, S_JOB_ID, &jobid) != ESPANK_SUCCESS) { + if (verbose) + fprintf (stderr, "auto-affinity: Failed to get my JOBID!\n"); + return (-1); + } + + if (slurm_load_jobs (0, &msg, 0) < 0) { + slurm_error ("auto-affinity: slurm_load_jobs: %m\n"); + return (-1); + } + + for (i = 0; i < msg->record_count; i++) { + job_info_t *j = &msg->job_array[i]; + + if (j->job_id == jobid) { + /* + * XXX: Assume cpus_per_node is the same across the whole job. + */ + cpus_per_node = (int) j->cpus_per_node[0]; + break; + } + } + + slurm_free_job_info_msg (msg); + return (cpus_per_node); +} + + +/* + * Return 1 if job has allocated all CPUs on this node + */ +static int job_is_exclusive (spank_t sp) +{ + const char var[] = "SLURM_JOB_CPUS_PER_NODE"; + char val[16]; + int n; + + if (spank_getenv (sp, var, val, sizeof (val)) != ESPANK_SUCCESS) { + if (verbose) + fprintf (stderr, "auto-affinity: Failed to find %s in env\n", + "SLURM_JOB_CPUS_PER_NODE"); + + /* XXX: Now query slurm controller for this information */ + if ((n = query_ncpus_per_node (sp)) < 0) { + fprintf (stderr, "auto-affinity: Unabled to determine ncpus!\n"); + return (0); + } + } + else if ((n = str2int (val)) < 0) { + fprintf (stderr, "auto-affinity: %s=%s invalid\n", + "SLURM_JOB_CPUS_PER_NODE", val); + return (0); + } + + return (n == ncpus); +} + + +int slurm_spank_init (spank_t sp, int ac, char **av) +{ + if (!spank_remote (sp)) + return (0); + + if (parse_argv (ac, av, spank_remote (sp)) < 0) + return (-1); + + /* + * First get total number of online CPUs + */ + if ((ncpus = (int) sysconf (_SC_NPROCESSORS_ONLN)) < 0) { + slurm_error ("Failed to get number of processors: %m\n"); + return (-1); + } + + if (spank_get_item (sp, S_JOB_LOCAL_TASK_COUNT, &ntasks) != ESPANK_SUCCESS) + { + slurm_error ("Failed to get number of local tasks\n"); + return (-1); + } + + return (0); +} + +/* + * Use the slurm_spank_user_init callback to check for exclusivity + * becuase user options are processed prior to calling here. + * Otherwise, we would not be able to use the `verbose' flag. + */ +int slurm_spank_user_init (spank_t sp, int ac, char **av) +{ + if (!spank_remote (sp)) + return (0); + + if (exclusive_only && !job_is_exclusive (sp)) { + if (verbose) + fprintf (stderr, "auto-affinity: Disabling. " + "(job doesn't have exclusive access to this node)\n"); + enabled = 0; + } + + if (exclusive_only && + (ntasks < ncpus_available) && (ncpus_available % ntasks)) { + if (verbose) + fprintf (stderr, "auto-affinity: Disabling. " + "ncpus must be evenly divisible by number of tasks\n"); + enabled = 0; + } + + return (0); +} + +static int cpu_set_count (cpu_set_t *setp) +{ + int i; + int n = 0; + for (i = 0; i < ncpus; i++) { + if (CPU_ISSET (i, setp)) + n++; + } + return (n); +} + +static char * cpuset_to_cstr (cpu_set_t *mask, char *str) +{ + int i; + char *ptr = str; + int entry_made = 0; + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, mask)) { + int j; + int run = 0; + entry_made = 1; + for (j = i + 1; j < CPU_SETSIZE; j++) { + if (CPU_ISSET(j, mask)) + run++; + else + break; + } + if (!run) + sprintf(ptr, "%d,", i); + else if (run == 1) { + sprintf(ptr, "%d,%d,", i, i + 1); + i++; + } else { + sprintf(ptr, "%d-%d,", i, i + run); + i += run; + } + while (*ptr != 0) + ptr++; + } + } + ptr -= entry_made; + *ptr = 0; + + return str; +} + +static int get_cpus_per_task () +{ + if (requested_cpus_per_task) + return (requested_cpus_per_task); + else if ((ncpus_available % ntasks) == 0) + return (ncpus_available / ntasks); + else + return (1); +} + +/* + * Return the absolute cpu number for relative CPU cpu within + * the available cpus mask 'cpus_available'. + */ +static int mask_to_available (int cpu) +{ + int i; + int j = 0; + for (i = 0; i < ncpus; i++) { + if (CPU_ISSET (i, &cpus_available) && (cpu == j++)) + return (i); + } + slurm_error ("Yikes! Couldn't convert CPU%d to available CPU!", cpu); + return (-1); +} + +static int generate_mask (cpu_set_t *setp, int localid) +{ + int i = 0; + int cpu; + int cpus_per_task = get_cpus_per_task (); + + if (cpus_per_task == 1) { + if ((cpu = mask_to_available (localid + startcpu)) < 0) + return (-1); + CPU_SET (cpu, setp); + return (0); + } + + cpu = ((localid * cpus_per_task) + startcpu) % ncpus_available; + + while (i++ < cpus_per_task) { + int bit = mask_to_available (cpu); + if (bit < 0) + return (-1); + CPU_SET (bit, setp); + cpu = (cpu + 1) % ncpus_available; + } + + return (0); +} + +static int generate_mask_reverse (cpu_set_t *setp, int localid) +{ + int i = 0; + int cpu; + int cpus_per_task = get_cpus_per_task (); + int lastcpu = ncpus_available - 1; + + if (cpus_per_task == 1) { + cpu = (lastcpu - (localid + startcpu) % ncpus_available); + if ((cpu = mask_to_available (cpu)) < 0) + return (-1); + CPU_SET (cpu, setp); + return (0); + } + + cpu = lastcpu - (((localid * cpus_per_task) + startcpu) % ncpus_available); + + while (i++ < cpus_per_task) { + int bit = mask_to_available (cpu); + if (bit < 0) + return (-1); + CPU_SET (bit, setp); + cpu = (--cpu >= 0) ? cpu : (ncpus_available - 1); + } + + return (0); +} + +/* + * Set the provided cpu set to the actual CPUs available to the + * current task (which may be restricted by cpusets or other + * mechanism. + * + * Returns the number of cpus set in setp. + * + */ +static int get_cpus_available (cpu_set_t *setp) +{ + if (sched_getaffinity (0, sizeof (cpu_set_t), setp) < 0) { + slurm_error ("auto-affinity: sched_getaffinity: %m"); + return (-1); + } + + return (cpu_set_count (setp)); +} + +int slurm_spank_init_post_opt (spank_t sp, int ac, char **av) +{ + if (!spank_remote (sp)) + return (0); + /* + * Set available cpus mask after user options have been processed, + * in case our cpuset changed. + */ + ncpus_available = get_cpus_available (&cpus_available); + return (0); +} + +int check_task_cpus_available (void) +{ + int n; + + /* + * Check number of available cpus again. If it has + * changed since checking in spank_init_post_opt, + * then abort, because likely something else is adjusting + * the cpu mask (or we are using per-task cpusets) + * and auto-affinity is not warranted. + */ + if ((n = get_cpus_available (&cpus_available)) && + (n != ncpus_available) ) { + if (ncpus_available > 0) { + if (verbose) + fprintf (stderr, "auto-affinity: Not adjusting CPU mask. " + "(task cpu mask adjusted externally)\n"); + return (-1); + } + + ncpus_available = n; + } + + return (0); +} + +int slurm_spank_task_init (spank_t sp, int ac, char **av) +{ + int localid; + cpu_set_t setp[1]; + char buf[4096]; + + if (!enabled) + return (0); + + if (check_task_cpus_available () < 0) + return (0); + + if (ncpus_available <= 1) + return (0); + + if ((ntasks <= 1) && !requested_cpus_per_task) { + if (verbose) + fprintf (stderr, "auto-affinity: Not adjusting CPU mask. " + "(%d task on this node)\n", ntasks); + return (0); + } + + /* + * Do nothing if user is overcommitting resources + */ + if (ntasks > ncpus_available) + return (0); + + /* + * Do nothing by default if number of CPUs is not a multiple + * of the number of tasks + */ + if ((ncpus_available % ntasks) && !requested_cpus_per_task) { + if (verbose) { + fprintf (stderr, "auto-affinity: Not adjusting mask. " + "(%d tasks not evenly divided among %d CPUs)\n", + ntasks, ncpus_available); + fprintf (stderr, "To force, explicity set cpus-per-task\n"); + } + return (0); + } + + spank_get_item (sp, S_TASK_ID, &localid); + + if (requested_cpus_per_task > ncpus_available) { + if (localid == 0) + slurm_error ("auto-affinity cpus_per_task=%d > ncpus=%d. %s...", + requested_cpus_per_task, ncpus_available, "Ignoring"); + requested_cpus_per_task = 0; + } + + CPU_ZERO (setp); + + if (reverse) + generate_mask_reverse (setp, localid); + else + generate_mask (setp, localid); + + if (verbose) + fprintf (stderr, "%s: local task %d: CPUs: %s\n", + "auto-affinity", localid, cpuset_to_cstr (setp, buf)); + + if (sched_setaffinity (getpid (), sizeof (*setp), setp) < 0) { + slurm_error ("Failed to set auto-affinity for task %d\n", localid); + return (-1); + } + + return (0); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/chaos-spankings.spec b/chaos-spankings.spec new file mode 100644 index 0000000..610da57 --- /dev/null +++ b/chaos-spankings.spec @@ -0,0 +1,174 @@ +## +# $Id: chaos-spankings.spec 7813 2008-09-25 23:08:25Z grondo $ +## + +Name: +Version: +Release: + +Summary: SLURM SPANK modules for CHAOS systems +Group: System Environment/Base +License: GPL + +BuildRoot: %{_tmppath}/%{name}-%{version} +Source0: %{name}-%{version}.tgz + +BuildRequires: slurm-devel job bison flex +BuildRequires: libbitmask libcpuset +BuildRequires: pam-devel + +Requires: slurm + +%description +This package contains a set of SLURM SPANK modules for CHAOS clusters. +Currently includes: + - renice.so : add --renice option to srun allowing users to set priority + of job + - oom-detect.so : Detect tasks killed by OOM killer via /proc/oomkilled file. + - system-safe.so : Implement pre-forked system(3) replacement in case MPI + implementation doesn't support fork(2). + - iotrace.so : Enable tracing of IO calls through LD_PRELOAD trick + - use-env.so : Add --use-env flag to srun to override environment + variables for job + - tmpdir.so : Create a job-specific TMPDIR and remove it (as the user) + after the job has exited. + + - auto-affinity.so: + Try to set CPU affinity on jobs using some kind of + presumably sane defaults. Also adds an --auto-affinity + option for tweaking the default behavior. + + - overcommit-memory.so : + Allow users to choose overcommit mode on nodes of + their job. + + - pty.so : Run task 0 of SLURM job under pseudo tty. + +%package cpuset +Summary: Cpuset spank plugin for slurm. +Group: System Environment/Base +Requires: libbitmask libcpuset slurm + +%description cpuset +This package contains a SLURM spank plugin for enabling +the use of cpusets to constrain CPU use of jobs on nodes to +the number of CPUs allocated. This plugin is specifically +designed for systems sharing nodes and using CPU scheduling +(i.e. using the sched/cons_res plugin). Most importantly the +plugin will be harmful when overallocating CPUs on nodes. The +plugin is enabled by adding the line: + + required cpuset.so [options] + +to /etc/slurm/plugstack.conf. + +A PAM module - pam_slurm_cpuset.so - is also provided for +constraining user logins in a similar fashion. For more +information see the slurm-cpuset(8) man page provided with +this package. + + +%prep +%setup + +%build +make CFLAGS="$RPM_OPT_FLAGS" + +%install +rm -rf "$RPM_BUILD_ROOT" +mkdir -p "$RPM_BUILD_ROOT" + +plugins="renice.so \ + oom-detect.so \ + system-safe.so \ + iotrace.so \ + tmpdir.so \ + use-env/use-env.so \ + overcommit-memory/overcommit-memory.so \ + auto-affinity.so \ + preserve-env.so \ + pty.so + " + +libs="system-safe-preload.so" +utilities="overcommit-memory/overcommit-util" + +libdir=$RPM_BUILD_ROOT%{_libdir} +plugindir=${libdir}/slurm +utildir=$RPM_BUILD_ROOT%{_libexecdir}/chaos-spankings/ + +mkdir -p --mode=0755 $plugindir +mkdir -p --mode=0755 $utildir + +cat /dev/null > std-plugins.list +for plugin in $plugins; do + install -m0755 $plugin $plugindir + echo %{_libdir}/slurm/$(basename $plugin) >>std-plugins.list +done + +for lib in $libs; do + install -m0755 $lib $libdir +done + +for utility in $utilities; do + install -m0755 $utility $utildir +done + +# +# cpuset_release_agent goes into /sbin +# +mkdir -p $RPM_BUILD_ROOT/sbin +install -m0755 cpuset/cpuset_release_agent $RPM_BUILD_ROOT/sbin +install -m0755 cpuset/cpuset.so $plugindir +mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/init.d/ +install -m0755 cpuset/cpuset.init \ + $RPM_BUILD_ROOT/%{_sysconfdir}/init.d/slurm-cpuset + +mkdir -p $RPM_BUILD_ROOT/%{_mandir}/man1 +mkdir -p $RPM_BUILD_ROOT/%{_mandir}/man8 +mkdir -p $RPM_BUILD_ROOT/%{_lib}/security + +install -m0755 cpuset/pam_slurm_cpuset.so $RPM_BUILD_ROOT/%{_lib}/security +install -m0644 cpuset/slurm-cpuset.8 cpuset/pam_slurm_cpuset.8 \ + $RPM_BUILD_ROOT/%{_mandir}/man8 +install -m0644 cpuset/use-cpusets.1 \ + $RPM_BUILD_ROOT/%{_mandir}/man1 + +# create /etc/slurm/plugstack.d directory +mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/slurm/plugstack.conf.d + +# create entry for preserve-env.so +echo " required preserve-env.so" > \ + $RPM_BUILD_ROOT/%{_sysconfdir}/slurm/plugstack.conf.d/99-preserve-env + +%clean +rm -rf "$RPM_BUILD_ROOT" + +%post cpuset +if [ -x /sbin/chkconfig ]; then /sbin/chkconfig --add slurm-cpuset; fi + +%preun cpuset +if [ "$1" = 0 ]; then + if [ -x /sbin/chkconfig ]; then /sbin/chkconfig --del slurm-cpuset; fi +fi + +%files -f std-plugins.list +%defattr(-,root,root,0755) +%doc NEWS ChangeLog README.use-env +/%{_libdir}/*.so +/%{_libexecdir}/chaos-spankings/* +%dir %attr(0755,root,root) %{_sysconfdir}/slurm/plugstack.conf.d +%config(noreplace) %{_sysconfdir}/slurm/plugstack.conf.d/* + +%files cpuset +%defattr(-,root,root,0755) +%doc NEWS ChangeLog cpuset/README +%{_sysconfdir}/init.d/slurm-cpuset +%{_libdir}/slurm/cpuset.so +/%{_lib}/security/pam_slurm_cpuset.so +/sbin/cpuset_release_agent +%{_mandir}/man1/use-cpusets.* +%{_mandir}/man8/pam_slurm_cpuset.* +%{_mandir}/man8/slurm-cpuset.* + + diff --git a/cpuset/Makefile b/cpuset/Makefile new file mode 100644 index 0000000..4279039 --- /dev/null +++ b/cpuset/Makefile @@ -0,0 +1,37 @@ +NAME := cpuset + +FLAGS := -ggdb -Wall -I../lib +SHOPTS := -shared -Wl,--version-script=version.map +LLIBS := -lbitmask -lcpuset -ldl -lfl +OBJS := nodemap.o util.o create.o log.o slurm.o \ + conf.o conf-lexer.o conf-parser.o \ + ../lib/fd.o ../lib/list.o ../lib/split.o + +all: $(NAME).so test cpuset_release_agent pam_slurm_cpuset.so + +$(NAME).so: $(OBJS) $(NAME).o + $(CC) $(SHOPTS) -o $(NAME).so $(OBJS) $(NAME).o $(LLIBS) + +test: test.o $(OBJS) + $(CC) -o test $(OBJS) test.o $(LLIBS) + +cpuset_release_agent: release-agent.o $(OBJS) + $(CC) -o cpuset_release_agent $(OBJS) release-agent.o $(LLIBS) + + +pam_slurm_cpuset.so : $(OBJS) pam_slurm_cpuset.o ../lib/hostlist.o + $(CC) -shared -o pam_slurm_cpuset.so $(OBJS) ../lib/hostlist.o \ + pam_slurm_cpuset.o -lbitmask $(LLIBS) -lpam -lpam_misc +.c.o: + $(CC) $(CFLAGS) $(FLAGS) -o $@ -fPIC -c $< + +conf.o : conf-parser.h + +conf-lexer.c : conf-parser.l conf-parser.h + flex -oconf-lexer.c conf-parser.l + +conf-parser.c conf-parser.h : conf-parser.y + bison -d -oconf-parser.c conf-parser.y + +clean: + -rm -f *.o *.so conf-parser.[ch] conf-lexer.c cpuset_release_agent test diff --git a/cpuset/README b/cpuset/README new file mode 100644 index 0000000..5fabc24 --- /dev/null +++ b/cpuset/README @@ -0,0 +1,71 @@ + +INTRODUCTION + +The SLURM 'cpuset' plugin uses Linux cpusets to constrain jobs to +the number of CPUs they have been allocated on nodes. The plugin is +specifically designed for sytems sharing nodes and using CPU scheduling +(i.e. using the select/cons_res plugin). The plugin will not work on +systems where CPUs are oversubscribed to jobs (i.e. strict node sharing +without the use of select/cons_res). + +The plugin uses SLURM's spank framework, and thus it is enabled by adding +the following line to /etc/slurm/plugstack.conf: + + required cpuset.so [options] + +where options [options] may be supplied to tune module behavior. + +The plugin may also constrain job steps to their own cpusets under +the job cpuset. This may be useful when running multiple job steps +under a single allocation, as the resources of each job step may +be partitioned into separate job steps. This functionality is enabled +by the srun user option + + --use-cpusets=[args...] + +Use of the --use-cpusets option for job steps is described below. + + +REQUIREMENTS + +The cpuset plugin of course requires cpuset support. It also uses the +libbitmask and libcpuset libraries from SGI for creating and managing +cpusets. Source for these libraries are available at + + http://oss.sgi.com/projects/cpusets/ + +The cpuset filesystem must also be mounted at runtime in order for +the plugin to be able to query and create cpusets. To mount the cpuset +filesystem, use: + + mount -t cpuset none /dev/cpuset + +The plugin currently assumes that the cpuset filesystem will be available +under /dev/cpuset. + +Included with the cpuset plugin source is a cpusets "release +agent" (release-agent.c) which may optionally be installed as +/sbin/cpuset_release_agent on any nodes using the SLURM cpuset plugin. +This release agent will be run for each SLURM cpuset when the last task +within the cpuset exits, and will free the cpuset immediately (with +proper locking so as not to race with other jobs). This release agent +is optional for a couple reasons: + + 1. In the current version of Linux for which this plugin was written + (RHEL5), there can only be one release-agent system-wide. We don't + want to interfere with other uses of cpusets if they exist. + + 2. The cpuset plugin removes stale cpusets at startup anyway. So, + the cpuset_release_agent is not a critical component. However, + it is nice to clean up job cpusets as the jobs exit, instead of + waiting until the *next* job is run. Unused cpusets lying around + may be confusing to users and sysadmins. + + +MAN PAGES + +This file is out of date. For up-to-date information see the +man pages provided with this software: slurm-cpuset(8), +use-cpusets(1), and pam_slurm_cpuset(8). + +$Id: README 7653 2008-07-29 22:33:31Z grondo $ diff --git a/cpuset/conf-parser.l b/cpuset/conf-parser.l new file mode 100644 index 0000000..d23b613 --- /dev/null +++ b/cpuset/conf-parser.l @@ -0,0 +1,78 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +%{ +#include +#include +#include + +#include "conf.h" + +#define YYSTYPE char * +#include "conf-parser.h" + +%} + +%option nounput + +%% + +#[^\n]* ; /* Ignore comments. */ +[ \t\r]+ ; /* Ignore whitespace. */ +; { return ';'; } +, { return ','; } +(#.*)?\\?\n { return '\n'; } + +\"[^\"]*\" | +\'[^\']*\' { + yytext [strlen (yytext) - 1] = '\0'; + yylval = strdup (yytext+1); + return STRING; + } + +(fit-)?policy { return POLICY; } +order { return ORDER; } +use-idle | +alloc-idle { return USE_IDLE; } +constrain-mem(s)? { return CONST_MEM; } +kill-orph(an)?s { return KILL_ORPHS; } += { return '='; } + +0 | +no | +No { return FALSE; } +1 | +yes | +Yes { return TRUE; } + +[^=;, \t\r\n]+ { + yylval = strdup (yytext); + return STRING; + } + +%% + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/conf-parser.y b/cpuset/conf-parser.y new file mode 100644 index 0000000..838b0e1 --- /dev/null +++ b/cpuset/conf-parser.y @@ -0,0 +1,199 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +%{ +#include +#include +#include +#include + +#include "conf.h" +#include "log.h" + +extern int yylex (); +void yyerror (const char *s); +extern FILE *yyin; + +static int cpuset_conf_line; + +#define YYSTYPE char * +#define YYDEBUG 1 +int yydebug = 0; + +static int cf_policy (const char *); +static int cf_use_idle (const char *); +static int cf_order (const char *); +static int cf_const_mem (int); +static int cf_kill_orphs (int); + +%} + +%token POLICY "policy" +%token USE_IDLE "use-idle" +%token CONST_MEM "constrain-mem" +%token KILL_ORPHS "kill-orphs" +%token ORDER "order" +%token TRUE "true" +%token FALSE "false" +%token STRING "string" + +%error-verbose + +%% + +file : /* empty */ + | file stmts + ; + +stmts : end + | stmt end + | stmts stmt + ; + +stmt : POLICY '=' STRING { if (cf_policy ($3) < 0) YYABORT; } + | USE_IDLE '=' STRING { if (cf_use_idle ($3) < 0) YYABORT; } + | USE_IDLE '=' FALSE { if (cf_use_idle ("no") < 0) YYABORT; } + | USE_IDLE '=' TRUE { if (cf_use_idle ("yes") < 0) YYABORT; } + | CONST_MEM '=' TRUE { if (cf_const_mem (1) < 0) YYABORT; } + | CONST_MEM '=' FALSE { if (cf_const_mem (0) < 0) YYABORT; } + | KILL_ORPHS '=' TRUE { if (cf_kill_orphs (1) < 0) YYABORT; } + | KILL_ORPHS '=' FALSE { if (cf_kill_orphs (0) < 0) YYABORT; } + | ORDER '=' STRING { if (cf_order ($3) < 0) YYABORT; } + +end : '\n' { cpuset_conf_line++; } + | ';' + ; + +%% + +static cpuset_conf_t conf; +static const char * cpuset_conf_filename = NULL; + +void cpuset_conf_debug () +{ + yydebug = 1; +} + +static const char * cf_file () +{ + if (!cpuset_conf_filename) + return ("stdin"); + return (cpuset_conf_filename); +} + +static int cf_line () +{ + return (cpuset_conf_line); +} + +void yyerror (const char *s) +{ + log_err ("%s: %d: %s\n", cf_file (), cf_line (), s); +} + +int cpuset_conf_parse (cpuset_conf_t cf, const char *path) +{ + cpuset_conf_filename = NULL; + + cpuset_conf_set_file (cf, path); + + if (strcmp (path, "-") == 0) + yyin = stdin; + else if (!(yyin = fopen (path, "r"))) { + int err = errno; + log_err ("open: %s: %s\n", path, strerror (errno)); + errno = err; + return (-1); + } + + cpuset_conf_filename = path; + cpuset_conf_line = 1; + conf = cf; + + log_debug ("reading config from \"%s\"\n", cf_file ()); + + if (yyparse ()) { + log_err ("%s: %d: parser failed\n", cf_file (), cf_line ()); + errno = 0; + return (-1); + } + + fclose (yyin); + + return (0); +} + +static int cf_policy (const char *name) +{ + log_debug ("%s: %d: Setting allocation policy to %s.\n", + cf_file (), cf_line(), name); + if (cpuset_conf_set_policy_string (conf, name) < 0) + return log_err ("%s: %d: Invalid allocation policy '%s'.\n", + cf_file (), cf_line (), name); + return (0); +} + +static int cf_use_idle (const char *s) +{ + log_debug ("%s: %d: Setting idle node use policy to %s.\n", + cf_file (), cf_line(), s); + if (cpuset_conf_set_alloc_idle_string (conf, s) < 0) + return log_err ("%s: %d: Invalid alloc-idle string '%s'\n", + cf_file (), cf_line (), s); + return (0); +} + +static int cf_order (const char *s) +{ + log_debug ("%s: %d: Setting order to %s.\n", + cf_file (), cf_line (), s); + + if (strcasecmp (s, "reverse") == 0) + return cpuset_conf_set_order (conf, 1); + else if (strcasecmp (s, "normal") == 0) + return cpuset_conf_set_order (conf, 0); + + return log_err ("%s: %d: Invalid setting for order: %s\n", + cf_file (), cf_line (), s); +} + +static int cf_const_mem (int val) +{ + log_debug ("%s: %d: Setting constrain-memsto %s.\n", + cf_file (), cf_line(), val ? "true" : "false"); + return (cpuset_conf_set_constrain_mem (conf, val)); +} + +static int cf_kill_orphs (int val) +{ + log_debug ("%s: %d: Setting kill-orphans to %s.\n", + cf_file (), cf_line(), val ? "true" : "false"); + return (cpuset_conf_set_kill_orphans (conf, val)); +} + +/* + * vi: ts=4 sw=4 expandtab + */ + diff --git a/cpuset/conf.c b/cpuset/conf.c new file mode 100644 index 0000000..0aa8b64 --- /dev/null +++ b/cpuset/conf.c @@ -0,0 +1,299 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include + +#include "conf.h" +#include "log.h" + +#include "conf-parser.h" + +static const char * default_config = "/etc/slurm/slurm-cpuset.conf"; + +struct cpuset_conf { + char filename [1024]; + + enum fit_policy policy; + + unsigned filename_valid:1; + unsigned reverse_order:1; + unsigned alloc_idle_nodes:1; + unsigned use_idle_if_multiple:1; + unsigned constrain_mems:1; + unsigned kill_orphans:1; +}; + + +/* + * Accessor routines + */ +enum fit_policy cpuset_conf_policy (cpuset_conf_t conf) +{ + return (conf->policy); +} + +int cpuset_conf_alloc_idle (cpuset_conf_t conf) +{ + return (conf->alloc_idle_nodes); +} + +int cpuset_conf_alloc_idle_gt (cpuset_conf_t conf) +{ + return (conf->alloc_idle_nodes && !conf->use_idle_if_multiple); +} + +int cpuset_conf_alloc_idle_multiple (cpuset_conf_t conf) +{ + return (conf->alloc_idle_nodes && conf->use_idle_if_multiple); +} + +int cpuset_conf_constrain_mem (cpuset_conf_t conf) +{ + return (conf->constrain_mems); +} + +int cpuset_conf_kill_orphans (cpuset_conf_t conf) +{ + return (conf->kill_orphans); +} + +int cpuset_conf_reverse_order (cpuset_conf_t conf) +{ + return (conf->reverse_order); +} + +int cpuset_conf_set_policy (cpuset_conf_t conf, enum fit_policy policy) +{ + if (!conf) + return (-1); + conf->policy = policy; + return (0); +} + +int cpuset_conf_set_policy_string (cpuset_conf_t conf, const char *name) +{ + if (strcmp (name, "best-fit") == 0) + return (cpuset_conf_set_policy (conf, BEST_FIT)); + else if (strcmp (name, "worst-fit") == 0) + return (cpuset_conf_set_policy (conf, WORST_FIT)); + else if (strcmp (name, "first-fit") == 0) + return (cpuset_conf_set_policy (conf, FIRST_FIT)); + else + return (-1); +} + +int cpuset_conf_set_alloc_idle (cpuset_conf_t conf, int alloc_idle) +{ + if (!conf) + return (-1); + conf->alloc_idle_nodes = alloc_idle; + return (0); +} + +int cpuset_conf_set_alloc_idle_mode (cpuset_conf_t conf, int multiple_only) +{ + if (!conf) + return (-1); + conf->use_idle_if_multiple = multiple_only; + return (0); +} + +int cpuset_conf_set_alloc_idle_string (cpuset_conf_t conf, const char *s) +{ + if (strcmp (s, "0") == 0 || + strcasecmp (s, "never") == 0 || + strcasecmp (s, "no") == 0) + return (cpuset_conf_set_alloc_idle (conf, 0)); + + if (strcmp (s, "1") == 0 || + strcasecmp (s, "yes") == 0) + return (cpuset_conf_set_alloc_idle (conf, 1)); + + if (strcasecmp (s, "multiple") == 0 || + strcasecmp (s, "mult") == 0) + return (cpuset_conf_set_alloc_idle_mode (conf, 1)); + + if (strcasecmp (s, "gt") == 0 || + strcasecmp (s, "greater") == 0) + return (cpuset_conf_set_alloc_idle_mode (conf, 0)); + + log_err ("Unknown alloc-idle setting \"%s\"\n", s); + + return (-1); +} + +int cpuset_conf_parse_opt (cpuset_conf_t conf, const char *opt) +{ + /* + * First check to see if we're setting a policy + */ + if (cpuset_conf_set_policy_string (conf, opt) == 0) + return (0); + + if (strncmp ("policy=", opt, 7) == 0) { + if (cpuset_conf_set_policy_string (conf, opt + 7) < 0) + return (log_err ("Unknown allocation policy \"%s\"", opt)); + } + + /* + * Next check for new config file via "conf=" + */ + if (strncmp ("conf=", opt, 5) == 0) + return (cpuset_conf_parse (conf, opt + 5)); + + if ((strcmp ("!idle-1st", opt) == 0) || + (strcmp ("no-idle", opt) == 0)) + return (cpuset_conf_set_alloc_idle (conf, 0)); + + if (strncmp ("idle-1st=", opt, 9) == 0) + return (cpuset_conf_set_alloc_idle_string (conf, opt + 9)); + + if (strncmp ("idle-first=", opt, 11) == 0) + return (cpuset_conf_set_alloc_idle_string (conf, opt + 11)); + + if ((strcmp ("!mem", opt) == 0) || + (strcmp ("nomem", opt) == 0) || + (strcmp ("!constrain-mem", opt) == 0)) + return (cpuset_conf_set_constrain_mem (conf, 0)); + + if ((strcmp ("mem", opt) == 0) || + (strcmp ("constrain-mem", opt) == 0)) + return (cpuset_conf_set_constrain_mem (conf, 1)); + + if ((strcmp ("reverse", opt) == 0) || + (strcmp ("order=reverse", opt) == 0)) + return (cpuset_conf_set_order (conf, 1)); + + if ((strcmp ("order=normal", opt) == 0)) + return (cpuset_conf_set_order (conf, 0)); + + return (log_err ("Unknown option \"%s\"\n", opt)); +} + +int cpuset_conf_set_constrain_mem (cpuset_conf_t conf, int constrain_mem) +{ + if (!conf) + return (-1); + conf->constrain_mems = constrain_mem; + return (0); +} + +int cpuset_conf_set_kill_orphans (cpuset_conf_t conf, int kill_orphans) +{ + if (!conf) + return (-1); + conf->kill_orphans = kill_orphans; + return (0); +} + +int cpuset_conf_set_order (cpuset_conf_t conf, int reverse) +{ + if (!conf) + return (-1); + conf->reverse_order = reverse; + return (0); +} + + +/* + * Create and Destroy: + */ +cpuset_conf_t cpuset_conf_create () +{ + cpuset_conf_t conf = malloc (sizeof (*conf)); + + if (conf == NULL) + return (NULL); + + memset (conf->filename, 0, sizeof (conf->filename)); + conf->filename_valid = 0; + + /* + * Set defaults + */ + conf->policy = BEST_FIT; + conf->reverse_order = 0; + conf->alloc_idle_nodes = 1; + conf->use_idle_if_multiple = 1; + conf->constrain_mems = 1; + conf->kill_orphans = 0; + + return (conf); +} + +void cpuset_conf_destroy (cpuset_conf_t conf) +{ + if (conf) free (conf); +} + + +/* + * Parsing + */ + +static int parse_if_exists (cpuset_conf_t conf, const char *file) +{ + if (access (file, F_OK) < 0) + return (0); + + if (access (file, R_OK) < 0) { + log_err ("File %s exists but is not readable.\n", file); + return (-1); + } + + if (cpuset_conf_parse (conf, file) < 0) + return (-1); + + /* Successfully read config file */ + return (0); +} + +int cpuset_conf_parse_system (cpuset_conf_t conf) +{ + return (parse_if_exists (conf, default_config)); +} + +const char * cpuset_conf_file (cpuset_conf_t conf) +{ + if (!conf->filename_valid) + return (NULL); + return (conf->filename); +} + +void cpuset_conf_set_file (cpuset_conf_t conf, const char *file) +{ + strncpy (conf->filename, file, sizeof (conf->filename)); + conf->filename_valid = 1; +} + +/* + * Later, perhaps allow a per-user conf file in ~/.slurm/cpuset.conf... + */ + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/conf.h b/cpuset/conf.h new file mode 100644 index 0000000..bc8b31b --- /dev/null +++ b/cpuset/conf.h @@ -0,0 +1,101 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#ifndef _CPUSET_CONF_H +#define _CPUSET_CONF_H + +typedef struct cpuset_conf * cpuset_conf_t; + +/* + * Valid allocation policies for cpusets + */ +enum fit_policy { + BEST_FIT, + FIRST_FIT, + WORST_FIT, +}; + + +/* + * Accessor routines + */ +enum fit_policy cpuset_conf_policy (cpuset_conf_t conf); + +int cpuset_conf_alloc_idle (cpuset_conf_t conf); + +int cpuset_conf_constrain_mem (cpuset_conf_t conf); + +int cpuset_conf_alloc_idle_gt (cpuset_conf_t conf); + +int cpuset_conf_alloc_idle_multiple (cpuset_conf_t conf); + +int cpuset_conf_kill_orphans (cpuset_conf_t conf); + +int cpuset_conf_reverse_order (cpuset_conf_t conf); + +int cpuset_conf_set_policy (cpuset_conf_t conf, enum fit_policy policy); + +int cpuset_conf_set_alloc_idle (cpuset_conf_t conf, int alloc_idle); + +int cpuset_conf_set_alloc_idle_mode (cpuset_conf_t conf, int multiple_only); + +int cpuset_conf_set_kill_orphans (cpuset_conf_t conf, int kill_orphans); + +int cpuset_conf_set_alloc_idle_string (cpuset_conf_t conf, const char *s); + +int cpuset_conf_set_policy_string (cpuset_conf_t conf, const char *name); + +int cpuset_conf_set_constrain_mem (cpuset_conf_t conf, int constrain_mem); + +int cpuset_conf_set_order (cpuset_conf_t conf, int reverse); +/* + * Create and Destroy: + */ +cpuset_conf_t cpuset_conf_create (); + +void cpuset_conf_destroy (cpuset_conf_t conf); + + +/* + * Parsing + */ + +int cpuset_conf_parse (cpuset_conf_t conf, const char *path); + +int cpuset_conf_parse_system (cpuset_conf_t conf); + +int cpuset_conf_parse_opt (cpuset_conf_t conf, const char *opt); + +/* + * Return filename of last config file parsed + */ +const char *cpuset_conf_file (cpuset_conf_t conf); + +void cpuset_conf_set_file (cpuset_conf_t conf, const char *file); + +#endif +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/cpuset.c b/cpuset/cpuset.c new file mode 100644 index 0000000..7fe6442 --- /dev/null +++ b/cpuset/cpuset.c @@ -0,0 +1,493 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#include +#include +#include +#include +#include + +#include + +/* SGI libcpuset */ +#include +#include + +#include "fd.h" +#include "list.h" +#include "split.h" +#include "util.h" +#include "create.h" +#include "conf.h" +#include "log.h" +#include "slurm.h" + +SPANK_PLUGIN (cpuset, 1) + +/* + * Help message for user option + */ +static const char cpuset_help_string [] = +"\ +use-cpusets: Automatically allocate cpusets to each step within a job.\n\ +\n\ +When using the SLURM cpuset.so plugin, the default behavior is to allocate\n\ +one cpuset per job, and run all subsequent job steps within the job cpuset.\n\ +When using --use-cpusets, the cpuset plugin will re-allocate CPUs and\n\ +optionally memory nodes from the job cpuset into a child cpuset for the\n\ +executing job step. This allows convenient separation of multiple job steps \n\ +being run in parallel within a single job allocation.\n\ +\n\ +By default, the same allocation options are used for job steps as are\n\ +configured for jobs. These options can be tuned by providing arguments to\n\ +the --use-cpusets option.\n\ +\n\ +Option Usage: --use-cpusets=[args...]\n\ +\n\ +where args... is a comma separated list of one or more of the following\n\ + help Display this message.\n\ + debug Enable verbose debugging messages.\n\ + tasks Additionally constrain tasks to cpusets.\n\ +\n\ + Policy options:\n\ + best-fit Allocate tasks to most full nodes/sockets first.\n\ + worst-fit Allocate tasks to least full nodes/sockets first.\n\ + first-fit Allocate tasks to first free slots found.\n\ + reverse Reverse CPU allocation order (start at last CPU).\n\ + order=normal Normal CPU allocation order (start at first CPU).\n\ + no-idle Do not try to allocate whole idle nodes first.\n\ +\n\ + idle-first=[policy] Use [policy] to allocate idle nodes first, where\n\ + policy is one of:\n\ + gt Allocate idle nodes first if the number of \n\ + tasks is greater than or equal to the size \n\ + of a socket/NUMA node.\n\ + mult Allocate idle nodes first only if the number of\n\ + tasks in the job step is a multiple of the\n\ + size of a socket/NUMA node.\n\ + no Equivalent to no-idle.\n\ +\n\ + nomem Do not also constrain memory to the local nodes of\n\ + the selected CPUs.\n\n"; + +static List user_options = NULL; + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +static cpuset_conf_t conf = NULL; + +//static int step_cpuset_created = 0; +static int per_task_cpuset = 0; /* --use-cpuset=tasks */ + +static uint32_t jobid; +static uint32_t stepid; +static int step_ncpus = -1; +static int ncpus_per_task = -1; +static int debug_level = 0; +static int user_debug_level = 0; + +static int parse_one_option (const char *opt) +{ + if (strncmp ("debug=", opt, 6) == 0) + debug_level = str2int (opt + 6); + else if (strcmp ("debug", opt) == 0) + debug_level = 1; + else + return (cpuset_conf_parse_opt (conf, opt)); + + return (0); +} + +static int parse_options (int ac, char **av) +{ + int i; + for (i = 0; i < ac; i++) + parse_one_option (av[i]); + return (0); +} + +/* + * XXX: Since we don't have a good way to determine the number of + * CPUs allocated to this job on this node, we have to query + * the slurm controller (!). + * + */ +static int query_ncpus_per_node (spank_t sp, uint32_t jobid) +{ + const char var[] = "SLURM_JOB_CPUS_PER_NODE"; + char val[16]; + job_info_msg_t * msg; + int cpus_per_node = -1; + int i; + + /* + * If SLURM_JOB_CPUS_PER_NODE is set in environment, + * return that value so we don't have to contact SLURM controller. + */ + if (spank_getenv (sp, var, val, sizeof (val)) == ESPANK_SUCCESS) { + cpuset_debug ("SLURM_JOB_CPUS_PER_NODE=%s\n", val); + return (str2int (val)); + } + + /* + * Otherwise, we have to query all jobs and find the right job record. + */ + if (dyn_slurm_load_jobs (&msg) < 0) { + cpuset_error ("slurm_load_jobs: %s\n", slurm_strerror (errno)); + return (-1); + } + + for (i = 0; i < msg->record_count; i++) { + job_info_t *j = &msg->job_array[i]; + + if (j->job_id == jobid) { + /* + * XXX: Assumes cpus_per_node is the same across the whole job. + */ + cpus_per_node = (int) j->cpus_per_node[0]; + break; + } + } + + dyn_slurm_free_job_info_msg (msg); + if (cpus_per_node < 0) + cpuset_error ("Failed to get nCPUs for this node: %s\n", slurm_strerror (errno)); + return (cpus_per_node); +} + +int migrate_job_to_cpuset (uint32_t jobid, uid_t uid, pid_t pid) +{ + int rc; + char path[4096]; + int n = 0; + + cpuset_getcpusetpath (0, path, sizeof (path)); + + if (pid) + cpuset_debug ("Migrate: Moving %d from cpuset %s\n", pid, path); + else + cpuset_debug ("Migrate: Moving from cpuset %s\n", path); + /* + * If we're not under /slurm, prepend user cpuset + */ + if (strncmp (path, "/slurm", 6) != 0) + n = snprintf (path, sizeof (path), "/slurm/%d", uid); + else + n = strlen (path); + + /* + * Now everything happens relative to current cpuset + */ + rc = snprintf (path + n, sizeof (path) - n, "/%u", jobid); + + if (rc < 0 || rc > sizeof (path)) { + cpuset_error ("job%u: snprintf failed: %s\n", jobid, strerror (errno)); + return (-1); + } + + if (pid) + cpuset_debug ("Migrate: Moving %d to cpuset %s\n", pid, path); + else + cpuset_debug ("Migrate: Moving to cpuset %s\n", path); + + if (cpuset_move (pid, path) < 0) + return (-1); + return (0); +} + +static int job_ncpus_per_task (spank_t sp) +{ + const char var[] = "SLURM_CPUS_PER_TASK"; + char val [128]; + + if (ncpus_per_task < 0) { + if (spank_getenv (sp, var, val, sizeof (val)) != ESPANK_SUCCESS) { + //cpuset_error ("getenv (SLURM_CPUS_PER_TASK) failed\n"); + return (-1); + } + ncpus_per_task = str2int(val); + } + return (ncpus_per_task); +} + +static int job_step_ncpus (spank_t sp) +{ + uint32_t ntasks; + + if (spank_get_item (sp, S_JOB_LOCAL_TASK_COUNT, &ntasks) != ESPANK_SUCCESS) + return (-1); + + return (job_ncpus_per_task (sp) * ntasks); +} + +static int log_slurm (const char *msg) +{ + slurm_info ("%s", msg); + return (0); +} + +static int log_stderr (const char *msg) +{ + fprintf (stderr, "%s", msg); + return (0); +} + +int slurm_spank_init (spank_t sp, int ac, char *av[]) +{ + int rc; + int lockfd; + uid_t uid; + + if (!spank_remote (sp)) + return (0); + + log_add_dest (1, log_slurm); + + conf = cpuset_conf_create (); + cpuset_conf_parse_system (conf); + + parse_options (ac, av); + + log_update (debug_level, log_slurm); + + /* + * Get jobid + */ + if (spank_get_item (sp, S_JOB_ID, &jobid) != ESPANK_SUCCESS) { + cpuset_error ("Failed to get jobid: %s\n", strerror (errno)); + return (-1); + } + + if (spank_get_item (sp, S_JOB_STEPID, &stepid) != ESPANK_SUCCESS) { + cpuset_error ("Failed to get stepid: %s\n", strerror (errno)); + return (-1); + } + + if (spank_get_item (sp, S_JOB_UID, &uid) != ESPANK_SUCCESS) { + cpuset_error ("Failed to get uid: %m\n", strerror (errno)); + return (-1); + } + + cpuset_debug ("Attempting to create slurm cpuset\n"); + /* + * Try to migrate to existing cpuset for this job. If + * successful, then we're done. + */ + if ((lockfd = slurm_cpuset_create (conf)) < 0) { + cpuset_error ("Failed to create/lock slurm cpuset: %s\n", + strerror (errno)); + return (-1); + } + + if ((rc = migrate_job_to_cpuset (jobid, uid, 0)) != 0) { + /* + * No existing job cpuset on this node, create one: + */ + int ncpus = query_ncpus_per_node (sp, jobid); + + cpuset_debug ("Creating cpuset for job=%d uid=%d ncpus=%d\n", + jobid, uid, ncpus); + + if ((rc = create_cpuset_for_job (conf, jobid, uid, ncpus)) < 0) + goto done; + + if ((rc = migrate_job_to_cpuset (jobid, uid, 0)) < 0) { + log_err ("Failed to migrate jobid %d to cpuset: %s\n", + jobid, strerror (errno)); + goto done; + } + } + + step_ncpus = job_step_ncpus (sp); + +done: + slurm_cpuset_unlock (lockfd); + return (rc); +} + +/* + * User optional per-step cpuset option parsing + * Options are processed *after* slurm_spank_init completes, + * so we have to create the step cpuset within the option + * handler. + */ + +static int set_user_options (int remote) +{ + char *opt; + ListIterator i; + int rc = 0; + + if (user_options == NULL) + return (0); + + i = list_iterator_create (user_options); + while ((opt = list_next (i))) { + if (!remote && (strcmp (opt, "help") == 0)) { + fprintf (stderr, cpuset_help_string); + exit (0); + } + else if (strcmp (opt, "tasks") == 0) + per_task_cpuset = 1; + else if (strncmp ("debug=", opt, 6) == 0) + user_debug_level = str2int (opt + 6); + else if (strcmp ("debug", opt) == 0) + user_debug_level = 1; + else if (parse_one_option (opt) < 0) + rc = -1; + } + /* + * Done with user_options now. + */ + list_destroy (user_options); + return (rc); +} + +static int parse_user_option (int val, const char *optarg, int remote) +{ + int rc = 1; + + log_add_dest (0, log_stderr); + + if (optarg) { + char *str; + str = strdup (optarg); + user_options = list_split (",", str); + free (str); + + /* + * If running 'local' (i.e. in srun), then we may + * not yet have created a cpuset configuration object. + * We'll need this to test-parse options, so create it now. + */ + if (!conf) conf = cpuset_conf_create (); + + if (set_user_options (remote) < 0) + return (-1); + + } + + log_update (user_debug_level, log_stderr); + + if (remote && !spank_symbol_supported ("slurm_spank_init_post_opt")) { + /* + * Must create job step cpuset in option handler unless + * init_post_opt callback exists in this version of SLURM. + */ + int lockfd = slurm_cpuset_lock (); + if (debug_level > 0 || user_debug_level > 0) + print_current_cpuset_info (); + if ((rc = create_cpuset_for_step (conf, stepid, step_ncpus)) < 0) { + /* + * If step cpuset creation failed, ensure we don't try + * to create per-task cpuset. + */ + cpuset_error ("Failed to create cpuset for step %d: %s\n", + stepid, strerror (errno)); + per_task_cpuset = 0; + } + else + rc = migrate_job_to_cpuset (stepid, -1, 0); + slurm_cpuset_unlock (lockfd); + } + + return (rc); +} + +struct spank_option spank_options [] = { + { "use-cpusets", "[args..]", + "Use per-job-step and per-task cpusets. (args=`help' for more info)", + 2, 0, (spank_opt_cb_f) parse_user_option + }, + SPANK_OPTIONS_TABLE_END +}; + +int slurm_spank_init_post_opt (spank_t sp, int ac, char **av) +{ + int lockfd; + int rc; + + if (!spank_remote (sp) || !user_options) + return (0); + + if ((lockfd = slurm_cpuset_lock ()) < 0) + return (-1); + + if (debug_level > 0 || user_debug_level > 0) + print_current_cpuset_info (); + + if ((rc = create_cpuset_for_step (conf, stepid, step_ncpus)) < 0) + per_task_cpuset = 0; + else + rc = migrate_job_to_cpuset (stepid, -1, 0); + + if (debug_level > 0) + print_current_cpuset_info (); + + slurm_cpuset_unlock (lockfd); + + return (rc); +} + +int slurm_spank_task_post_fork (spank_t sp, int ac, char **av) +{ + pid_t task_pid; + int taskid; + int lockfd; + int cpus_per_task; + int rc; + + if (!per_task_cpuset) + return (0); + + if (spank_get_item (sp, S_TASK_ID, &taskid) != ESPANK_SUCCESS) { + cpuset_error ("Failed to get taskid\n"); + return (-1); + } + + if (spank_get_item (sp, S_TASK_PID, &task_pid) != ESPANK_SUCCESS) { + cpuset_error ("Failed to get task pid\n"); + return (-1); + } + + if ((lockfd = slurm_cpuset_lock ()) < 0) + return (-1); + + cpus_per_task = job_ncpus_per_task (sp); + + if ((rc = create_cpuset_for_task (conf, taskid, cpus_per_task)) == 0) + rc = migrate_job_to_cpuset (taskid, -1, task_pid); + + slurm_cpuset_unlock (lockfd); + + return (rc); + +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/cpuset.init b/cpuset/cpuset.init new file mode 100644 index 0000000..6759809 --- /dev/null +++ b/cpuset/cpuset.init @@ -0,0 +1,47 @@ +#!/bin/sh +############################################################################### +# chkconfig: 12345 01 99 +############################################################################### +### BEGIN INIT INFO +# Provides: slurm-cpuset +# Required-Start: $named $time +# Default-Start: 3 4 5 +# Default-Stop: 0 1 2 6 +# Description: Mount /dev/cpuset filesystem +### END INIT INFO +############################################################################### + + +case "$1" in + start) + echo -n "Mounting /dev/cpuset filesystem: " + mkdir -m 0755 -p /dev/cpuset + mount -t cpuset none /dev/cpuset + if [ $? -ne 0 ]; then + echo "Failed" + exit 1 + fi + + # Spread slab allocations over all memory nodes + echo 1 > /dev/cpuset/memory_spread_slab + echo "Success" + ;; + + stop) + # Do nothing + exit 0; + ;; + + status) + echo -n "cpuset filesystem is " + [ -f /dev/cpuset/cpus ] || echo -n "not " + echo -n "mounted." + ;; + + *) + echo "Usage: $0 start|status" + exit 1 + ;; +esac + +exit 0 diff --git a/cpuset/create.c b/cpuset/create.c new file mode 100644 index 0000000..edbe1ed --- /dev/null +++ b/cpuset/create.c @@ -0,0 +1,411 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#include +#include +#include +#include + +#include "log.h" +#include "conf.h" +#include "create.h" +#include "util.h" +#include "nodemap.h" + +/* + * Return the /dev/cpuset relative path for job, step, or task [id]. + * Basically if we're in / or /slurm, return "/slurm//" + * otherwise return /. + */ +static int job_cpuset_path (uint32_t id, uid_t uid, char *path, int len) +{ + int n; + char buf [64]; + + if (cpuset_getcpusetpath (0, buf, sizeof (buf)) < 0) + return (-1); + + /* + * If we are in root or /slurm cpuset, prepend path to user cpuset + */ + if (strcmp (buf, "/") == 0 || strcmp (buf, "/slurm") == 0) + snprintf (buf, sizeof (buf), "/slurm/%d", uid); + + n = snprintf (path, len, "%s/%u", buf, id); + if ((n < 0) || (n >= len)) + return (-1); + + return (0); +} + +/* + * Return a struct cpuset with cpus set to those in [alloc] and + * memory constrained to local memories if constrain_mems == 1. + */ +static struct cpuset * +do_cpuset_create (cpuset_conf_t cf, const struct bitmask *alloc) +{ + struct cpuset *cp; + struct bitmask *mems; + + if ((cp = cpuset_alloc ()) == NULL) { + cpuset_error ("Failed to alloc job cpuset: %m"); + return (NULL); + } + + if (cpuset_setcpus (cp, alloc) < 0) { + cpuset_error ("Failed to set cpus: %m"); + goto fail1; + } + + if ((mems = bitmask_alloc (cpuset_mems_nbits ())) == NULL) { + cpuset_error ("failed to alloc mems bitmask: %m"); + goto fail1; + } + + if (cpuset_conf_constrain_mem (cf)) { + if (cpuset_localmems (alloc, mems) < 0) { + cpuset_error ("cpuset_localmems failed: %m"); + goto fail2; + } + } else { + if (cpuset_getmems (NULL, mems) < 0) { + cpuset_error ("cpuset_getmems: %m"); + goto fail2; + } + } + + if (cpuset_setmems (cp, mems) < 0) { + cpuset_error ("cpuset_setmems failed: %m"); + goto fail2; + } + + cpuset_set_iopt (cp, "notify_on_release", 1); + + bitmask_free (mems); + return (cp); + +fail2: + bitmask_free (mems); +fail1: + cpuset_free (cp); + return (NULL); +} + +int job_cpuset_exists (uint32_t jobid, uid_t uid) +{ + char path [4096]; + struct cpuset *cp; + int rc; + + if (job_cpuset_path (jobid, uid, path, sizeof (path)) < 0) { + cpuset_error ("Failed to geneerate job cpuset path\n"); + return (0); + } + + cp = cpuset_alloc (); + rc = cpuset_query (cp, path); + cpuset_free (cp); + + return (rc == 0); +} + +/* + * Create a job cpuset for job [jobid] user [uid] with cpus in [alloc] + */ +static int +job_cpuset_create (cpuset_conf_t cf, uint32_t jobid, uid_t uid, + const struct bitmask *alloc) +{ + int rc; + struct cpuset *cp; + char path [4096]; + mode_t oldmask; + + if ((cp = do_cpuset_create (cf, alloc)) < 0) + return (-1); + + if (job_cpuset_path (jobid, uid, path, sizeof (path)) < 0) { + cpuset_error ("Failed to generate job cpuset path: %s\n", + strerror (errno)); + goto out; + } + + oldmask = umask (022); + if (cpuset_create (path, cp) < 0) + cpuset_error ("create [%s]: %s", path, strerror (errno)); + else + rc = 0; + umask (oldmask); + + print_cpuset_info (path, cp); + +out: + cpuset_free (cp); + return (rc); +} + +#if 0 +static struct bitmask * cpuset_cpus_bitmask (const char *name) +{ + struct cpuset *cp = cpuset_alloc (); + struct bitmask *b = NULL; + + if (cpuset_query (cp, name) < 0) { + cpuset_error ("cpuset query %s: %m", name); + goto out; + } + + if ((b = bitmask_alloc (cpumask_size ())) == NULL) { + cpuset_error ("bitmask_alloc: %m"); + goto out; + } + + if (cpuset_getcpus (cp, b) < 0) { + cpuset_error ("Failed to get cpus for cpuset %s: %m", name); + bitmask_free (b); + b = NULL; + } +out: + cpuset_free (cp); + return (b); +} +#endif + +/* + * Create a cpuset for [id] user [uid] with ncpus. + */ +static int +create_cpuset (cpuset_conf_t cf, unsigned int id, uid_t uid, int ncpus) +{ + struct nodemap *map; + struct bitmask *alloc; + int rc = -1; + + if (!(map = nodemap_create (cf, NULL))) + return (-1); + + if ((alloc = nodemap_allocate (map, ncpus)) == NULL) + goto out; + + /* + * Create and/or update user cpuset, under which job cpuset will + * be created. + */ + if ((int) uid >= 0) { + cpuset_debug ("Updating user %d cpuset with %d cpus\n", uid, ncpus); + if (user_cpuset_update (cf, uid, alloc) < 0) { + cpuset_error ("Failed to update user cpuset"); + goto out; + } + } + + if (job_cpuset_create (cf, id, uid, alloc) < 0) + goto out; + + rc = 0; +out: + if (map) + nodemap_destroy (map); + if (alloc) + bitmask_free (alloc); + + if (rc < 0) + log_debug2 ("create_cpuset: id=%u uid=%d ncpus=%d: Failed.\n", + id, uid, ncpus); + + return (rc); +} + +int create_cpuset_for_job (cpuset_conf_t cf, unsigned jobid, uid_t uid, + int ncpus) +{ + return (create_cpuset (cf, jobid, uid, ncpus)); +} + +int create_cpuset_for_step (cpuset_conf_t cf, unsigned int stepid, int ncpus) +{ + return (create_cpuset (cf, stepid, -1, ncpus)); +} + +int create_cpuset_for_task (cpuset_conf_t cf, unsigned int taskid, int ncpus) +{ + return (create_cpuset (cf, taskid, -1, ncpus)); +} + +static int user_cpuset_orphan (uid_t uid, const char *path) +{ + char orphan [1024]; + int n; + n = snprintf (orphan, sizeof (orphan), "/dev/cpuset/slurm/orphan:%d", uid); + if ((n <= 0) || (n > sizeof (orphan))) + return (-1); + if (rename (path, orphan) < 0) + cpuset_error ("Failed to rename %s to %s: %m", path, orphan); + return (0); +} + +static int kill_orphan (const char *name) +{ + struct cpuset_pidlist *pids; + int i; + + if ((pids = cpuset_init_pidlist (name, 0)) == NULL) { + cpuset_error ("cpuset_init_pidlist: %s: %s\n", + name, strerror (errno)); + return (-1); + } + + for (i = 0; i < cpuset_pidlist_length (pids); i++) + kill (cpuset_get_pidlist (pids, i), SIGKILL); + + cpuset_freepidlist (pids); + return (0); +} + +static int user_cpuset_unorphan (uid_t uid, const char *path) +{ + char orphan [1024]; + int n; + n = snprintf (orphan, sizeof (orphan), "/dev/cpuset/slurm/orphan:%d", uid); + if ((n <= 0) || (n > sizeof (orphan))) + return (-1); + cpuset_debug ("rename (%s, %s)\n", orphan, path); + if (rename (orphan, path) < 0) + return (0); + return (1); +} + +/* + * If user cpuset does not exist, keep its cpus and mems empty + * They'll be filled in later. + */ +static int user_cpuset_create (const char *path) +{ + int rc = 0; + mode_t oldmask = umask (022); + + if ((mkdir (path, 0755)) < 0 && errno != EEXIST) { + cpuset_error ("mkdir %s: %m", path); + rc = -1; + } + umask (oldmask); + return (rc); +} + + +int +user_cpuset_update (cpuset_conf_t cf, uid_t uid, const struct bitmask *alloc) +{ + int rc = -1; + char path [1024]; + const char *name; + struct bitmask *used; + struct cpuset *cp; + int orphan = 0; + + snprintf (path, sizeof (path), "/dev/cpuset/slurm/%d", uid); + name = cpuset_path_to_name (path); + + /* + * If there is an orphan user login, move it back + * Otherwise, create regular user cpuset if it doesn't + * already exist. + */ + if (!(orphan = user_cpuset_unorphan (uid, path)) + && (user_cpuset_create (path) < 0)) + return (-1); + + cpuset_debug ("Updating user cpuset at %s\n", path); + used = used_cpus_bitmask_path (path, 1); + if (orphan) + bitmask_clearall (used); + if (alloc) + bitmask_or (used, used, alloc); + + if (bitmask_weight (used) == 0) { + /* + * This is an orphaned user cpuset. + * We can't leave it with 0 cpus, and + * we can't leave it allocated under /slurm + * since those cpusets are used for tracking + * in-use cpusets. Instead, just rename the + * current cpuset to an orphans directory. + */ + cpuset_debug ("user_cpuset_orphan: uid=%d\n", uid); + if (cpuset_conf_kill_orphans (cf)) + kill_orphan (name); + else + user_cpuset_orphan (uid, path); + return (0); + } + + if (!(cp = do_cpuset_create (cf, used))) { + bitmask_free (used); + return (-1); + } + +again: + if ((rc = cpuset_modify (name, cp)) < 0) { + /* + * cpuset_modify can potentially return EBUSY. + */ + if (errno == EBUSY || errno == EAGAIN) { + sleep (1); + goto again; + } + cpuset_error ("Failed to modify %s: %m", name); + } + + bitmask_free (used); + cpuset_free (cp); + return (rc); +} + +int update_user_cpusets (cpuset_conf_t cf) +{ + DIR *dirp; + struct dirent *dp; + + if ((dirp = opendir ("/dev/cpuset/slurm")) == NULL) { + cpuset_error ("Unable to open /dev/cpuset/slurm: %m"); + return (-1); + } + + while ((dp = readdir (dirp))) { + int uid; + if ((uid = str2int (dp->d_name)) < 0) + continue; + cpuset_debug ("Checking cpuset for uid %d\n", uid); + user_cpuset_update (cf, uid, NULL); + } + closedir (dirp); + + return (0); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/create.h b/cpuset/create.h new file mode 100644 index 0000000..62ab5de --- /dev/null +++ b/cpuset/create.h @@ -0,0 +1,51 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#ifndef _HAVE_CREATE_H +#define _HAVE_CREATE_H + +#include +#include +#include + +#include "conf.h" + +int job_cpuset_exists (uint32_t jobid, uid_t uid); + +int create_cpuset_for_job (cpuset_conf_t cf, + unsigned int jobid, uid_t uid, int ncpus); + +int create_cpuset_for_step (cpuset_conf_t cf, + unsigned int stepid, int ncpus); + +int create_cpuset_for_task (cpuset_conf_t cf, + unsigned int taskid, int ncpus_per_task); + +int user_cpuset_update (cpuset_conf_t cf, + uid_t uid, const struct bitmask *b); + +int update_user_cpusets (); + +#endif diff --git a/cpuset/log.c b/cpuset/log.c new file mode 100644 index 0000000..36a3fd9 --- /dev/null +++ b/cpuset/log.c @@ -0,0 +1,232 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#include +#include +#include +#include +#include "list.h" +#include "log.h" + +static char facility [64] = "cpuset"; + +struct logger { + int level; + log_f *logfn; +}; + +static List log_list = NULL; + +static struct logger * logger_create (int level, log_f *fn) +{ + struct logger *l = malloc (sizeof (*l)); + + if (l != NULL) { + l->level = level; + l->logfn = fn; + } + + return (l); +} + +void logger_destroy (struct logger *l) +{ + free (l); +} + +int log_add_dest (int level, log_f *fn) +{ + struct logger *l; + + if (log_list == NULL) { + log_list = list_create ((ListDelF) logger_destroy); + } + + if ((l = logger_create (level, fn)) == NULL) + return (-1); + + list_push (log_list, l); + return (0); +} + +int log_set_prefix (const char *prefix) +{ + strncpy (facility, prefix, sizeof (facility)); + return (0); +} + +int find_fn (struct logger *l, log_f *fn) +{ + return (l->logfn == fn); +} + +int log_update (int level, log_f *fn) +{ + struct logger *l = list_find_first (log_list, (ListFindF) find_fn, fn); + + if (l == NULL) + return (-1); + + l->level = level; + return (0); +} + + +void log_cleanup () +{ + list_destroy (log_list); +} + +static int do_log_all (int level, const char *buf) +{ + struct logger *l; + ListIterator i = list_iterator_create (log_list); + + while ((l = list_next (i))) { + if (l->level >= level) + (*l->logfn) (buf); + } + + list_iterator_destroy (i); + return (0); +} + +static void vlog_msg (const char *prefix, int level, const char *format, va_list ap) +{ + char buf[4096]; + char *p; + int n; + int len; + + if (!log_list) + return; + + p = buf; + len = sizeof (buf); + + if (strlen (facility)) { + n = snprintf (p, len, "%s: ", facility); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } + else { + p += n; + len -= n; + } + } + + /* Add a log level prefix. + */ + if ((len > 0) && prefix) { + n = snprintf (p, len, "%s: ", prefix); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } + else { + p += n; + len -= n; + } + } + + if ((len > 0) && (format)) { + n = vsnprintf (p, len, format, ap); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } + else { + p += n; + len -= n; + } + } + + /* Add suffix for truncation if necessary. + */ + if (len <= 0) { + char *q; + const char *suffix = "+"; + q = buf + sizeof (buf) - 1 - strlen (suffix); + p = (p < q) ? p : q; + strcpy (p, suffix); + p += strlen (suffix); + } + + *p = '\0'; + + do_log_all (level, buf); + + return; +} + +int log_err (const char *format, ...) +{ + va_list ap; + va_start (ap, format); + vlog_msg ("Error", -1, format, ap); + va_end (ap); + return (-1); /* So we can do return (log_err (...)) */ +} + +void log_msg (const char *format, ...) +{ + va_list ap; + va_start (ap, format); + vlog_msg (NULL, 0, format, ap); + va_end (ap); + return; +} + +void log_verbose (const char *format, ...) +{ + va_list ap; + va_start (ap, format); + vlog_msg (NULL, 1, format, ap); + va_end (ap); + return; +} + +void log_debug (const char *format, ...) +{ + va_list ap; + va_start (ap, format); + vlog_msg ("Debug", 2, format, ap); + va_end (ap); + return; +} + +void log_debug2 (const char *format, ...) +{ + va_list ap; + va_start (ap, format); + vlog_msg ("Debug", 3, format, ap); + va_end (ap); + return; +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/log.h b/cpuset/log.h new file mode 100644 index 0000000..a8e2b26 --- /dev/null +++ b/cpuset/log.h @@ -0,0 +1,56 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#ifndef _CPUSET_LOG_H +#define _CPUSET_LOG_H + +#define C_LOG_QUIET -2 +#define C_LOG_CRIT -1 +#define C_LOG_NORMAL 0 +#define C_LOG_VERBOSE 1 +#define C_LOG_DEBUG 2 +#define C_LOG_DEBUG2 3 + +typedef int (log_f) (const char *msg); + +int log_add_dest (int level, log_f *fn); +int log_update (int level, log_f *fn); +int log_set_prefix (const char *prefix); +void log_cleanup (); +int log_err (const char *format, ...); +void log_msg (const char *format, ...); +void log_verbose (const char *format, ...); +void log_debug (const char *format, ...); +void log_debug2 (const char *format, ...); + +/* + * Legacy logging functions + */ +#define cpuset_error(args...) log_err (args) +#define cpuset_verbose(args...) log_verbose (args) +#define cpuset_debug(args...) log_debug (args) +#define cpuset_debug2(args...) log_debug2 (args) + +#endif diff --git a/cpuset/nodemap.c b/cpuset/nodemap.c new file mode 100644 index 0000000..b0d7521 --- /dev/null +++ b/cpuset/nodemap.c @@ -0,0 +1,616 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include + +#include "log.h" +#include "list.h" +#include "util.h" +#include "conf.h" +#include "nodemap.h" + + +/* + * Description of one NUMA node on the system. + */ +struct node { + int nodeid; /* The NUMA node id */ + int ncpus; /* Total Number of CPUs */ + int navail; /* Number of currently available CPUs */ + struct bitmask *localcpus; /* Bitmask mapping local CPUs to global */ + struct bitmask *usedcpus; /* Bitmask of used cpus (size = ncpus) */ + struct nodemap *map; /* Pointer back to the nodemap */ +}; + +#define ALLOC_IDLE_MULTIPLE 0 /* Allocate idle nodes first if + ntasks is multiple of node size */ +#define ALLOC_IDLE_GT 1 /* Allocate idle nodes first if + ntasks is >= node size */ +#define ALLOC_NO_IDLE 2 /* Do not allocate idle nodes first */ + +struct policy { + unsigned int reverse:1; + unsigned int best_fit:1; + unsigned int first_fit:1; + unsigned int worst_fit:1; + unsigned int alloc_idle_first:1; + unsigned int alloc_idle_multiples_only:1; +}; + +static struct policy default_policy = { + .best_fit = 1, + .alloc_idle_first = 1, +}; + +/* + * Store the current mapping of CPUs to memory nodes as + * well as the currently in-use CPUs. + */ +struct nodemap { + struct policy policy; /* Allocation policy: best fit, first fit... */ + + int nnodes; /* Number of NUMA nodes */ + int ncpus; /* Total number of CPUs online */ + int navail; /* Total number of CPUs currently available */ + struct bitmask *usedcpus; /* Bitmask of used CPUs */ + struct bitmask *cpus; /* Bitmask of available CPUs relative to the + current cpuset */ + List nodelist; /* List of nodes in this map */ +}; + +/* + * A temporary object used to create a new allocation. + */ +struct allocation { + int ntasks; /* Number of total tasks to allocate */ + int nleft; /* Number of CPUs left to allocate */ + struct nodemap * map; /* pointer back to nodemap */ + struct bitmask * allocated_cpus; + /* The final bitmask of allocated CPUs */ +}; + +int nodemap_policy_update (struct nodemap *map, cpuset_conf_t cf) +{ + map->policy.best_fit = cpuset_conf_policy (cf) == BEST_FIT; + map->policy.worst_fit = cpuset_conf_policy (cf) == WORST_FIT; + map->policy.first_fit = cpuset_conf_policy (cf) == FIRST_FIT; + map->policy.alloc_idle_first = cpuset_conf_alloc_idle (cf); + map->policy.alloc_idle_multiples_only = + cpuset_conf_alloc_idle_multiple (cf); + map->policy.reverse = cpuset_conf_reverse_order (cf); + return (0); +} + + +static struct bitmask *current_cpuset_cpus () +{ + struct bitmask *cpus; + struct cpuset *cp; + + if ((cp = cpuset_alloc ()) == NULL) { + cpuset_error ("Failed to alloc cpuset: %s\n", strerror (errno)); + return (NULL); + } + + if ((cpus = bitmask_alloc (cpumask_size ())) == NULL) { + cpuset_error ("Failed to alloc bitmask: %s\n", strerror (errno)); + cpuset_free (cp); + return (NULL); + } + + cpuset_query (cp, "."); + cpuset_getcpus (cp, cpus); + cpuset_free (cp); + + return (cpus); +} + +static struct bitmask *used_cpus_bitmask () +{ + return (used_cpus_bitmask_path (NULL, 0)); +} + +static struct node * node_create (struct nodemap *map, int id) +{ + int i, offset; + struct bitmask *mems; + struct node *n = malloc (sizeof (*n)); + + if (n == NULL) + return (NULL); + + + n->map = map; + + n->nodeid = id; + n->ncpus = 0; + n->localcpus = bitmask_alloc (cpumask_size ()); + + /* + * Get the bitmask of local cpus for this node + */ + mems = bitmask_alloc (memmask_size ()); + bitmask_setbit (mems, n->nodeid); + cpuset_localcpus (mems, n->localcpus); + bitmask_free (mems); + + /* + * Now count the number of local CPUs + */ + n->ncpus = bitmask_weight (n->localcpus); + + + /* + * Now set used cpus from node map + */ + n->usedcpus = bitmask_alloc (n->ncpus); + + offset = bitmask_first (n->localcpus); + for (i = 0; i < n->ncpus; i++) { + if (bitmask_isbitset (map->usedcpus, offset + i)) + bitmask_setbit (n->usedcpus, i); + } + + n->navail = n->ncpus - bitmask_weight (n->usedcpus); + + cpuset_debug2 ("Done creating node%d with %d/%d CPUs\n", + n->nodeid, n->navail, n->ncpus); + + return (n); +} + +static void node_destroy (struct node *n) +{ + bitmask_free (n->localcpus); + bitmask_free (n->usedcpus); + free (n); +} + + +void nodemap_destroy (struct nodemap *map) +{ + bitmask_free (map->usedcpus); + list_destroy (map->nodelist); + free (map); +} + +static int node_cpus_available (struct nodemap *map, int i) +{ + int rc; + struct bitmask * mems = bitmask_alloc (memmask_size ()); + + if (mems == NULL) + return log_err ("failed to allocate mems mask!!\n"); + + if (cpuset_localmems (map->cpus, mems) < 0) + return log_err ("cpuset_localmems: %s\n", strerror (errno)); + + rc = bitmask_isbitset (mems, i); + + bitmask_free (mems); + return (rc); +} + +struct nodemap * nodemap_create (cpuset_conf_t cf, struct bitmask *used) +{ + int i; + struct nodemap *map = malloc (sizeof (*map)); + + if (map == NULL) + return (NULL); + + map->policy = default_policy; + + map->nodelist = list_create ((ListDelF) node_destroy); + + map->nnodes = memmask_size (); + map->ncpus = cpumask_size (); + + if (used) { + map->usedcpus = bitmask_alloc (bitmask_weight (used)); + bitmask_copy (map->usedcpus, used); + } + else { + map->usedcpus = used_cpus_bitmask (); + } + + if (!map->usedcpus) { + list_destroy (map->nodelist); + free (map); + return (NULL); + } + + map->cpus = current_cpuset_cpus (); + + for (i = 0; i < map->nnodes; i++) { + struct node *n; + + /* + * Don't bother appending this node if none of its CPUs + * are available in the current cpuset + */ + if (!node_cpus_available (map, i)) + continue; + + if ((n = node_create (map, i)) == NULL) { + nodemap_destroy (map); + return (NULL); + } + list_push (map->nodelist, n); + } + + map->navail = map->ncpus - bitmask_weight (map->usedcpus); + + log_debug2 ("Created nodemap with %d nodes, %d/%d CPUs\n", + map->nnodes, map->navail, map->ncpus); + + nodemap_policy_update (map, cf); + + return (map); +} + +void print_nodemap (const struct nodemap *map) +{ + struct node *n; + struct bitmask *b; + ListIterator i = list_iterator_create (map->nodelist); + + print_bitmask ("Available CPUs: %s\n", map->cpus); + + b = bitmask_alloc (cpumask_size ()); + bitmask_and (b, map->cpus, map->usedcpus); + + print_bitmask ("Used CPUs: %s\n", b); + bitmask_free (b); + + while ((n = list_next (i))) { + //slurm_info ("Node%d:", n->nodeid); + print_bitmask ("Local CPUs: %s\n", n->localcpus); + print_bitmask ("Used CPUs: %s\n", n->usedcpus); + } + + list_iterator_destroy (i); +} + + +static int find_multiple_of_node_size (struct node *n, int *np) +{ + if (!(*np % n->ncpus) && (n->navail == n->ncpus)) + return (1); + return (0); +} + +static int find_node_lt_size (struct node *n, int *np) +{ + if ((*np >= n->ncpus) && (n->navail == n->ncpus)) + return (1); + return (0); +} + +static int should_allocate_idle_nodes (struct nodemap *m, int count) +{ + ListFindF fn; + + log_debug ("should_allocate_idle_nodes: %d\n", m->policy.alloc_idle_first); + + if (!m->policy.alloc_idle_first) + return (0); + + if (m->policy.alloc_idle_multiples_only) + fn = (ListFindF) find_multiple_of_node_size; + else + fn = (ListFindF) find_node_lt_size; + + if (list_find_first (m->nodelist, fn, &count)) + return (1); + return (0); +} + +static struct allocation * allocation_create (struct nodemap *map, int ntasks) +{ + struct allocation *a = malloc (sizeof (*a)); + + if (a == NULL) + return (NULL); + + a->map = map; + + a->ntasks = a->nleft = ntasks; + a->allocated_cpus = bitmask_alloc (cpumask_size ()); + + return (a); +} + +static void allocation_destroy (struct allocation *a) +{ + free (a); +} + +static int node_cpu_to_global (struct node *n, int cpu) +{ + int firstcpu = bitmask_first (n->localcpus); + return (firstcpu + cpu); +} + +static int node_allocate_cpu (struct node *n, int cpu) +{ + int gcpu; + if (bitmask_isbitset (n->usedcpus, cpu)) + return (-1); + + gcpu = node_cpu_to_global (n, cpu); + if (bitmask_isbitset (n->map->usedcpus, gcpu)) + return (-1); + + bitmask_setbit (n->usedcpus, cpu); + n->navail--; + bitmask_setbit (n->map->usedcpus, gcpu); + n->map->navail--; + return (gcpu); +} + +static void allocation_add_cpu (struct allocation *a, int cpu) +{ + bitmask_setbit (a->allocated_cpus, cpu); + a->nleft--; +} + +static int try_alloc (struct node *n, struct allocation *a, int cpu) +{ + int globalcpu = node_allocate_cpu (n, cpu); + + if (globalcpu < 0) /* CPU is in use */ + return (-1); + + cpuset_debug2 ("Node%d: allocated local CPU%d = CPU%d\n", + n->nodeid, cpu, globalcpu); + + allocation_add_cpu (a, globalcpu); + + return (0); +} + +static int node_allocate_n (struct node *n, struct allocation *a, int count) +{ + int nalloc = 0; + int i; + + if (a->nleft == 0) + return (0); + + /* + * Allocate all CPUs left in node if count == -1 + */ + if (count < 0) + count = n->navail; + + cpuset_debug2 ("Allocating %d CPUs from node%d. nleft = %d\n", + count, n->nodeid, a->nleft); + + if (!n->map->policy.reverse) { + /* + * Start with first CPU in node + */ + for (i = 0; i < n->ncpus && a->nleft && nalloc < count; i++) { + if (try_alloc (n, a, i) < 0) + continue; + nalloc++; + } + } + else { + /* + * Start with last CPU in node + */ + for (i = n->ncpus - 1; i >= 0 && a->nleft && nalloc < count; i--) { + if (try_alloc (n, a, i) < 0) + continue; + nalloc++; + } + } + + return (nalloc); +} + +static int node_allocate_all (struct node *n, struct allocation *a) +{ + return (node_allocate_n (n, a, -1)); +} + +static int alloc_idle_nodes (struct allocation *a) +{ + ListIterator i; + struct node *n; + int nalloc = 0; + + cpuset_debug ("Attempting to allocate idle nodes\n"); + i = list_iterator_create (a->map->nodelist); + + while ((n = list_next (i)) && (a->nleft > 0)) { + + log_debug2 ("alloc_idle: node%d; avail=%d\n", n->nodeid, n->navail); + if(n->navail == 0) + continue; + + /* + * Ignore this node if we're only allocating multiples + * and the number of tasks left is not a multiple. + */ + if (a->map->policy.alloc_idle_multiples_only + && ((a->nleft % n->navail) != 0)) + continue; + + /* + * Otherwise, allocate whole, idle node. + */ + if ((n->navail == n->ncpus) && (a->nleft >= n->navail)) { + log_debug2 ("Allocating up to %d CPUs from node%d\n", + n->ncpus, n->nodeid); + nalloc += node_allocate_n (n, a, n->ncpus); + } + } + + return (nalloc); +} + +static int node_cmp_free (struct node *n1, struct node *n2) +{ + if (n1->navail == n2->navail) + return (0); + else if (n1->navail < n2->navail) + return (-1); + else + return (1); +} + +static int node_cmp_avail (struct node *n1, struct node *n2) +{ + int rc = node_cmp_free (n1, n2); + return (-rc); +} + +static int node_cmp_nodeid (struct node *n1, struct node *n2) +{ + if (n1->nodeid < n2->nodeid) + return (-1); + else if (n1->nodeid > n2->nodeid) + return (1); + else /* Shouldn't happen, but we'll check anyway */ + return (0); +} + +static int node_cmp_reverse (struct node *n1, struct node *n2) +{ + return (-node_cmp_nodeid (n1, n2)); +} + + +static int do_allocation (struct allocation *a, ListCmpF sort_f) +{ + if (sort_f) + list_sort (a->map->nodelist, sort_f); + list_for_each (a->map->nodelist, (ListForF) node_allocate_all, a); + return (0); +} + +static int allocation_best_fit (struct allocation *a) +{ + ListCmpF fn; + + log_debug ("allocation: best-fit\n"); + /* + * Best fit: + * + * Sort NUMA nodes by amount of CPUs free in ascending + * order, then pack in first-fit mode. + */ + fn = (ListCmpF) node_cmp_free; + + return (do_allocation (a, fn)); +} + +static int allocation_first_fit (struct allocation *a) +{ + log_debug ("allocation: first-fit\n"); + return (do_allocation (a, NULL)); +} + +static int allocation_worst_fit (struct allocation *a) +{ + log_debug ("allocation: worst-fit\n"); + while (a->nleft) { + /* + * For worst-fit, we have to sort by available CPUs in + * desending order, then allocate 1 CPU. Then re-sort, and + * so on. + */ + list_sort (a->map->nodelist, (ListCmpF) node_cmp_avail); + if (node_allocate_n (list_peek (a->map->nodelist), a, 1) < 0) + return (-1); + } + return (0); +} + +struct bitmask * nodemap_allocate (struct nodemap *map, int ncpus) +{ + struct bitmask *allocated; + struct allocation * a; + + log_debug ("nodemap_allocate (ncpus=%d, navail=%d)\n", + ncpus, map->navail); + + if (ncpus > map->navail) { + cpuset_error ("%d CPUs requested, but only %d available\n", + ncpus, map->navail); + return (NULL); + } + + if (!map->policy.reverse) + list_sort (map->nodelist, (ListCmpF) node_cmp_nodeid); + else + list_sort (map->nodelist, (ListCmpF) node_cmp_reverse); + + if ((a = allocation_create (map, ncpus)) == NULL) + return (NULL); + + if (should_allocate_idle_nodes (a->map, ncpus)) + alloc_idle_nodes (a); + + if (a->nleft > 0) { + /* + * Allocate based on policy. + */ + if (a->map->policy.best_fit) + allocation_best_fit (a); + else if (a->map->policy.first_fit) + allocation_first_fit (a); + else if (a->map->policy.worst_fit) + allocation_worst_fit (a); + } + + if (a->nleft > 0) + cpuset_error ("Failed to allocate %d tasks.\n", a->nleft); + + allocated = a->allocated_cpus; + a->allocated_cpus = NULL; + + allocation_destroy (a); + + return (allocated); +} + +const struct bitmask * nodemap_used (struct nodemap *map) +{ + return (map->usedcpus); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/nodemap.h b/cpuset/nodemap.h new file mode 100644 index 0000000..026068d --- /dev/null +++ b/cpuset/nodemap.h @@ -0,0 +1,51 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#ifndef HAVE_NODEMAP_H +#define HAVE_NODEMAP_H + +#include "conf.h" + +/* + * Create a nodemap with optional used CPUs bitmask + * if used == NULL, then the nodemap will be initialized + * with the actual utilized CPUs. + */ +struct nodemap * nodemap_create (cpuset_conf_t cf, struct bitmask *used); +int nodemap_policy_update (struct nodemap *map, cpuset_conf_t cf); + +void nodemap_destroy (struct nodemap *); + +void print_nodemap (const struct nodemap *); + +/* + * Allocate ncpus from nodemap + */ +struct bitmask * nodemap_allocate (struct nodemap *map, int ncpus); + +const struct bitmask * nodemap_used (struct nodemap *map); + + +#endif /* !HAVE_NODEMAP_H */ diff --git a/cpuset/pam_slurm_cpuset.8 b/cpuset/pam_slurm_cpuset.8 new file mode 100644 index 0000000..9a1887f --- /dev/null +++ b/cpuset/pam_slurm_cpuset.8 @@ -0,0 +1,81 @@ + +.TH "PAM_SLURM_CPUSET" "8" + +.SH NAME +pam_slurm_cpuset \- restrict user logins to SLURM cpusets + +.SH SYNOPSIS +\fBpam_slurm_cpuset.so\fR [\fIOPTIONS\fR]... + +.SH DESCRIPTION +.PP +The \fBpam_slurm_cpuset\fR module may be used to restrict user +login sessions on compute nodes to only the CPUs which they have +been allocated by SLURM. It will also deny access to users attempting +to log in to nodes which they have not been allocated. Thus, it +should replace \fBpam_slurm.so\fR in the PAM stack. +.PP +Like the \fBpam_slurm\fR module, the \fBpam_slurm_cpuset.so\fR module +should be enabled in the account section of the PAM stack. +.PP +User login session tasks are placed into the \fBuser\fR cpuset created +by the \fBslurm-cpuset\fR(8) utilities. If a \fBuser\fR cpuset doesn't +exist at the time of operation of this module, and the user has one +or more valid SLURM jobs assigned to the current system, then a user +cpuset under + +.B /dev/cpuset/slurm/UID + +will be created with access to all CPUs to which the user has access. +.PP +As jobs begin and are terminated on the node, the set of CPUs in the +user cpuset is automatically adjusted to the union of all job cpusets. +If and when all the user's jobs on the node are complete, and the +user has no CPUs allocated to them, SLURM with either \fBorphan\fR +the user cpuset by renaming it to + +.B /dev/cpuset/slurm/orphan:UID + +or will immediately terminate the user login and clean up the +user cpuset. The method used depends on the \fBkill-orphs\fR +setting in \fBslurm-cpuset.conf\fR. +.PP +For more information about the SLURM cpuset suite and its +operation, see the \fBslurm-cpuset\fR(8) man page. + +.SH OPTIONS +.TP +.BI debug [=level] +Enable verbose module logging via \fBpam_syslog\fR(3). Optionally +a \fIlevel\fR may be specified. +.TP +.BI conf= FILENAME +Read configuration from config file \fIFILENAME\fR. By default, the +configuration is read from /etc/slurm/slurm-cpuset.conf. +.PP +For valid configuration file syntax and options, see the +\fBslurm-cpuset\fR(8) man page. + +.SH "MODULE SERVICES PROVIDED" +.PP +Currently, on the \fBaccount\fR service is supported. + +.SH "RETURN VALUES" +.TP 3n +PAM_SUCCESS +Access was granted. +.TP +PAM_PERM_DENIED +Access was not granted. +.TP +PAM_USER_UNKNOWN +Failed to read \fBPAM_USER\fR or user not in passwd file. +.TP +PAM_SYSTEM_ERR +System or module configuration error. + +.SH "SEE ALSO" +.BR slurm-cpuset (8), +.BR cpuset (4), +.BR pam (8), +.BR pam.d (8) diff --git a/cpuset/pam_slurm_cpuset.c b/cpuset/pam_slurm_cpuset.c new file mode 100644 index 0000000..46d50a2 --- /dev/null +++ b/cpuset/pam_slurm_cpuset.c @@ -0,0 +1,295 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + + +#include +#include +#include +#include + +#define PAM_SM_ACCOUNT +#include +#include +#include + +#include "create.h" +#include "util.h" +#include "hostlist.h" +#include "slurm.h" +#include "conf.h" +#include "log.h" + +static int create_all_job_cpusets (cpuset_conf_t conf, uid_t uid); +static int migrate_to_user_cpuset (uid_t uid); +static int in_user_cpuset (uid_t uid); + +static pam_handle_t *pam_handle = NULL; + +static const char msg_prefix [] = ""; +static const char msg_suffix [] = "\r"; + +static int debuglevel = 1; + + +static int log_pam_syslog (const char *msg) { + pam_syslog (pam_handle, 0, "%s", msg); + return (0); +} + +static int log_pam_error (const char *msg) { + pam_error (pam_handle, "%s%s%s", msg_prefix, msg, msg_suffix); + return (0); +} + +static int parse_options (cpuset_conf_t conf, int ac, const char **av) +{ + int i; + for (i = 0; i < ac; i++) { + if (strcmp ("debug", av[i]) == 0) + debuglevel++; + else if (strncmp ("debug=", av[i], 6) == 0) + debuglevel = 1 + str2int (av[i] + 6); + else if (cpuset_conf_parse_opt (conf, av[i]) < 0) + return (-1); + } + return (0); +} + +PAM_EXTERN int +pam_sm_acct_mgmt (pam_handle_t *pamh, int flags, int ac, const char **av) +{ + int rc; + int n; + const char *user; + struct passwd *pw; + uid_t uid; + const void **uptr = (const void **) &user; + int lockfd; + + cpuset_conf_t conf = cpuset_conf_create (); + + pam_handle = pamh; + + log_add_dest (debuglevel, log_pam_syslog); + log_add_dest (0, log_pam_error); + log_set_prefix (""); + + if ((rc = pam_get_item (pamh, PAM_USER, uptr)) != PAM_SUCCESS + || user == NULL + || *user == '\0') { + log_err ("get PAM_USER: %s", pam_strerror (pamh, rc)); + return (PAM_USER_UNKNOWN); + } + + if (!(pw = getpwnam (user))) { + log_err ("User (%s) does not exist.", user); + return (PAM_USER_UNKNOWN); + } + + uid = pw->pw_uid; + + if (uid == 0) + return (PAM_SUCCESS); + + /* + * If we're already in the user's cpuset, bail early + */ + if (in_user_cpuset (uid)) { + log_msg ("User %s (uid=%d) already in cpuset", user, uid); + return (PAM_SUCCESS); + } + + /* + * Read any configuration: + */ + if (parse_options (conf, ac, av) < 0) + return (PAM_SYSTEM_ERR); + + log_update (debuglevel, log_pam_syslog); + + /* + * If we didn't parse a config file due to "conf=" above, + * then parse the system config. + */ + if (!cpuset_conf_file (conf)) + cpuset_conf_parse_system (conf); + + /* + * Now we have to create cpusets for all running jobs + * on the system for this user, so that they have the + * correct number of CPUs accounted to them upon logging + * in. + */ + + if ((lockfd = slurm_cpuset_create (conf)) < 0) { + log_err ("Unable to initialilze slurm cpuset"); + return (PAM_SYSTEM_ERR); + } + + /* + * create_all_job_cpusets returns the number of CPUs + * the user has allocated on this node (or -1 for failure) + */ + + if ((n = create_all_job_cpusets (conf, uid)) < 0) { + log_err ("Failed to create user cpuset for uid=%d", uid); + slurm_cpuset_unlock (lockfd); + return (PAM_SYSTEM_ERR); + } + else if (n == 0) { + log_err ("Access denied: User %s (uid=%d) has no active SLURM jobs.", + user, uid); + slurm_cpuset_unlock (lockfd); + return (PAM_PERM_DENIED); + } + + if (migrate_to_user_cpuset (uid) < 0) { + log_err ("Failed to create user cpuset for uid=%d", uid); + slurm_cpuset_unlock (lockfd); + return (PAM_SYSTEM_ERR); + } + slurm_cpuset_unlock (lockfd); + + log_msg ("Access granted for user %s (uid=%d) with %d CPUs", + user, uid, n); + + cpuset_conf_destroy (conf); + + return (PAM_SUCCESS); +} + +static int in_user_cpuset (uid_t uid) +{ + char p [1024]; + char q [1024]; + int n; + + if (!cpuset_getcpusetpath (0, p, sizeof (p))) + return (0); + + n = snprintf (q, sizeof (q), "/slurm/%d", uid); + if ((n <= 0) || (n >= sizeof (q))) + return (0); + + return (strncmp (p, q, strlen (q)) == 0); +} + +static int migrate_to_user_cpuset (uid_t uid) +{ + int rc; + char path [128]; + + rc = snprintf (path, sizeof (path), "/slurm/%d", uid); + if (rc < 0 || rc > sizeof (path)) + return (-1); + + if (cpuset_move (0, path) < 0) + return (-1); + + return (0); +} + +int hostname_hostid (const char *host, const char *nodes) +{ + int n; + hostlist_t h = hostlist_create (nodes); + + if (!(h = hostlist_create (nodes))) + return (0); + + n = hostlist_find (h, host); + hostlist_destroy (h); + + return (n); +} + +int cpus_on_node (job_info_t *j, int hostid) +{ + int i; + int start = 0; + + for (i = 0; i < j->num_cpu_groups; i++) { + if (hostid >= start && hostid < (start + j->cpu_count_reps[i])) + return (j->cpus_per_node[i]); + else + start += j->cpu_count_reps[i]; + } + + return (0); +} + +int create_all_job_cpusets (cpuset_conf_t conf, uid_t uid) +{ + int i; + char hostname[256]; + char *p; + job_info_msg_t * msg; + int total_cpus = 0; + + if (gethostname (hostname, sizeof (hostname)) < 0) { + return (-1); + } + + if ((p = strchr (hostname, '.'))) + *p = '\0'; + + if (dyn_slurm_load_jobs (&msg) < 0) { + return (-1); + } + + for (i = 0; i < msg->record_count; i++) { + job_info_t *j = &msg->job_array[i]; + int hostid; + int ncpus; + + if ((j->user_id != uid) || (j->job_state != JOB_RUNNING)) + continue; + + if ((hostid = hostname_hostid (hostname, j->nodes)) < 0) + continue; + + if (!(ncpus = cpus_on_node (j, hostid))) { + log_err ("job %u: Failed to find ncpus for this node", j->job_id); + continue; + } + + if (!job_cpuset_exists (j->job_id, j->user_id) && + create_cpuset_for_job (conf, j->job_id, j->user_id, ncpus) < 0) { + log_err ("job %u: Failed to create cpuset: %m", j->job_id); + continue; + } + + total_cpus += ncpus; + } + + dyn_slurm_free_job_info_msg (msg); + + return (total_cpus); +} + +/* + * vi: ts=4 sw=4 expandtab + */ + diff --git a/cpuset/release-agent.c b/cpuset/release-agent.c new file mode 100644 index 0000000..947ea93 --- /dev/null +++ b/cpuset/release-agent.c @@ -0,0 +1,86 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#include +#include +#include +#include +#include + +#include "util.h" +#include "create.h" +#include "conf.h" +#include "log.h" + +const char cpuset_path[] = "/dev/cpuset"; + +const char * basename (const char *path); +static FILE *fp = NULL; + +static int log_fp (const char *msg) +{ + if (fp) + fprintf (fp, "%s", msg); + return (0); +} + +int main (int ac, char **av) +{ + int lockfd; + char path [4096]; + const char *prog = basename (av[0]); + + cpuset_conf_t conf = cpuset_conf_create (); + + if (ac < 2) { + fprintf (stderr, "Usage: %s cpuset_path\n", prog); + return (1); + } + + fp = fopen ("/var/log/slurm-cpuset.log", "a"); + + log_add_dest (C_LOG_VERBOSE, log_fp); + cpuset_conf_parse_system (conf); /* Ignore errors, we must proceed */ + + snprintf (path, sizeof (path), "%s%s", cpuset_path, av[1]); + + if ((lockfd = slurm_cpuset_create (conf)) < 0) { + log_err ("Failed to lock slurm cpuset: %s\n", strerror (errno)); + exit (1); + } + + log_verbose ("Cleaning path %s\n", path); + + update_user_cpusets (conf); + slurm_cpuset_unlock (lockfd); + cpuset_conf_destroy (conf); + fclose (fp); + + return (0); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/slurm-cpuset.8 b/cpuset/slurm-cpuset.8 new file mode 100644 index 0000000..c742fdf --- /dev/null +++ b/cpuset/slurm-cpuset.8 @@ -0,0 +1,378 @@ +.\" $Id: slurm-cpuset.8 7653 2008-07-29 22:33:31Z grondo $ + +.TH slurm-cpuset 8 "SLURM cpuset plugin" + +.SH NAME +slurm-cpuset \- confine SLURM jobs to CPUs using cpusets + +.SH DESCRIPTION +The SLURM \fBcpuset\fR suite enables the use of Linux \fBcpuset\fR(4) +functionality to constrain user jobs and login sessions to the +number of CPUs allocated on compute nodes. The suite consists of a +\fBspank\fR(8) plugin, a \fBPAM\fR module, and a cpuset \fIrelease +agent\fR. Together, these three components may effectively restrict +user access to shared nodes based on actual SLURM allocations. +.PP +The SLURM cpuset components are specifically designed for +systems sharing nodes using CPU scheduling (i.e. using SLURM's +\fIselect/cons_res\fR plugin) These plugins and utilities will not +be effective on systems where CPUs may be oversubscribed to jobs +(e.g. strict node sharing without the use of \fIselect/cons_res\fR). +.PP +For more details see the OPERATION section below. + +.SH SLURM PLUGIN +The core cpuset functionality for SLURM jobs is provided +by a SLURM \fBspank\fR(8) plugin \fBcpuset.so\fR. Since this plugin +uses SLURM's \fBspank\fR(8) framework, it must be enabled +in the plugstack.conf for the system, via the following +line +.nf + + required cpuset.so [options] + +.fi +where \fIoptions\fR are described further in the \fIOPTIONS\fR +section below. +.PP +The slurm cpuset plugin (as well as other SLURM cpuset components) +works on a single node. It knows nothing about the global state of +SLURM, its queues, etc. Local CPUs are allocated dynamically to +incoming jobs based on the number of CPUs assigned to the job by +SLURM. The cpuset plugin does not keep any state across jobs, nor +across the nodes of a job. Instead, it uses past created cpusets +to track which CPUs are currently in use, and which are available. +.PP +The SLURM cpuset plugin may also constrain job steps to their +own cpusets under the job cpuset. This may be useful when running +multiple job steps under a single allocation, as the resources of +each job step may be partitioned into separate, non-overlapping +cpusets. This functionality is enabled by the srun user option +.TP +.BI "--use-cpusets="[args...] +.PP +Where the optional arguments in \fIargs\fR modify the cpuset plugin +behavior for job steps and/or tasks. Any plugin option as described +in the OPTIONS section can be specified. + +.SH PAM MODULE +The \fBpam_slurm_cpuset\fR(8) module may be used to restrict user +login sessions on compute nodes to only the CPUs which they have +been allocated by SLURM. If enabled in the PAM stack, it will also +deny access to users attempting to log in to nodes which they +have not been allocated. +.PP +The \fBpam_slurm_cpuset\fR PAM module +uses the same configuration file and algorithms as the SLURM cpuset +plugin, and is further documented in the \fBpam_slurm_cpuset\fR(8) +man page. + +.SH RELEASE AGENT +Included with the SLURM cpuset utilities is a cpuset release-agent +which may optionally be installed into /sbin/cpuset_release_agent +on any nodes using the SLURM cpuset plugin or PAM module. This release +agent will be run for each SLURM cpuset when the last +task within the cpuset exits, and will free the cpuset immediately +(with proper locking so as to not race with other jobs). The release +agent is optional for a couple reasons: +.RS 8 +.TP 3 +1. +Some versions of Linux may only allow a single \fBcpuset_release_agent\fR +and we don't want to interfere with other uses of cpusets if they exist. +.TP +2. +The cpuset plugin and PAM modules remove stale cpusets as they initialize +anyway. Therefore \fBcpuset_release_agent\fR is not a critical component +for operation. However, it is nice to clean up job cpusets as jobs exit, +instaed of waiting until the next job is run. Unused cpusets lying around +may be confusing to syadmins and users. + +.SH CONFIGURATION +All SLURM cpuset components will first attempt to read the systemwide +config file at /etc/slurm/slurm-cpuset.conf. This location may be overridden +in the PAM module and SLURM plugin with the \fBconf=\fR parameter. +However, this is not suggested, because there is no way currently +to override the config file location for the cpuset release agent. +.PP +Available configuration parameters that may be set in slurm-cpuset.conf +are: +.TP 8 +\fBpolicy\fR = \fIPOLICY\fR +Set the allocation policy for cpusets to \fIPOLICY\fR. Currently +supported policies include: +.RS +.TP +.B best-fit +Allocate tasks to the most full NUMA nodes first. This is the default +.TP +.B first-fit +Allocate tasks to nodes in order of node ID. +.TP +.B worst-fit +Allocate tasks to least full nodes first. +.RE + +.TP +\fBorder\fR = [\fInormal\fR|\fIreverse\fR] +Set the allocation order of tasks to CPUs. In \fInormal\fR +mode, tasks are allocated starting with the first available +CPU and in increasing order, while with \fRreverse\fR order, +tasks are allocated starting with the last available CPU. The +default order is \fInormal\fR. +.TP +\fBuse-idle\fR = \fISTRATEGY\fR +The \fBuse-idle\fR parameter indicates when to allocate tasks +to fully idle NUMA nodes first. The default behavior is +to use idle nodes first when the number of tasks is a multiple +of the number of CPUs within a node. Other options include +.RS +.TP 12 +.B mult[iple] +The default. Allocate idle nodes first if number of tasks is a +multiple of the node size. +.TP +.B [greater|gt] +Allocate idle nodes first if the number of tasks is \fBgreater\fR +than the number of CPUs in a node. +.TP +.B [0|no|never] +Do not allocate idle nodes first, no matter the job size. +.TP +.B [1|yes] +Allocate idle nodes first using the default policy. +.RE +.TP +\fBconstrain-mem\fR = \fIBOOLEAN\fR +If set to 1 or yes, constrain memory nodes along with CPUs when +creating cpusets. If set to 0 or no, let all cpusets access all +memory nodes on the system (i.e. do not constrain memory). The +default is yes. +.TP +\fBkill-orphs\fR = \fIBOOLEAN\fR +If set to 1 or yes, kill orphaned user logins, i.e. those logins +for which there are no longer any SLURM jobs running. If 0 or no, +then leave orphan user logins (in a special orphan login cpuset). +The default is no. + +.SH USER OPTIONS + +The \fB--use-cpusets\fR option may be used to override some of +the options above, in addition to providing a couple of extra options. +Currently supported arguments for this option include: +.TP +.B help +Print a short usage message to stderr and exit. +.TP +.B debug +Enable debug messages. +.TP +.BI "debug=" N +Increase debugging verbosity to \fIN\fR +.TP +.BI "conf=" FILENAME +Read configuration from file \fIFILENAME\fR. Settings in this +config file will override system configuration, as well as options +previously set on the command line. +.TP +.BI "policy=" POLICY +As above, set the allocation policy for cpusets to \fIPOLICY\fR. +For the user option, this only overrides the policy as applied to +job steps and tasks. +.TP +.BI "order=" ORDER +Set allocation order to \fInormal\fR or \fIreverse\fR. +.TP +.B reverse +Same as \fBorder=\fR\fIreverse\fR. +.TP +.B best-fit | worst-fit | first-fit +Shortcut for \fBpolicy\fR=\fIPOLICY\fR. +.TP +.BI "idle-first=" WHEN +As above, set \fIWHEN\fR to allocate idle nodes first. +.TP +.BI "no-idle" +Same as \fBidle-first\fR=\fIno\fR. +.TP +.B mem | constrain-mem +Constrain memory as well as CPUs. Same as \fBconstrain-mem\fR = \fIyes\fR +in the config file. +.TP +.B nomem | !constrain-mem +Do not constrain memory. +.TP +.B tasks +Also constrain individual tasks to cpusets. + +.SH OPERATION +All SLURM cpusets for jobs and login sessions are created +under the /slurm cpuset heirarchy, and require that the +epuset filesystem be mounted under /dev/cpuset (An init script +is provided for this purpose.). +.PP +The first level of cpuset +created under the /slurm directory are UID cpusets. Each +user with a job or login to the current node will have +a cpuset under +.nf + + \fB/slurm/UID\fR + +.fi +which will contain the set of +all CPUs that user is allowed to use on the system. Processes +which are part of a login session are contained within this +cpuset, and thus have access to all CPUs which the user has +been allocated. +.PP +Under each UID cpuset will be one cpuset per active job. +These cpusets are named with the JOBID, and thus fall +under the path +.nf + + \fB/slurm/UID/JOBID\fR + +.fi +The CPUs allocated to the JOBID cpusets will obviously +be a subset of the UID cpuset. +.PP +Finally, if the user requests per-job-step or per-task +cpusets, these cpusets will fall under the JOBID cpuset, +and will of course be a subset of the job cpuset. Thus, +the final cpuset path for a task would be: +.nf + + \fB/slurm/UID/JOBID/STEPID/TASKID\fR + +.fi +where there would be N TASKID cpusets for an N task job. +.PP +As cpusets are created by the SLURM cpuset utilities, +the \fBnotify_on_release\fR flag is set. This causes +the cpuset release agent at /sbin/cpuset_release_agent +to be called after the last task exits from the cpuset. +The SLURM cpuset version of \fBcpuset_release_agent\fR takes +care of removing the cpuset and releasing CPUs for use +if necessary. Use of the release agent is optional, however, +because the SLURM cpuset utilities will also try to +free unused cpusets on demand as well. +.PP +The general algorithm the SLURM cpuset utilities use for +allocating a new JOB cpuset is as follows: +.PP +.RS 2 +.TP 3 +1. +Lock SLURM cpuset at /dev/cpuset/slurm. +.TP +2. +Clean up current slurm cpuset heirarchy by removing all unused cpusets, +and ensuring user cpusets (/slurm/UID) are up to date. +.TP +3. +Check for an existing cpuset for this job in /slurm/UID/JOBID. If +it exists, goto directly to step 8. +.TP +4. +Scan the slurm cpuset heirarchy and gather the list of currently +used CPUs. This is the union of all active user cpusets, which are +in turn the union of all active user job cpusets. +.TP +5. +Abort if the number of CPUs assigned to the starting job is greater +than the number of available CPUs. +.TP +6. +Assign CPUs and optionally memory nodes based on the currently +configured policy. (See CONFIGURATION section for valid policies) +.TP +7. +Create new cpuset under /dev/cpuset/slurm/UID/JOBID, updating +the user cpuset if necessary with newly allocated cpus. +.TP +8. +Migrate job to cpuset /dev/cpuset/slurm/UID/JOBID. +.TP +9. Unlock SLURM cpuset at /dev/cpuset/slurm. +.RE +.PP + +.SH EXAMPLES +Default allocation policy, job sizes 2 cpus, 1 cpu, 1 cpu, 4 cpus: +.nf + + cpuset: /slurm/6885/69946: 2 cpus [0-1], 1 mem [0] + cpuset: /slurm/6885/69947: 1 cpu [2], 1 mem [1] + cpuset: /slurm/6885/69948: 1 cpu [3], 1 mem [1] + cpuset: /slurm/6885/69950: 4 cpus [4-7], 2 mems [2-3] + +.fi +Same as above with order = reverse. +.nf + + cpuset: /slurm/6885/69954: 2 cpus [6-7], 1 mem [3] + cpuset: /slurm/6885/69955: 1 cpu [5], 1 mem [2] + cpuset: /slurm/6885/69956: 1 cpu [4], 1 mem [2] + cpuset: /slurm/6885/69957: 4 cpus [0-3], 2 mems [0-1] + +.fi +use-idle = never, policy = worst-fit: job sizes 1, 1, 1, 4, 1 +.nf + + cpuset: /slurm/6885/69976: 1 cpu [0], 1 mem [0] + cpuset: /slurm/6885/69977: 1 cpu [2], 1 mem [1] + cpuset: /slurm/6885/69978: 1 cpu [4], 1 mem [2] + cpuset: /slurm/6885/69979: 4 cpus [1,3,6-7], 3 mems [0-1,3] + cpuset: /slurm/6885/69980: 1 cpu [5], 1 mem [2] + +.fi +policy = first-fit: job sizes 1, 1, 1, 4, 1 +Note that 4 cpu job is allocated to idle nodes first. +.nf + + cpuset: /slurm/6885/69985: 1 cpu [0], 1 mem [0] + cpuset: /slurm/6885/69986: 1 cpu [1], 1 mem [0] + cpuset: /slurm/6885/69987: 1 cpu [2], 1 mem [1] + cpuset: /slurm/6885/69988: 4 cpus [4-7], 2 mems [2-3] + cpuset: /slurm/6885/69989: 1 cpu [3], 1 mem [1] + +.fi +Using cpusets for multiple job steps under an allocate of 1 node +with 8 cpus. + +.nf + + > srun --use-cpusets=debug -n1 sleep 100 & + + cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] + cpuset: /slurm/6885/69993/0: 1 cpu [0], 1 mem [0] + + > srun --use-cpusets=debug -n2 sleep 100 & + + cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] + cpuset: /slurm/6885/69993/1: 2 cpus [2-3], 1 mem [1] + +.fi +Use of --use-cpusets=tasks + +.nf + + > srun --use-cpusets=debug,tasks -n4 sleep 100 + + cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] + cpuset: /slurm/6885/69993/2: 4 cpus [0-3], 2 mems [0-1] + cpuset: /slurm/6885/69993/2/0: 1 cpu [0], 1 mem [0] + cpuset: /slurm/6885/69993/2/1: 1 cpu [1], 1 mem [0] + cpuset: /slurm/6885/69993/2/2: 1 cpu [2], 1 mem [1] + cpuset: /slurm/6885/69993/2/3: 1 cpu [3], 1 mem [1] +.fi + +.SH AUTHOR +Mark Grondona + +.SH "SEE ALSO" +.BR use-cpusets (1), +.BR pam_slurm_cpuset (8), +.BR spank (8), +.BR cpuset (4) diff --git a/cpuset/slurm.c b/cpuset/slurm.c new file mode 100644 index 0000000..eb107b4 --- /dev/null +++ b/cpuset/slurm.c @@ -0,0 +1,114 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#include +#include "slurm.h" +#include "log.h" +/* + * Handle for libslurm.so + * + * We open libslurm.so via dlopen () in order to pass the + * flag RTDL_GLOBAL so that subsequently loaded modules have + * access to libslurm symbols. This is pretty much only needed + * for dynamically loaded modules that would otherwise be + * linked against libslurm. + * + */ +static void * slurm_h = NULL; + + +static int dyn_slurm_open () +{ + if (slurm_h) + return (0); + if (!(slurm_h = dlopen("libslurm.so", RTLD_NOW|RTLD_GLOBAL))) { + log_err ("Unable to dlopen libslurm: %s\n", dlerror ()); + return (-1); + } + return (0); +} + +/* + * Wrapper for SLURM API function slurm_load_jobs () + */ +int dyn_slurm_load_jobs (job_info_msg_t **msgp) +{ + static int (*load_jobs) (time_t, job_info_msg_t **) = NULL; + + dyn_slurm_open (); + + if (!load_jobs && !(load_jobs = dlsym (slurm_h, "slurm_load_jobs"))) { + log_err ("Unable to resolve slurm_load_jobs\n"); + return -1; + } + + return load_jobs ((time_t) NULL, msgp); +} + +/* + * Wrapper for SLURM API function slurm_strerror () + */ +char * dyn_slurm_strerror (int errnum) +{ + static char * (*f) (int) = NULL; + + dyn_slurm_open (); + + if (!f && !(f = dlsym (slurm_h, "slurm_strerror"))) { + log_err ("Unable to resolve slurm_strerror\n"); + return "unknown error"; + } + + return f (errnum); +} + + +/* + * Wrapper for slurm_free_job_info_msg () + */ +void dyn_slurm_free_job_info_msg (job_info_msg_t *msg) +{ + static void (*free_msg) (job_info_msg_t *) = NULL; + + dyn_slurm_open (); + + if (!free_msg && !(free_msg = dlsym (slurm_h, "slurm_free_job_info_msg"))) { + log_err ("Unable to resolve slurm_free_job...\n"); + return; + } + + free_msg (msg); + + return; +} + +void dyn_slurm_close () +{ + if (slurm_h) dlclose (slurm_h); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/slurm.h b/cpuset/slurm.h new file mode 100644 index 0000000..80db18a --- /dev/null +++ b/cpuset/slurm.h @@ -0,0 +1,36 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#ifndef _HAVE_DYN_SLURM_H +#define _HAVE_DYN_SLURM_H + +#include + +int dyn_slurm_load_jobs (job_info_msg_t **msgp); +char * dyn_slurm_strerror (int errnum); +void dyn_slurm_free_job_info_msg (job_info_msg_t *msg); +void dyn_slurm_close (); + +#endif diff --git a/cpuset/test.c b/cpuset/test.c new file mode 100644 index 0000000..a1a6c49 --- /dev/null +++ b/cpuset/test.c @@ -0,0 +1,90 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + + +#include +#include +#include +#include + +#include "nodemap.h" +#include "util.h" +#include "conf.h" +#include "log.h" + +static int log_stderr (const char *msg) +{ + fprintf (stderr, "%s", msg); return 0; +} + +int main (int ac, char **av) +{ + cpuset_conf_t conf; + struct bitmask * b; + struct nodemap * map; + int n = str2int (av[1]); + + log_add_dest (4, log_stderr); + + conf = cpuset_conf_create (); + //cpuset_conf_debug (); + + if (cpuset_conf_parse_system (conf) < 0) + exit (1); + + if (ac < 2) + exit (1); + + if (av[1] == NULL || ((n = str2int (av[1])) <= 0)) { + fprintf (stderr, "Usage: %s NCPUS\n", av[0]); + exit (1); + } + + fprintf (stdout, "Faking a job with %d CPUs\n", n); + + if ((map = nodemap_create (conf, NULL)) == NULL) { + fprintf (stderr, "Failed to create nodemap\n"); + exit (1); + } + + print_nodemap (map); + + if (!(b = nodemap_allocate (map, n))) { + fprintf (stderr, "Failed to allocate %d tasks in nodemap\n", n); + exit (1); + } + + print_bitmask ("Used CPUs: %s\n", nodemap_used (map)); + + nodemap_destroy (map); + + cpuset_conf_destroy (conf); + + exit (0); + +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/use-cpusets.1 b/cpuset/use-cpusets.1 new file mode 100644 index 0000000..354a260 --- /dev/null +++ b/cpuset/use-cpusets.1 @@ -0,0 +1,114 @@ +.TH use-cpusets 1 "user options for SLURM cpuset plugin" + +.SH NAME +use-cpusets \- user options for SLURM cpuset plugin + +.SH SYNOPSIS +\fB--use-cpusets=\fR[\fIargs\fR]... + +.SH DESCRIPTION +The \fB--use-cpusets\fR option is added to \fBsrun\fR(1) +by the SLURM cpuset plugin, which is described fully +in the \fBslurm-cpuset\fR(8) manpage. This option allows +users to request that job steps and optionally individual +tasks be contained within cpusets under a SLURM job cpuset. +This may be useful when running multiple job steps under +an allocation, as the resources of each job step may be +partitioned into separate cpus and/or memory nodes. + +.SH OPTIONS +The \fB--use-cpusets\fR option may be used to override some of +the SLURM cpuset defaults and system configuration. Additionally, +some extra options are provided. +.PP +Used alone, the \fB--use-cpusets\fR option enables per-job-step +cpusets for the spawned tasks. Options that change policies +and behavior of the SLURM cpuset plugin may specified with an +optional list of comma-separated arguments to the \fB--use-cpusets\fR +option, e.g. + +.BI "--use-cpusets=" debug,tasks + +.PP +Currently supported arguments for this option include: +.TP +.B help +Print a short usage message to stderr and exit. +.TP +.B debug +Enable debug messages. +.TP +.BI "debug=" N +Increase debugging verbosity to \fIN\fR +.TP +.BI "conf=" FILENAME +Read configuration from file \fIFILENAME\fR. Settings in this +config file will override system configuration, as well as options +previously set on the command line. +.TP +.BI "policy=" POLICY +As above, set the allocation policy for cpusets to \fIPOLICY\fR. +For the user option, this only overrides the policy as applied to +job steps and tasks. +.TP +.BI "order=" ORDER +Set allocation order to \fInormal\fR or \fIreverse\fR. +.TP +.B reverse +Same as \fBorder=\fR\fIreverse\fR. +.TP +.B best-fit | worst-fit | first-fit +Shortcut for \fBpolicy\fR=\fIPOLICY\fR. +.TP +.BI "idle-first=" WHEN +As above, set \fIWHEN\fR to allocate idle nodes first. +.TP +.BI "no-idle" +Same as \fBidle-first\fR=\fIno\fR. +.TP +.B mem | constrain-mem +Constrain memory as well as CPUs. Same as \fBconstrain-mem\fR = \fIyes\fR +in the config file. +.TP +.B nomem | !constrain-mem +Do not constrain memory. +.TP +.B tasks +Also constrain individual tasks to cpusets. + +.SH EXAMPLES +Using cpusets for multiple job steps under an allocate of 1 node +with 8 cpus. + +.nf + + > srun --use-cpusets=debug -n1 sleep 100 & + + cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] + cpuset: /slurm/6885/69993/0: 1 cpu [0], 1 mem [0] + + > srun --use-cpusets=debug -n2 sleep 100 & + + cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] + cpuset: /slurm/6885/69993/1: 2 cpus [2-3], 1 mem [1] + +.fi +Use of --use-cpusets=tasks + +.nf + + > srun --use-cpusets=debug,tasks -n4 sleep 100 + + cpuset: /slurm/6885/69993: 8 cpus [0-7], 4 mems [0-3] + cpuset: /slurm/6885/69993/2: 4 cpus [0-3], 2 mems [0-1] + cpuset: /slurm/6885/69993/2/0: 1 cpu [0], 1 mem [0] + cpuset: /slurm/6885/69993/2/1: 1 cpu [1], 1 mem [0] + cpuset: /slurm/6885/69993/2/2: 1 cpu [2], 1 mem [1] + cpuset: /slurm/6885/69993/2/3: 1 cpu [3], 1 mem [1] +.fi +.SH AUTHOR +Mark Grondona + +.SH "SEE ALSO" +.BR slurm-cpuset (8), +.BR cpuset (4) diff --git a/cpuset/util.c b/cpuset/util.c new file mode 100644 index 0000000..c419f8d --- /dev/null +++ b/cpuset/util.c @@ -0,0 +1,464 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#define __USE_GNU 1 +#include +#include +#include +#include +#include + +#include +#include + +#include "fd.h" +#include "util.h" +#include "nodemap.h" +#include "create.h" +#include "slurm.h" +#include "log.h" + +/* + * Path to base SLURM cpuset, which contains all other cpusets. + */ +static const char slurm_cpuset[] = "/dev/cpuset/slurm"; + +void print_bitmask (const char *fmt, const struct bitmask *b) +{ + char buf [16]; + bitmask_displaylist (buf, sizeof (buf), b); + log_msg (fmt, buf); +} + +static struct cpuset * get_cpuset (const char *path) +{ + struct cpuset *cpuset = NULL; + + if (!(cpuset = cpuset_alloc ())) + return (NULL); + + if (cpuset_query (cpuset, path) < 0) { + cpuset_free (cpuset); + return (NULL); + } + + return (cpuset); +} + +int cpumask_size (void) +{ + struct cpuset *cp; + static int totalcpus = -1; + if (totalcpus < 0) { + cp = get_cpuset ("/"); + totalcpus = cpuset_cpus_weight (cp); + cpuset_free (cp); + } + return (totalcpus); +} + +int memmask_size (void) +{ + struct cpuset *cp; + static int totalmems = -1; + if (totalmems < 0) { + cp = get_cpuset ("/"); + totalmems = cpuset_mems_weight (cp); + cpuset_free (cp); + } + return (totalmems); +} + +void print_cpuset_info (const char *path, struct cpuset *cp) +{ + char cstr [16]; + char mstr [16]; + struct bitmask *cpus, *mems; + int ncpus, nmems; + + ncpus = cpuset_cpus_weight (cp); + nmems = cpuset_mems_weight (cp); + + cpus = bitmask_alloc (cpumask_size ()); + mems = bitmask_alloc (memmask_size ()); + + cpuset_getcpus (cp, cpus); + cpuset_getmems (cp, mems); + + bitmask_displaylist (cstr, sizeof (cstr), cpus); + bitmask_displaylist (mstr, sizeof (mstr), mems); + + cpuset_verbose ("%s: %d cpu%s [%s], %d mem%s [%s]\n", + path, + ncpus, (ncpus == 1 ? "" : "s"), cstr, + nmems, (nmems == 1 ? "" : "s"), mstr); + + bitmask_free (cpus); + bitmask_free (mems); +} + +void print_current_cpuset_info () +{ + char path [4096]; + struct cpuset *cp = cpuset_alloc (); + + cpuset_getcpusetpath (0, path, sizeof (path)); + cpuset_query (cp, path); + + print_cpuset_info (path, cp); + + cpuset_free (cp); +} + +static int current_cpuset_path (char *path, int len) +{ + if (len < 12) + return (-1); + + strncpy (path, "/dev/cpuset", len); + + if (!cpuset_getcpusetpath (0, path + 11, len - 11)) + return (-1); + + if (strcmp (path, "/dev/cpuset/") == 0) { + /* + * If we are in the root cpuset, pretend we're in /slurm instead. + */ + strncat (path, "slurm", len); + } + + return (0); +} + +const char * cpuset_path_to_name (const char *path) +{ + return (path + 11); +} + +struct bitmask *used_cpus_bitmask_path (char *path, int clearall) +{ + char buf [4096]; + const char *current; + struct bitmask *b, *used; + DIR *dirp; + struct dirent *dp; + struct cpuset *cp; + + if (path == NULL) { + path = buf; + if (current_cpuset_path (buf, sizeof (buf)) < 0) { + cpuset_error ("Unable to get current cpuset path: %m"); + return (NULL); + } + cpuset_debug ("used_cpus_bitmask_path (%s)\n", path); + } + + if ((dirp = opendir (path)) == NULL) { + cpuset_error ("Couldn't open %s: %m", path); + return NULL; + } + + if ((cp = cpuset_alloc ()) == NULL) { + cpuset_error ("Couldn't alloc cpuset: %m"); + return (NULL); + } + + current = cpuset_path_to_name (path); + + b = bitmask_alloc (cpumask_size ()); + used = bitmask_alloc (cpumask_size ()); + + if (!clearall) { + /* + * First, set all CPUs not in this cpuset as used + */ + cpuset_query (cp, current); + cpuset_getcpus (cp, used); + bitmask_complement (used, used); + } + + while ((dp = readdir (dirp))) { + char name [4096]; + + if (*dp->d_name == '.') + continue; + + /* + * Skip any orphans + */ + if (strncmp (dp->d_name, "orphan:", 7) == 0) + continue; + + /* + * Generate cpuset name relative to /dev/cpuset + */ + snprintf (name, sizeof (name), "%s/%s", current, dp->d_name); + if (cpuset_query (cp, name) < 0) + continue; + + if (cpuset_getcpus (cp, b) < 0) + cpuset_error ("Failed to get CPUs for %s: %m", name); + + used = bitmask_or (used, b, used); + } + closedir (dirp); + + bitmask_free (b); + return (used); +} + +int slurm_jobid_is_valid (int jobid) +{ + static job_info_msg_t *msg = NULL; + int i; + + cpuset_debug ("slurm_jobid_is_valid (%d)\n", jobid); + + if (msg == NULL) + dyn_slurm_load_jobs (&msg); + else if (jobid == -1) { + dyn_slurm_free_job_info_msg (msg); + return (0); + } + + for (i = 0; i < msg->record_count; i++) { + job_info_t *j = &msg->job_array[i]; + + if (j->job_id == jobid && j->job_state == JOB_RUNNING) + return (1); + } + + return (0); +} + +int cpuset_ntasks (const char *path) +{ + struct cpuset_pidlist *pids; + int n; + + if ((pids = cpuset_init_pidlist (path, 0)) == NULL) { + cpuset_error ("cpuset_init_pidlist %s: %m", path); + return (-1); + } + + n = cpuset_pidlist_length (pids); + + cpuset_freepidlist (pids); + + return (n); +} + +int slurm_cpuset_clean_path (const char *path) +{ + int userid; + int jobid; + int stepid; + const char *name = cpuset_path_to_name (path); + + if (sscanf (name, "/slurm/%d/%d/%d", &userid, &jobid, &stepid) == 2) { + /* + * We only destroy jobid cpusets when the owner uid + * cpuset is also empty. This is because the jobid + * cpusets are used for accounting the CPUs in the + * uid cpuset. + */ + char user_cpuset [128]; + snprintf (user_cpuset, sizeof (user_cpuset), "/slurm/%d", userid); + if ((cpuset_ntasks (user_cpuset) > 0) && + slurm_jobid_is_valid (jobid)) + return (0); + } + + rmdir (path); + return (0); +} + +int slurm_cpuset_clean (cpuset_conf_t cf) +{ + struct cpuset_fts_tree *fts; + const struct cpuset_fts_entry *entry; + + if (!(fts = cpuset_fts_open ("/slurm"))) + return (-1); + /* + * Reverse cpuset fts tree so that child cpusets + * are returned before parents. This is important + * because a cpuset can seemingly only be removed + * after all its children have been removed. + */ + + cpuset_fts_reverse (fts); + + while ((entry = cpuset_fts_read (fts))) { + const char *name = cpuset_fts_get_path (entry); + + + if (strcmp (name, "/slurm") != 0) { + char path [4096]; + snprintf (path, sizeof (path), "/dev/cpuset%s", name); + cpuset_debug ("clean: %s\n", name); + slurm_cpuset_clean_path (path); + } + } + + cpuset_fts_close (fts); + + update_user_cpusets (cf); + + return (0); +} + +static int do_cpuset_lock (const char *name) +{ + int fd; + char path [1024]; + + /* + * We can't just any files under the cpuset for advisory locking as + * we used to do. Recall that the advisory lock is dropped for the + * process if _any_ open file descriptor for the locked file is closed, + * Since libcpuset opens and closes *all* files under all our cpusets, + * we instead use a more typical lockfile under /var/lock. + */ + snprintf (path, sizeof (path), "/var/lock/%s-cpuset", name); + +again: + if ((fd = open (path, O_RDWR|O_CREAT|O_NOFOLLOW, 0644)) < 0) { + static int first = 1; + if (errno == EEXIST && first) { /* A symlink */ + unlink (path); + first = 0; + goto again; + } + log_err ("Open of lockfile [%s] failed: %s\n", path, strerror (errno)); + return (-1); + } + if (fd_get_writew_lock (fd) < 0) { + close (fd); + return (-1); + } + return (fd); +} + +static int do_cpuset_unlock (int fd) +{ + if (fd < 0) + return (-1); + /*if (fd_release_lock (fd) < 0) + return (-1); */ + return (close (fd)); +} + +int slurm_cpuset_lock (void) +{ + return (do_cpuset_lock ("/slurm")); +} + +int slurm_cpuset_unlock (int fd) +{ + return (do_cpuset_unlock (fd)); +} + +/* + * Create slurm cpuset if necessary and return + * with lock held. + */ +static int create_and_lock_cpuset_dir (cpuset_conf_t cf, const char *name) +{ + char path [1024] = "/dev/cpuset"; + struct cpuset *cp; + int fd; + mode_t oldmask = umask (022); + + strncat (path, name, sizeof (path)); + + cpuset_debug2 ("create_and_lock_cpuset_dir (%s)\n", name); + + /* + * First grab cpuset lock from /var/lock: + */ + if ((fd = do_cpuset_lock (name)) < 0) { + cpuset_error ("Failed to lock %s: %m", path); + return (-1); + } + + if ((mkdir (path, 0755)) < 0) { + /* If mkdir fails with EEXIST, then slurm cpuset already + * exists and we can simply return lockfd after ensuring + * the cpuset is "clean" + */ + umask (oldmask); + if (errno == EEXIST) { + slurm_cpuset_clean (cf); + return (fd); + } + else { + cpuset_error ("mkdir %s: %m", path); + return (-1); + } + } + umask (oldmask); + + /* + * Initialize SLURM cpuset with all CPUs and MEMs: + */ + cp = cpuset_alloc (); + if (cpuset_query (cp, "/") < 0) { + cpuset_error ("Failed to query root cpuset: %m"); + return (-1); + } + + cpuset_debug2 ("modifying %s cpuset\n", name); + + if (cpuset_modify (name, cp) < 0) { + cpuset_error ("Failed to modify %s cpuset: %m", name); + return (-1); + } + + cpuset_free (cp); + + return (fd); +} + +int slurm_cpuset_create (cpuset_conf_t cf) +{ + return (create_and_lock_cpuset_dir (cf, "/slurm")); +} + +int str2int (const char *str) +{ + char *p; + long l = strtol (str, &p, 10); + + if (p && (*p != '\0')) + return (-1); + + return ((int) l); +} +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/util.h b/cpuset/util.h new file mode 100644 index 0000000..174a6b0 --- /dev/null +++ b/cpuset/util.h @@ -0,0 +1,64 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#ifndef _HAVE_CPUSET_UTIL_H +#define _HAVE_CPUSET_UTIL_H + +#include + +#include +#include +#include +#include + +#include "fd.h" +#include "conf.h" + +int cpumask_size (void); +int memmask_size (void); + +int slurm_cpuset_lock (void); +int slurm_cpuset_unlock (int fd); + +int user_cpuset_lock (uid_t uid); +void user_cpuset_unlock (int fd); + +void print_current_cpuset_info (); +void print_cpuset_info (const char *path, struct cpuset *cp); + +void print_bitmask (const char * fmt, const struct bitmask *b); + +struct bitmask *used_cpus_bitmask_path (char *path, int clearall); + +int slurm_cpuset_create (cpuset_conf_t conf); +int slurm_cpuset_clean_path (const char *path); + +int str2int (const char *str); + +const char * cpuset_path_to_name (const char *path); +#endif + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/cpuset/version.map b/cpuset/version.map new file mode 100644 index 0000000..e234ff4 --- /dev/null +++ b/cpuset/version.map @@ -0,0 +1,9 @@ +{ global: + plugin_name; + plugin_type; + plugin_version; + spank*; + slurm_spank*; + local: + *; +}; diff --git a/iorelay/Makefile b/iorelay/Makefile new file mode 100644 index 0000000..f0c78b7 --- /dev/null +++ b/iorelay/Makefile @@ -0,0 +1,13 @@ +CFLAGS = -Wall -ggdb + +all: iorelay.so + +.SUFFIXES: .c .o .so + +.c.o: + $(CC) $(CFLAGS) -o $@ -fPIC -c $< +.o.so: + $(CC) -shared -o $*.so $< $(LIBS) + +clean: + rm -f *.so *.o diff --git a/iorelay/iorelay-bind-nfs.sh b/iorelay/iorelay-bind-nfs.sh new file mode 100755 index 0000000..3a4cb51 --- /dev/null +++ b/iorelay/iorelay-bind-nfs.sh @@ -0,0 +1,84 @@ +#!/bin/bash +############################################################################### +# +# Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. +# Produced at Lawrence Livermore National Laboratory. +# Written by Jim Garlick . +# +# UCRL-CODE-235358 +# +# This file is part of chaos-spankings, a set of spank plugins for SLURM. +# +# This is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### +# +# iorelay-bind-nfs - bind directories from mntpt over all nfs mounted +# file systems +# +# Run as root in private namespace +# +declare -r prog=iorelay-bind-nfs + +die () +{ + echo "$prog: $1" >&2 + exit 1 +} +warn () +{ + echo "$prog: $1" >&2 +} +usage () +{ + echo "Usage: $prog -m mntpt" + exit 1 +} +listnfs () +{ + local src dst typ opts a1 a2 + + cat /proc/mounts | while read src dst typ opts a1 a2; do + [ ${typ} = nfs ] && echo ${dst} + fi + done +} + +[ -n "$SLURM_NODELIST" ] || die "SLURM_NODELIST is not set" +relayhost=$(echo $SLURM_NODELIST | glob-hosts -n1) +[ "$(hostname)" = "$relayhost" ] && exit 0 # silently exit if relayhost + +uopt=0 +mntpt="" +while getopts "m:" opt; do + case ${opt} in + m) mntpt=${OPTARG} ;; + *) usage ;; + esac +done +shift $((${OPTIND} - 1)) +[ $# = 0 ] || usage +[ -n "$mntpt" ] || usage +[ -d $mntpt ] || die "not a directory: $mntpt" + +count=0 +for dir in $(listnfs); do + if [ -d ${mntpt}/${dir} ]; then + mount --bind ${mntpt}/${dir} ${dir} || warn "bind ${dir} failed" + count=$(($count+1)) + fi +done +warn "relayed $count file systems" + +exit 0 diff --git a/iorelay/iorelay-mount-nodezero.sh b/iorelay/iorelay-mount-nodezero.sh new file mode 100755 index 0000000..62eac65 --- /dev/null +++ b/iorelay/iorelay-mount-nodezero.sh @@ -0,0 +1,81 @@ +#!/bin/bash +############################################################################### +# +# Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. +# Produced at Lawrence Livermore National Laboratory. +# Written by Jim Garlick . +# +# UCRL-CODE-235358 +# +# This file is part of chaos-spankings, a set of spank plugins for SLURM. +# +# This is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### +# +# iorelay-mount-nodezero - mount / from first slurm node on /mnt +# +# Run as root in private namespace. +# +declare -r prog=iorelay-mount-nodezero +declare -r sshcmd=/usr/libexec/iorelay-mrsh-sshfs-wrap + +die () +{ + echo "$prog: $1" >&2 + exit 1 +} +warn () +{ + echo "$prog: $1" >&2 +} +usage () +{ + echo "Usage: $prog -m mntpt -u username" + exit 1 +} + + +[ -n "$SLURM_NODELIST" ] || die "SLURM_NODELIST is not set" +relayhost=$(echo $SLURM_NODELIST | glob-hosts -n1) +[ -n "$relayhost" ] || die "could not determine relayhost" +[ "$(hostname)" = "$relayhost" ] && exit 0 # silently exit if relayhost + +mntpt="" +username="" +while getopts "u:m:" opt; do + case ${opt} in + m) mntpt=${OPTARG} ;; + u) username=${OPTARG} ;; + *) usage ;; + esac +done +shift $((${OPTIND} - 1)) +[ $# = 0 ] || usage +[ -n "$mntpt" ] || usage +[ -d $mntpt ] || die "not a directory: $mntpt" +[ -n "$username" ] || usage +uid=$(id -u $username 2>&1) || die "no such user: $username" +[ "$uid" != 0 ] || die "sshfs as root is unsupported" + +grep -q sshfs /proc/mounts && die "sshfs is already mounted" + +# NOTE: work around missing -n option in sshfs/fusermount +mv -f /etc/mtab /etc/mtab-iorelay || die "failed to back up /etc/mtab" +sshfs -o ssh_command=${sshcmd} ${username}@${relayhost}/ ${mntpt} +result=$? +mv -f /etc/mtab-iorelay /etc/mtab || warn "failed to restore /etc/mtab" +[ $result = 0 ] || die "sshfs mount ${username}@${relayhost}/ ${mntpt} failed" + +exit 0 diff --git a/iorelay/iorelay-mrsh-sshfs-wrap.sh b/iorelay/iorelay-mrsh-sshfs-wrap.sh new file mode 100755 index 0000000..0b17802 --- /dev/null +++ b/iorelay/iorelay-mrsh-sshfs-wrap.sh @@ -0,0 +1,50 @@ +#!/bin/bash +############################################################################### +# +# Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. +# Produced at Lawrence Livermore National Laboratory. +# Written by Jim Garlick . +# +# UCRL-CODE-235358 +# +# This file is part of chaos-spankings, a set of spank plugins for SLURM. +# +# This is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### +# +# sshfs-mrsh-wrap - wrapper for mrsh for sshfs usage +# +declare -r prog=iorelay-sshfs-mrsh-wrap + +die () { + echo "$prog: $1" >&2 + exit 1 +} + +# Expected args: +# -x -a -oClearAllForwardings=yes -2 user@host -s sftp +# We ignore everything except user@host arg +for arg in $*; do + if echo $arg | grep -q "@"; then + user=$(echo $arg | cut -d@ -f1) + host=$(echo $arg | cut -d@ -f2) + fi +done + +[ -n "$user" ] && [ -n "$host" ] || die "no user@host arg" + +exec /usr/bin/mrsh -l $user $host /usr/libexec/openssh/sftp-server +die "failed to exec mrsh" +# NOTREACHED diff --git a/iorelay/iorelay.c b/iorelay/iorelay.c new file mode 100644 index 0000000..a3b12bb --- /dev/null +++ b/iorelay/iorelay.c @@ -0,0 +1,142 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Jim Garlick . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * All spank plugins must define this macro for the SLURM plugin loader. + */ +SPANK_PLUGIN(iorelay, 1) + +#define IORELAY_ENABLE 1 + +/* Usage: iorelay-mount-nodezero -u user -m mntpt */ +#define MOUNT_SCRIPT "/usr/libexec/iorelay-mount-nodezero" + +/* Usage: iorelay-bind-nfs -m mntpt */ +#define BIND_SCRIPT "/usr/libexec/iorelay-bind-nfs" + +static int enabled = 0; + +static int _opt_process (int val, const char *optarg, int remote); + +/* + * Provide a --iorelay option to srun: + */ +struct spank_option spank_options[] = +{ + { "iorelay", NULL, "Enable NFS I/O relaying.", + 1, IORELAY_ENABLE, + (spank_opt_cb_f) _opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + +/* + * Called from both srun and slurmd. + */ +int slurm_spank_init (spank_t sp, int ac, char **av) +{ + char cmd[256]; + struct passwd *pw; + uid_t uid; + + if (!enabled || !spank_remote (sp)) + return (0); + + spank_get_item (sp, S_JOB_UID, &uid); + pw = getpwuid (uid); + if (!pw) { + slurm_error ("Error looking up uid in /etc/passwd"); + return (-1); + } + + /* Unshare file namespace. This means only this process and its children + * will see the following mounts, and when this process and its children + * terminate, the mounts go away automatically. + */ + if (unshare (CLONE_NEWNS) < 0) { + slurm_error ("unshare CLONE_NEWNS: %m"); + return (-1); + } + + /* Mount node zero root on /mnt using sshfs. + * Script has no effect on node zero. + */ + snprintf (cmd, sizeof(cmd), "%s -u %s -m /mnt", MOUNT_SCRIPT, pw->pw_name); + if (system (cmd) != 0) { + slurm_error ("Error running `%s': %m", cmd); + return (-1); + } + + /* Bind NFS-mounted directories now mirrored in /mnt via sshfs + * over their NFS mount points. + * Script has no effect on node zero. + */ + snprintf (cmd, sizeof(cmd), "%s -m /mnt", BIND_SCRIPT); + if (system (cmd) != 0) { + slurm_error ("Error running `%s': %m", cmd); + return (-1); + } + + return (0); +} + +/* + * Called from both srun and slurmd. + */ +int slurm_spank_exit (spank_t sp, int ac, char **av) +{ + /* Do nothing here as mounts in private namespace will take care of + * themselves. + */ + return (0); +} + +static int _opt_process (int val, const char *optarg, int remote) +{ + switch (val) { + case IORELAY_ENABLE: + enabled = 1; + break; + default: + slurm_error ("Ignoring unknown iorelay option value %d\n", val); + break; + } + + return (0); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/iotrace.c b/iotrace.c new file mode 100644 index 0000000..8b154e6 --- /dev/null +++ b/iotrace.c @@ -0,0 +1,126 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include + +#include + +/* + * All spank plugins must define this macro for the SLURM plugin loader. + */ +SPANK_PLUGIN(iotrace, 1) + +#define IOTRACE_ENABLE 1 + +static int enabled = 0; +static char *flags = ""; + +static int _opt_process (int val, const char *optarg, int remote); + +/* + * Provide a --iotrace option to srun: + */ +struct spank_option spank_options[] = +{ + { "iotrace", "[flags]", "Enable application I/O tracing.", + 2, IOTRACE_ENABLE, + (spank_opt_cb_f) _opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + + +static void _iotrace_label(spank_t sp, char *buf, int len) +{ + char hostname[128], *p; + uint32_t taskid = 0; + spank_err_t rc; + + rc = spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid); + if (rc != ESPANK_SUCCESS) + slurm_error ("iotrace: error fetching taskid: %d", rc); + + if (gethostname (hostname, sizeof (hostname)) == 0) { + hostname[sizeof(hostname) - 1] = '\0'; + if ((p = strchr (hostname, '.'))) + *p = '\0'; + } else + strncpy (hostname, "unknown", sizeof(hostname)); + + snprintf (buf, len, "iotrace-%d@%s", taskid, hostname); +} + +int slurm_spank_task_init (spank_t sp, int ac, char **av) +{ + char nbuf [4096], obuf [4096]; + char label [64]; + const char *preload = "libplasticfs.so"; + + if (!enabled) + return (0); + + /* append to LD_PRELOAD (with a space) */ + if (spank_getenv (sp, "LD_PRELOAD", obuf, sizeof (obuf)) == ESPANK_SUCCESS) + snprintf (nbuf, sizeof (nbuf), "%s %s", obuf, preload); + else + strncpy (nbuf, preload, strlen (preload)); + if (spank_setenv (sp, "LD_PRELOAD", nbuf, 1) != ESPANK_SUCCESS) + slurm_error ("Failed to set LD_PRELOAD=%s\n", nbuf); + + /* prepend to PLASTICFS (with a pipe) */ + _iotrace_label (sp, label, sizeof (label)); + if (spank_getenv (sp, "PLASTICFS", obuf, sizeof (obuf)) == ESPANK_SUCCESS) + snprintf (nbuf, sizeof (nbuf), "log - %s %s | %s", label, flags, obuf); + else + snprintf (nbuf, sizeof (nbuf), "log - %s %s", label, flags); + + if (spank_setenv (sp, "PLASTICFS", nbuf, 1) != ESPANK_SUCCESS) + slurm_error ("Failed to set PLASTICFS=%s\n", nbuf); + + return (0); +} + +static int _opt_process (int val, const char *optarg, int remote) +{ + switch (val) { + case IOTRACE_ENABLE: + enabled = 1; + if (optarg) + flags = optarg; + break; + default: + slurm_error ("Ignoring unknown iotrace option value %d\n", val); + break; + } + + return (0); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/lib/fd.c b/lib/fd.c new file mode 100644 index 0000000..99f2270 --- /dev/null +++ b/lib/fd.c @@ -0,0 +1,273 @@ +/***************************************************************************** + * $Id: fd.c 412 2003-06-03 21:31:19Z achu $ + ***************************************************************************** + * This file is part of the Munge Uid 'N' Gid Emporium (MUNGE). + * For details, see . + * UCRL-CODE-2003-???. + * + * Copyright (C) 2001-2003 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Chris Dunlap . + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA. + ***************************************************************************** + * Refer to "fd.h" for documentation on public functions. + *****************************************************************************/ + + +#if HAVE_CONFIG_H +# include "config.h" +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include "fd.h" + + +static int _fd_get_lock (int fd, int cmd, int type); +static pid_t _fd_test_lock (int fd, int type); + + +int +fd_set_close_on_exec (int fd) +{ + assert (fd >= 0); + + if (fcntl (fd, F_SETFD, FD_CLOEXEC) < 0) + return (-1); + return (0); +} + + +int +fd_set_nonblocking (int fd) +{ + int fval; + + assert (fd >= 0); + + if ((fval = fcntl (fd, F_GETFL, 0)) < 0) + return (-1); + if (fcntl (fd, F_SETFL, fval | O_NONBLOCK) < 0) + return (-1); + return (0); +} + + +int +fd_get_read_lock (int fd) +{ + return (_fd_get_lock (fd, F_SETLK, F_RDLCK)); +} + + +int +fd_get_readw_lock (int fd) +{ + return (_fd_get_lock (fd, F_SETLKW, F_RDLCK)); +} + + +int +fd_get_write_lock (int fd) +{ + return (_fd_get_lock (fd, F_SETLK, F_WRLCK)); +} + + +int +fd_get_writew_lock (int fd) +{ + return (_fd_get_lock (fd, F_SETLKW, F_WRLCK)); +} + + +int +fd_release_lock (int fd) +{ + return (_fd_get_lock (fd, F_SETLK, F_UNLCK)); +} + + +pid_t +fd_is_read_lock_blocked (int fd) +{ + return (_fd_test_lock (fd, F_RDLCK)); +} + + +pid_t +fd_is_write_lock_blocked (int fd) +{ + return (_fd_test_lock (fd, F_WRLCK)); +} + + +static int +_fd_get_lock (int fd, int cmd, int type) +{ + struct flock lock; + + assert (fd >= 0); + + lock.l_type = type; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + + return (fcntl (fd, cmd, &lock)); +} + + +static pid_t +_fd_test_lock (int fd, int type) +{ + struct flock lock; + + assert (fd >= 0); + + lock.l_type = type; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + + if (fcntl (fd, F_GETLK, &lock) < 0) + return (-1); + if (lock.l_type == F_UNLCK) + return (0); + return (lock.l_pid); +} + + +ssize_t +fd_read_n (int fd, void *buf, size_t n) +{ + size_t nleft; + ssize_t nread; + unsigned char *p; + + p = buf; + nleft = n; + while (nleft > 0) { + if ((nread = read (fd, p, nleft)) < 0) { + if (errno == EINTR) + continue; + else + return (-1); + } + else if (nread == 0) { /* EOF */ + break; + } + nleft -= nread; + p += nread; + } + return (n - nleft); +} + + +ssize_t +fd_write_n (int fd, void *buf, size_t n) +{ + size_t nleft; + ssize_t nwritten; + unsigned char *p; + + p = buf; + nleft = n; + while (nleft > 0) { + if ((nwritten = write (fd, p, nleft)) < 0) { + if (errno == EINTR) + continue; + else + return (-1); + } + nleft -= nwritten; + p += nwritten; + } + return (n); +} + + +ssize_t +fd_read_line (int fd, void *buf, size_t maxlen) +{ + ssize_t n, rc; + unsigned char c, *p; + + n = 0; + p = buf; + while (n < (ssize_t) maxlen - 1) { /* reserve space for NUL-termination */ + + if ((rc = read (fd, &c, 1)) == 1) { + n++; + *p++ = c; + if (c == '\n') + break; /* store newline, like fgets() */ + } + else if (rc == 0) { + if (n == 0) /* EOF, no data read */ + return (0); + else /* EOF, some data read */ + break; + } + else { + if (errno == EINTR) + continue; + return (-1); + } + } + + *p = '\0'; /* NUL-terminate, like fgets() */ + return (n); +} + +/* + * Following added by Mike Haskell + */ +ssize_t +fd_null_read_n (int fd, void *buf, size_t n) +{ + unsigned char *mp; + size_t nleft; + ssize_t nread; + unsigned char *p; + unsigned char *q; + + q = p = (unsigned char *)buf; + nleft = n; + while (nleft > 0) { + if ((nread = read (fd, p, nleft)) < 0) { + if (errno == EINTR) + continue; + else + return (-1); + } + else if (nread == 0) { /* EOF */ + break; + } + nleft -= nread; + p += nread; + mp = (unsigned char *) memchr(q, '\0', (n - nleft)); + if (mp <= &q[ (n - nleft - 1)]) { + if (mp != NULL) + break; + } + } + return (n - nleft); +} diff --git a/lib/fd.h b/lib/fd.h new file mode 100644 index 0000000..c753382 --- /dev/null +++ b/lib/fd.h @@ -0,0 +1,129 @@ +/***************************************************************************** + * $Id: fd.h 412 2003-06-03 21:31:19Z achu $ + ***************************************************************************** + * This file is part of the Munge Uid 'N' Gid Emporium (MUNGE). + * For details, see . + * UCRL-CODE-2003-???. + * + * Copyright (C) 2001-2003 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Chris Dunlap . + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA. + *****************************************************************************/ + + +#ifndef FD_H +#define FD_H + + +#if HAVE_CONFIG_H +# include "config.h" +#endif /* HAVE_CONFIG_H */ + +#include +#include + + +int fd_set_close_on_exec (int fd); +/* + * Sets the file descriptor [fd] to be closed on exec(). + * Returns 0 on success, or -1 on error. + */ + +int fd_set_nonblocking (int fd); +/* + * Sets the file descriptor [fd] for non-blocking I/O. + * Returns 0 on success, or -1 on error. + */ + +int fd_get_read_lock (int fd); +/* + * Obtain a read lock on the file specified by [fd]. + * Returns 0 on success, or -1 if prevented from obtaining the lock. + */ + +int fd_get_readw_lock (int fd); +/* + * Obtain a read lock on the file specified by [fd], + * blocking until one becomes available. + * Returns 0 on success, or -1 on error. + */ + +int fd_get_write_lock (int fd); +/* + * Obtain a write lock on the file specified by [fd]. + * Returns 0 on success, or -1 if prevented from obtaining the lock. + */ + +int fd_get_writew_lock (int fd); +/* + * Obtain a write lock on the file specified by [fd], + * blocking until one becomes available. + * Returns 0 on success, or -1 on error. + */ + +int fd_release_lock (int fd); +/* + * Release a lock held on the file specified by [fd]. + * Returns 0 on success, or -1 on error. + */ + +pid_t fd_is_read_lock_blocked (int fd); +/* + * Checks to see if a lock exists on [fd] that would block a request for a + * read-lock (ie, if a write-lock is already being held on the file). + * Returns the pid of the process holding the lock, 0 if no lock exists, + * or -1 on error. + */ + +pid_t fd_is_write_lock_blocked (int fd); +/* + * Checks to see if a lock exists on [fd] that would block a request for a + * write-lock (ie, if any lock is already being held on the file). + * Returns the pid of the process holding the lock, 0 if no lock exists, + * or -1 on error. + */ + +ssize_t fd_read_n (int fd, void *buf, size_t n); +/* + * Reads up to [n] bytes from [fd] into [buf]. + * Returns the number of bytes read, 0 on EOF, or -1 on error. + */ + +ssize_t fd_write_n (int fd, void *buf, size_t n); +/* + * Writes [n] bytes from [buf] to [fd]. + * Returns the number of bytes written, or -1 on error. + */ + +ssize_t fd_read_line (int fd, void *buf, size_t maxlen); +/* + * Reads at most [maxlen-1] bytes up to a newline from [fd] into [buf]. + * The [buf] is guaranteed to be NUL-terminated and will contain the + * newline if it is encountered within [maxlen-1] bytes. + * Returns the number of bytes read, 0 on EOF, or -1 on error. + */ + +ssize_t fd_null_read_n (int fd, void *buf, size_t maxlen); +/* + * Reads up to [n] bytes from [fd] into [buf]. + * Returns the number of bytes read, 0 on EOF, or -1 on error. + * Differs from fd_read_n() in that it checks for the presence + * a null along the partial read and breaks out if it does. + * Added by Mike Haskell + */ + +#endif /* !FD_H */ diff --git a/lib/hostlist.c b/lib/hostlist.c new file mode 100644 index 0000000..b55a78a --- /dev/null +++ b/lib/hostlist.c @@ -0,0 +1,2715 @@ +/*****************************************************************************\ + * $Id: hostlist.c 7582 2008-07-11 22:38:28Z grondo $ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see . + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +# if HAVE_STRING_H +# include +# endif +# if HAVE_PTHREAD_H +# include +# endif +#else /* !HAVE_CONFIG_H */ +# include +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hostlist.h" + +/* + * lsd_fatal_error : fatal error macro + */ +#ifdef WITH_LSD_FATAL_ERROR_FUNC +# undef lsd_fatal_error + extern void lsd_fatal_error(char *file, int line, char *mesg); +#else /* !WITH_LSD_FATAL_ERROR_FUNC */ +# ifndef lsd_fatal_error +# define lsd_fatal_error(file, line, mesg) \ + do { \ + fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", \ + file, line, mesg, strerror(errno)); \ + } while (0) +# endif /* !lsd_fatal_error */ +#endif /* !WITH_LSD_FATAL_ERROR_FUNC */ + +/* + * lsd_nomem_error + */ +#ifdef WITH_LSD_NOMEM_ERROR_FUNC +# undef lsd_nomem_error + extern void * lsd_nomem_error(char *file, int line, char *mesg); +#else /* !WITH_LSD_NOMEM_ERROR_FUNC */ +# ifndef lsd_nomem_error +# define lsd_nomem_error(file, line, mesg) (NULL) +# endif /* !lsd_nomem_error */ +#endif /* !WITH_LSD_NOMEM_ERROR_FUNC */ + +/* + * OOM helper function + * Automatically call lsd_nomem_error with appropriate args + * and set errno to ENOMEM + */ +#define out_of_memory(mesg) \ + do { \ + errno = ENOMEM; \ + return(lsd_nomem_error(__FILE__, __LINE__, mesg)); \ + } while (0) + +/* + * Some constants and tunables: + */ + +/* number of elements to allocate when extending the hostlist array */ +#define HOSTLIST_CHUNK 16 + +/* max host range: anything larger will be assumed to be an error */ +#define MAX_RANGE 16384 /* 16K Hosts */ + +/* max host suffix value */ +#define MAX_HOST_SUFFIX 1<<25 + +/* max number of ranges that will be processed between brackets */ +#define MAX_RANGES 10240 /* 10K Ranges */ + +/* size of internal hostname buffer (+ some slop), hostnames will probably + * be truncated if longer than MAXHOSTNAMELEN */ +#ifndef MAXHOSTNAMELEN +#define MAXHOSTNAMELEN 64 +#endif + +/* max size of internal hostrange buffer */ +#define MAXHOSTRANGELEN 1024 + +/* ----[ Internal Data Structures ]---- */ + +/* hostname type: A convenience structure used in parsing single hostnames */ +struct hostname_components { + char *hostname; /* cache of initialized hostname */ + char *prefix; /* hostname prefix */ + unsigned long num; /* numeric suffix */ + + /* string representation of numeric suffix + * points into `hostname' */ + char *suffix; +}; + +typedef struct hostname_components *hostname_t; + +/* hostrange type: A single prefix with `hi' and `lo' numeric suffix values */ +struct hostrange_components { + char *prefix; /* alphanumeric prefix: */ + + /* beginning (lo) and end (hi) of suffix range */ + unsigned long lo, hi; + + /* width of numeric output format + * (pad with zeros up to this width) */ + int width; + + /* If singlehost is 1, `lo' and `hi' are invalid */ + unsigned singlehost:1; +}; + +typedef struct hostrange_components *hostrange_t; + +/* The hostlist type: An array based list of hostrange_t's */ +struct hostlist { +#ifndef NDEBUG +#define HOSTLIST_MAGIC 57005 + int magic; +#endif +#if WITH_PTHREADS + pthread_mutex_t mutex; +#endif /* WITH_PTHREADS */ + + /* current number of elements available in array */ + int size; + + /* current number of ranges stored in array */ + int nranges; + + /* current number of hosts stored in hostlist */ + int nhosts; + + /* pointer to hostrange array */ + hostrange_t *hr; + + /* list of iterators */ + struct hostlist_iterator *ilist; + +}; + + +/* a hostset is a wrapper around a hostlist */ +struct hostset { + hostlist_t hl; +}; + +struct hostlist_iterator { +#ifndef NDEBUG + int magic; +#endif + /* hostlist we are traversing */ + hostlist_t hl; + + /* current index of iterator in hl->hr[] */ + int idx; + + /* current hostrange object in list hl, i.e. hl->hr[idx] */ + hostrange_t hr; + + /* current depth we've traversed into range hr */ + int depth; + + /* next ptr for lists of iterators */ + struct hostlist_iterator *next; +}; + + +/* ---- ---- */ + +/* ------[ static function prototypes ]------ */ + +static void _error(char *file, int line, char *mesg, ...); +static char * _next_tok(char *, char **); +static int _zero_padded(unsigned long, int); +static int _width_equiv(unsigned long, int *, unsigned long, int *); + +static int host_prefix_end(const char *); +static hostname_t hostname_create(const char *); +static void hostname_destroy(hostname_t); +static int hostname_suffix_is_valid(hostname_t); +static int hostname_suffix_width(hostname_t); + +static hostrange_t hostrange_new(void); +static hostrange_t hostrange_create_single(const char *); +static hostrange_t hostrange_create(char *, unsigned long, unsigned long, int); +static unsigned long hostrange_count(hostrange_t); +static hostrange_t hostrange_copy(hostrange_t); +static void hostrange_destroy(hostrange_t); +static hostrange_t hostrange_delete_host(hostrange_t, unsigned long); +static int hostrange_cmp(hostrange_t, hostrange_t); +static int hostrange_prefix_cmp(hostrange_t, hostrange_t); +static int hostrange_within_range(hostrange_t, hostrange_t); +static int hostrange_width_combine(hostrange_t, hostrange_t); +static int hostrange_empty(hostrange_t); +static char * hostrange_pop(hostrange_t); +static char * hostrange_shift(hostrange_t); +static int hostrange_join(hostrange_t, hostrange_t); +static hostrange_t hostrange_intersect(hostrange_t, hostrange_t); +static int hostrange_hn_within(hostrange_t, hostname_t); +static size_t hostrange_to_string(hostrange_t hr, size_t, char *, char *); +static size_t hostrange_numstr(hostrange_t, size_t, char *); + +static hostlist_t hostlist_new(void); +static hostlist_t _hostlist_create_bracketed(const char *, char *, char *); +static int hostlist_resize(hostlist_t, size_t); +static int hostlist_expand(hostlist_t); +static int hostlist_push_range(hostlist_t, hostrange_t); +static int hostlist_push_hr(hostlist_t, char *, unsigned long, + unsigned long, int); +static int hostlist_insert_range(hostlist_t, hostrange_t, int); +static void hostlist_delete_range(hostlist_t, int n); +static void hostlist_coalesce(hostlist_t hl); +static void hostlist_collapse(hostlist_t hl); +static hostlist_t _hostlist_create(const char *, char *, char *); +static void hostlist_shift_iterators(hostlist_t, int, int, int); +static int _attempt_range_join(hostlist_t, int); +static int _is_bracket_needed(hostlist_t, int); + +static hostlist_iterator_t hostlist_iterator_new(void); +static void _iterator_advance(hostlist_iterator_t); +static void _iterator_advance_range(hostlist_iterator_t); + +static int hostset_find_host(hostset_t, const char *); + +/* ------[ macros ]------ */ + +#ifdef WITH_PTHREADS +# define mutex_init(mutex) \ + do { \ + int e = pthread_mutex_init(mutex, NULL); \ + if (e) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "hostlist mutex init:"); \ + abort(); \ + } \ + } while (0) + +# define mutex_lock(mutex) \ + do { \ + int e = pthread_mutex_lock(mutex); \ + if (e) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "hostlist mutex lock:"); \ + abort(); \ + } \ + } while (0) + +# define mutex_unlock(mutex) \ + do { \ + int e = pthread_mutex_unlock(mutex); \ + if (e) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "hostlist mutex unlock:"); \ + abort(); \ + } \ + } while (0) + +# define mutex_destroy(mutex) \ + do { \ + int e = pthread_mutex_destroy(mutex); \ + if (e) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "hostlist mutex destroy:"); \ + abort(); \ + } \ + } while (0) + +#else /* !WITH_PTHREADS */ + +# define mutex_init(mutex) +# define mutex_lock(mutex) +# define mutex_unlock(mutex) +# define mutex_destroy(mutex) + +#endif /* WITH_PTHREADS */ + +#define LOCK_HOSTLIST(_hl) \ + do { \ + assert(_hl != NULL); \ + mutex_lock(&(_hl)->mutex); \ + assert((_hl)->magic == HOSTLIST_MAGIC); \ + } while (0) + +#define UNLOCK_HOSTLIST(_hl) \ + do { \ + mutex_unlock(&(_hl)->mutex); \ + } while (0) + +#define seterrno_ret(_errno, _rc) \ + do { \ + errno = _errno; \ + return _rc; \ + } while (0) + +/* ------[ Function Definitions ]------ */ + +/* ----[ general utility functions ]---- */ + + +/* + * Varargs capable error reporting via lsd_fatal_error() + */ +static void _error(char *file, int line, char *msg, ...) +{ + va_list ap; + char buf[1024]; + int len = 0; + va_start(ap, msg); + + len = vsnprintf(buf, 1024, msg, ap); + if ((len < 0) || (len > 1024)) + buf[1023] = '\0'; + + lsd_fatal_error(file, line, buf); + + va_end(ap); + return; +} + +static int _advance_past_brackets (char *tok, char **str) +{ + /* if _single_ opening bracket exists b/w tok and str, push str + * past first closing bracket to next seperator */ + if ( memchr(tok, '[', *str - tok) != NULL + && memchr(tok, ']', *str - tok) == NULL ) { + char *q = strchr(*str, ']'); + if (q && memchr(*str, '[', q - *str) == NULL) { + *str = q + 1; + return (1); + } + } + + return 0; +} + +/* + * Helper function for host list string parsing routines + * Returns a pointer to the next token; additionally advance *str + * to the next separator. + * + * next_tok was taken directly from pdsh courtesy of Jim Garlick. + * (with modifications to support bracketed hostlists, i.e.: + * xxx[xx,xx,xx] is a single token) + * + */ +static char * _next_tok(char *sep, char **str) +{ + char *tok; + + /* push str past any leading separators */ + while (**str != '\0' && strchr(sep, **str) != '\0') + (*str)++; + + if (**str == '\0') + return NULL; + + /* assign token ptr */ + tok = *str; + + /* + * Advance str past any separators, but if a separator occurs between + * brackets, e.g. foo[0-3,5], then advance str past closing brackets and + * try again. + */ + do { + /* push str past token and leave pointing to first separator */ + while (**str != '\0' && strchr(sep, **str) == '\0') + (*str)++; + } while (_advance_past_brackets (tok, str)); + + /* nullify consecutive separators and push str beyond them */ + while (**str != '\0' && strchr(sep, **str) != '\0') + *(*str)++ = '\0'; + + return tok; +} + + +/* return the number of zeros needed to pad "num" to "width" + */ +static int _zero_padded(unsigned long num, int width) +{ + int n = 1; + while (num /= 10L) + n++; + return width > n ? width - n : 0; +} + +/* test whether two format `width' parameters are "equivalent" + * The width arguments "wn" and "wm" for integers "n" and "m" + * are equivalent if: + * + * o wn == wm OR + * + * o applying the same format width (either wn or wm) to both of + * 'n' and 'm' will not change the zero padding of *either* 'm' nor 'n'. + * + * If this function returns 1 (or true), the appropriate width value + * (either 'wm' or 'wn') will have been adjusted such that both format + * widths are equivalent. + */ +static int _width_equiv(unsigned long n, int *wn, unsigned long m, int *wm) +{ + int npad, nmpad, mpad, mnpad; + + if (wn == wm) + return 1; + + npad = _zero_padded(n, *wn); + nmpad = _zero_padded(n, *wm); + mpad = _zero_padded(m, *wm); + mnpad = _zero_padded(m, *wn); + + if (npad != nmpad && mpad != mnpad) + return 0; + + if (npad != nmpad) { + if (mpad == mnpad) { + *wm = *wn; + return 1; + } else + return 0; + } else { /* mpad != mnpad */ + if (npad == nmpad) { + *wn = *wm; + return 1; + } else + return 0; + } + + /* not reached */ +} + + +/* ----[ hostname_t functions ]---- */ + +/* + * return the location of the last char in the hostname prefix + */ +static int host_prefix_end(const char *hostname) +{ + int idx = strlen(hostname) - 1; + + while (idx >= 0 && isdigit((char) hostname[idx])) + idx--; + return idx; +} + +/* + * create a hostname_t object from a string hostname + */ +static hostname_t hostname_create(const char *hostname) +{ + hostname_t hn = NULL; + char *p = '\0'; + int idx = 0; + + assert(hostname != NULL); + + if (!(hn = (hostname_t) malloc(sizeof(*hn)))) + out_of_memory("hostname create"); + + idx = host_prefix_end(hostname); + + if (!(hn->hostname = strdup(hostname))) { + free(hn); + out_of_memory("hostname create"); + } + + hn->num = 0; + hn->prefix = NULL; + hn->suffix = NULL; + + if (idx == strlen(hostname) - 1) { + if ((hn->prefix = strdup(hostname)) == NULL) { + hostname_destroy(hn); + out_of_memory("hostname prefix create"); + } + return hn; + } + + hn->suffix = hn->hostname + idx + 1; + hn->num = strtoul(hn->suffix, &p, 10); + + if ((*p == '\0') && (hn->num <= MAX_HOST_SUFFIX)) { + if (!(hn->prefix = malloc((idx + 2) * sizeof(char)))) { + hostname_destroy(hn); + out_of_memory("hostname prefix create"); + } + memcpy(hn->prefix, hostname, idx + 1); + hn->prefix[idx + 1] = '\0'; + } else { + if (!(hn->prefix = strdup(hostname))) { + hostname_destroy(hn); + out_of_memory("hostname prefix create"); + } + hn->suffix = NULL; + } + + return hn; +} + +/* free a hostname object + */ +static void hostname_destroy(hostname_t hn) +{ + if (hn == NULL) + return; + hn->suffix = NULL; + if (hn->hostname) + free(hn->hostname); + if (hn->prefix) + free(hn->prefix); + free(hn); +} + +/* return true if the hostname has a valid numeric suffix + */ +static int hostname_suffix_is_valid(hostname_t hn) +{ + return hn->suffix != NULL; +} + +/* return the width (in characters) of the numeric part of the hostname + */ +static int hostname_suffix_width(hostname_t hn) +{ + assert(hn->suffix != NULL); + return (int) strlen(hn->suffix); +} + + +/* ----[ hostrange_t functions ]---- */ + +/* allocate a new hostrange object + */ +static hostrange_t hostrange_new(void) +{ + hostrange_t new = (hostrange_t) malloc(sizeof(*new)); + if (!new) + out_of_memory("hostrange create"); + return new; +} + +/* Create a hostrange_t containing a single host without a valid suffix + * hr->prefix will represent the entire hostname. + */ +static hostrange_t hostrange_create_single(const char *prefix) +{ + hostrange_t new; + + assert(prefix != NULL); + + if ((new = hostrange_new()) == NULL) + goto error1; + + if ((new->prefix = strdup(prefix)) == NULL) + goto error2; + + new->singlehost = 1; + new->lo = 0L; + new->hi = 0L; + new->width = 0; + + return new; + + error2: + free(new); + error1: + out_of_memory("hostrange create single"); +} + + +/* Create a hostrange object with a prefix, hi, lo, and format width + */ +static hostrange_t +hostrange_create(char *prefix, unsigned long lo, unsigned long hi, int width) +{ + hostrange_t new; + + assert(prefix != NULL); + + if ((new = hostrange_new()) == NULL) + goto error1; + + if ((new->prefix = strdup(prefix)) == NULL) + goto error2; + + new->lo = lo; + new->hi = hi; + new->width = width; + + new->singlehost = 0; + + return new; + + error2: + free(new); + error1: + out_of_memory("hostrange create"); +} + + +/* Return the number of hosts stored in the hostrange object + */ +static unsigned long hostrange_count(hostrange_t hr) +{ + assert(hr != NULL); + if (hr->singlehost) + return 1; + else + return hr->hi - hr->lo + 1; +} + +/* Copy a hostrange object + */ +static hostrange_t hostrange_copy(hostrange_t hr) +{ + assert(hr != NULL); + + if (hr->singlehost) + return hostrange_create_single(hr->prefix); + else + return hostrange_create(hr->prefix, hr->lo, hr->hi, + hr->width); +} + + +/* free memory allocated by the hostrange object + */ +static void hostrange_destroy(hostrange_t hr) +{ + if (hr == NULL) + return; + if (hr->prefix) + free(hr->prefix); + free(hr); +} + +/* hostrange_delete_host() deletes a specific host from the range. + * If the range is split into two, the greater range is returned, + * and `hi' of the lesser range is adjusted accordingly. If the + * highest or lowest host is deleted from a range, NULL is returned + * and the hostrange hr is adjusted properly. + */ +static hostrange_t hostrange_delete_host(hostrange_t hr, unsigned long n) +{ + hostrange_t new = NULL; + + assert(hr != NULL); + assert(n >= hr->lo && n <= hr->hi); + + if (n == hr->lo) + hr->lo++; + else if (n == hr->hi) + hr->hi--; + else { + if (!(new = hostrange_copy(hr))) + out_of_memory("hostrange copy"); + hr->hi = n - 1; + new->lo = n + 1; + } + + return new; +} + +/* hostrange_cmp() is used to sort hostrange objects. It will + * sort based on the following (in order): + * o result of strcmp on prefixes + * o if widths are compatible, then: + * sort based on lowest suffix in range + * else + * sort based on width */ +static int hostrange_cmp(hostrange_t h1, hostrange_t h2) +{ + int retval; + + assert(h1 != NULL); + assert(h2 != NULL); + + if ((retval = hostrange_prefix_cmp(h1, h2)) == 0) + retval = hostrange_width_combine(h1, h2) ? + h1->lo - h2->lo : h1->width - h2->width; + + return retval; +} + + +/* compare the prefixes of two hostrange objects. + * returns: + * < 0 if h1 prefix is less than h2 OR h1 == NULL. + * + * 0 if h1's prefix and h2's prefix match, + * UNLESS, either h1 or h2 (NOT both) do not have a valid suffix. + * + * > 0 if h1's prefix is greater than h2's OR h2 == NULL. */ +static int hostrange_prefix_cmp(hostrange_t h1, hostrange_t h2) +{ + int retval; + if (h1 == NULL) + return 1; + if (h2 == NULL) + return -1; + + retval = strcmp(h1->prefix, h2->prefix); + return retval == 0 ? h2->singlehost - h1->singlehost : retval; +} + +/* returns true if h1 and h2 would be included in the same bracketed hostlist. + * h1 and h2 will be in the same bracketed list iff: + * + * 1. h1 and h2 have same prefix + * 2. neither h1 nor h2 are singlet hosts (i.e. invalid suffix) + * + * (XXX: Should incompatible widths be placed in the same bracketed list? + * There's no good reason not to, except maybe aesthetics) + */ +static int hostrange_within_range(hostrange_t h1, hostrange_t h2) +{ + if (hostrange_prefix_cmp(h1, h2) == 0) + return h1->singlehost || h2->singlehost ? 0 : 1; + else + return 0; +} + + +/* compare two hostrange objects to determine if they are width + * compatible, returns: + * 1 if widths can safely be combined + * 0 if widths cannot be safely combined + */ +static int hostrange_width_combine(hostrange_t h0, hostrange_t h1) +{ + assert(h0 != NULL); + assert(h1 != NULL); + + return _width_equiv(h0->lo, &h0->width, h1->lo, &h1->width); +} + + +/* Return true if hostrange hr contains no hosts, i.e. hi < lo + */ +static int hostrange_empty(hostrange_t hr) +{ + assert(hr != NULL); + return ((hr->hi < hr->lo) || (hr->hi == (unsigned long) -1)); +} + +/* return the string representation of the last host in hostrange hr + * and remove that host from the range (i.e. decrement hi if possible) + * + * Returns NULL if malloc fails OR there are no more hosts left + */ +static char *hostrange_pop(hostrange_t hr) +{ + size_t size = 0; + char *host = NULL; + + assert(hr != NULL); + + if (hr->singlehost) { + hr->lo++; /* effectively set count == 0 */ + host = strdup(hr->prefix); + } else if (hostrange_count(hr) > 0) { + size = strlen(hr->prefix) + hr->width + 16; + if (!(host = (char *) malloc(size * sizeof(char)))) + out_of_memory("hostrange pop"); + snprintf(host, size, "%s%0*lu", hr->prefix, + hr->width, hr->hi--); + } + + return host; +} + +/* Same as hostrange_pop(), but remove host from start of range */ +static char *hostrange_shift(hostrange_t hr) +{ + size_t size = 0; + char *host = NULL; + + assert(hr != NULL); + + if (hr->singlehost) { + hr->lo++; + if (!(host = strdup(hr->prefix))) + out_of_memory("hostrange shift"); + } else if (hostrange_count(hr) > 0) { + size = strlen(hr->prefix) + hr->width + 16; + if (!(host = (char *) malloc(size * sizeof(char)))) + out_of_memory("hostrange shift"); + snprintf(host, size, "%s%0*lu", hr->prefix, + hr->width, hr->lo++); + } + + return host; +} + + +/* join two hostrange objects. + * + * returns: + * + * -1 if ranges do not overlap (including incompatible zero padding) + * 0 if ranges join perfectly + * >0 number of hosts that were duplicated in h1 and h2 + * + * h2 will be coalesced into h1 if rc >= 0 + * + * it is assumed that h1->lo <= h2->lo, i.e. hr1 <= hr2 + * + */ +static int hostrange_join(hostrange_t h1, hostrange_t h2) +{ + int duplicated = -1; + + assert(h1 != NULL); + assert(h2 != NULL); + assert(hostrange_cmp(h1, h2) <= 0); + + if (hostrange_prefix_cmp(h1, h2) == 0 && + hostrange_width_combine(h1, h2)) { + + if (h1->singlehost && h2->singlehost) { /* matching singlets */ + duplicated = 1; + } else if (h1->hi == h2->lo - 1) { /* perfect join */ + h1->hi = h2->hi; + duplicated = 0; + } else if (h1->hi >= h2->lo) { /* some duplication */ + if (h1->hi < h2->hi) { + duplicated = h1->hi - h2->lo + 1; + h1->hi = h2->hi; + } else + duplicated = hostrange_count(h2); + } + } + + return duplicated; +} + +/* hostrange intersect returns the intersection (common hosts) + * of hostrange objects h1 and h2. If there is no intersection, + * NULL is returned. + * + * It is assumed that h1 <= h2 (i.e. h1->lo <= h2->lo) + */ +static hostrange_t hostrange_intersect(hostrange_t h1, hostrange_t h2) +{ + hostrange_t new = NULL; + + assert(h1 != NULL); + assert(h2 != NULL); + + if (h1->singlehost || h2->singlehost) + return NULL; + + assert(hostrange_cmp(h1, h2) <= 0); + + if ((hostrange_prefix_cmp(h1, h2) == 0) + && (h1->hi > h2->lo) + && (hostrange_width_combine(h1, h2))) { + + if (!(new = hostrange_copy(h1))) + return NULL; + new->lo = h2->lo; + new->hi = h2->hi < h1->hi ? h2->hi : h1->hi; + } + + return new; +} + +/* return 1 if hostname hn is within the hostrange hr + * 0 if not. + */ +static int hostrange_hn_within(hostrange_t hr, hostname_t hn) +{ + if (hr->singlehost) { + /* + * If the current hostrange [hr] is a `singlehost' (no valid + * numeric suffix (lo and hi)), then the hostrange [hr] + * stores just one host with name == hr->prefix. + * + * Thus the full hostname in [hn] must match hr->prefix, in + * which case we return true. Otherwise, there is no + * possibility that [hn] matches [hr]. + */ + if (strcmp (hn->hostname, hr->prefix) == 0) + return 1; + else + return 0; + } + + /* + * Now we know [hr] is not a "singlehost", so hostname + * better have a valid numeric suffix, or there is no + * way we can match + */ + if (!hostname_suffix_is_valid (hn)) + return 0; + + /* + * If hostrange and hostname prefixes don't match, then + * there is no way the hostname falls within the range [hr]. + */ + if (strcmp(hr->prefix, hn->prefix) != 0) + return 0; + + /* + * Finally, check whether [hn], with a valid numeric suffix, + * falls within the range of [hr]. + */ + if (hn->num <= hr->hi && hn->num >= hr->lo) { + int width = hostname_suffix_width(hn); + int num = hn->num; + return (_width_equiv(hr->lo, &hr->width, num, &width)); + } + + return 0; +} + + +/* copy a string representation of the hostrange hr into buffer buf, + * writing at most n chars including NUL termination + */ +static size_t +hostrange_to_string(hostrange_t hr, size_t n, char *buf, char *separator) +{ + unsigned long i; + int truncated = 0; + int len = 0; + char sep = separator == NULL ? ',' : separator[0]; + + if (n == 0) + return 0; + + if (hr->singlehost) + return snprintf(buf, n, "%s", hr->prefix); + + for (i = hr->lo; i <= hr->hi; i++) { + size_t m = (n - len) <= n ? n - len : 0; /* check for < 0 */ + int ret = snprintf(buf + len, m, "%s%0*lu", + hr->prefix, hr->width, i); + if (ret < 0 || ret >= m) { + len = n; + truncated = 1; + break; + } + len+=ret; + buf[len++] = sep; + } + + if (truncated) { + buf[n-1] = '\0'; + return -1; + } else { + /* back up over final separator */ + buf[--len] = '\0'; + return len; + } +} + +/* Place the string representation of the numeric part of hostrange into buf + * writing at most n chars including NUL termination. + */ +static size_t hostrange_numstr(hostrange_t hr, size_t n, char *buf) +{ + int len = 0; + + assert(buf != NULL); + + if (hr->singlehost || n == 0) + return 0; + + len = snprintf(buf, n, "%0*lu", hr->width, hr->lo); + + if ((len >= 0) && (len < n) && (hr->lo < hr->hi)) { + int len2 = snprintf(buf+len, n-len, "-%0*lu", hr->width, hr->hi); + if (len2 < 0) + len = -1; + else + len += len2; + } + + return len; +} + + +/* ----[ hostlist functions ]---- */ + +/* Create a new hostlist object. + * Returns an empty hostlist, or NULL if memory allocation fails. + */ +static hostlist_t hostlist_new(void) +{ + int i; + hostlist_t new = (hostlist_t) malloc(sizeof(*new)); + if (!new) + goto fail1; + + assert(new->magic = HOSTLIST_MAGIC); + mutex_init(&new->mutex); + + new->hr = (hostrange_t *) malloc(HOSTLIST_CHUNK * sizeof(hostrange_t)); + if (!new->hr) + goto fail2; + + /* set entries in hostrange array to NULL */ + for (i = 0; i < HOSTLIST_CHUNK; i++) + new->hr[i] = NULL; + + new->size = HOSTLIST_CHUNK; + new->nranges = 0; + new->nhosts = 0; + new->ilist = NULL; + return new; + + fail2: + free(new); + fail1: + out_of_memory("hostlist_create"); +} + + +/* Resize the internal array used to store the list of hostrange objects. + * + * returns 1 for a successful resize, + * 0 if call to _realloc fails + * + * It is assumed that the caller has the hostlist hl locked + */ +static int hostlist_resize(hostlist_t hl, size_t newsize) +{ + int i; + size_t oldsize; + assert(hl != NULL); + assert(hl->magic == HOSTLIST_MAGIC); + oldsize = hl->size; + hl->size = newsize; + hl->hr = realloc((void *) hl->hr, hl->size*sizeof(hostrange_t)); + if (!(hl->hr)) + return 0; + + for (i = oldsize; i < newsize; i++) + hl->hr[i] = NULL; + + return 1; +} + +/* Resize hostlist by one HOSTLIST_CHUNK + * Assumes that hostlist hl is locked by caller + */ +static int hostlist_expand(hostlist_t hl) +{ + if (!hostlist_resize(hl, hl->size + HOSTLIST_CHUNK)) + return 0; + else + return 1; +} + +/* Push a hostrange object onto hostlist hl + * Returns the number of hosts successfully pushed onto hl + * or -1 if there was an error allocating memory + */ +static int hostlist_push_range(hostlist_t hl, hostrange_t hr) +{ + hostrange_t tail; + int retval; + + assert(hr != NULL); + LOCK_HOSTLIST(hl); + + tail = (hl->nranges > 0) ? hl->hr[hl->nranges-1] : hl->hr[0]; + + if (hl->size == hl->nranges && !hostlist_expand(hl)) + goto error; + + if (hl->nranges > 0 + && hostrange_prefix_cmp(tail, hr) == 0 + && tail->hi == hr->lo - 1 + && hostrange_width_combine(tail, hr)) { + tail->hi = hr->hi; + } else { + if ((hl->hr[hl->nranges++] = hostrange_copy(hr)) == NULL) + goto error; + } + + retval = hl->nhosts += hostrange_count(hr); + + UNLOCK_HOSTLIST(hl); + + return retval; + + error: + UNLOCK_HOSTLIST(hl); + return -1; +} + + + +/* Same as hostlist_push_range() above, but prefix, lo, hi, and width + * are passed as args + */ +static int +hostlist_push_hr(hostlist_t hl, char *prefix, unsigned long lo, + unsigned long hi, int width) +{ + hostrange_t hr = hostrange_create(prefix, lo, hi, width); + int retval = hostlist_push_range(hl, hr); + hostrange_destroy(hr); + return retval; +} + +/* Insert a range object hr into position n of the hostlist hl + * Assumes that hl->mutex is already held by calling process + */ +static int hostlist_insert_range(hostlist_t hl, hostrange_t hr, int n) +{ + int i; + hostrange_t tmp; + hostlist_iterator_t hli; + + assert(hl != NULL); + assert(hl->magic == HOSTLIST_MAGIC); + assert(hr != NULL); + + if (n > hl->nranges) + return 0; + + if (hl->size == hl->nranges && !hostlist_expand(hl)) + return 0; + + /* copy new hostrange into slot "n" in array */ + tmp = hl->hr[n]; + hl->hr[n] = hostrange_copy(hr); + + /* push remaining hostrange entries up */ + for (i = n + 1; i < hl->nranges + 1; i++) { + hostrange_t last = hl->hr[i]; + hl->hr[i] = tmp; + tmp = last; + } + hl->nranges++; + + /* adjust hostlist iterators if needed */ + for (hli = hl->ilist; hli; hli = hli->next) { + if (hli->idx >= n) + hli->hr = hli->hl->hr[++hli->idx]; + } + + return 1; +} + +/* Delete the range at position n in the range array + * Assumes the hostlist lock is already held. + */ +static void hostlist_delete_range(hostlist_t hl, int n) +{ + int i; + hostrange_t old; + + assert(hl != NULL); + assert(hl->magic == HOSTLIST_MAGIC); + assert(n < hl->nranges && n >= 0); + + old = hl->hr[n]; + for (i = n; i < hl->nranges - 1; i++) + hl->hr[i] = hl->hr[i + 1]; + hl->nranges--; + hl->hr[hl->nranges] = NULL; + hostlist_shift_iterators(hl, n, 0, 1); + + /* XXX caller responsible for adjusting nhosts */ + /* hl->nhosts -= hostrange_count(old) */ + + hostrange_destroy(old); +} + +#if WANT_RECKLESS_HOSTRANGE_EXPANSION + +/* The reckless hostrange expansion function. + * See comment in hostlist.h:hostlist_create() for more info on + * the different choices for hostlist notation. + */ +hostlist_t _hostlist_create(const char *hostlist, char *sep, char *r_op) +{ + char *str, *orig; + char *tok, *cur; + int high, low, fmt = 0; + char prefix[256] = ""; + int pos = 0; + int error = 0; + char range_op = r_op[0];/* XXX support > 1 char range ops in future? */ + + hostlist_t new = hostlist_new(); + + orig = str = strdup(hostlist); + + /* return an empty list if an empty string was passed in */ + if (str == NULL || strlen(str) == 0) + goto done; + + /* Use hostlist_create_bracketed if we see "[" */ + if (strchr(str, '[') != NULL) + return _hostlist_create_bracketed(hostlist, sep, r_op); + + while ((tok = _next_tok(sep, &str)) != NULL) { + + /* save the current string for error messages */ + cur = tok; + + high = low = 0; + + /* find end of alpha part + * do this by finding last occurence of range_op in str */ + pos = strlen(tok) - 1; + if (strstr(tok, r_op) != '\0') { + while (pos >= 0 && (char) tok[pos] != range_op) + pos--; + } + + /* now back up past any digits */ + while (pos >= 0 && isdigit((char) tok[--pos])) {;} + + /* Check for valid x-y range (x must be a digit) + * Reset pos if the range is not valid */ + if (!isdigit((char) tok[++pos])) + pos = strlen(tok) - 1; + + /* create prefix string + * if prefix will be zero length, but prefix already exists + * use the previous prefix and fmt + */ + if ((pos > 0) || (prefix[0] == '\0')) { + memcpy(prefix, tok, (size_t) pos * sizeof(char)); + prefix[pos] = '\0'; + + /* push pointer past prefix */ + tok += pos; + + /* count number of digits for ouput fmt */ + for (fmt = 0; isdigit(tok[fmt]); ++fmt) {;} + + if (fmt == 0) + error = 1; + + } else + tok += pos; + + /* get lower bound */ + low = strtoul(tok, (char **) &tok, 10); + + if (*tok == range_op) { /* now get range upper bound */ + /* push pointer past range op */ + ++tok; + + /* find length of alpha part */ + for (pos = 0; tok[pos] && !isdigit(tok[pos]); ++pos) {;} + + /* alpha part must match prefix or error + * this could mean we've got something like "rtr1-a2" + * so just record an error + */ + if (pos > 0) { + if (pos != strlen(prefix) || + strncmp(prefix, tok, pos) != 0) + error = 1; + } + + if (*tok != '\0') + tok += pos; + + /* make sure we have digits to the end */ + for (pos = 0; tok[pos] && isdigit((char) tok[pos]); ++pos) {;} + + if (pos > 0) { /* we have digits to process */ + high = strtoul(tok, (char **) &tok, 10); + } else { /* bad boy, no digits */ + error = 1; + } + + if ((low > high) || (high - low > MAX_RANGE)) + error = 1; + + } else { /* single value */ + high = 0; /* special case, ugh. */ + } + + /* error if: + * 1. we are not at end of string + * 2. upper bound equals lower bound + */ + if (*tok != '\0' || high == low) + error = 1; + + if (error) { /* assume this is not a range on any error */ + hostlist_push_host(new, cur); + } else { + if (high < low) + high = low; + hostlist_push_hr(new, prefix, low, high, fmt); + } + + error = 0; + } + + done: + free(orig); + + return new; +} + +#else /* !WANT_RECKLESS_HOSTRANGE_EXPANSION */ + +hostlist_t _hostlist_create(const char *hostlist, char *sep, char *r_op) +{ + return _hostlist_create_bracketed(hostlist, sep, r_op); +} + +#endif /* WANT_RECKLESS_HOSTRANGE_EXPANSION */ + +struct _range { + unsigned long lo, hi; + int width; +}; + +/* Grab a single range from str + * returns 1 if str contained a valid number or range, + * 0 if conversion of str to a range failed. + */ +static int _parse_single_range(const char *str, struct _range *range) +{ + char *p, *q; + char *orig = strdup(str); + if (!orig) + seterrno_ret(ENOMEM, 0); + + if ((p = strchr(str, '-'))) { + *p++ = '\0'; + if (*p == '-') /* do NOT allow negative numbers */ + goto error; + } + range->lo = strtoul(str, &q, 10); + if (q == str) + goto error; + + range->hi = (p && *p) ? strtoul(p, &q, 10) : range->lo; + + if (q == p || *q != '\0') + goto error; + + if (range->lo > range->hi) + goto error; + + if (range->hi - range->lo + 1 > MAX_RANGE ) { + _error(__FILE__, __LINE__, "Too many hosts in range `%s'", orig); + free(orig); + seterrno_ret(ERANGE, 0); + } + + free(orig); + range->width = strlen(str); + return 1; + + error: + _error(__FILE__, __LINE__, "Invalid range: `%s'", orig); + free(orig); + seterrno_ret(EINVAL, 0); +} + + +/* + * Convert 'str' containing comma separated digits and ranges into an array + * of struct _range types (max 'len' elements). + * + * Return number of ranges created, or -1 on error. + */ +static int _parse_range_list(char *str, struct _range *ranges, int len) +{ + char *p; + int count = 0; + + while (str) { + if (count == len) + return -1; + if ((p = strchr(str, ','))) + *p++ = '\0'; + if (!_parse_single_range(str, &ranges[count++])) + return -1; + str = p; + } + return count; +} + +static void +_push_range_list(hostlist_t hl, char *pfx, struct _range *rng, + int n) +{ + int i; + for (i = 0; i < n; i++) { + hostlist_push_hr(hl, pfx, rng->lo, rng->hi, rng->width); + rng++; + } +} + +static void +_push_range_list_with_suffix(hostlist_t hl, char *pfx, char *sfx, + struct _range *rng, int n) +{ + int i; + unsigned long j; + for (i = 0; i < n; i++) { + for (j = rng->lo; j <= rng->hi; j++) { + char host[4096]; + hostrange_t hr; + snprintf (host, 4096, "%s%0*lu%s", pfx, rng->width, j, sfx); + hr = hostrange_create_single (host); + hostlist_push_range (hl, hr); + /* + * hr is copied in hostlist_push_range. Need to free here. + */ + hostrange_destroy (hr); + } + rng++; + } +} + +/* + * Create a hostlist from a string with brackets '[' ']' to aid + * detection of ranges and compressed lists + */ +static hostlist_t +_hostlist_create_bracketed(const char *hostlist, char *sep, char *r_op) +{ + hostlist_t new = hostlist_new(); + struct _range ranges[MAX_RANGES]; + int nr, err; + char *p, *tok, *str, *orig; + char cur_tok[1024]; + + if (hostlist == NULL) + return new; + + if (!(orig = str = strdup(hostlist))) { + hostlist_destroy(new); + return NULL; + } + + while ((tok = _next_tok(sep, &str)) != NULL) { + strncpy(cur_tok, tok, 1024); + + if ((p = strchr(tok, '[')) != NULL) { + char *q, *prefix = tok; + *p++ = '\0'; + + if ((q = strchr(p, ']'))) { + *q = '\0'; + nr = _parse_range_list(p, ranges, MAX_RANGES); + if (nr < 0) + goto error; + + if (*(++q) != '\0') + _push_range_list_with_suffix (new, prefix, q, ranges, nr); + else + _push_range_list(new, prefix, ranges, nr); + + + } else + hostlist_push_host(new, cur_tok); + + } else + hostlist_push_host(new, cur_tok); + } + + free(orig); + return new; + + error: + err = errno; + hostlist_destroy(new); + free(orig); + seterrno_ret(err, NULL); +} + + + +hostlist_t hostlist_create(const char *str) +{ + return _hostlist_create(str, "\t, ", "-"); +} + + +hostlist_t hostlist_copy(const hostlist_t hl) +{ + int i; + hostlist_t new; + + if (hl == NULL) + return NULL; + + LOCK_HOSTLIST(hl); + if (!(new = hostlist_new())) + goto done; + + new->nranges = hl->nranges; + new->nhosts = hl->nhosts; + if (new->nranges > new->size) + hostlist_resize(new, new->nranges); + + for (i = 0; i < hl->nranges; i++) + new->hr[i] = hostrange_copy(hl->hr[i]); + + done: + UNLOCK_HOSTLIST(hl); + return new; +} + + +void hostlist_destroy(hostlist_t hl) +{ + int i; + if (hl == NULL) + return; + LOCK_HOSTLIST(hl); + while (hl->ilist) { + mutex_unlock(&hl->mutex); + hostlist_iterator_destroy(hl->ilist); + mutex_lock(&hl->mutex); + } + for (i = 0; i < hl->nranges; i++) + hostrange_destroy(hl->hr[i]); + free(hl->hr); + assert(hl->magic = 0x1); + UNLOCK_HOSTLIST(hl); + mutex_destroy(&hl->mutex); + free(hl); +} + + +int hostlist_push(hostlist_t hl, const char *hosts) +{ + hostlist_t new; + int retval; + if (hosts == NULL) + return 0; + new = hostlist_create(hosts); + if (!new) + return 0; + mutex_lock(&new->mutex); + retval = new->nhosts; + mutex_unlock(&new->mutex); + hostlist_push_list(hl, new); + hostlist_destroy(new); + return retval; +} + +int hostlist_push_host(hostlist_t hl, const char *str) +{ + hostrange_t hr; + hostname_t hn; + + if (str == NULL) + return 0; + + hn = hostname_create(str); + + if (hostname_suffix_is_valid(hn)) { + hr = hostrange_create(hn->prefix, hn->num, hn->num, + hostname_suffix_width(hn)); + } else + hr = hostrange_create_single(str); + + hostlist_push_range(hl, hr); + + hostrange_destroy(hr); + hostname_destroy(hn); + + return 1; +} + +int hostlist_push_list(hostlist_t h1, hostlist_t h2) +{ + int i, n = 0; + + if (h2 == NULL) + return 0; + + LOCK_HOSTLIST(h2); + + for (i = 0; i < h2->nranges; i++) + n += hostlist_push_range(h1, h2->hr[i]); + + UNLOCK_HOSTLIST(h2); + + return n; +} + + +char *hostlist_pop(hostlist_t hl) +{ + char *host = NULL; + + LOCK_HOSTLIST(hl); + if (hl->nhosts > 0) { + hostrange_t hr = hl->hr[hl->nranges - 1]; + host = hostrange_pop(hr); + hl->nhosts--; + if (hostrange_empty(hr)) { + hostrange_destroy(hl->hr[--hl->nranges]); + hl->hr[hl->nranges] = NULL; + } + } + UNLOCK_HOSTLIST(hl); + return host; +} + +/* find all iterators affected by a shift (or deletion) at + * hl->hr[idx], depth, with the deletion of n ranges */ +static void +hostlist_shift_iterators(hostlist_t hl, int idx, int depth, int n) +{ + hostlist_iterator_t i; + for (i = hl->ilist; i; i = i->next) { + if (n == 0) { + if (i->idx == idx && i->depth >= depth) + i->depth = i->depth > -1 ? i->depth - 1 : -1; + } else { + if (i->idx >= idx) { + if ((i->idx -= n) >= 0) + i->hr = i->hl->hr[i->idx]; + else + hostlist_iterator_reset(i); + } + } + } +} + +char *hostlist_shift(hostlist_t hl) +{ + char *host = NULL; + + LOCK_HOSTLIST(hl); + + if (hl->nhosts > 0) { + hostrange_t hr = hl->hr[0]; + + host = hostrange_shift(hr); + hl->nhosts--; + + if (hostrange_empty(hr)) { + hostlist_delete_range(hl, 0); + /* hl->nranges--; */ + } else + hostlist_shift_iterators(hl, 0, 0, 0); + } + + UNLOCK_HOSTLIST(hl); + + return host; +} + + +char *hostlist_pop_range(hostlist_t hl) +{ + int i; + char buf[MAXHOSTRANGELEN + 1]; + hostlist_t hltmp; + hostrange_t tail; + + LOCK_HOSTLIST(hl); + if (hl->nranges < 1 || !(hltmp = hostlist_new())) { + UNLOCK_HOSTLIST(hl); + return NULL; + } + + i = hl->nranges - 2; + tail = hl->hr[hl->nranges - 1]; + while (i >= 0 && hostrange_within_range(tail, hl->hr[i])) + i--; + + for (i++; i < hl->nranges; i++) { + hostlist_push_range(hltmp, hl->hr[i]); + hostrange_destroy(hl->hr[i]); + hl->hr[i] = NULL; + } + hl->nhosts -= hltmp->nhosts; + hl->nranges -= hltmp->nranges; + + UNLOCK_HOSTLIST(hl); + hostlist_ranged_string(hltmp, MAXHOSTRANGELEN, buf); + hostlist_destroy(hltmp); + return strdup(buf); +} + + +char *hostlist_shift_range(hostlist_t hl) +{ + int i; + char buf[1024]; + hostlist_t hltmp = hostlist_new(); + if (!hltmp) + return NULL; + + LOCK_HOSTLIST(hl); + + if (hl->nranges == 0) { + hostlist_destroy(hltmp); + UNLOCK_HOSTLIST(hl); + return NULL; + } + + i = 0; + do { + hostlist_push_range(hltmp, hl->hr[i]); + hostrange_destroy(hl->hr[i]); + } while ( (++i < hl->nranges) + && hostrange_within_range(hltmp->hr[0], hl->hr[i]) ); + + hostlist_shift_iterators(hl, i, 0, hltmp->nranges); + + /* shift rest of ranges back in hl */ + for (; i < hl->nranges; i++) { + hl->hr[i - hltmp->nranges] = hl->hr[i]; + hl->hr[i] = NULL; + } + hl->nhosts -= hltmp->nhosts; + hl->nranges -= hltmp->nranges; + + UNLOCK_HOSTLIST(hl); + + hostlist_ranged_string(hltmp, 1024, buf); + hostlist_destroy(hltmp); + + return strdup(buf); +} + +/* XXX: Note: efficiency improvements needed */ +int hostlist_delete(hostlist_t hl, const char *hosts) +{ + int n = 0; + char *hostname = NULL; + hostlist_t hltmp; + + if (!(hltmp = hostlist_create(hosts))) + seterrno_ret(EINVAL, 0); + + while ((hostname = hostlist_pop(hltmp)) != NULL) { + n += hostlist_delete_host(hl, hostname); + free(hostname); + } + hostlist_destroy(hltmp); + + return n; +} + + +/* XXX watch out! poor implementation follows! (fix it at some point) */ +int hostlist_delete_host(hostlist_t hl, const char *hostname) +{ + int n = hostlist_find(hl, hostname); + if (n >= 0) + hostlist_delete_nth(hl, n); + return n >= 0 ? 1 : 0; +} + + +static char * +_hostrange_string(hostrange_t hr, int depth) +{ + char buf[MAXHOSTNAMELEN + 16]; + int len = snprintf(buf, MAXHOSTNAMELEN + 15, "%s", hr->prefix); + + if (!hr->singlehost) + snprintf(buf+len, MAXHOSTNAMELEN+15 - len, "%0*lu", + hr->width, hr->lo + depth); + return strdup(buf); +} + +char * hostlist_nth(hostlist_t hl, int n) +{ + char *host = NULL; + int i, count; + + LOCK_HOSTLIST(hl); + count = 0; + for (i = 0; i < hl->nranges; i++) { + int num_in_range = hostrange_count(hl->hr[i]); + + if (n <= (num_in_range - 1 + count)) { + host = _hostrange_string(hl->hr[i], n - count); + break; + } else + count += num_in_range; + } + + UNLOCK_HOSTLIST(hl); + + return host; +} + + +int hostlist_delete_nth(hostlist_t hl, int n) +{ + int i, count; + + LOCK_HOSTLIST(hl); + assert(n >= 0 && n <= hl->nhosts); + + count = 0; + + for (i = 0; i < hl->nranges; i++) { + int num_in_range = hostrange_count(hl->hr[i]); + hostrange_t hr = hl->hr[i]; + + if (n <= (num_in_range - 1 + count)) { + unsigned long num = hr->lo + n - count; + hostrange_t new; + + if (hr->singlehost) { /* this wasn't a range */ + hostlist_delete_range(hl, i); + } else if ((new = hostrange_delete_host(hr, num))) { + hostlist_insert_range(hl, new, i + 1); + hostrange_destroy(new); + } else if (hostrange_empty(hr)) + hostlist_delete_range(hl, i); + + goto done; + } else + count += num_in_range; + + } + + done: + UNLOCK_HOSTLIST(hl); + hl->nhosts--; + return 1; +} + +int hostlist_count(hostlist_t hl) +{ + int retval; + LOCK_HOSTLIST(hl); + retval = hl->nhosts; + UNLOCK_HOSTLIST(hl); + return retval; +} + +int hostlist_find(hostlist_t hl, const char *hostname) +{ + int i, count, ret = -1; + hostname_t hn; + + if (!hostname) + return -1; + + hn = hostname_create(hostname); + + LOCK_HOSTLIST(hl); + + for (i = 0, count = 0; i < hl->nranges; i++) { + if (hostrange_hn_within(hl->hr[i], hn)) { + if (hostname_suffix_is_valid(hn) && !hl->hr[i]->singlehost) + ret = count + hn->num - hl->hr[i]->lo; + else + ret = count; + goto done; + } else + count += hostrange_count(hl->hr[i]); + } + + done: + UNLOCK_HOSTLIST(hl); + hostname_destroy(hn); + return ret; +} + +/* hostrange compare with void * arguments to allow use with + * libc qsort() + */ +int _cmp(const void *hr1, const void *hr2) +{ + hostrange_t *h1 = (hostrange_t *) hr1; + hostrange_t *h2 = (hostrange_t *) hr2; + return hostrange_cmp((hostrange_t) * h1, (hostrange_t) * h2); +} + + +void hostlist_sort(hostlist_t hl) +{ + hostlist_iterator_t i; + LOCK_HOSTLIST(hl); + + if (hl->nranges <= 1) { + UNLOCK_HOSTLIST(hl); + return; + } + + qsort(hl->hr, hl->nranges, sizeof(hostrange_t), &_cmp); + + /* reset all iterators */ + for (i = hl->ilist; i; i = i->next) + hostlist_iterator_reset(i); + + UNLOCK_HOSTLIST(hl); + + hostlist_coalesce(hl); + +} + + +/* search through hostlist for ranges that can be collapsed + * does =not= delete any hosts + */ +static void hostlist_collapse(hostlist_t hl) +{ + int i; + + LOCK_HOSTLIST(hl); + for (i = hl->nranges - 1; i > 0; i--) { + hostrange_t hprev = hl->hr[i - 1]; + hostrange_t hnext = hl->hr[i]; + + if (hostrange_prefix_cmp(hprev, hnext) == 0 && + hprev->hi == hnext->lo - 1 && + hostrange_width_combine(hprev, hnext)) { + hprev->hi = hnext->hi; + hostlist_delete_range(hl, i); + } + } + UNLOCK_HOSTLIST(hl); +} + +/* search through hostlist (hl) for intersecting ranges + * split up duplicates and coalesce ranges where possible + */ +static void hostlist_coalesce(hostlist_t hl) +{ + int i, j; + hostrange_t new; + + LOCK_HOSTLIST(hl); + + for (i = hl->nranges - 1; i > 0; i--) { + + new = hostrange_intersect(hl->hr[i - 1], hl->hr[i]); + + if (new) { + hostrange_t hprev = hl->hr[i - 1]; + hostrange_t hnext = hl->hr[i]; + j = i; + + if (new->hi < hprev->hi) + hnext->hi = hprev->hi; + + hprev->hi = new->lo; + hnext->lo = new->hi; + + if (hostrange_empty(hprev)) + hostlist_delete_range(hl, i); + + while (new->lo <= new->hi) { + hostrange_t hr = hostrange_create( new->prefix, + new->lo, new->lo, + new->width ); + + if (new->lo > hprev->hi) + hostlist_insert_range(hl, hr, j++); + + if (new->lo < hnext->lo) + hostlist_insert_range(hl, hr, j++); + + hostrange_destroy(hr); + + new->lo++; + } + i = hl->nranges; + hostrange_destroy(new); + } + } + UNLOCK_HOSTLIST(hl); + + hostlist_collapse(hl); + +} + +/* attempt to join ranges at loc and loc-1 in a hostlist */ +/* delete duplicates, return the number of hosts deleted */ +/* assumes that the hostlist hl has been locked by caller */ +/* returns -1 if no range join occurred */ +static int _attempt_range_join(hostlist_t hl, int loc) +{ + int ndup; + assert(hl != NULL); + assert(hl->magic == HOSTLIST_MAGIC); + assert(loc > 0); + assert(loc < hl->nranges); + ndup = hostrange_join(hl->hr[loc - 1], hl->hr[loc]); + if (ndup >= 0) { + hostlist_delete_range(hl, loc); + hl->nhosts -= ndup; + } + return ndup; +} + +void hostlist_uniq(hostlist_t hl) +{ + int i = 1; + hostlist_iterator_t hli; + LOCK_HOSTLIST(hl); + if (hl->nranges <= 1) { + UNLOCK_HOSTLIST(hl); + return; + } + qsort(hl->hr, hl->nranges, sizeof(hostrange_t), &_cmp); + + while (i < hl->nranges) { + if (_attempt_range_join(hl, i) < 0) /* No range join occurred */ + i++; + } + + /* reset all iterators */ + for (hli = hl->ilist; hli; hli = hli->next) + hostlist_iterator_reset(hli); + + UNLOCK_HOSTLIST(hl); +} + + +ssize_t hostlist_deranged_string(hostlist_t hl, size_t n, char *buf) +{ + int i; + int len = 0; + int truncated = 0; + + LOCK_HOSTLIST(hl); + for (i = 0; i < hl->nranges; i++) { + size_t m = (n - len) <= n ? n - len : 0; + int ret = hostrange_to_string(hl->hr[i], m, buf + len, ","); + if (ret < 0 || ret > m) { + len = n; + truncated = 1; + break; + } + len+=ret; + buf[len++] = ','; + } + UNLOCK_HOSTLIST(hl); + + buf[len > 0 ? --len : 0] = '\0'; + if (len == n) + truncated = 1; + + return truncated ? -1 : len; +} + +/* return true if a bracket is needed for the range at i in hostlist hl */ +static int _is_bracket_needed(hostlist_t hl, int i) +{ + hostrange_t h1 = hl->hr[i]; + hostrange_t h2 = i < hl->nranges - 1 ? hl->hr[i + 1] : NULL; + return hostrange_count(h1) > 1 || hostrange_within_range(h1, h2); +} + +/* write the next bracketed hostlist, i.e. prefix[n-m,k,...] + * into buf, writing at most n chars including the terminating '\0' + * + * leaves start pointing to one past last range object in bracketed list, + * and returns the number of bytes written into buf. + * + * Assumes hostlist is locked. + */ +static int +_get_bracketed_list(hostlist_t hl, int *start, const size_t n, char *buf) +{ + hostrange_t *hr = hl->hr; + int i = *start; + int m, len = 0; + int bracket_needed = _is_bracket_needed(hl, i); + + len = snprintf(buf, n, "%s", hr[i]->prefix); + + if ((len < 0) || (len > n)) + return n; /* truncated, buffer filled */ + + if (bracket_needed && len < n && len >= 0) + buf[len++] = '['; + + do { + m = (n - len) <= n ? n - len : 0; + len += hostrange_numstr(hr[i], m, buf + len); + if (len >= n) + break; + if (bracket_needed) /* Only need commas inside brackets */ + buf[len++] = ','; + } while (++i < hl->nranges && hostrange_within_range(hr[i], hr[i-1])); + + if (bracket_needed && len < n && len > 0) { + + /* Add trailing bracket (change trailing "," from above to "]" */ + buf[len - 1] = ']'; + + /* NUL terminate for safety, but do not add terminator to len */ + buf[len] = '\0'; + + } else if (len >= n) { + if (n > 0) + buf[n-1] = '\0'; + + } else { + /* If len is > 0, NUL terminate (but do not add to len) */ + buf[len > 0 ? len : 0] = '\0'; + } + + *start = i; + return len; +} + +ssize_t hostlist_ranged_string(hostlist_t hl, size_t n, char *buf) +{ + int i = 0; + int len = 0; + int truncated = 0; + + LOCK_HOSTLIST(hl); + while (i < hl->nranges && len < n) { + len += _get_bracketed_list(hl, &i, n - len, buf + len); + if ((len > 0) && (len < n) && (i < hl->nranges)) + buf[len++] = ','; + } + UNLOCK_HOSTLIST(hl); + + /* NUL terminate */ + if (len >= n) { + truncated = 1; + if (n > 0) + buf[n-1] = '\0'; + } else + buf[len > 0 ? len : 0] = '\0'; + + return truncated ? -1 : len; +} + +/* ----[ hostlist iterator functions ]---- */ + +static hostlist_iterator_t hostlist_iterator_new(void) +{ + hostlist_iterator_t i = (hostlist_iterator_t) malloc(sizeof(*i)); + if (!i) + return NULL; + i->hl = NULL; + i->hr = NULL; + i->idx = 0; + i->depth = -1; + i->next = i; + assert(i->magic = HOSTLIST_MAGIC); + return i; +} + +hostlist_iterator_t hostlist_iterator_create(hostlist_t hl) +{ + hostlist_iterator_t i; + + if (!(i = hostlist_iterator_new())) + out_of_memory("hostlist_iterator_create"); + + LOCK_HOSTLIST(hl); + i->hl = hl; + i->hr = hl->hr[0]; + i->next = hl->ilist; + hl->ilist = i; + UNLOCK_HOSTLIST(hl); + return i; +} + +hostlist_iterator_t hostset_iterator_create(hostset_t set) +{ + return hostlist_iterator_create(set->hl); +} + +void hostlist_iterator_reset(hostlist_iterator_t i) +{ + assert(i != NULL); + assert(i->magic == HOSTLIST_MAGIC); + i->idx = 0; + i->hr = i->hl->hr[0]; + i->depth = -1; + return; +} + +void hostlist_iterator_destroy(hostlist_iterator_t i) +{ + hostlist_iterator_t *pi; + if (i == NULL) + return; + assert(i != NULL); + assert(i->magic == HOSTLIST_MAGIC); + LOCK_HOSTLIST(i->hl); + for (pi = &i->hl->ilist; *pi; pi = &(*pi)->next) { + assert((*pi)->magic == HOSTLIST_MAGIC); + if (*pi == i) { + *pi = (*pi)->next; + break; + } + } + UNLOCK_HOSTLIST(i->hl); + assert(i->magic = 0x1); + free(i); +} + +static void _iterator_advance(hostlist_iterator_t i) +{ + assert(i != NULL); + assert(i->magic == HOSTLIST_MAGIC); + if (i->idx > i->hl->nranges - 1) + return; + if (++(i->depth) > (i->hr->hi - i->hr->lo)) { + i->depth = 0; + i->hr = i->hl->hr[++i->idx]; + } +} + +/* advance iterator to end of current range (meaning within "[" "]") + * i.e. advance iterator past all range objects that could be represented + * in on bracketed hostlist. + */ +static void _iterator_advance_range(hostlist_iterator_t i) +{ + int nr, j; + hostrange_t *hr; + assert(i != NULL); + assert(i->magic == HOSTLIST_MAGIC); + + nr = i->hl->nranges; + hr = i->hl->hr; + j = i->idx; + if (++i->depth > 0) { + while (++j < nr && hostrange_within_range(i->hr, hr[j])) {;} + i->idx = j; + i->hr = i->hl->hr[i->idx]; + i->depth = 0; + } +} + +char *hostlist_next(hostlist_iterator_t i) +{ + char *buf = NULL; + char suffix[16]; + int len = 0; + assert(i != NULL); + assert(i->magic == HOSTLIST_MAGIC); + LOCK_HOSTLIST(i->hl); + _iterator_advance(i); + + if (i->idx > i->hl->nranges - 1) { + UNLOCK_HOSTLIST(i->hl); + return NULL; + } + + suffix[0] = '\0'; + + if (!i->hr->singlehost) + snprintf (suffix, 15, "%0*lu", i->hr->width, i->hr->lo + i->depth); + + len = strlen (i->hr->prefix) + strlen (suffix) + 1; + if (!(buf = malloc (len))) + out_of_memory("hostlist_next"); + + buf[0] = '\0'; + strcat (buf, i->hr->prefix); + strcat (buf, suffix); + + UNLOCK_HOSTLIST(i->hl); + return (buf); +} + +char *hostlist_next_range(hostlist_iterator_t i) +{ + char buf[MAXHOSTRANGELEN + 1]; + int j; + + assert(i != NULL); + assert(i->magic == HOSTLIST_MAGIC); + LOCK_HOSTLIST(i->hl); + + _iterator_advance_range(i); + + if (i->idx > i->hl->nranges - 1) { + UNLOCK_HOSTLIST(i->hl); + return NULL; + } + + j = i->idx; + _get_bracketed_list(i->hl, &j, MAXHOSTRANGELEN, buf); + + UNLOCK_HOSTLIST(i->hl); + + return strdup(buf); +} + +int hostlist_remove(hostlist_iterator_t i) +{ + hostrange_t new; + assert(i != NULL); + assert(i->magic == HOSTLIST_MAGIC); + LOCK_HOSTLIST(i->hl); + new = hostrange_delete_host(i->hr, i->hr->lo + i->depth); + if (new) { + hostlist_insert_range(i->hl, new, i->idx + 1); + hostrange_destroy(new); + i->hr = i->hl->hr[++i->idx]; + i->depth = -1; + } else if (hostrange_empty(i->hr)) { + hostlist_delete_range(i->hl, i->idx); + } else + i->depth--; + + i->hl->nhosts--; + UNLOCK_HOSTLIST(i->hl); + + return 1; +} + +/* ----[ hostset functions ]---- */ + +hostset_t hostset_create(const char *hostlist) +{ + hostset_t new; + + if (!(new = (hostset_t) malloc(sizeof(*new)))) + goto error1; + + if (!(new->hl = hostlist_create(hostlist))) + goto error2; + + hostlist_uniq(new->hl); + return new; + + error2: + free(new); + error1: + return NULL; +} + +hostset_t hostset_copy(const hostset_t set) +{ + hostset_t new; + if (!(new = (hostset_t) malloc(sizeof(*new)))) + goto error1; + + if (!(new->hl = hostlist_copy(set->hl))) + goto error2; + + return new; + error2: + free(new); + error1: + return NULL; +} + +void hostset_destroy(hostset_t set) +{ + if (set == NULL) + return; + hostlist_destroy(set->hl); + free(set); +} + +/* inserts a single range object into a hostset + * Assumes that the set->hl lock is already held + * Updates hl->nhosts + */ +static int hostset_insert_range(hostset_t set, hostrange_t hr) +{ + int i = 0; + int inserted = 0; + int nhosts = 0; + int ndups = 0; + hostlist_t hl; + + hl = set->hl; + + if (hl->size == hl->nranges && !hostlist_expand(hl)) + return 0; + + nhosts = hostrange_count(hr); + + for (i = 0; i < hl->nranges; i++) { + if (hostrange_cmp(hr, hl->hr[i]) <= 0) { + + if ((ndups = hostrange_join(hr, hl->hr[i])) >= 0) + hostlist_delete_range(hl, i); + else if (ndups < 0) + ndups = 0; + + hostlist_insert_range(hl, hr, i); + + /* now attempt to join hr[i] and hr[i-1] */ + if (i > 0) { + int m; + if ((m = _attempt_range_join(hl, i)) > 0) + ndups += m; + } + hl->nhosts += nhosts - ndups; + inserted = 1; + break; + } + } + + if (inserted == 0) { + hl->hr[hl->nranges++] = hostrange_copy(hr); + hl->nhosts += nhosts; + if (hl->nranges > 1) { + if ((ndups = _attempt_range_join(hl, hl->nranges - 1)) <= 0) + ndups = 0; + } + } + + /* + * Return the number of unique hosts inserted + */ + return nhosts - ndups; +} + +int hostset_insert(hostset_t set, const char *hosts) +{ + int i, n = 0; + hostlist_t hl = hostlist_create(hosts); + if (!hl) + return 0; + + hostlist_uniq(hl); + LOCK_HOSTLIST(set->hl); + for (i = 0; i < hl->nranges; i++) + n += hostset_insert_range(set, hl->hr[i]); + UNLOCK_HOSTLIST(set->hl); + hostlist_destroy(hl); + return n; +} + + +/* linear search through N ranges for hostname "host" + * */ +static int hostset_find_host(hostset_t set, const char *host) +{ + int i; + int retval = 0; + hostname_t hn; + LOCK_HOSTLIST(set->hl); + hn = hostname_create(host); + for (i = 0; i < set->hl->nranges; i++) { + if (hostrange_hn_within(set->hl->hr[i], hn)) { + retval = 1; + goto done; + } + } + done: + UNLOCK_HOSTLIST(set->hl); + hostname_destroy(hn); + return retval; +} + +int hostset_within(hostset_t set, const char *hosts) +{ + int nhosts, nfound; + hostlist_t hl; + char *hostname; + + assert(set->hl->magic == HOSTLIST_MAGIC); + + if (!(hl = hostlist_create(hosts))) + return (0); + + nhosts = hostlist_count(hl); + nfound = 0; + + while ((hostname = hostlist_pop(hl)) != NULL) { + nfound += hostset_find_host(set, hostname); + free(hostname); + } + + hostlist_destroy(hl); + + return (nhosts == nfound); +} + +int hostset_delete(hostset_t set, const char *hosts) +{ + return hostlist_delete(set->hl, hosts); +} + +int hostset_delete_host(hostset_t set, const char *hostname) +{ + return hostlist_delete_host(set->hl, hostname); +} + +char *hostset_shift(hostset_t set) +{ + return hostlist_shift(set->hl); +} + +char *hostset_pop(hostset_t set) +{ + return hostlist_pop(set->hl); +} + +char *hostset_shift_range(hostset_t set) +{ + return hostlist_shift_range(set->hl); +} + +char *hostset_pop_range(hostset_t set) +{ + return hostlist_pop_range(set->hl); +} + +int hostset_count(hostset_t set) +{ + return hostlist_count(set->hl); +} + +ssize_t hostset_ranged_string(hostset_t set, size_t n, char *buf) +{ + return hostlist_ranged_string(set->hl, n, buf); +} + +ssize_t hostset_deranged_string(hostset_t set, size_t n, char *buf) +{ + return hostlist_deranged_string(set->hl, n, buf); +} + +#if TEST_MAIN + +int hostlist_nranges(hostlist_t hl) +{ + return hl->nranges; +} + +int hostset_nranges(hostset_t set) +{ + return set->hl->nranges; +} + +/* test iterator functionality on the list of hosts represented + * by list + */ +int iterator_test(char *list) +{ + int j; + char buf[1024]; + hostlist_t hl = hostlist_create(list); + hostset_t set = hostset_create(list); + + hostlist_iterator_t i = hostlist_iterator_create(hl); + hostlist_iterator_t seti = hostset_iterator_create(set); + hostlist_iterator_t i2 = hostlist_iterator_create(hl); + char *host; + + + hostlist_ranged_string(hl, 1024, buf); + printf("iterator_test: hl = `%s' passed in `%s'\n", buf, list); + host = hostlist_next(i); + printf("first host in list hl = `%s'\n", host); + free(host); + + /* forge ahead three hosts with i2 */ + for (j = 0; j < 4; j++) { + host = hostlist_next(i2); + free(host); + } + + host = hostlist_shift(hl); + printf("result of shift(hl) = `%s'\n", host); + free(host); + host = hostlist_next(i); + printf("next host in list hl = `%s'\n", host); + free(host); + host = hostlist_next(i2); + printf("next host for i2 = `%s'\n", host); + free(host); + + hostlist_iterator_destroy(i); + + hostlist_destroy(hl); + hostset_destroy(set); + return 1; +} + +int main(int ac, char **av) +{ + char buf[1024000]; + int i; + char *str; + + hostlist_t hl1, hl2, hl3; + hostset_t set, set1; + hostlist_iterator_t iter, iter2; + + if (!(hl1 = hostlist_create(ac > 1 ? av[1] : NULL))) + perror("hostlist_create"); + if (!(set = hostset_create(ac > 1 ? av[1] : NULL))) + perror("hostlist_create"); + + hl3 = hostlist_create("f[0-5]"); + hostlist_delete(hl3, "f[1-3]"); + hostlist_ranged_string(hl3, 102400, buf); + printf("after delete = `%s'\n", buf); + hostlist_destroy(hl3); + + for (i = 2; i < ac; i++) { + hostlist_push(hl1, av[i]); + hostset_insert(set, av[i]); + } + + hostlist_ranged_string(hl1, 102400, buf); + printf("ranged = `%s'\n", buf); + + iterator_test(buf); + + hostlist_deranged_string(hl1, 10240, buf); + printf("deranged = `%s'\n", buf); + + hostset_ranged_string(set, 1024, buf); + printf("hostset = `%s'\n", buf); + + hostlist_sort(hl1); + hostlist_ranged_string(hl1, 1024, buf); + printf("sorted = `%s'\n", buf); + + hostlist_uniq(hl1); + hostlist_ranged_string(hl1, 1024, buf); + printf("uniqed = `%s'\n", buf); + + hl2 = hostlist_copy(hl1); + printf("pop_range: "); + while ((str = hostlist_pop_range(hl2))) { + printf("`%s' ", str); + free(str); + } + hostlist_destroy(hl2); + printf("\n"); + + hl2 = hostlist_copy(hl1); + printf("shift_range: "); + while ((str = hostlist_shift_range(hl2))) { + printf("`%s' ", str); + free(str); + } + hostlist_destroy(hl2); + printf("\n"); + + iter = hostset_iterator_create(set); + iter2 = hostset_iterator_create(set); + hostlist_iterator_destroy(iter2); + + printf("next: "); + while ((str = hostlist_next(iter))) { + printf("`%s' ", str); + free(str); + } + printf("\n"); + + hostlist_iterator_reset(iter); + printf("next_range: "); + while ((str = hostlist_next_range(iter))) { + printf("`%s' ", str); + free(str); + } + printf("\n"); + + printf("nranges = %d\n", hostset_nranges(set)); + + hostset_ranged_string(set, 1024, buf); + printf("set = %s\n", buf); + + hostset_destroy(set); + hostlist_destroy(hl1); + return 0; +} + +#endif /* TEST_MAIN */ + +/* + * vi: tabstop=4 shiftwidth=4 expandtab + */ diff --git a/lib/hostlist.h b/lib/hostlist.h new file mode 100644 index 0000000..d87d6ef --- /dev/null +++ b/lib/hostlist.h @@ -0,0 +1,419 @@ +/*****************************************************************************\ + * $Id: hostlist.h 7428 2008-05-23 16:08:31Z grondo $ + ***************************************************************************** + * Copyright (C) 2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Mark Grondona + * UCRL-CODE-2002-040. + * + * This file is part of SLURM, a resource management program. + * For details, see . + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#ifndef _HOSTLIST_H +#define _HOSTLIST_H + +#include + +/* Notes: + * + * If WITH_LSD_FATAL_ERROR_FUNC is defined, the linker will expect to + * find and external lsd_fatal_error(file,line,mesg) function. By default, + * lsd_fatal_error(file,line,mesg) is a macro definition that outputs an + * error message to stderr. This macro may be redefined to invoke another + * routine instead. e.g.: + * + * #define lsd_fatal_error(file,line,mesg) \ + * error("%s:%s %s\n",file,line,mesg); + * + * If WITH_LSD_NOMEM_ERROR_FUNC is defined, the linker will expect to + * find an external lsd_nomem_error(file,line,mesg) function. By default, + * lsd_nomem_error(file,line,mesg) is a macro definition that returns NULL. + * This macro may be redefined to invoke another routine instead. + * + * If WITH_PTHREADS is defined, these routines will be thread-safe. + * + */ + +/* The hostlist opaque data type + * + * A hostlist is a list of hostnames optimized for a prefixXXXX style + * naming convention, where XXXX is a decimal, numeric suffix. + */ +typedef struct hostlist * hostlist_t; + +/* A hostset is a special case of a hostlist. It: + * + * 1. never contains duplicates + * 2. is always sorted + * (Note: sort occurs first on alphanumeric prefix -- where prefix + * matches, numeric suffixes will be sorted *by value*) + */ +typedef struct hostset * hostset_t; + +/* The hostlist iterator type (may be used with a hostset as well) + * used for non-destructive access to hostlist members. + * + */ +typedef struct hostlist_iterator * hostlist_iterator_t; + +/* ----[ hostlist_t functions: ]---- */ + +/* ----[ hostlist creation and destruction ]---- */ + +/* + * hostlist_create(): + * + * Create a new hostlist from a string representation. + * + * The string representation (str) may contain one or more hostnames or + * bracketed hostlists separated by either `,' or whitespace. A bracketed + * hostlist is denoted by a common prefix followed by a list of numeric + * ranges contained within brackets: e.g. "tux[0-5,12,20-25]" + * + * Note: if this module is compiled with WANT_RECKLESS_HOSTRANGE_EXPANSION + * defined, a much more loose interpretation of host ranges is used. + * Reckless hostrange expansion allows all of the following (in addition to + * bracketed hostlists): + * + * o tux0-5,tux12,tux20-25 + * o tux0-tux5,tux12,tux20-tux25 + * o tux0-5,12,20-25 + * + * If str is NULL, and empty hostlist is created and returned. + * + * If the create fails, hostlist_create() returns NULL. + * + * The returned hostlist must be freed with hostlist_destroy() + * + */ +hostlist_t hostlist_create(const char *hostlist); + +/* hostlist_copy(): + * + * Allocate a copy of a hostlist object. Returned hostlist must be freed + * with hostlist_destroy. + */ +hostlist_t hostlist_copy(const hostlist_t hl); + +/* hostlist_destroy(): + * + * Destroy a hostlist object. Frees all memory allocated to the hostlist. + */ +void hostlist_destroy(hostlist_t hl); + + +/* ----[ hostlist list operations ]---- */ + +/* hostlist_push(): + * + * push a string representation of hostnames onto a hostlist. + * + * The hosts argument may take the same form as in hostlist_create() + * + * Returns the number of hostnames inserted into the list, + * or 0 on failure. + */ +int hostlist_push(hostlist_t hl, const char *hosts); + + +/* hostlist_push_host(): + * + * Push a single host onto the hostlist hl. + * This function is more efficient than hostlist_push() for a single + * hostname, since the argument does not need to be checked for ranges. + * + * return value is 1 for success, 0 for failure. + */ +int hostlist_push_host(hostlist_t hl, const char *host); + + +/* hostlist_push_list(): + * + * Push a hostlist (hl2) onto another list (hl1) + * + * Returns 1 for success, 0 for failure. + * + */ +int hostlist_push_list(hostlist_t hl1, hostlist_t hl2); + + +/* hostlist_pop(): + * + * Returns the string representation of the last host pushed onto the list + * or NULL if hostlist is empty or there was an error allocating memory. + * The host is removed from the hostlist. + * + * Note: Caller is responsible for freeing the returned memory. + */ +char * hostlist_pop(hostlist_t hl); + + +char * hostlist_nth(hostlist_t hl, int n); + +/* hostlist_shift(): + * + * Returns the string representation of the first host in the hostlist + * or NULL if the hostlist is empty or there was an error allocating memory. + * The host is removed from the hostlist. + * + * Note: Caller is responsible for freeing the returned memory. + */ +char * hostlist_shift(hostlist_t hl); + + +/* hostlist_pop_range(): + * + * Pop the last bracketed list of hosts of the hostlist hl. + * Returns the string representation in bracketed list form. + * All hosts associated with the returned list are removed + * from hl. + * + * Caller is responsible for freeing returned memory + */ +char * hostlist_pop_range(hostlist_t hl); + +/* hostlist_shift_range(): + * + * Shift the first bracketed hostlist (improperly: range) off the + * hostlist hl. Returns the string representation in bracketed list + * form. All hosts associated with the list are removed from the + * hostlist. + * + * Caller is responsible for freeing returned memory. + */ +char * hostlist_shift_range(hostlist_t hl); + + +/* hostlist_find(): + * + * Searches hostlist hl for the first host matching hostname + * and returns position in list if found. + * + * Returns -1 if host is not found. + * + */ +int hostlist_find(hostlist_t hl, const char *hostname); + +/* hostlist_delete(): + * + * Deletes all hosts in the list represented by `hosts' + * + * Returns the number of hosts successfully deleted + */ +int hostlist_delete(hostlist_t hl, const char *hosts); + + +/* hostlist_delete_host(): + * + * Deletes the first host that matches `hostname' from the hostlist hl. + * Note: "hostname" argument cannot contain a range of hosts + * (see hostlist_delete() for this functionality.) + * + * Returns 1 if successful, 0 if hostname is not found in list. + */ +int hostlist_delete_host(hostlist_t hl, const char *hostname); + + +/* hostlist_delete_nth(): + * + * Deletes the host from position n in the hostlist. + * + * Returns 1 if successful 0 on error. + * + */ +int hostlist_delete_nth(hostlist_t hl, int n); + + +/* hostlist_count(): + * + * Return the number of hosts in hostlist hl. + */ +int hostlist_count(hostlist_t hl); + +/* hostlist_is_empty(): return true if hostlist is empty. */ +#define hostlist_is_empty(__hl) ( hostlist_count(__hl) == 0 ) + +/* ----[ Other hostlist operations ]---- */ + +/* hostlist_sort(): + * + * Sort the hostlist hl. + * + */ +void hostlist_sort(hostlist_t hl); + +/* hostlist_uniq(): + * + * Sort the hostlist hl and remove duplicate entries. + * + */ +void hostlist_uniq(hostlist_t hl); + + +/* ----[ hostlist print functions ]---- */ + +/* hostlist_ranged_string(): + * + * Write the string representation of the hostlist hl into buf, + * writing at most n chars. Returns the number of bytes written, + * or -1 if truncation occurred. + * + * The result will be NULL terminated. + * + * hostlist_ranged_string() will write a bracketed hostlist representation + * where possible. + */ +ssize_t hostlist_ranged_string(hostlist_t hl, size_t n, char *buf); +ssize_t hostset_ranged_string(hostset_t hs, size_t n, char *buf); + +/* hostlist_deranged_string(): + * + * Writes the string representation of the hostlist hl into buf, + * writing at most n chars. Returns the number of bytes written, + * or -1 if truncation occurred. + * + * hostlist_deranged_string() will not attempt to write a bracketed + * hostlist representation. Every hostname will be explicitly written. + */ +ssize_t hostlist_deranged_string(hostlist_t hl, size_t n, char *buf); +ssize_t hostset_deranged_string(hostset_t hs, size_t n, char *buf); + + +/* ----[ hostlist utility functions ]---- */ + + +/* hostlist_nranges(): + * + * Return the number of ranges currently held in hostlist hl. + */ +int hostlist_nranges(hostlist_t hl); + + +/* ----[ hostlist iterator functions ]---- */ + +/* hostlist_iterator_create(): + * + * Creates and returns a hostlist iterator used for non destructive + * access to a hostlist or hostset. Returns NULL on failure. + */ +hostlist_iterator_t hostlist_iterator_create(hostlist_t hl); + +/* hostset_iterator_create(): + * + * Same as hostlist_iterator_create(), but creates a hostlist_iterator + * from a hostset. + */ +hostlist_iterator_t hostset_iterator_create(hostset_t set); + +/* hostlist_iterator_destroy(): + * + * Destroys a hostlist iterator. + */ +void hostlist_iterator_destroy(hostlist_iterator_t i); + +/* hostlist_iterator_reset(): + * + * Reset an iterator to the beginning of the list. + */ +void hostlist_iterator_reset(hostlist_iterator_t i); + +/* hostlist_next(): + * + * Returns a pointer to the next hostname on the hostlist + * or NULL at the end of the list + * + * The caller is responsible for freeing the returned memory. + */ +char * hostlist_next(hostlist_iterator_t i); + + +/* hostlist_next_range(): + * + * Returns the next bracketed hostlist or NULL if the iterator i is + * at the end of the list. + * + * The caller is responsible for freeing the returned memory. + * + */ +char * hostlist_next_range(hostlist_iterator_t i); + + +/* hostlist_remove(): + * Removes the last host returned by hostlist iterator i + * + * Returns 1 for success, 0 for failure. + */ +int hostlist_remove(hostlist_iterator_t i); + + +/* ----[ hostset operations ]---- */ + +/* hostset_create(): + * + * Create a new hostset object from a string representation of a list of + * hosts. See hostlist_create() for valid hostlist forms. + */ +hostset_t hostset_create(const char *hostlist); + +/* hostset_copy(): + * + * Copy a hostset object. Returned set must be freed with hostset_destroy(). + */ +hostset_t hostset_copy(hostset_t set); + +/* hostset_destroy(): + */ +void hostset_destroy(hostset_t set); + +/* hostset_insert(): + * Add a host or list of hosts into hostset "set." + * + * Returns number of hosts successfully added to "set" + * (insertion of a duplicate is not considered successful) + */ +int hostset_insert(hostset_t set, const char *hosts); + +/* hostset_delete(): + * Delete a host or list of hosts from hostset "set." + * Returns number of hosts deleted from set. + */ +int hostset_delete(hostset_t set, const char *hosts); + +/* hostset_within(): + * Return 1 if all hosts specified by "hosts" are within the hostset "set" + * Retrun 0 if every host in "hosts" is not in the hostset "set" + */ +int hostset_within(hostset_t set, const char *hosts); + +/* hostset_shift(): + * hostset equivalent to hostlist_shift() + */ +char * hostset_shift(hostset_t set); + +/* hostset_shift_range(): + * hostset eqivalent to hostlist_shift_range() + */ +char * hostset_shift_range(hostset_t set); + +/* hostset_count(): + * Count the number of hosts currently in hostset + */ +int hostset_count(hostset_t set); + + +#endif /* !_HOSTLIST_H */ diff --git a/lib/list.c b/lib/list.c new file mode 100644 index 0000000..de4de72 --- /dev/null +++ b/lib/list.c @@ -0,0 +1,835 @@ +/***************************************************************************** + * $Id$ + ***************************************************************************** + * $LSDId: list.c 3709 2006-11-29 00:51:22Z dun $ + ***************************************************************************** + * Copyright (C) 2001-2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Chris Dunlap . + * + * This file is from LSD-Tools, the LLNL Software Development Toolbox. + * + * LSD-Tools is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * LSD-Tools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with LSD-Tools; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + ***************************************************************************** + * Refer to "list.h" for documentation on public functions. + *****************************************************************************/ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif /* HAVE_CONFIG_H */ + +#ifdef WITH_PTHREADS +# include +#endif /* WITH_PTHREADS */ + +#include +#include +#include +#include +#include "list.h" + + +/********************* + * lsd_fatal_error * + *********************/ + +#ifdef WITH_LSD_FATAL_ERROR_FUNC +# undef lsd_fatal_error + extern void lsd_fatal_error(char *file, int line, char *mesg); +#else /* !WITH_LSD_FATAL_ERROR_FUNC */ +# ifndef lsd_fatal_error +# include +# include +# include +# define lsd_fatal_error(file, line, mesg) \ + do { \ + fprintf(stderr, "ERROR: [%s:%d] %s: %s\n", \ + file, line, mesg, strerror(errno)); \ + } while (0) +# endif /* !lsd_fatal_error */ +#endif /* !WITH_LSD_FATAL_ERROR_FUNC */ + + +/********************* + * lsd_nomem_error * + *********************/ + +#ifdef WITH_LSD_NOMEM_ERROR_FUNC +# undef lsd_nomem_error + extern void * lsd_nomem_error(char *file, int line, char *mesg); +#else /* !WITH_LSD_NOMEM_ERROR_FUNC */ +# ifndef lsd_nomem_error +# define lsd_nomem_error(file, line, mesg) (NULL) +# endif /* !lsd_nomem_error */ +#endif /* !WITH_LSD_NOMEM_ERROR_FUNC */ + + +/*************** + * Constants * + ***************/ + +#define LIST_ALLOC 32 +#define LIST_MAGIC 0xDEADBEEF + + +/**************** + * Data Types * + ****************/ + +struct listNode { + void *data; /* node's data */ + struct listNode *next; /* next node in list */ +}; + +struct listIterator { + struct list *list; /* the list being iterated */ + struct listNode *pos; /* the next node to be iterated */ + struct listNode **prev; /* addr of 'next' ptr to prv It node */ + struct listIterator *iNext; /* iterator chain for list_destroy() */ +#ifndef NDEBUG + unsigned int magic; /* sentinel for asserting validity */ +#endif /* !NDEBUG */ +}; + +struct list { + struct listNode *head; /* head of the list */ + struct listNode **tail; /* addr of last node's 'next' ptr */ + struct listIterator *iNext; /* iterator chain for list_destroy() */ + ListDelF fDel; /* function to delete node data */ + int count; /* number of nodes in list */ +#ifdef WITH_PTHREADS + pthread_mutex_t mutex; /* mutex to protect access to list */ +#endif /* WITH_PTHREADS */ +#ifndef NDEBUG + unsigned int magic; /* sentinel for asserting validity */ +#endif /* !NDEBUG */ +}; + +typedef struct listNode * ListNode; + + +/**************** + * Prototypes * + ****************/ + +static void * list_node_create (List l, ListNode *pp, void *x); +static void * list_node_destroy (List l, ListNode *pp); +static List list_alloc (void); +static void list_free (List l); +static ListNode list_node_alloc (void); +static void list_node_free (ListNode p); +static ListIterator list_iterator_alloc (void); +static void list_iterator_free (ListIterator i); +static void * list_alloc_aux (int size, void *pfreelist); +static void list_free_aux (void *x, void *pfreelist); + + +/*************** + * Variables * + ***************/ + +static List list_free_lists = NULL; +static ListNode list_free_nodes = NULL; +static ListIterator list_free_iterators = NULL; + +#ifdef WITH_PTHREADS +static pthread_mutex_t list_free_lock = PTHREAD_MUTEX_INITIALIZER; +#endif /* WITH_PTHREADS */ + + +/************ + * Macros * + ************/ + +#ifdef WITH_PTHREADS + +# define list_mutex_init(mutex) \ + do { \ + int e = pthread_mutex_init(mutex, NULL); \ + if (e != 0) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "list mutex init"); \ + abort(); \ + } \ + } while (0) + +# define list_mutex_lock(mutex) \ + do { \ + int e = pthread_mutex_lock(mutex); \ + if (e != 0) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "list mutex lock"); \ + abort(); \ + } \ + } while (0) + +# define list_mutex_unlock(mutex) \ + do { \ + int e = pthread_mutex_unlock(mutex); \ + if (e != 0) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "list mutex unlock"); \ + abort(); \ + } \ + } while (0) + +# define list_mutex_destroy(mutex) \ + do { \ + int e = pthread_mutex_destroy(mutex); \ + if (e != 0) { \ + errno = e; \ + lsd_fatal_error(__FILE__, __LINE__, "list mutex destroy"); \ + abort(); \ + } \ + } while (0) + +# ifndef NDEBUG + static int list_mutex_is_locked (pthread_mutex_t *mutex); +# endif /* !NDEBUG */ + +#else /* !WITH_PTHREADS */ + +# define list_mutex_init(mutex) +# define list_mutex_lock(mutex) +# define list_mutex_unlock(mutex) +# define list_mutex_destroy(mutex) +# define list_mutex_is_locked(mutex) (1) + +#endif /* !WITH_PTHREADS */ + + +/*************** + * Functions * + ***************/ + +List +list_create (ListDelF f) +{ + List l; + + if (!(l = list_alloc())) + return(lsd_nomem_error(__FILE__, __LINE__, "list create")); + l->head = NULL; + l->tail = &l->head; + l->iNext = NULL; + l->fDel = f; + l->count = 0; + list_mutex_init(&l->mutex); + assert(l->magic = LIST_MAGIC); /* set magic via assert abuse */ + return(l); +} + + +void +list_destroy (List l) +{ + ListIterator i, iTmp; + ListNode p, pTmp; + + assert(l != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + i = l->iNext; + while (i) { + assert(i->magic == LIST_MAGIC); + iTmp = i->iNext; + assert(i->magic = ~LIST_MAGIC); /* clear magic via assert abuse */ + list_iterator_free(i); + i = iTmp; + } + p = l->head; + while (p) { + pTmp = p->next; + if (p->data && l->fDel) + l->fDel(p->data); + list_node_free(p); + p = pTmp; + } + assert(l->magic = ~LIST_MAGIC); /* clear magic via assert abuse */ + list_mutex_unlock(&l->mutex); + list_mutex_destroy(&l->mutex); + list_free(l); + return; +} + + +int +list_is_empty (List l) +{ + int n; + + assert(l != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + n = l->count; + list_mutex_unlock(&l->mutex); + return(n == 0); +} + + +int +list_count (List l) +{ + int n; + + assert(l != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + n = l->count; + list_mutex_unlock(&l->mutex); + return(n); +} + + +void * +list_append (List l, void *x) +{ + void *v; + + assert(l != NULL); + assert(x != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + v = list_node_create(l, l->tail, x); + list_mutex_unlock(&l->mutex); + return(v); +} + + +void * +list_prepend (List l, void *x) +{ + void *v; + + assert(l != NULL); + assert(x != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + v = list_node_create(l, &l->head, x); + list_mutex_unlock(&l->mutex); + return(v); +} + + +void * +list_find_first (List l, ListFindF f, void *key) +{ + ListNode p; + void *v = NULL; + + assert(l != NULL); + assert(f != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + for (p=l->head; p; p=p->next) { + if (f(p->data, key)) { + v = p->data; + break; + } + } + list_mutex_unlock(&l->mutex); + return(v); +} + + +int +list_delete_all (List l, ListFindF f, void *key) +{ + ListNode *pp; + void *v; + int n = 0; + + assert(l != NULL); + assert(f != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + pp = &l->head; + while (*pp) { + if (f((*pp)->data, key)) { + if ((v = list_node_destroy(l, pp))) { + if (l->fDel) + l->fDel(v); + n++; + } + } + else { + pp = &(*pp)->next; + } + } + list_mutex_unlock(&l->mutex); + return(n); +} + + +int +list_for_each (List l, ListForF f, void *arg) +{ + ListNode p; + int n = 0; + + assert(l != NULL); + assert(f != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + for (p=l->head; p; p=p->next) { + n++; + if (f(p->data, arg) < 0) { + n = -n; + break; + } + } + list_mutex_unlock(&l->mutex); + return(n); +} + + +void +list_sort (List l, ListCmpF f) +{ +/* Note: Time complexity O(n^2). + */ + ListNode *pp, *ppPrev, *ppPos, pTmp; + ListIterator i; + + assert(l != NULL); + assert(f != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + if (l->count > 1) { + ppPrev = &l->head; + pp = &(*ppPrev)->next; + while (*pp) { + if (f((*pp)->data, (*ppPrev)->data) < 0) { + ppPos = &l->head; + while (f((*pp)->data, (*ppPos)->data) >= 0) + ppPos = &(*ppPos)->next; + pTmp = (*pp)->next; + (*pp)->next = *ppPos; + *ppPos = *pp; + *pp = pTmp; + if (ppPrev == ppPos) + ppPrev = &(*ppPrev)->next; + } + else { + ppPrev = pp; + pp = &(*pp)->next; + } + } + l->tail = pp; + + for (i=l->iNext; i; i=i->iNext) { + assert(i->magic == LIST_MAGIC); + i->pos = i->list->head; + i->prev = &i->list->head; + } + } + list_mutex_unlock(&l->mutex); + return; +} + + +void * +list_push (List l, void *x) +{ + void *v; + + assert(l != NULL); + assert(x != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + v = list_node_create(l, &l->head, x); + list_mutex_unlock(&l->mutex); + return(v); +} + + +void * +list_pop (List l) +{ + void *v; + + assert(l != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + v = list_node_destroy(l, &l->head); + list_mutex_unlock(&l->mutex); + return(v); +} + + +void * +list_peek (List l) +{ + void *v; + + assert(l != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + v = (l->head) ? l->head->data : NULL; + list_mutex_unlock(&l->mutex); + return(v); +} + + +void * +list_enqueue (List l, void *x) +{ + void *v; + + assert(l != NULL); + assert(x != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + v = list_node_create(l, l->tail, x); + list_mutex_unlock(&l->mutex); + return(v); +} + + +void * +list_dequeue (List l) +{ + void *v; + + assert(l != NULL); + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + v = list_node_destroy(l, &l->head); + list_mutex_unlock(&l->mutex); + return(v); +} + + +ListIterator +list_iterator_create (List l) +{ + ListIterator i; + + assert(l != NULL); + if (!(i = list_iterator_alloc())) + return(lsd_nomem_error(__FILE__, __LINE__, "list iterator create")); + i->list = l; + list_mutex_lock(&l->mutex); + assert(l->magic == LIST_MAGIC); + i->pos = l->head; + i->prev = &l->head; + i->iNext = l->iNext; + l->iNext = i; + assert(i->magic = LIST_MAGIC); /* set magic via assert abuse */ + list_mutex_unlock(&l->mutex); + return(i); +} + + +void +list_iterator_reset (ListIterator i) +{ + assert(i != NULL); + assert(i->magic == LIST_MAGIC); + list_mutex_lock(&i->list->mutex); + assert(i->list->magic == LIST_MAGIC); + i->pos = i->list->head; + i->prev = &i->list->head; + list_mutex_unlock(&i->list->mutex); + return; +} + + +void +list_iterator_destroy (ListIterator i) +{ + ListIterator *pi; + + assert(i != NULL); + assert(i->magic == LIST_MAGIC); + list_mutex_lock(&i->list->mutex); + assert(i->list->magic == LIST_MAGIC); + for (pi=&i->list->iNext; *pi; pi=&(*pi)->iNext) { + assert((*pi)->magic == LIST_MAGIC); + if (*pi == i) { + *pi = (*pi)->iNext; + break; + } + } + list_mutex_unlock(&i->list->mutex); + assert(i->magic = ~LIST_MAGIC); /* clear magic via assert abuse */ + list_iterator_free(i); + return; +} + + +void * +list_next (ListIterator i) +{ + ListNode p; + + assert(i != NULL); + assert(i->magic == LIST_MAGIC); + list_mutex_lock(&i->list->mutex); + assert(i->list->magic == LIST_MAGIC); + if ((p = i->pos)) + i->pos = p->next; + if (*i->prev != p) + i->prev = &(*i->prev)->next; + list_mutex_unlock(&i->list->mutex); + return(p ? p->data : NULL); +} + + +void * +list_insert (ListIterator i, void *x) +{ + void *v; + + assert(i != NULL); + assert(x != NULL); + assert(i->magic == LIST_MAGIC); + list_mutex_lock(&i->list->mutex); + assert(i->list->magic == LIST_MAGIC); + v = list_node_create(i->list, i->prev, x); + list_mutex_unlock(&i->list->mutex); + return(v); +} + + +void * +list_find (ListIterator i, ListFindF f, void *key) +{ + void *v; + + assert(i != NULL); + assert(f != NULL); + assert(i->magic == LIST_MAGIC); + while ((v=list_next(i)) && !f(v,key)) {;} + return(v); +} + + +void * +list_remove (ListIterator i) +{ + void *v = NULL; + + assert(i != NULL); + assert(i->magic == LIST_MAGIC); + list_mutex_lock(&i->list->mutex); + assert(i->list->magic == LIST_MAGIC); + if (*i->prev != i->pos) + v = list_node_destroy(i->list, i->prev); + list_mutex_unlock(&i->list->mutex); + return(v); +} + + +int +list_delete (ListIterator i) +{ + void *v; + + assert(i != NULL); + assert(i->magic == LIST_MAGIC); + if ((v = list_remove(i))) { + if (i->list->fDel) + i->list->fDel(v); + return(1); + } + return(0); +} + + +static void * +list_node_create (List l, ListNode *pp, void *x) +{ +/* Inserts data pointed to by [x] into list [l] after [pp], + * the address of the previous node's "next" ptr. + * Returns a ptr to data [x], or NULL if insertion fails. + * This routine assumes the list is already locked upon entry. + */ + ListNode p; + ListIterator i; + + assert(l != NULL); + assert(l->magic == LIST_MAGIC); + assert(list_mutex_is_locked(&l->mutex)); + assert(pp != NULL); + assert(x != NULL); + if (!(p = list_node_alloc())) + return(lsd_nomem_error(__FILE__, __LINE__, "list node create")); + p->data = x; + if (!(p->next = *pp)) + l->tail = &p->next; + *pp = p; + l->count++; + for (i=l->iNext; i; i=i->iNext) { + assert(i->magic == LIST_MAGIC); + if (i->prev == pp) + i->prev = &p->next; + else if (i->pos == p->next) + i->pos = p; + assert((i->pos == *i->prev) || (i->pos == (*i->prev)->next)); + } + return(x); +} + + +static void * +list_node_destroy (List l, ListNode *pp) +{ +/* Removes the node pointed to by [*pp] from from list [l], + * where [pp] is the address of the previous node's "next" ptr. + * Returns the data ptr associated with list item being removed, + * or NULL if [*pp] points to the NULL element. + * This routine assumes the list is already locked upon entry. + */ + void *v; + ListNode p; + ListIterator i; + + assert(l != NULL); + assert(l->magic == LIST_MAGIC); + assert(list_mutex_is_locked(&l->mutex)); + assert(pp != NULL); + if (!(p = *pp)) + return(NULL); + v = p->data; + if (!(*pp = p->next)) + l->tail = pp; + l->count--; + for (i=l->iNext; i; i=i->iNext) { + assert(i->magic == LIST_MAGIC); + if (i->pos == p) + i->pos = p->next, i->prev = pp; + else if (i->prev == &p->next) + i->prev = pp; + assert((i->pos == *i->prev) || (i->pos == (*i->prev)->next)); + } + list_node_free(p); + return(v); +} + + +static List +list_alloc (void) +{ + return(list_alloc_aux(sizeof(struct list), &list_free_lists)); +} + + +static void +list_free (List l) +{ + list_free_aux(l, &list_free_lists); + return; +} + + +static ListNode +list_node_alloc (void) +{ + return(list_alloc_aux(sizeof(struct listNode), &list_free_nodes)); +} + + +static void +list_node_free (ListNode p) +{ + list_free_aux(p, &list_free_nodes); + return; +} + + +static ListIterator +list_iterator_alloc (void) +{ + return(list_alloc_aux(sizeof(struct listIterator), &list_free_iterators)); +} + + +static void +list_iterator_free (ListIterator i) +{ + list_free_aux(i, &list_free_iterators); + return; +} + + +static void * +list_alloc_aux (int size, void *pfreelist) +{ +/* Allocates an object of [size] bytes from the freelist [*pfreelist]. + * Memory is added to the freelist in chunks of size LIST_ALLOC. + * Returns a ptr to the object, or NULL if the memory request fails. + */ + void **px; + void **pfree = pfreelist; + void **plast; + + assert(sizeof(char) == 1); + assert(size >= sizeof(void *)); + assert(pfreelist != NULL); + assert(LIST_ALLOC > 0); + list_mutex_lock(&list_free_lock); + if (!*pfree) { + if ((*pfree = malloc(LIST_ALLOC * size))) { + px = *pfree; + plast = (void **) ((char *) *pfree + ((LIST_ALLOC - 1) * size)); + while (px < plast) + *px = (char *) px + size, px = *px; + *plast = NULL; + } + } + if ((px = *pfree)) + *pfree = *px; + else + errno = ENOMEM; + list_mutex_unlock(&list_free_lock); + return(px); +} + + +static void +list_free_aux (void *x, void *pfreelist) +{ +/* Frees the object [x], returning it to the freelist [*pfreelist]. + */ + void **px = x; + void **pfree = pfreelist; + + assert(x != NULL); + assert(pfreelist != NULL); + list_mutex_lock(&list_free_lock); + *px = *pfree; + *pfree = px; + list_mutex_unlock(&list_free_lock); + return; +} + + +#ifndef NDEBUG +#ifdef WITH_PTHREADS +static int +list_mutex_is_locked (pthread_mutex_t *mutex) +{ +/* Returns true if the mutex is locked; o/w, returns false. + */ + int rc; + + assert(mutex != NULL); + rc = pthread_mutex_trylock(mutex); + return(rc == EBUSY ? 1 : 0); +} +#endif /* WITH_PTHREADS */ +#endif /* !NDEBUG */ diff --git a/lib/list.h b/lib/list.h new file mode 100644 index 0000000..b031fc7 --- /dev/null +++ b/lib/list.h @@ -0,0 +1,281 @@ +/***************************************************************************** + * $Id: list.h,v 1.14 2002/12/11 19:00:36 dun Exp $ + ***************************************************************************** + * Copyright (C) 2001-2002 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Chris Dunlap . + * + * This file is from LSD-Tools, the LLNL Software Development Toolbox. + * + * LSD-Tools is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * LSD-Tools is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with LSD-Tools; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + *****************************************************************************/ + + +#ifndef LSD_LIST_H +#define LSD_LIST_H + + +/*********** + * Notes * + ***********/ +/* + * If NDEBUG is not defined, internal debug code will be enabled. This is + * intended for development use only and production code should define NDEBUG. + * + * If WITH_LSD_FATAL_ERROR_FUNC is defined, the linker will expect to + * find an external lsd_fatal_error(file,line,mesg) function. By default, + * lsd_fatal_error(file,line,mesg) is a macro definition that outputs an + * error message to stderr. This macro may be redefined to invoke another + * routine instead. + * + * If WITH_LSD_NOMEM_ERROR_FUNC is defined, the linker will expect to + * find an external lsd_nomem_error(file,line,mesg) function. By default, + * lsd_nomem_error(file,line,mesg) is a macro definition that returns NULL. + * This macro may be redefined to invoke another routine instead. + * + * If WITH_PTHREADS is defined, these routines will be thread-safe. + */ + + +/**************** + * Data Types * + ****************/ + +typedef struct list * List; +/* + * List opaque data type. + */ + +typedef struct listIterator * ListIterator; +/* + * List Iterator opaque data type. + */ + +typedef void (*ListDelF) (void *x); +/* + * Function prototype to deallocate data stored in a list. + * This function is responsible for freeing all memory associated + * with an item, including all subordinate items (if applicable). + */ + +typedef int (*ListCmpF) (void *x, void *y); +/* + * Function prototype for comparing two items in a list. + * Returns less-than-zero if (xy). + */ + +typedef int (*ListFindF) (void *x, void *key); +/* + * Function prototype for matching items in a list. + * Returns non-zero if (x==key); o/w returns zero. + */ + +typedef int (*ListForF) (void *x, void *arg); +/* + * Function prototype for operating on each item in a list. + * Returns less-than-zero on error. + */ + + +/******************************* + * General-Purpose Functions * + *******************************/ + +List list_create (ListDelF f); +/* + * Creates and returns a new empty list, or lsd_nomem_error() on failure. + * The deletion function [f] is used to deallocate memory used by items + * in the list; if this is NULL, memory associated with these items + * will not be freed when the list is destroyed. + * Note: Abandoning a list without calling list_destroy() will result + * in a memory leak. + */ + +void list_destroy (List l); +/* + * Destroys list [l], freeing memory used for list iterators and the + * list itself; if a deletion function was specified when the list + * was created, it will be called for each item in the list. + */ + +int list_is_empty (List l); +/* + * Returns non-zero if list [l] is empty; o/w returns zero. + */ + +int list_count (List l); +/* + * Returns the number of items in list [l]. + */ + + +/*************************** + * List Access Functions * + ***************************/ + +void * list_append (List l, void *x); +/* + * Inserts data [x] at the end of list [l]. + * Returns the data's ptr, or lsd_nomem_error() if insertion failed. + */ + +void * list_prepend (List l, void *x); +/* + * Inserts data [x] at the beginning of list [l]. + * Returns the data's ptr, or lsd_nomem_error() if insertion failed. + */ + +void * list_find_first (List l, ListFindF f, void *key); +/* + * Traverses list [l] using [f] to match each item with [key]. + * Returns a ptr to the first item for which the function [f] + * returns non-zero, or NULL if no such item is found. + * Note: This function differs from list_find() in that it does not require + * a list iterator; it should only be used when all list items are known + * to be unique (according to the function [f]). + */ + +int list_delete_all (List l, ListFindF f, void *key); +/* + * Traverses list [l] using [f] to match each item with [key]. + * Removes all items from the list for which the function [f] returns + * non-zero; if a deletion function was specified when the list was + * created, it will be called to deallocate each item being removed. + * Returns a count of the number of items removed from the list. + */ + +int list_for_each (List l, ListForF f, void *arg); +/* + * For each item in list [l], invokes the function [f] with [arg]. + * Returns a count of the number of items on which [f] was invoked. + * If [f] returns <0 for a given item, the iteration is aborted and the + * function returns the negative of that item's position in the list. + */ + +void list_sort (List l, ListCmpF f); +/* + * Sorts list [l] into ascending order according to the function [f]. + * Note: Sorting a list resets all iterators associated with the list. + * Note: The sort algorithm is stable. + */ + + +/**************************** + * Stack Access Functions * + ****************************/ + +void * list_push (List l, void *x); +/* + * Pushes data [x] onto the top of stack [l]. + * Returns the data's ptr, or lsd_nomem_error() if insertion failed. + */ + +void * list_pop (List l); +/* + * Pops the data item at the top of the stack [l]. + * Returns the data's ptr, or NULL if the stack is empty. + */ + +void * list_peek (List l); +/* + * Peeks at the data item at the top of the stack (or head of the queue) [l]. + * Returns the data's ptr, or NULL if the stack (or queue) is empty. + * Note: The item is not removed from the list. + */ + + +/**************************** + * Queue Access Functions * + ****************************/ + +void * list_enqueue (List l, void *x); +/* + * Enqueues data [x] at the tail of queue [l]. + * Returns the data's ptr, or lsd_nomem_error() if insertion failed. + */ + +void * list_dequeue (List l); +/* + * Dequeues the data item at the head of the queue [l]. + * Returns the data's ptr, or NULL if the queue is empty. + */ + + +/***************************** + * List Iterator Functions * + *****************************/ + +ListIterator list_iterator_create (List l); +/* + * Creates and returns a list iterator for non-destructively traversing + * list [l], or lsd_nomem_error() on failure. + */ + +void list_iterator_reset (ListIterator i); +/* + * Resets the list iterator [i] to start traversal at the beginning + * of the list. + */ + +void list_iterator_destroy (ListIterator i); +/* + * Destroys the list iterator [i]; list iterators not explicitly destroyed + * in this manner will be destroyed when the list is deallocated via + * list_destroy(). + */ + +void * list_next (ListIterator i); +/* + * Returns a ptr to the next item's data, + * or NULL once the end of the list is reached. + * Example: i=list_iterator_create(i); while ((x=list_next(i))) {...} + */ + +void * list_insert (ListIterator i, void *x); +/* + * Inserts data [x] immediately before the last item returned via list + * iterator [i]; once the list iterator reaches the end of the list, + * insertion is made at the list's end. + * Returns the data's ptr, or lsd_nomem_error() if insertion failed. + */ + +void * list_find (ListIterator i, ListFindF f, void *key); +/* + * Traverses the list from the point of the list iterator [i] + * using [f] to match each item with [key]. + * Returns a ptr to the next item for which the function [f] + * returns non-zero, or NULL once the end of the list is reached. + * Example: i=list_iterator_reset(i); while ((x=list_find(i,f,k))) {...} + */ + +void * list_remove (ListIterator i); +/* + * Removes from the list the last item returned via list iterator [i] + * and returns the data's ptr. + * Note: The client is responsible for freeing the returned data. + */ + +int list_delete (ListIterator i); +/* + * Removes from the list the last item returned via list iterator [i]; + * if a deletion function was specified when the list was created, + * it will be called to deallocate the item being removed. + * Returns a count of the number of items removed from the list + * (ie, '1' if the item was removed, and '0' otherwise). + */ + + +#endif /* !LSD_LIST_H */ diff --git a/lib/split.c b/lib/split.c new file mode 100644 index 0000000..e85b704 --- /dev/null +++ b/lib/split.c @@ -0,0 +1,149 @@ +/*****************************************************************************\ + * $Id: split.c 1042 2006-03-30 20:55:59Z grondo $ + ***************************************************************************** + * Copyright (C) 2006 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Jim Garlick . + * UCRL-CODE-2003-005. + * + * This file is part of Pdsh, a parallel remote shell program. + * For details, see . + * + * Pdsh is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Pdsh; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include "split.h" + +/* + * Helper function for list_split(). Extract tokens from str. + * Return a pointer to the next token; at the same time, advance + * *str to point to the next separator. + * sep (IN) string containing list of separator characters + * str (IN) double-pointer to string containing tokens and separators + * RETURN next token + */ +static char *_next_tok(char *sep, char **str) +{ + char *tok; + + /* push str past any leading separators */ + while (**str != '\0' && strchr(sep, **str) != NULL) + (*str)++; + + if (**str == '\0') + return NULL; + + /* assign token pointer */ + tok = *str; + + /* push str past token and leave pointing to first separator */ + while (**str != '\0' && strchr(sep, **str) == NULL) + (*str)++; + + /* nullify consecutive separators and push str beyond them */ + while (**str != '\0' && strchr(sep, **str) != NULL) + *(*str)++ = '\0'; + + return tok; +} + +/* + * Given a list of separators and a string, generate a list + * sep (IN) string containing separater characters + * str (IN) string containing tokens and separators + * RETURN new list containing all tokens + */ +List list_split(char *sep, char *str) +{ + List new = list_create((ListDelF) free); + char *tok; + + if (sep == NULL) + sep = " \t"; + + while ((tok = _next_tok(sep, &str)) != NULL) { + if (strlen(tok) > 0) + list_append(new, strdup(tok)); + } + + return new; +} + +List list_split_append (List l, char *sep, char *str) +{ + char *tok; + + if (sep == NULL) + sep = " \t"; + + while ((tok = _next_tok(sep, &str)) != NULL) { + if (strlen(tok) > 0) + list_append(l, strdup(tok)); + } + + return l; +} + +int list_join (char *result, size_t len, const char *sep, List l) +{ + char *str = NULL; + int n = 0; + int truncated = 0; + ListIterator i; + + memset (result, 0, len); + + if (list_count(l) == 0) + return (0); + + i = list_iterator_create(l); + while ((str = list_next(i))) { + int count; + + if (!truncated) { + count = snprintf(result + n, len - n, "%s%s", str, sep); + + if ((count >= (len - n)) || (count < 0)) + truncated = 1; + else + n += count; + } + else + n += strlen (str) + strlen (sep); + } + list_iterator_destroy(i); + + if (truncated) + result [len - 1] = '\0'; + else { + /* + * Delete final separator + */ + result[strlen(result) - strlen(sep)] = '\0'; + } + + return (n); +} + +/* vi: ts=4 sw=4 expandtab + */ + diff --git a/lib/split.h b/lib/split.h new file mode 100644 index 0000000..6201ea4 --- /dev/null +++ b/lib/split.h @@ -0,0 +1,35 @@ +/*****************************************************************************\ + * $Id$ + ***************************************************************************** + * Copyright (C) 2006 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Jim Garlick . + * UCRL-CODE-2003-005. + * + * This file is part of Pdsh, a parallel remote shell program. + * For details, see . + * + * Pdsh is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * Pdsh is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Pdsh; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. +\*****************************************************************************/ +#ifndef _SPLIT_H +#define _SPLIT_H + +#include "list.h" + +List list_split (char *sep, char *str); +List list_split_append (List l, char *sep, char *str); +int list_join (char *result, size_t len, const char *sep, List l); + +#endif /* !_SPLIT_H */ diff --git a/oom-detect.c b/oom-detect.c new file mode 100644 index 0000000..512765b --- /dev/null +++ b/oom-detect.c @@ -0,0 +1,315 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +/*############################################################################ + * $Id$ + *############################################################################ + * + * SLURM spank plugin to detect tasks killed by OOM killer using CHAOS + * kernel /proc/oomkilled file. + * + * Requires SGI Job container-based process tracking. + * + *############################################################################ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +SPANK_PLUGIN(oom-detect, 1) + +typedef jid_t (*getjid_f) (pid_t pid); + +static int do_syslog = 0; + +static void * libjob = NULL; +static getjid_f getjid = NULL; +static jid_t jid = (jid_t) -1; +static uint32_t ntasks = (uint32_t) -1; + + +int slurm_spank_init (spank_t sp, int ac, char *av[]) +{ + if (!spank_remote (sp)) + return (0); + + if (ac > 0 && strcmp (av[0], "do_syslog") == 0) + do_syslog = 1; + + if (!(libjob = dlopen ("libjob.so", RTLD_LAZY))) { + slurm_error ("Failed to open libjob.so: %s", dlerror ()); + return (-1); + } + + if (!(getjid = (getjid_f) dlsym (libjob, "job_getjid"))) { + slurm_error ("Failed to resolve job_getjid in libjob: %s", + dlerror ()); + return (-1); + } + + /* + * spank_init runs after slurm job container has been created, so + * now determine our jid. + */ + if ((jid = (*getjid) (getpid ())) == (jid_t) -1) + slurm_info ("Failed to get job container id"); + + if (spank_get_item (sp, S_JOB_LOCAL_TASK_COUNT, &ntasks)) { + slurm_error ("spank_get_item (S_JOB_LOCAL_TASK_COUNT) failed."); + /* must be at least one task */ + ntasks = 1; + } + + return (0); +} + +#define OOMKILLED_FILENAME "/proc/oomkilled" + +struct oomkilled_data { + uint64_t jobid; + pid_t pid; + long vmsize; + long rss; + char comm[16]; +}; + +static int _parse_oomkilled (struct oomkilled_data *data, size_t size) +{ + char buf [4096]; + char *bufptr; + char *line; + ssize_t len; + int count = 0; + int fd = -1; + int rv = -1; + + assert(data && size); + + if (access (OOMKILLED_FILENAME, R_OK) < 0) { + goto cleanup; + } + + if ((fd = open (OOMKILLED_FILENAME, O_RDONLY)) < 0) { + goto cleanup; + } + + memset(buf, '\0', sizeof (buf)); + if ((len = read (fd, buf, sizeof (buf))) < 0) { + goto cleanup; + } + + if (!len) + return 0; + + + line = strtok_r (buf, "\n", &bufptr); + do { + struct oomkilled_data *d; + + if (count >= size) { + errno = ENOSPC; + goto cleanup; + } + + d = &data[count]; + memset (d, 0, sizeof (*d)); + if (sscanf (line, "%lu %d %ld %ld %15c", + &d->jobid, &d->pid, &d->vmsize, &d->rss, d->comm) != 5) { + goto cleanup; + } + count++; + + } while ((line = strtok_r (NULL, "\n", &bufptr))); + + rv = count; +cleanup: + close(fd); + return rv; +} + + +int oomkilled_pids (jid_t jid, struct oomkilled_data *d, size_t len) +{ + struct oomkilled_data data [64]; + int count; + int i; + int index = 0; + + if ((count = _parse_oomkilled (data, 64)) < 0) { + return -1; + } + + for (i = 0; i < count; i++) { + if ((jid_t) data[i].jobid == jid) { + if (index >= len) { + errno = ENOSPC; + return -1; + } + d[index++] = data[i]; + } + } + + return (index); +} + +static int pid_reported (pid_t pid) +{ + static pid_t pids[64]; + static int initialized = 0; + int i = 0; + + if (!initialized) { + memset (pids, 0, sizeof (pids)); + initialized = 1; + } + + for (i = 0; i < 64; i++) { + if (pids[i] == 0) { + pids[i] = pid; + return (0); + } + if (pids[i] == pid) + return (1); + } + + return (0); +} + +static void print_oomkilled_error (struct oomkilled_data *d, int taskid) +{ + char buf [256]; + const size_t siz = sizeof (buf); + int len = 0; + + memset (buf, 0, sizeof (buf)); + + if (d->vmsize) { + if ((len = snprintf (buf, siz, " VmSize: %ldM", d->vmsize/1024)) < 0) + len = 0; + } + if (d->rss) + len += snprintf (buf+len, siz - len, " RSS: %ldM", d->rss/1024); + + if ((len >= siz)) { + buf [siz - 2] = '+'; + buf [siz - 1] = '\0'; + } + + if (taskid >= 0) { + slurm_error ("task%d: [%s] terminated by OOM killer.", taskid, d->comm); + if (d->vmsize || d->rss) + slurm_error ("task%d:%s", taskid, buf); + } else { + slurm_error ("pid %ld: [%s] %s terminated by OOM killer.\n", + d->pid, d->comm, "(task id unknown)"); + if (d->vmsize || d->rss) + slurm_error ("pid %ld:%s", d->pid, buf); + } + return; +} + +static void send_syslog_oom_msg (spank_t sp) +{ + uint32_t jobid; + uint32_t stepid; + uid_t uid; + + if ((spank_get_item (sp, S_JOB_ID, &jobid) != ESPANK_SUCCESS) || + (spank_get_item (sp, S_JOB_STEPID, &stepid) != ESPANK_SUCCESS) || + (spank_get_item (sp, S_JOB_UID, &uid) != ESPANK_SUCCESS)) { + slurm_error ("Failed to get jobid, stepid, or uid for syslog msg."); + return; + } + + openlog ("slurmd", 0, LOG_USER); + syslog (LOG_WARNING, "OOM detected: jobid=%u.%u uid=%u", jobid, stepid, uid); + closelog (); + slurm_verbose ("Sent OOM message via syslog for this job."); + +} + +int slurm_spank_task_exit (spank_t sp, int ac, char *av[]) +{ + static int nexited = 0; + struct oomkilled_data killed [16]; + int n; + int i; + + if ((jid == (jid_t) -1) || (ntasks == (uint32_t) -1)) + return (0); + + ++nexited; + + /* + * As each task exits, report to user if any processes + * were terminated by OOM killer + */ + if (!(n = oomkilled_pids (jid, killed, 16))) + return (0); + + for (i = 0; i < n; i++) { + struct oomkilled_data *d = &killed[i]; + uint32_t taskid; + + if (pid_reported (d->pid)) + continue; + + spank_get_item (sp, S_JOB_PID_TO_GLOBAL_ID, d->pid, &taskid); + + print_oomkilled_error (d, taskid); + } + + if (nexited == ntasks) { + if (do_syslog) + send_syslog_oom_msg (sp); + /* + * If we got here, then we printed one or more OOM killed message + * to user's stderr. Delay a bit here to make it more likely + * that the user gets the message. + */ + sleep (2); + } + return (0); +} + +int slurm_spank_exit (spank_t sp, int ac, char *av[]) +{ + return (0); +} + +/* + * vi:tabstop=4 shiftwidth=4 expandtab + */ diff --git a/overcommit-memory/Makefile b/overcommit-memory/Makefile new file mode 100644 index 0000000..a065cc9 --- /dev/null +++ b/overcommit-memory/Makefile @@ -0,0 +1,17 @@ +SHOPTS := -shared +OBJS := overcommit-memory.o overcommit.o ../lib/fd.o + +all: overcommit-memory.so overcommit-util + +overcommit-memory.so : $(OBJS) + $(CC) $(SHOPTS) -o overcommit-memory.so $(OBJS) + +overcommit-util : util.o overcommit.o ../lib/fd.o + $(CC) -o overcommit-util util.o overcommit.o ../lib/fd.o -lpthread + +.c.o : + $(CC) -ggdb -I../lib -Wall $(CFLAGS) -o $@ -fPIC -c $< + + +clean: + rm -f *.o *.so overcommit-util diff --git a/overcommit-memory/overcommit-memory.c b/overcommit-memory/overcommit-memory.c new file mode 100644 index 0000000..41200ec --- /dev/null +++ b/overcommit-memory/overcommit-memory.c @@ -0,0 +1,220 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "overcommit.h" + +SPANK_PLUGIN (overcommit, 1); + +const char env_flag [] = "SPANK_OVERCOMMIT_MEMORY_FLAG"; + +static int jobid; +static int stepid; +static int overcommit_ratio = 100; +static overcommit_shared_ctx_t ctx = NULL; + +static int overcommit_opt_process (int val, const char *arg, int remote); + +struct spank_option spank_options [] = +{ + { "overcommit-memory", "[m]", + "Choose memory overcommit mode [m] (always|off|on) for all nodes of job.", + 1, 0, + (spank_opt_cb_f) overcommit_opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + +static int set_overcommit_policy (int val) +{ + ctx = overcommit_shared_ctx_create (jobid, stepid); + + if (ctx == NULL) + return (-1); + + if (overcommit_in_use (ctx, val)) { + slurm_error ("overcommit-memory: Cannot set desired mode on this node"); + overcommit_shared_ctx_destroy (ctx); + } + else if (overcommit_memory_set_current_state (val) < 0) + slurm_error ("overcommit-memory: Failed to set overcommit = %d", val); + else if (overcommit_ratio_set (overcommit_ratio) < 0) + slurm_error ("overcommit-memory: Failed to set overcommit_ratio to %d\n", + overcommit_ratio); + + return (0); +} + +static int strnmatch (const char *src, int n, ...) +{ + int i = 0; + int rc = 0; + va_list ap; + + va_start (ap, n); + + while ((i++ < n) && !(rc = (strcmp (src, va_arg (ap, char *)) == 0))) {;} + + va_end (ap); + + return (rc); +} + +static int overcommit_opt_process (int val, const char *arg, int remote) +{ + int overcommit_mode = 0; + + if (strnmatch (arg, 4, "off", "no", "never", "2")) + overcommit_mode = 2; + else if (strnmatch (arg, 2, "always", "1")) + overcommit_mode = 1; + else if (strnmatch (arg, 2, "on", "yes", "0")) + overcommit_mode = 0; + else { + slurm_error ("--overcommit-memory: invalid argument: %s", arg); + return (-1); + } + + if (!remote) { + /* Need to set a flag in environment so slurmd knows that a + * command line option is called and won't apply any environment + * options. + */ + setenv ("SPANK_OVERCOMMIT_MEMORY_FLAG", "1", 1); + return (0); + } + + if (set_overcommit_policy (overcommit_mode) < 0) + return (-1); + + return (0); +} + +static int check_env (spank_t sp, int remote) +{ + char buf [64]; + const char var[] = "SLURM_OVERCOMMIT_MEMORY"; + + /* If env_flag is set in environment, ignore options set from + * environment since command line option should override + */ + if (spank_getenv (sp, env_flag, buf, sizeof (buf)) == ESPANK_SUCCESS) { spank_unsetenv (sp, env_flag); + return (0); + } + + if (spank_getenv (sp, var, buf, sizeof (buf)) == ESPANK_SUCCESS) { + if (overcommit_opt_process (0, buf, remote) < 0) { + slurm_error ("Environment setting %s=%s invalid", var, buf); + return (-1); + } + } + + return (0); +} + +static int str2int (const char *str) +{ + char *p; + long l = strtol (str, &p, 10); + + if (p && (*p != '\0')) + return (-1); + + return ((int) l); +} + +int parse_options (int ac, char **av) +{ + int i; + int retval = 0; + + for (i = 0; i < ac; i++) { + if (strncmp ("ratio=", av[i], 6) == 0) { + char *ratio = av[i] + 6; + if ((overcommit_ratio = str2int (ratio)) < 0) { + slurm_error ("overcommit-memory: Invalid ratio = %s\n", ratio); + retval = -1; + } + } + else { + slurm_error ("overcommit-memory: Invalid option %s\n", av[i]); + retval = -1; + } + } + + return (retval); +} + +int slurm_spank_init (spank_t sp, int ac, char **av) +{ + if (parse_options (ac, av) < 0) + return (-1); + + if (!spank_remote (sp)) { + if (check_env (sp, 0) < 0) + return (-1); + return (0); + } + + /* + * Set jobid and stepid from spank_init. Options are processed + * *after* spank_init, but the option handler does not have access + * to the spank_t handle. + */ + spank_get_item (sp, S_JOB_ID, &jobid); + spank_get_item (sp, S_JOB_STEPID, &stepid); + + if (check_env (sp, 1) < 0) + return (-1); + + return (0); +} + + +int slurm_spank_exit (spank_t sp, int ac, char **av) +{ + if (!spank_remote (sp) || !ctx) + return (0); + + overcommit_shared_ctx_unregister (ctx); + + return (0); +} + + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/overcommit-memory/overcommit.c b/overcommit-memory/overcommit.c new file mode 100644 index 0000000..eb556a1 --- /dev/null +++ b/overcommit-memory/overcommit.c @@ -0,0 +1,383 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "overcommit.h" +#include "fd.h" + +static const char shared_filename [] = "/tmp/spank-overcommit-memory"; +static const char overcommit_file [] = "/proc/sys/vm/overcommit_memory"; +static const char overcommit_ratio_file [] = "/proc/sys/vm/overcommit_ratio"; + +struct overcommit_job_info { + int jobid; + int stepid; + int used; +}; + +struct overcommit_shared_info { + sem_t sem; + int initialized; + int overcommit_value; + int previous_overcommit_ratio; + int nusers; + struct overcommit_job_info users [64]; +}; + +struct overcommit_shared_context { + int fd; + int jobid; + int stepid; + struct overcommit_shared_info *shared; +}; + +static int +unregister_job (overcommit_shared_ctx_t ctx) +{ + int i; + int maxn = sizeof (ctx->shared->users) / sizeof (int); + + for (i = 0; i < maxn; i++) { + struct overcommit_job_info *j = &ctx->shared->users[i]; + + if ((j->jobid == ctx->jobid) + && ((ctx->stepid < 0) || (j->stepid == ctx->stepid))) { + memset (j, 0, sizeof (*j)); + ctx->shared->nusers--; + return (0); + } + } + + return (-1); +} + +static int register_job (overcommit_shared_ctx_t ctx) +{ + int i; + int maxn = sizeof (ctx->shared->users) / sizeof (int); + + for (i = 0; i < maxn; i++) { + struct overcommit_job_info *j = &ctx->shared->users[i]; + if (!j->used) { + j->used = 1; + j->jobid = ctx->jobid; + j->stepid = ctx->stepid; + ctx->shared->nusers++; + return (0); + } + } + + return (-1); +} + +static int overcommit_shared_file_initialized (overcommit_shared_ctx_t ctx) +{ + struct stat st; + + if (fstat (ctx->fd, &st) < 0) { + fprintf (stderr, "fstat (%s): %s\n", shared_filename, strerror (errno)); + return (-1); + } + + if (st.st_uid != geteuid ()) { + fprintf (stderr, "Bad owner on %s: uid=%d\n", + shared_filename, st.st_uid); + return (-1); + } + + if (st.st_size == sizeof (*ctx->shared)) + return (1); + + return (0); +} + +static int overcommit_shared_info_init (overcommit_shared_ctx_t ctx) +{ + int len = sizeof (*ctx->shared); + int initialized; + + if (ctx->fd < 0) { + fprintf (stderr, "ctx->fd < 0!\n"); + return (-1); + } + if (fd_get_write_lock (ctx->fd) < 0) + fprintf (stderr, "Failed to get write lock: %s\n", strerror (errno)); + + if (fd_set_close_on_exec (ctx->fd)) + fprintf (stderr, "fd_set_close_on_exec(): %s\n", strerror (errno)); + + if ((initialized = overcommit_shared_file_initialized (ctx)) < 0) + return (-1); + + if (!initialized) + ftruncate (ctx->fd, len); + + ctx->shared = mmap (0, len, PROT_READ|PROT_WRITE, MAP_SHARED, ctx->fd, 0); + + if (ctx->shared == MAP_FAILED) { + fprintf (stderr, "mmap (%s): %s\n", shared_filename, strerror (errno)); + return (-1); + } + + if (!initialized) { + memset (ctx->shared, 0, len); + + if (sem_init (&ctx->shared->sem, 1, 1) < 0) { + fprintf (stderr, "sem_init: %s\n", strerror (errno)); + return (-1); + } + } + + if (fd_release_lock (ctx->fd) < 0) + fprintf (stderr, "Failed to release file lock: %s\n", strerror (errno)); + + return (0); +} + +overcommit_shared_ctx_t overcommit_shared_ctx_attach () +{ + overcommit_shared_ctx_t ctx = malloc (sizeof (*ctx)); + + memset (ctx, 0, sizeof (*ctx)); + ctx->jobid = ctx->stepid = -1; + + if ((ctx->fd = open (shared_filename, O_RDWR)) < 0) + return (NULL); + + if (overcommit_shared_info_init (ctx) < 0) { + overcommit_shared_ctx_destroy (ctx); + return (NULL); + } + + sem_wait (&ctx->shared->sem); + + return (ctx); +} + +overcommit_shared_ctx_t +overcommit_shared_ctx_create (int jobid, int stepid) +{ + int flags = O_RDWR | O_CREAT | O_EXCL; + + overcommit_shared_ctx_t ctx = malloc (sizeof (*ctx)); + + if (!ctx) + return (NULL); + + memset (ctx, 0, sizeof (*ctx)); + ctx->jobid = jobid; + ctx->stepid = stepid; + + if ((ctx->fd = open (shared_filename, flags, 0600)) < 0) { + if ((errno != EEXIST) + || ((ctx->fd = open (shared_filename, O_RDWR)) < 0)) { + fprintf (stderr, "Failed to open overcommit shared info: %s", + strerror (errno)); + overcommit_shared_ctx_destroy (ctx); + return (NULL); + } + } + + + if (overcommit_shared_info_init (ctx) < 0) { + overcommit_shared_ctx_destroy (ctx); + return (0); + } + + sem_wait (&ctx->shared->sem); + + return (ctx); +} + + +int overcommit_shared_cleanup (int jobid, int stepid) +{ + int rc = 0; + overcommit_shared_ctx_t ctx; + + if ((ctx = overcommit_shared_ctx_create (jobid, stepid))) { + rc = unregister_job (ctx); + overcommit_shared_ctx_destroy (ctx); + } else if (overcommit_memory_get_current_state () != 0) { + overcommit_memory_set_current_state (0); + } + return (rc); +} + +int overcommit_force_cleanup () +{ + if ((unlink (shared_filename) < 0) && (errno != ENOENT)) + fprintf (stderr, "Failed to remove %s: %s\n", shared_filename, + strerror (errno)); + if (overcommit_memory_get_current_state () != 0) { + return (overcommit_memory_set_current_state (0)); + overcommit_ratio_set (50); /* XXX: Need a way to set default!! */ + } + return (0); +} + +void overcommit_shared_ctx_destroy (overcommit_shared_ctx_t ctx) +{ + if (ctx->shared->nusers == 0) { + unlink (shared_filename); + if (overcommit_memory_get_current_state () != 0) + overcommit_memory_set_current_state (0); + overcommit_ratio_set (ctx->shared->previous_overcommit_ratio); + } + sem_post (&ctx->shared->sem); + munmap (ctx->shared, sizeof (*ctx->shared)); + close (ctx->fd); + free (ctx); +} + +void overcommit_shared_ctx_unregister (overcommit_shared_ctx_t ctx) +{ + sem_wait (&ctx->shared->sem); + unregister_job (ctx); + overcommit_shared_ctx_destroy (ctx); +} + +int overcommit_shared_list_users () +{ + overcommit_shared_ctx_t ctx; + int i; + int maxn = sizeof (ctx->shared->users) / sizeof (int); + + if (!(ctx = overcommit_shared_ctx_attach ()) || ctx->shared->nusers == 0) { + fprintf (stdout, "No users currently using overcommit-memory\n"); + return (0); + } + + fprintf (stdout, "%d users of overcommit-memory on this node:\n", + ctx->shared->nusers); + + for (i = 0; i < maxn; i++) { + struct overcommit_job_info *j = &ctx->shared->users[i]; + if (j->used) + fprintf (stdout, "%d.%d\n", j->jobid, j->stepid); + } + fprintf (stdout, "\n"); + fprintf (stdout, "Current setting = %d\n", ctx->shared->overcommit_value); + fprintf (stdout, "Current ratio = %d\n", overcommit_ratio_get ()); + fprintf (stdout, "Previous ratio = %d\n", + ctx->shared->previous_overcommit_ratio); + + + overcommit_shared_ctx_destroy (ctx); + return (0); +} + +int overcommit_in_use (overcommit_shared_ctx_t ctx, int value) +{ + int rc = 0; + if ((ctx->shared->nusers > 0) && (ctx->shared->overcommit_value != value)) + rc = 1; + else { + if (!ctx->shared->nusers) { + ctx->shared->overcommit_value = value; + ctx->shared->previous_overcommit_ratio = overcommit_ratio_get (); + } + register_job (ctx); + } + sem_post (&ctx->shared->sem); + + return (rc); +} + +int overcommit_memory_get_current_state () +{ + int val = -1; + FILE *fp; + + if (!(fp = fopen (overcommit_file, "r"))) + return (-1); + + fscanf (fp, "%d", &val); + + fclose (fp); + + return (val); +} + +int overcommit_memory_set_current_state (int val) +{ + FILE *fp; + + if (val > 2 || val < 0) + return (-1); + + if (!(fp = fopen (overcommit_file, "w"))) { + fprintf (stderr, "open (%s): %s\n", overcommit_file, strerror (errno)); + return (-1); + } + + fprintf (fp, "%d\n", val); + + fclose (fp); + + return (0); +} + +int overcommit_ratio_set (int val) +{ + FILE *fp; + + if (!(fp = fopen (overcommit_ratio_file, "w"))) + return (-1); + + fprintf (fp, "%d\n", val); + + fclose (fp); + + return (0); +} + +int overcommit_ratio_get () +{ + int val = -1; + FILE *fp; + + if (!(fp = fopen (overcommit_ratio_file, "r"))) + return (-1); + + fscanf (fp, "%d", &val); + + fclose (fp); + + return (val); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/overcommit-memory/overcommit.h b/overcommit-memory/overcommit.h new file mode 100644 index 0000000..0341980 --- /dev/null +++ b/overcommit-memory/overcommit.h @@ -0,0 +1,47 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#ifndef _HAVE_OVERCOMMIT_H +#define _HAVE_OVERCOMMIT_H + +typedef struct overcommit_shared_context * overcommit_shared_ctx_t; + +overcommit_shared_ctx_t overcommit_shared_ctx_create (int jobid, int stepid); + +void overcommit_shared_ctx_destroy (overcommit_shared_ctx_t ctx); +void overcommit_shared_ctx_unregister (overcommit_shared_ctx_t ctx); + +int overcommit_in_use (overcommit_shared_ctx_t ctx, int value); +int overcommit_shared_list_users (); + +int overcommit_shared_cleanup (int jobid, int stepid); +int overcommit_force_cleanup (); + +int overcommit_memory_get_current_state (); +int overcommit_memory_set_current_state (int value); + +int overcommit_ratio_get (); +int overcommit_ratio_set (int value); + +#endif /* !_HAVE_OVERCOMMIT_H */ diff --git a/overcommit-memory/util.c b/overcommit-memory/util.c new file mode 100644 index 0000000..dd5f253 --- /dev/null +++ b/overcommit-memory/util.c @@ -0,0 +1,201 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include + +#include "overcommit.h" + +char *prog = NULL; + +static int cleanup = 0; +static int list_users = 0; +static int force_reset = 0; +static int jobid = -1; + +#define __GNU_SOURCE +#include + +struct option opt_table [] = { + { "help", 0, NULL, 'h' }, + { "cleanup", 0, NULL, 'c' }, + { "list-users", 0, NULL, 'l' }, + { "force-reset", 0, NULL, 'f' }, + { "jobid", 1, NULL, 'j' }, + { NULL, 0, NULL, 0 } +}; + +const char opt_string[] = "hclfj:"; + +#define USAGE "\ +Usage: %s [OPTONS]\n\ + -h, --help Display this message\n\ + -l, --list-users List current jobs using overcommit-memory plugin.\n\ + -c, --cleanup Cleanup any overcommit-memory usage by a SLURM job.\n\ + SLURM_JOBID and SLURM_STEPID should be set in current\n\ + environment. Removes shared memory file and resets\n\ + overcommit_memory to default if no more references\n\ + to overcommit-memory exist.\n\ + -f, --force-reset Force total cleanup of overcommit-memory state. Reset\n\ + overcommit_memory setting to default and remove\n\ + overcommit shared file.\n\ + -j, --jobid=ID Specify SLURM jobid to clean up after if SLURM_JOBID\n\ + not set in environment\n" + +static int get_env_int (const char *var); +static int str2int (const char *str); +static int parse_cmdline (int ac, char **av); +static void log_fatal (char *fmt, ...); + +int main (int ac, char *av[]) +{ + int stepid = -1; + + parse_cmdline (ac, av); + + if (jobid < 0) + jobid = get_env_int ("SLURM_JOBID"); + if (stepid < 0) + stepid = get_env_int ("SLURM_STEPID"); + + if (cleanup && jobid < 0) + log_fatal ("--cleanup requires SLURM_JOBID in environment\n"); + + if (!cleanup && !list_users && !force_reset) + log_fatal ("Specify one of --cleanup, --force-reset, or --list-users.\n"); + + if (list_users) + overcommit_shared_list_users (); + + if (force_reset) { + if (overcommit_force_cleanup () < 0) + return (1); + printf ("Successfuly reset overcommit-memory state\n"); + } + else if (cleanup) { + /* + * If overcommit_shared_cleanup returns < 0, this probably just + * means that the jobid.stepid is not in the shared memory state. + */ + if (overcommit_shared_cleanup (jobid, stepid) < 0) + printf ("No overcommit state for job %d\n", jobid); + else + printf ("Succesfully cleaned up overcommit state for job %d\n", + jobid); + } + + return (0); +} + +static void usage (const char *prog) +{ + fprintf (stderr, USAGE, prog); +} + +static int parse_cmdline (int ac, char **av) +{ + prog = basename (av[0]); + + for (;;) { + char c = getopt_long (ac, av, opt_string, opt_table, NULL); + + if (c == -1) + break; + + switch (c) { + case 'h': + usage (prog); + exit (0); + case 'c': + cleanup = 1; + break; + case 'l': + list_users = 1; + break; + case 'f': + force_reset = 1; + break; + case 'j': + if ((jobid = str2int (optarg)) < 0) + log_fatal ("Invalid argument: --jobid=%s\n", optarg); + break; + case '?': + if (optopt > 0) + fprintf (stderr, "%s: Invalid option \"-%c\"\n", + prog, optopt); + else + fprintf (stderr, "%s: Invalid option \"%s\"\n", + prog, av[optind-1]); + break; + default: + fprintf (stderr, "%s: Unimplemented option \"%s\"\n", + prog, av[optind-1]); + break; + } + } + + return (0); +} + +static void log_fatal (char *fmt, ...) +{ + va_list ap; + va_start (ap, fmt); + fprintf (stderr, "%s: ", prog); + vfprintf (stderr, fmt, ap); + va_end (ap); + exit (1); +} + +static int str2int (const char *str) +{ + char *p; + long l = strtol (str, &p, 10); + + if (p && (*p != '\0')) + return (-1); + + return ((int) l); +} + +static int get_env_int (const char *var) +{ + char *val; + int id; + + if (!(val = getenv (var))) + return (-1); + + if ((id = str2int (val)) < 0) + log_fatal ("Bad environment value: %s=%s\n", var, val); + + return (id); +} + + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/preserve-env.c b/preserve-env.c new file mode 100644 index 0000000..bf738ad --- /dev/null +++ b/preserve-env.c @@ -0,0 +1,244 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +/**************************************************************************** + * + * preserve-env.so + * + * This SLURM spank plugin will preserve all SLURM_* environment + * variables from srun's invoking shell to the remote node or nodes + * on which the command specified by srun is invoked. The main purpose + * is to preserve the environment from a SLURM allocation shell + * (e.g. salloc), onto a remote "login" shell spawned with + * + * srun -n1 --pty $SHELL. + * + * Normally, SLURM environment variables would be reset in the + * remote shell, but when using --preserve-slurm-env, they will + * remain essentially the same as in the shell spawned by salloc. + * + ****************************************************************************/ + +#include +#include +#include +#include +#include + +#include "lib/list.h" + +#include + +SPANK_PLUGIN (preserve-env, 1) + + +/**************************************************************************** + * + * Set up a --preserve-slurm-env option for srun: + * + ****************************************************************************/ +static unsigned int enabled = 0; + +static int preserve_opt_process (int val, const char *optarg, int remote) +{ + enabled = 1; + return (0); +} + +struct spank_option spank_options [] = +{ + { "preserve-slurm-env", NULL, + "Preserve all current SLURM_ env vars in remote session", + 0, 0, (spank_opt_cb_f) preserve_opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + +/****************************************************************************/ + +/* + * Copy env var entry in [entry] into buffer [var] of size [len], + * NUL terminating at '='. Furthermore, if [valp] is non-NULL, + * set [valp] to point to first character after nullified '='. + * + */ +static int get_env_var (const char *entry, char *var, int len, char **valp) +{ + const char *p = entry; + + memset (var, 0, len); + + while (*p != '\0') { + *var = *p; + + if (*var == '=') { + *var = '\0'; + if (valp) + *valp = var + 1; + } + p++; + var++; + } + + return 0; +} + +/* + * Preserve the SLURM_* environment entry in [entry] by renaming + * it save_SLURM_*. + */ +static int preserve_slurm_var (const char *entry) +{ + char *val; + char var [1024]; + char newvar [1024]; + int n; + int len = sizeof (var) - 1; + + get_env_var (entry, var, len, &val); + + n = snprintf (newvar, len, "save_%s", var); + + if (n < 0 || n >= len) { + fprintf (stderr, "Variable name %s too long to copy!\n", var); + return (-1); + } + + if (setenv (newvar, val, 1) < 0) { + fprintf (stderr, "Failed to set %s=%s: %s\n", + newvar, val, strerror (errno)); + return (-1); + } + return (0); +} + +extern char **environ; + +int slurm_spank_local_user_init (spank_t sp, int ac, char **av) +{ + char **p = environ; + + if (!enabled) + return (0); + + while (*p != NULL) { + /* + * Preserve SLURM environment variables + * (except for those we know we don't need) + */ + if (strncmp (*p, "SLURM_", 6) == 0 && + strncmp (*p, "SLURM_RLIMIT", 12) != 0 && + strncmp (*p, "SLURM_UMASK", 11) != 0 && + strncmp (*p, "SLURM_PRIO", 10) != 0 && + preserve_slurm_var (*p) < 0) + return (-1); + ++p; + } + + return (0); +} + +int slurm_spank_task_init (spank_t sp, int ac, char **av) +{ + List l; + const char **env; + char *entry; + char var [64]; + char *val; + + if (!enabled) + return (0); + + /* + * The following routine unsets all SLURM_* and MPIRUN_* + * environment variables, and resets the saved variables + * in save_*. We are careful not to walk the env array + * at the same time as adding and removing variables, so + * we instead use the list 'l' to hold environment entries + * for the next operation. + * + * The first step accumulates and removes all unwanted variables, + * then the second step resets the saved variables. + */ + l = list_create (NULL); + + if (spank_get_item (sp, S_JOB_ENV, &env) != ESPANK_SUCCESS) { + fprintf (stderr, "Failed to get job environment!\n"); + return (-1); + } + + /* + * First collect all env vars to unset + */ + while (*env != NULL) { + if (strncmp (*env, "SLURM_", 6) == 0 || + strncmp (*env, "MPIRUN_", 7) == 0) + list_push (l, strdup (*env)); + ++env; + } + + while ((entry = list_pop (l))) { + get_env_var (entry, var, sizeof (var), &val); + spank_unsetenv (sp, var); + free (entry); + } + + /* + * Now search for saved SLURM env vars to reset + */ + + if (spank_get_item (sp, S_JOB_ENV, &env) != ESPANK_SUCCESS) { + fprintf (stderr, "Failed to get job environment!\n"); + return (-1); + } + + while (*env != NULL) { + if (strncmp (*env, "save_SLURM_", 11) == 0) + list_push (l, strdup (*env)); + env++; + } + + while ((entry = list_pop (l))) { + get_env_var (entry, var, sizeof (var), &val); + + if (spank_setenv (sp, var + 5, val, 1) != ESPANK_SUCCESS) { + fprintf (stderr, "spank_setenv (%s) failed\n", var + 5); + } + + /* + * Now unset the unneeded save_* var + */ + spank_unsetenv (sp, var); + + free (entry); + } + + list_destroy (l); + + return (0); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/pty.c b/pty.c new file mode 100644 index 0000000..fd4451a --- /dev/null +++ b/pty.c @@ -0,0 +1,565 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +/* + * Hack to run task 0 under a pty for a slurm job. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + + +#include + +SPANK_PLUGIN (pty, 1) + +/* + * Globals: + */ +static int do_pty = 0; +static int master = -1; +static int listenfd = -1; +static pid_t pid; +static struct termios termdefaults; + +static int pty_opt_process (int val, const char *optarg, int remote); + +struct spank_option spank_options[] = +{ + { "pty", NULL, + "Allocate a pty for rank 0. Must also specify -u." + " (Use of --pty implies --output=0)", + 0, 0, (spank_opt_cb_f) pty_opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + + +struct pty_winsz { + unsigned rows; + unsigned cols; +}; + +static void pty_winsz_pack (struct pty_winsz *w) +{ + w->rows = htonl (w->rows); + w->cols = htonl (w->cols); +} + +static void pty_winsz_unpack (struct pty_winsz *w) +{ + w->rows = ntohl (w->rows); + w->cols = ntohl (w->cols); +} + +static int pty_opt_process (int val, const char *optarg, int remote) +{ + do_pty = 1; + return (0); +} + +void process_pty () +{ + unsigned char buf [4096]; + int len; + + if ((len = read (master, buf, sizeof (buf))) < 0) { + if (errno == EAGAIN) + return; + if (errno == EIO) /* Why do we get this sometimes */ + return; + slurm_error ("read (pty master): %m\n"); + exit (1); + } + else if (len == 0) { + close (STDOUT_FILENO); + close (master); + master = -1; + return; + } + + write (STDOUT_FILENO, buf, len); +} + +void process_stdin () +{ + unsigned char buf [4096]; + int len; + + if ((len = read (STDIN_FILENO, buf, sizeof (buf))) < 0) { + slurm_error ("stdin read: %m\n"); + exit (1); + } + else if (len == 0) { + close (STDOUT_FILENO); + master = -1; + return; + } + + write (master, buf, len); +} + +void check_for_slave_exit () +{ + int status = 0; + + if (waitpid (pid, &status, WNOHANG) <= 0) + return; + + if (WIFEXITED (status)) + exit (status); +} + +static int fd_set_nonblocking (int fd) +{ + int fval; + + assert (fd >= 0); + + if ((fval = fcntl (fd, F_GETFL, 0)) < 0) + return (-1); + if (fcntl (fd, F_SETFL, fval | O_NONBLOCK) < 0) + return (-1); + return (0); +} + +static int get_winsize (spank_t sp, struct winsize *wsp) +{ + char val [64]; + + memset (wsp, 0, sizeof (*wsp)); + + if (spank_getenv (sp, "SLURM_PTY_WIN_ROW", val, 64) == ESPANK_SUCCESS) { + spank_unsetenv (sp, "SLURM_PTY_WIN_ROW"); + wsp->ws_row = atoi (val); + } + + if (spank_getenv (sp, "SLURM_PTY_WIN_COL", val, 64) == ESPANK_SUCCESS) { + spank_unsetenv (sp, "SLURM_PTY_WIN_COL"); + wsp->ws_col = atoi (val); + } + + if (!wsp->ws_row && !wsp->ws_col) + return (0); + return (1); +} + +int pty_connect_back (spank_t sp) +{ + char ip [64], port [16]; + struct sockaddr_in addr; + int s; + + int rc = spank_getenv (sp, "SLURM_LAUNCH_NODE_IPADDR", ip, 64); + if (rc != ESPANK_SUCCESS) { + slurm_error ("failed to read SLURM_NODE_IPADDR in env!"); + return (-1); + } + + if (spank_getenv (sp, "SLURM_PTY_PORT", port, 16) != ESPANK_SUCCESS) { + slurm_error ("failed to read SLURM_PTY_PORT in env!"); + return (-1); + } + + addr.sin_family = AF_INET; + inet_aton (ip, &addr.sin_addr); + addr.sin_port = htons (atoi (port)); + + + if ((s = socket (AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { + slurm_error ("pty: socket: %m"); + return (-1); + } + + + if (connect (s, (struct sockaddr *) &addr, sizeof (addr)) < 0) { + slurm_error ("pty: connect: %m"); + close (s); + return (-1); + } + + return (s); +} + +static int write_pty_winsize (int fd, struct winsize *ws) +{ + int len; + struct pty_winsz winsz; + + winsz.rows = ws->ws_row; + winsz.cols = ws->ws_col; + + pty_winsz_pack (&winsz); + + if ((len = write (fd, &winsz, sizeof (winsz))) < 0) { + slurm_error ("write_pty_winsz: %m"); + return (-1); + } + + return (len); +} + +static int read_pty_winsize (int fd, struct winsize *ws) +{ + struct pty_winsz winsz; + int len; + + if ((len = read (fd, &winsz, sizeof (winsz))) < 0) { + slurm_error ("read_pty_winsz: %m"); + return (-1); + } + + if (len == 0) { + slurm_error ("read_pty_winsz: Remote closed connection."); + return (-1); + } + + pty_winsz_unpack (&winsz); + + memset (ws, 0, sizeof (*ws)); + + ws->ws_col = winsz.cols; + ws->ws_row = winsz.rows; + + return (0); +} + +static void process_winsz_event (int fd, int master) +{ + struct winsize ws; + + if (read_pty_winsize (fd, &ws) < 0) + return; + + ioctl (master, TIOCSWINSZ, &ws); + kill (0, SIGWINCH); +} + +static int no_close_stdio (spank_t sp) +{ + char val [64]; + const char var[] = "SLURM_PTY_NO_CLOSE_STDIO"; + + if (spank_getenv (sp, var, val, 64) == ESPANK_SUCCESS) + return (1); + return 0; +} + +static void close_stdio (void) +{ + int devnull; + + if ((devnull = open ("/dev/null", O_RDWR)) < 0) { + slurm_error ("Failed to open /dev/null: %m"); + } + else { + dup2 (devnull, STDOUT_FILENO); + dup2 (devnull, STDIN_FILENO); + dup2 (devnull, STDERR_FILENO); + close (devnull); + } +} + +int slurm_spank_task_init (spank_t sp, int ac, char **av) +{ + int taskid; + int rfd; + struct winsize ws; + struct winsize *wsp = NULL; + + if (!do_pty) + return (0); + + spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid); + + if (taskid != 0) { + if (!no_close_stdio (sp)) + close_stdio (); + return (0); + } + + if ((rfd = pty_connect_back (sp)) < 0) { + slurm_error ("Failed to connect back to pty server"); + } + + if (get_winsize (sp, &ws)) + wsp = &ws; + + if ((pid = forkpty (&master, NULL, NULL, wsp)) < 0) { + slurm_error ("Failed to allocate a pty for rank 0: %m\n"); + return (0); + } + else if (pid == 0) { + /* Child. Continue with SLURM code */ + return (0); + } + + /* Parent: process data from client */ + + while (1) { + struct pollfd fds[3]; + int rc; + int nfds = 2; + + fd_set_nonblocking (master); + fd_set_nonblocking (STDIN_FILENO); + + fds[0].fd = master; + fds[1].fd = STDIN_FILENO; + fds[0].events = POLLIN | POLLERR; + fds[1].events = POLLIN | POLLERR; + + if (rfd >= 0) { + fd_set_nonblocking (rfd); + fds[2].fd = rfd; + fds[2].events = POLLIN | POLLERR; + nfds++; + } + + + if ((rc = poll (fds, 3, -1)) < 0) { + slurm_error ("poll: %m\n"); + exit (1); + } + + if (fds[0].revents & POLLERR) { + check_for_slave_exit (); + continue; + } + + if (fds[0].revents & POLLIN) + process_pty (); + + if (fds[1].revents & POLLIN) + process_stdin (); + + if (fds[2].revents & POLLIN) + process_winsz_event (rfd, master); + + check_for_slave_exit (); + } + + return (0); +} + +static void pty_restore (void) +{ + /* STDIN is probably closed by now */ + if (tcsetattr (STDOUT_FILENO, TCSANOW, &termdefaults) < 0) + fprintf (stderr, "tcsetattr: %s\n", strerror (errno)); +} + +static int set_winsize (spank_t sp) +{ + struct winsize ws; + char buf[64]; + ioctl (STDIN_FILENO, TIOCGWINSZ, &ws); + + snprintf (buf, sizeof (buf), "%d", ws.ws_row); + setenv ("SLURM_PTY_WIN_ROW", buf, 1); + + snprintf (buf, sizeof (buf), "%d", ws.ws_col); + setenv ("SLURM_PTY_WIN_COL", buf, 1); + + return (0); +} + +static void sigset_sigwinch (sigset_t *pset) +{ + sigemptyset (pset); + sigaddset (pset, SIGWINCH); +} + +static int notify_winsize_change (int fd) +{ + struct winsize ws; + ioctl (STDOUT_FILENO, TIOCGWINSZ, &ws); + write_pty_winsize (fd, &ws); + return (0); +} + +/* + * Detect when a window size change event occurs. + */ +static int winch = 0; +static void handle_sigwinch (int sig) +{ + winch = 1; + signal (SIGWINCH, handle_sigwinch); +} + +static void * pty_thread (void *arg) +{ + int fd; + sigset_t set; + + sigset_sigwinch (&set); + pthread_sigmask (SIG_UNBLOCK, &set, NULL); + + signal (SIGWINCH, handle_sigwinch); + + if ((fd = accept (listenfd, NULL, NULL)) < 0) { + slurm_error ("pty: accept: %m"); + return NULL; + } + + for (;;) { + poll (NULL, 0, -1); + if (winch && notify_winsize_change (fd) < 0) + return NULL; + winch = 0; + } + + return (NULL); +} + +static int bind_wild (int sockfd) +{ + socklen_t len; + struct sockaddr_in sin; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(0); /* bind ephemeral port */ + + if (bind (sockfd, (struct sockaddr *) &sin, sizeof(sin)) < 0) { + slurm_error ("bind: %m\n"); + return (-1); + } + len = sizeof(sin); + if (getsockname(sockfd, (struct sockaddr *) &sin, &len) < 0) + return (-1); + return ntohs(sin.sin_port); + +} + +static int do_listen (int *fd, short *port) +{ + int rc, val; + + if ((*fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) + return -1; + + val = 1; + rc = setsockopt(*fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)); + if (rc > 0) { + goto cleanup; + } + + *port = bind_wild (*fd); + + if ((rc = listen(*fd, 16)) < 0) { + slurm_error ("listen: %m"); + goto cleanup; + } + + return (0); + +cleanup: + close (*fd); + return (-1); +} + +static void set_pty_env (short port) +{ + char buf [64]; + + snprintf (buf, sizeof (buf), "%hu", port); + setenv ("SLURM_PTY_PORT", buf, 1); +} + +static int pty_thread_create (spank_t sp) +{ + short port; + int err; + pthread_attr_t attr; + pthread_t tid; + + if (do_listen (&listenfd, &port) < 0) { + slurm_error ("Unable to create pty listen port: %m"); + return (-1); + } + set_pty_env (port); + + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + err = pthread_create (&tid, &attr, &pty_thread, NULL); + pthread_attr_destroy (&attr); + if (err) + return (-1); + return (0); +} + + +static void block_sigwinch (void) +{ + sigset_t set; + sigset_sigwinch (&set); + pthread_sigmask (SIG_BLOCK, &set, NULL); +} + +int slurm_spank_local_user_init (spank_t sp, int ac, char **av) +{ + struct termios term; + int fd = STDIN_FILENO; + + if (!do_pty) + return (0); + + + /* Save terminal settings for restore */ + tcgetattr (fd, &termdefaults); + tcgetattr (fd, &term); + /* Set raw mode on local tty */ + cfmakeraw (&term); + tcsetattr (fd, TCSANOW, &term); + atexit (&pty_restore); + + set_winsize (sp); + + block_sigwinch (); + + pty_thread_create (sp); + + return (0); +} diff --git a/renice.c b/renice.c new file mode 100644 index 0000000..8e69722 --- /dev/null +++ b/renice.c @@ -0,0 +1,190 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include + +/* + * All spank plugins must define this macro for the SLURM plugin loader. + */ +SPANK_PLUGIN(renice, 1) + +#define PRIO_ENV_VAR "SLURM_RENICE" +#define PRIO_NOT_SET 42 + +/* + * Minimum allowable value for priority. May be set globally + * via plugin option min_prio= + */ +static int min_prio = -20; +static int default_prio = 0; + +static int prio = PRIO_NOT_SET; + +static int _renice_opt_process (int val, const char *optarg, int remote); +static int _str2prio (const char *str, int *p2int); +static int _check_env (spank_t sp); + +/* + * Provide a --renice=[prio] option to srun: + */ +struct spank_option spank_options[] = +{ + { "renice", "[prio]", "Re-nice job tasks to priority [prio].", 1, 0, + (spank_opt_cb_f) _renice_opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + + +/* + * Called from both srun and slurmd. + */ +int slurm_spank_init (spank_t sp, int ac, char **av) +{ + int i; + + for (i = 0; i < ac; i++) { + if (strncmp ("min_prio=", av[i], 9) == 0) { + const char *optarg = av[i] + 9; + if (_str2prio (optarg, &min_prio) < 0) + slurm_error ("Ignoring invalid min_prio value \"%s\"", av[i]); + } + else if (strncmp ("default=", av[i], 8) == 0) { + const char *optarg = av[i] + 8; + if (_str2prio (optarg, &default_prio) < 0) + slurm_error ("renice: Ignoring invalid default value \"%s\"", + av[i]); + } + else { + slurm_error ("renice: Invalid option \"%s\"", av[i]); + } + } + + if (!spank_remote (sp)) + slurm_verbose ("renice: min_prio = %d", min_prio); + + return (0); +} + + +int slurm_spank_task_post_fork (spank_t sp, int ac, char **av) +{ + pid_t pid; + int taskid; + + /* + * Use default priority if prio not set by command line or env var + */ + if ((prio == PRIO_NOT_SET) && (_check_env (sp) < 0)) + prio = default_prio; + + if (prio < min_prio) + prio = min_prio; + + spank_get_item (sp, S_TASK_GLOBAL_ID, &taskid); + spank_get_item (sp, S_TASK_PID, &pid); + + /* + * No need to do any thing if priority is system default + */ + if (prio == getpriority (PRIO_PROCESS, (int) pid)) + return (0); + + slurm_verbose ("re-nicing task%d pid %ld to %d\n", taskid, pid, prio); + + if (setpriority (PRIO_PROCESS, (int) pid, (int) prio) < 0) { + slurm_error ("setpriority: %m"); + return (-1); + } + + return (0); +} + +static int _renice_opt_process (int val, const char *optarg, int remote) +{ + if (optarg == NULL) { + slurm_error ("--renice: invalid argument!"); + return (-1); + } + + if (_str2prio (optarg, &prio) < 0) { + slurm_error ("Bad value for --renice: \"%s\"\n", optarg); + return (-1); + } + + if (prio < min_prio) + slurm_error ("--renice=%d not allowed, will use min=%d", + prio, min_prio); + + return (0); +} + +static int _str2prio (const char *str, int *p2int) +{ + long int l; + char *p; + + l = strtol (str, &p, 10); + if ((*p != '\0') || (l < -20) || (l > 20)) + return (-1); + + *p2int = (int) l; + + return (0); +} + +static int _check_env (spank_t sp) +{ + /* + * See if SLURM_RENICE env var is set by user + */ + char val [1024]; + + if (spank_getenv (sp, PRIO_ENV_VAR, val, 1024) != ESPANK_SUCCESS) + return (-1); + + if (_str2prio (val, &prio) < 0) { + slurm_error ("Bad value for %s: \"%s\".\n", PRIO_ENV_VAR, val); + return (-1); + } + + if (prio < min_prio) { + slurm_error ("%s=%d not allowed, using min=%d", + PRIO_ENV_VAR, prio, min_prio); + } + + return (0); +} + + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/system-safe-preload.c b/system-safe-preload.c new file mode 100644 index 0000000..bb9373e --- /dev/null +++ b/system-safe-preload.c @@ -0,0 +1,343 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +/* + * safe-system.so : Making system(3) safe for MPI jobs everywhere. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char **environ; + +typedef int (*system_f) (const char * cmd); + +static void * libc_handle; +static system_f real_system; + +static int client_fd = -1; +static int server_fd = -1; + +static int write_n (int fd, const void *buf, size_t n) +{ + size_t nleft; + ssize_t nwritten; + unsigned const char *p; + + p = buf; + nleft = n; + while (nleft > 0) { + if ((nwritten = write (fd, p, nleft)) < 0) { + if (errno == EINTR) + continue; + else + return (-1); + } + nleft -= nwritten; + p += nwritten; + } + return (n); +} + +static int read_n (int fd, void *buf, size_t n) +{ + size_t nleft; + ssize_t nread; + unsigned char *p; + + p = buf; + nleft = n; + while (nleft > 0) { + if ((nread = read (fd, p, nleft)) < 0) { + if (errno == EINTR) + continue; + else + return (-1); + } + else if (nread == 0) { /* EOF */ + break; + } + nleft -= nread; + p += nread; + } + return (n - nleft); +} + + +static int create_socketpair (void) +{ + int pfds[2]; + + if (socketpair (AF_UNIX, SOCK_STREAM, 0, pfds) < 0) { + fprintf (stderr, "systemsafe: socketpair failed: %s\n", strerror (errno)); + return (-1); + } + + client_fd = pfds[0]; + server_fd = pfds[1]; + + fcntl (client_fd, F_SETFD, FD_CLOEXEC); + fcntl (server_fd, F_SETFD, FD_CLOEXEC); + + return (0); +} + +static int read_string (int fd, char **bufp) +{ + int len = 0; + int rc; + + *bufp = NULL; + + /* + * Read string length + */ + if ((rc = read_n (fd, &len, sizeof (int))) < 0) { + fprintf (stderr, "systemsafe: read_string: %s\n", strerror (errno)); + return (-1); + } + + if (rc == 0) + return (0); + + if ((*bufp = malloc (len + 1)) == NULL) { + fprintf (stderr, "systemsafe: read_string: malloc (%d): %s\n", + len, strerror (errno)); + return (-1); + } + + if ((rc = read_n (fd, *bufp, len)) < 0) { + fprintf (stderr, "systemsafe: read_string: %s\n", strerror (errno)); + return (-1); + } + + if (rc == 0) + return (0); + + (*bufp) [len] = '\0'; + + return (len); +} + +static int write_string (int fd, const char *str) +{ + int len = strlen (str); + int rc; + + if (write_n (fd, &len, sizeof (int)) < 0) { + fprintf (stderr, "systemsafe: write: %s\n", strerror (errno)); + return (-1); + } + + rc = write_n (fd, str, len); + + return (rc); +} + +void free_env (char **env) +{ + int i = 0; + while (env [i]) + free (env [i++]); + free (env); + return; +} + +int read_env (int fd, char ***envp) +{ + int envc = 0; + int i; + + if (read_n (fd, &envc, sizeof (int)) < 0) { + fprintf (stderr, "systemsafe: read_env: %s\n", strerror (errno)); + return (-1); + } + + if (!(*envp = malloc ((envc + 1) * sizeof (**envp)))) { + fprintf (stderr, "systemsafe: read_env: malloc: %s\n", strerror (errno)); + return (-1); + } + + for (i = 0; i < envc; i++) { + char *entry; + if (read_string (fd, &entry) < 0) { + fprintf (stderr, "systemsafe: %s\n", strerror (errno)); + free_env (*envp); + return (-1); + } + + if (strncmp ("LD_PRELOAD=", entry, 10) == 0) + entry [11] = '\0'; + + (*envp)[i] = entry; + } + + (*envp)[envc] = NULL; + + return (0); +} + +static void handle_system_request (int fd) +{ + char *cmd, *path, **env, **oldenv; + int rc; + + if ((rc = read_string (fd, &cmd)) < 0) { + fprintf (stderr, "systemsafe: read cmd: %s\n", strerror (errno)); + exit (0); + } + + if (rc == 0) /* EOF, time to exit */ + exit (0); + + if (read_string (fd, &path) < 0) { + fprintf (stderr, "systemsafe: read path: %s\n", strerror (errno)); + exit (0); + } + + if (read_env (fd, &env) < 0) { + fprintf (stderr, "systemsafe: read env: %s\n", strerror (errno)); + exit (0); + } + + if (chdir (path) < 0) + fprintf (stderr, "systemsafe: Failed to chdir to %s: %s\n", + path, strerror (errno)); + + oldenv = environ; + environ = env; + + rc = (*real_system) (cmd); + + write_n (fd, &rc, sizeof (int)); + + environ = oldenv; + free_env (env); + free (cmd); + free (path); + + return; +} + +static void system_server (void) +{ + char c = 0; + close (client_fd); + write (server_fd, &c, 1); + for (;;) + handle_system_request (server_fd); + return; +} + +static int create_system_server (void) +{ + pid_t pid; + char c; + + create_socketpair (); + + if ((pid = fork ()) < 0) + return (-1); + + if (pid == 0) { + system_server (); + exit (0); + } + + close (server_fd); + + /* + * Wait for system_server setup to complete + */ + read (client_fd, &c, 1); + + return (0); +} + +static int write_env (int fd) +{ + int i, envc = 0; + + while (environ[envc]) + envc++; + + write (fd, &envc, sizeof (int)); + + for (i = 0; i < envc; i++) + write_string (fd, environ [i]); + + return (0); +} + +int system (const char *cmd) +{ + int rc; + char path [4096]; + + if (cmd == NULL) { + errno = EINVAL; + return (-1); + } + + write_string (client_fd, cmd); + write_string (client_fd, getcwd (path, sizeof (path))); + write_env (client_fd); + + if (read (client_fd, &rc, sizeof (int)) < 0) { + fprintf (stderr, "system: failed to read status from server: %s\n", + strerror (errno)); + return (-1); + } + + return (rc); +} + +void __attribute__ ((constructor)) fork_safe_init (void) +{ + if ((libc_handle = dlopen ("libc.so.6", RTLD_LAZY)) == NULL) { + exit (1); + } + + if ((real_system = dlsym (libc_handle, "system")) == NULL) + exit (2); + + create_system_server (); + + return; +} + + +/* + * vi: ts=4 sw=4 expandtab + */ + diff --git a/system-safe.c b/system-safe.c new file mode 100644 index 0000000..af07096 --- /dev/null +++ b/system-safe.c @@ -0,0 +1,123 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include + +#include + +/* + * All spank plugins must define this macro for the SLURM plugin loader. + */ +SPANK_PLUGIN(system-safe, 1) + +#define SYSTEM_SAFE_ENABLE 0x0 +#define SYSTEM_SAFE_DISABLE 0x1 + +/* + * Disabled by default + */ +static int enabled = 0; +static int opt_enable = 0; +static int opt_disable = 0; + +static int _opt_process (int val, const char *optarg, int remote); + +/* + * Provide a --renice=[prio] option to srun: + */ +struct spank_option spank_options[] = +{ + { "system-safe", NULL, "Replace system(3) with version safe for MPI.", + 0, SYSTEM_SAFE_ENABLE, + (spank_opt_cb_f) _opt_process + }, + { "no-system-safe", NULL, "Disable system(3) replacement.", + 0, SYSTEM_SAFE_DISABLE, + (spank_opt_cb_f) _opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + + +/* + * Called from both srun and slurmd. + */ +int slurm_spank_init (spank_t sp, int ac, char **av) +{ + int i; + + if (!spank_remote (sp)) + return (0); + + for (i = 0; i < ac; i++) { + if (strncmp ("enabled", av[i], 7) == 0) { + enabled = 1; + } + else if (strncmp ("disabled", av[i], 8) == 0) { + enabled = 0; + } + else { + slurm_error ("system-safe: Invalid option \"%s\"", av[i]); + } + } + + return (0); +} + +int slurm_spank_user_init (spank_t sp, int ac, char **av) +{ + char buf [4096]; + const char *preload = "system-safe-preload.so"; + + if (opt_disable || (!enabled && !opt_enable)) + return (0); + + if (spank_getenv (sp, "LD_PRELOAD", buf, sizeof (buf)) == ESPANK_SUCCESS) + snprintf (buf, sizeof (buf), "%s %s", buf, preload); + else + strncpy (buf, preload, strlen (preload)); + + if (spank_setenv (sp, "LD_PRELOAD", buf, 1) != ESPANK_SUCCESS) + slurm_error ("Failed to set LD_PRELOAD=%s\n", buf); + + return (0); +} + +static int _opt_process (int val, const char *optarg, int remote) +{ + if (val == SYSTEM_SAFE_ENABLE) + opt_enable = 1; + else + opt_disable = 0; + + return (0); +} + + + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/tmpdir.c b/tmpdir.c new file mode 100644 index 0000000..c3bad13 --- /dev/null +++ b/tmpdir.c @@ -0,0 +1,111 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include + +SPANK_PLUGIN (tmpdir, 1); + +/* + * Create job-specific TMPDIR. + * Called from srun after allocation before launch. + * Does the equivalent of TMPDIR=${TMPDIR-/tmp}/$SLURM_JOBID.$SLURM_STEPID + */ +int slurm_spank_local_user_init (spank_t sp, int ac, char **av) +{ + uint32_t jobid, stepid; + const char *tmpdir; + char buf [1024]; + int n; + + if (spank_get_item (sp, S_JOB_ID, &jobid) != ESPANK_SUCCESS) { + slurm_error ("Failed to get jobid from SLURM"); + return (-1); + } + + if (spank_get_item (sp, S_JOB_STEPID, &stepid) != ESPANK_SUCCESS) { + slurm_error ("Failed to get job step id from SLURM"); + return (-1); + } + + if (!(tmpdir = getenv ("TMPDIR"))) + tmpdir = "/tmp"; + + n = snprintf (buf, sizeof (buf), "%s/%u.%u", tmpdir, jobid, stepid); + + if ((n < 0) || (n > sizeof (buf) - 1)) { + slurm_error ("TMPDIR = \"%s\" too large. Aborting"); + return (-1); + } + + if (setenv ("TMPDIR", buf, 1) < 0) { + slurm_error ("setenv (TMPDIR, \"%s\"): %m", buf); + return (-1); + } + + return (0); +} + +/* + * ``rm -rf TMPDIR'' *as user* after job tasks have exited + */ +int slurm_spank_exit (spank_t sp, int ac, char **av) +{ + const char sudo [] = "/usr/bin/sudo -u"; + const char rm [] = "/bin/rm -rf"; + char tmp [1024]; + char cmd [4096]; + int n; + int status; + uid_t uid = (uid_t) -1; + + if (!spank_remote (sp)) + return (0); + + if (spank_getenv (sp, "TMPDIR", tmp, sizeof (tmp)) != ESPANK_SUCCESS) { + slurm_error ("Unable to remove TMPDIR at exit!"); + return (-1); + } + + if (spank_get_item (sp, S_JOB_UID, &uid) != ESPANK_SUCCESS) { + slurm_error ("tmpdir: Unable to get job's user id"); + return (-1); + } + + n = snprintf (cmd, sizeof (cmd), "%s \\#%d %s %s", sudo, uid, rm, tmp); + + if ((n < 0) || (n > sizeof (cmd) - 1)) { + slurm_error ("Unable to remove TMPDIR at exit!"); + return (-1); + } + + if ((status = system (cmd)) != 0) { + slurm_error ("\"%s\" exited with status=0x%04x\n", cmd, status); + return (-1); + } + + return (0); +} diff --git a/use-env/Makefile b/use-env/Makefile new file mode 100644 index 0000000..7043884 --- /dev/null +++ b/use-env/Makefile @@ -0,0 +1,27 @@ + +OBJS := lex.yy.o use-env-parser.o ../lib/list.o log_msg.o ../lib/split.o +HDRS := use-env.h ../lib/list.h ../lib/split.h log_msg.h use-env-parser.h +SHOPTS := -shared -Wl,--version-script=version.map + +all: use-env.so test + +use-env.so : $(OBJS) use-env.o + $(CC) $(SHOPTS) -o use-env.so $(OBJS) use-env.o + +test: $(OBJS) main.o + $(CC) -ggdb -o test $(OBJS) main.o + +check: test + ./test -f test.conf + +.c.o : + $(CC) -ggdb -I../lib -Wall $(CFLAGS) -o $@ -fPIC -c $< + +use-env-parser.c use-env-parser.h : use-env-parser.y + bison -d -o use-env-parser.c $< + +lex.yy.c : use-env-parser.l use-env-parser.h + lex $< + +clean: + rm -f test *.o use-env-parser.[ch] lex.yy.c *.so diff --git a/use-env/log_msg.c b/use-env/log_msg.c new file mode 100644 index 0000000..25ba6cb --- /dev/null +++ b/use-env/log_msg.c @@ -0,0 +1,241 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include + +#include "use-env.h" + +struct log_ctx { + int quiet; + int verbose; + char *prefix; +}; + +static struct log_ctx log_ctx = { 0, 0, "" }; + +int log_msg_init (const char *prefix) +{ + if (prefix) + log_ctx.prefix = strdup (prefix); + return (0); +} + +void log_msg_fini () +{ + if (log_ctx.prefix) + free (log_ctx.prefix); +} + +int log_msg_verbose () +{ + return (log_ctx.verbose++); +} + +int log_msg_set_verbose (int level) +{ + return (log_ctx.verbose = level); +} + +int log_msg_quiet () +{ + return (log_ctx.quiet++); +} + + +static void +vlog_msg (const char *prefix, int use_basename, const char *format, va_list ap) +{ + char buf[4096]; + char *p; + int n; + int len; + + p = buf; + len = sizeof (buf); + + /* Prefix output with facility name. + */ + if (log_ctx.prefix && (*log_ctx.prefix != '\0')) { + n = snprintf (buf, len, "%s: ", log_ctx.prefix); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } + else { + p += n; + len -= n; + } + } + + /* Add a log level prefix. + */ + if ((len > 0) && (prefix)) { + n = snprintf (p, len, "%s: ", prefix); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } + else { + p += n; + len -= n; + } + } + + /* Add file and line number information + */ + if (len > 0 && (lex_file () != NULL)) { + char *file = strdup (lex_file ()); + char *name = use_basename ? basename (file) : file; + + n = snprintf (p, len, "%s: %d: ", name, lex_line()); + + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } + else { + p += n; + len -= n; + } + free (file); + } + + if ((len > 0) && (format)) { + n = vsnprintf (p, len, format, ap); + if ((n < 0) || (n >= len)) { + p += len - 1; + len = 0; + } + else { + p += n; + len -= n; + } + } + + /* Add suffix for truncation if necessary. + */ + if (len <= 0) { + char *q; + const char *suffix = "+"; + q = buf + sizeof (buf) - 1 - strlen (suffix); + p = (p < q) ? p : q; + strcpy (p, suffix); + p += strlen (suffix); + } + + *p = '\0'; + + fprintf (stderr, "%s", buf); + + return; +} + + +int log_err (const char *format, ...) +{ + va_list ap; + + if (log_ctx.quiet) + return (-1); + + va_start (ap, format); + vlog_msg ("Error", 0, format, ap); + va_end (ap); + return (-1); +} + +void log_msg (const char *format, ...) +{ + va_list ap; + + if (log_ctx.quiet) + return; + + va_start (ap, format); + vlog_msg (NULL, 1, format, ap); + va_end (ap); + return; +} + +void log_verbose (const char *format, ...) +{ + va_list ap; + + if (log_ctx.quiet || !log_ctx.verbose) + return; + + va_start (ap, format); + vlog_msg (NULL, 1, format, ap); + va_end (ap); + return; +} + +void log_debug (const char *format, ...) +{ + va_list ap; + + if ((log_ctx.quiet) || (log_ctx.verbose < 2)) + return; + + va_start (ap, format); + vlog_msg (NULL, 1, format, ap); + va_end (ap); + return; +} + +void log_debug2 (const char *format, ...) +{ + va_list ap; + + if ((log_ctx.quiet) || (log_ctx.verbose < 3)) + return; + + va_start (ap, format); + vlog_msg (NULL, 1, format, ap); + va_end (ap); + return; +} + +void log_debug3 (const char *format, ...) +{ + va_list ap; + + if ((log_ctx.quiet) || (log_ctx.verbose < 4)) + return; + + va_start (ap, format); + vlog_msg (NULL, 1, format, ap); + va_end (ap); + return; +} + + + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/use-env/log_msg.h b/use-env/log_msg.h new file mode 100644 index 0000000..04ed6e2 --- /dev/null +++ b/use-env/log_msg.h @@ -0,0 +1,41 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#ifndef _LOG_MSG_H +#define _LOG_MSG_H + +int log_msg_init (const char *prefix); +void log_msg_fini (); + +int log_msg_verbose (); +int log_msg_set_verbose (int level); +int log_msg_quiet (); +int log_err (const char *format, ...); +void log_msg (const char *format, ...); +void log_verbose (const char *format, ...); +void log_debug (const char *format, ...); +void log_debug2 (const char *format, ...); +void log_debug3 (const char *format, ...); + +#endif /* !_LOG_MSG_H */ diff --git a/use-env/main.c b/use-env/main.c new file mode 100644 index 0000000..2851e7c --- /dev/null +++ b/use-env/main.c @@ -0,0 +1,92 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include + +#include "use-env.h" +#include "log_msg.h" + +extern int yydebug; +static char *run_as_task = NULL; + +int get_options (int ac, char **av, char **ppath, char **nnodes, char **nprocs) +{ + int c; + + while ((c = getopt (ac, av, "dvt:f:n:N:")) >= 0) { + switch (c) { + case 'd' : + yydebug = 1; + break; + case 'v': + log_msg_verbose (); + break; + case 'f': + *ppath = optarg; + break; + case 'n': + *nprocs = optarg; + break; + case 'N': + *nnodes = optarg; + break; + case 't': + run_as_task = optarg; + break; + case '?' : + default: + exit (1); + } + } + return (0); +} + + +int main (int ac, char **av) +{ + int rc = 0; + char *filename = NULL; + char *nnodes = "0"; + char *nprocs = "0"; + + log_msg_init ("use-env"); + + get_options (ac, av, &filename, &nnodes, &nprocs); + + keyword_define ("SLURM_NNODES", nnodes); + keyword_define ("SLURM_NPROCS", nprocs); + + if (run_as_task) { + keyword_define ("SLURM_PROCID", run_as_task); + keyword_define ("SLURM_NODEID", "0"); + } + + use_env_parser_init (run_as_task != NULL); + rc = use_env_parse (filename); + use_env_parser_fini (); + log_msg_fini (); + + return (rc); +} diff --git a/use-env/test.conf b/use-env/test.conf new file mode 100644 index 0000000..349b136 --- /dev/null +++ b/use-env/test.conf @@ -0,0 +1,79 @@ +# Test file for use-env parser + +# Comment + # Comment + # Comment # +FOO = 1 # Comment +FOO=2# + +A = 1 +A |= 2 + + B = 3 + +C = 1;D=1; + +if ($A == 2) + print "ERROR |= didn't seem to work" +endif + +PATH += /foo/bin + +PATH = "${PATH}:/usr/local/bin" + +print "$PATH" + +if (($A == 1) && ($B >= 3)) + C = 10 +else if ($A == 1) + print "ERROR else if fallthrough not working" +else + print "ERROR else fallthrough not working" +endif + +EMPTY = "" +EMPTY = + +print "EMPTY = \"$EMPTY\"" + +unset EMPTY + +define n = ${EMPTY}$SLURM_NPROCS +define N = $SLURM_NNODES + +define x = 101 +define y = 10 + +if ($x < 100) + print "ERROR: x not < 100" +else if ($x < 200) + if ($y > 1) + # + else if ($y > 5) + print "ERROR: nested else if fallthrough failed" + else + print "ERROR: nested else fallthrough failed" + endif +else + print "ERROR: else fallthrough failed" +endif + +include test.conf.include + +undefine n + +dump all + + +set debuglevel 3 + +in task { + print "In task $SLURM_PROCID"; + if (defined $LD_PRELOAD) + LD_PRELOAD = "$LD_PRELOAD libfoo.so" + else + LD_PRELOAD = libfoo.so + endif +} + +print ~/bin diff --git a/use-env/test.conf.include b/use-env/test.conf.include new file mode 100644 index 0000000..f8e6a3d --- /dev/null +++ b/use-env/test.conf.include @@ -0,0 +1,3 @@ + +print "Included file" + diff --git a/use-env/use-env-parser.l b/use-env/use-env-parser.l new file mode 100644 index 0000000..9275075 --- /dev/null +++ b/use-env/use-env-parser.l @@ -0,0 +1,906 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +%{ +#include +#include +#include +#include +#include +#include +#include + +#include "use-env.h" +#include "use-env-parser.h" +#include "list.h" +#include "log_msg.h" + +static char *s; +static char buf [4096]; + +/* + * True if we've returned an item in POSTOP condition + */ +static int postop_got_item = 0; + +extern int yyerror (char *); + +/* + * Macro for entering POSTOP start condition: + * - Initialize buf and string pointer `s' + * - reset postop_got_item to 0 + */ +#define BEGIN_POSTOP \ + do { \ + memset (s = buf, 0, sizeof (buf)); \ + BEGIN (POSTOP); \ + postop_got_item = 0; \ + } while (0) + +/* + * Initialize string buffer and begin STR condition. + */ +#define BEGIN_STR \ + do { \ + memset (s = buf, 0, sizeof (buf)); \ + BEGIN (STR); \ + } while (0) + +/* + * Place a bracketed identifier ${id} item into yylval.item + */ +#define GET_BRACKETED_ITEM \ + do { \ + yytext [strlen(yytext) - 1] = '\0'; /* Nullify closing brace */ \ + yylval.item = lex_item_create (yytext+2, TYPE_SYM); \ + } while (0) + + + +%} + +%option noyywrap + +digit [0-9] +alpha [a-zA-Z] +alnum [0-9a-zA-Z] +ident [0-9a-zA-Z_] +id [_a-zA-z][0-9a-zA-Z_]* +p [\)<>;=+\n \t#] + + +%x STR STR2 POSTOP + +%% + +[ \t]+ ; /* Ignore whitespace */ +#[^\n]* ; /* Ignore comments */ + +\n lex_line_increment (); return '\n'; + +\" BEGIN_STR; + +dump return DUMP; +define return DEF; +undefine return UNDEF; +set return SET; +unset return UNSET; +if return IF; +else return ELSE; +endif return ENDIF; +defined return DEFINED; +"in task" return IN_TASK; +match(es)? return MATCH; + +print BEGIN_POSTOP; return PRINT; +include BEGIN_POSTOP; return INCLUDE; +"|=" BEGIN_POSTOP; return COND_SET; +"+=" BEGIN_POSTOP; return PREPEND; +"=+" BEGIN_POSTOP; return APPEND; +"=" BEGIN_POSTOP; return '='; +"," return ','; +"!" return '!'; +"(" return '('; +")" return ')'; +"{" return '{'; +"}" return '}'; +";" return ';'; +"<" return LT; +">" return GT; +"==" return EQ; +"<=" return LE; +">=" return GE; +"!=" return NE; +"&&" return AND; +"||" return OR; + + +[0-9]+/{p} { yylval.item = lex_item_create (yytext, TYPE_INT); return ITEM; } + +{ident}+ { yylval.item = lex_item_create (yytext, TYPE_STR); return ITEM; } + +\${id} { yylval.item = lex_item_create (yytext+1, TYPE_SYM); return ITEM;} + +\$\{{id}\} { GET_BRACKETED_ITEM; return ITEM; } + + +{ + \" { BEGIN (STR2); } + + (\n|;) { + BEGIN INITIAL; + unput (*yytext); /* Return the newline or ; to the stream */ + if (strlen (buf) || !postop_got_item) { + yylval.item = lex_item_create (buf, TYPE_STR); + return ITEM; + } + } + + [ \t]+ { + if (strlen (buf)) { /* Don't return an empty string separated by ws */ + postop_got_item = 1; + yylval.item = lex_item_create (buf, TYPE_STR); + memset (s = buf, 0, sizeof (buf)); + return ITEM; + } + } + + #[^\n]* ; /* Skip comments */ + + \\\ { *s++ = ' '; } +} + +{ + \" { + BEGIN INITIAL; + yylval.item = lex_item_create (buf, TYPE_STR); + return ITEM; + } +} + +{ + \" { + postop_got_item = 1; + *s = '\0'; + yylval.item = lex_item_create (buf, TYPE_STR); + memset (s = buf, 0, sizeof (buf)); + BEGIN POSTOP; + return ITEM; + } +} + +{ + \n { + log_err ("Unterminated double-quoted string?\n"); + lex_line_increment (); + BEGIN (INITIAL); + } +} + +{ + ~ { + const char *home; + if ((s == buf) && (home = getenv ("HOME"))) { + strncat (buf, home, sizeof (buf)); + s += strlen (home); + } else + *s++ = '~'; + } + + \${id} { + const struct sym *m = sym (yytext+1); + if (m) { + strncat (buf, m->string, sizeof (buf)); + s += strlen (m->string); + } + } + \$\{{id}\} { + const struct sym *m; + yytext[strlen(yytext)-1] = '\0'; /* Nullify closing brace */ + if ((m = sym (yytext+2))) { + *s = '\0'; + strncat (buf, m->string, sizeof (buf)); + s += strlen (m->string); + } + } + \\$ { *s++ = '$'; } + \\n { *s++ = '\n'; } + \\t { *s++ = '\t'; } + \\r { *s++ = '\r'; } + \\\" { *s++ = '\"'; } + . { *s++ = *yytext; } +} + + +<> { + if (!lex_include_pop ()) + yyterminate (); +} + +%% + + +/**************************************************************************** + * Data Types + ****************************************************************************/ + +struct file_info { + FILE * fp; + char * path; + int line; + YY_BUFFER_STATE yybuf; +}; + + +/**************************************************************************** + * Static Globals + ****************************************************************************/ + +static List includes = NULL; +static struct file_info *current; + +/* + * Three-level symbol table. I know, overly complex - but it is actually + * pretty simple. + * + * Keywords (stored in the keytab) have the highest precedence and + * cannot be overridden by the config file nor environment. + * + * Local symbols defined by the user using the "define" command are + * stored in the symtab. These have higher precedence than environment + * variables, and can be updated and changed by the user with + * subsequent ``define'' invocations. + * + * The envtab contains cached environment variable "symbol" records + * for later destruction. + */ +static List keytab = NULL; +static List symtab = NULL; +static List envtab = NULL; + +static List itemcache = NULL; + +/**************************************************************************** + * Include file funtions + ****************************************************************************/ + +static void file_info_destroy (struct file_info *f) +{ + if (f == NULL) + return; + if (f->path) + free (f->path); + if (f->fp && f->fp != stdin) + fclose (f->fp); + if (f->yybuf) + yy_delete_buffer (f->yybuf); + free (f); + return; +} + +static struct file_info * file_info_create (const char *path) +{ + struct file_info *f = malloc (sizeof (*f)); + + memset (f, 0, sizeof (*f)); + + if (f == NULL) + return (NULL); + + f->line = 1; + + if (path == NULL) { + f->path = strdup ("stdin"); + f->fp = stdin; + } else { + f->path = strdup (path); + + if ((f->fp = fopen (path, "r")) == NULL) { + if (current) + log_err ("failed to include \"%s\"\n", path); + else + log_err ("Failed to open %s: %s\n", path, strerror (errno)); + + file_info_destroy (f); + return (NULL); + } + } + + f->yybuf = yy_create_buffer (f->fp, YY_BUF_SIZE); + + return (f); +} + +static int lex_switch_buffer (struct file_info *f) +{ + yyin = f->fp; + yy_switch_to_buffer (f->yybuf); + current = f; + return (0); +} + +static int find_f (struct file_info *f, char *file) +{ + return (strcmp (f->path, file) == 0); +} + +int lex_file_init (const char *path) +{ + struct file_info *f = file_info_create (path); + + if (f == NULL) + return (-1); + + lex_switch_buffer (f); + + return (0); +} + +const char * lex_file () +{ + if (!current) + return (NULL); + return (current->path); +} + +int lex_line () +{ + if (!current) + return (0); + return (current->line); +} + +int lex_line_increment () +{ + if (!current) + return (0); + return (current->line++); +} + +static char * full_path (const char *path, const char *include, + char *buf, size_t len) +{ + char *p = strdup (path); + char *prefix; + + if (p == NULL) + return (NULL); + + if (include[0] == '/') + return (strdup (include)); + + if (strcmp ("stdin", path) == 0) + prefix = "."; + else + prefix = dirname (p); + + snprintf (buf, len, "%s/%s", prefix, include); + + buf [len - 1] = '\0'; + + free (p); + + return (buf); +} + + +int lex_include_push (const char *include) +{ + struct file_info *f; + char buf [4096]; + char *path; + + assert (include != NULL); + + /* + * Decrement line counter for this file so that error messages + * correspond to the line that the include is on. + */ + current->line--; + + path = full_path (current->path, include, buf, sizeof (buf)); + + if ((path == NULL) || !(f = file_info_create (path))) + return (-1); + + if (!includes) + includes = list_create ((ListDelF) file_info_destroy); + else if (list_find_first (includes, (ListFindF) find_f, f->path)) { + log_err ("Recursively included file\n"); + file_info_destroy (f); + return (-1); + } + else if (list_count (includes) > 20) { + log_err ("include files nested too deep\n"); + file_info_destroy (f); + return (-1); + } + log_verbose ("including file %s\n", f->path); + + current->fp = yyin; + current->yybuf = YY_CURRENT_BUFFER; + + list_push (includes, current); + + lex_switch_buffer (f); + + return (0); +} + +int lex_include_pop () +{ + struct file_info *f, *tmp = current; + + if (!includes) + return (0); + + assert (current); + + if (!(f = list_pop (includes))) + return (0); + + lex_switch_buffer (f); + + /* + * Re-increment line counter when popping back to original file. + */ + current->line++; + + log_verbose ("popping back to file %s\n", current->path); + + file_info_destroy (tmp); + + return (1); +} + + +/**************************************************************************** + * Lex Item Functions + ****************************************************************************/ + + +static void lex_item_clear (struct lex_item *i) +{ + if ((i->type == TYPE_SYM) && (i->val.sym == NULL) && i->str) + free (i->str); + if (i->name) + free (i->name); + memset (i, 0, sizeof (*i)); + return; +} + +static void lex_item_destroy (struct lex_item *i) +{ + lex_item_clear (i); + free (i); +} + +static int item_unused (struct lex_item *i, void *arg) +{ + return (i->used == 0); +} + +static struct lex_item * item_cache_find_unused () +{ + if (itemcache == NULL) + itemcache = list_create ((ListDelF) lex_item_destroy); + + return (list_find_first (itemcache, (ListFindF) item_unused, NULL)); +} + +static struct lex_item * lex_item_alloc () +{ + struct lex_item *i = item_cache_find_unused (); + + if (i == NULL) { + log_debug3 ("allocated new lex_item\n"); + i = malloc (sizeof (*i)); + list_append (itemcache, i); + } else + log_debug3 ("pulled lex_item off cache with %d items\n", + list_count (itemcache)); + + i->used = 1; + + return (i); +} + +struct lex_item * lex_item_create (char *name, int type) +{ + struct lex_item *i = lex_item_alloc (); + + i->name = strdup (name); + i->str = i->name; + i->type = type; + + if (type == TYPE_STR) + i->val.str = i->name; + else if (type == TYPE_INT) + i->val.num = atoi (name); + else if (type == TYPE_SYM) { + if ((i->val.sym = sym (name))) + i->str = i->val.sym->string; + else + i->str = strdup (""); + } + + log_debug2 ("creating item \"%s\"\n", name); + + return (i); +} + +static int item_clear (struct lex_item *i, void *arg) +{ + if (i->used) { + lex_item_clear (i); + i->used = 0; + } + return (0); +} + +void lex_item_cache_clear () +{ + int a = 1; + + if (itemcache == NULL) + return; + + log_debug3 ("clearing %d items in cache\n", list_count (itemcache)); + + list_for_each (itemcache, (ListForF) item_clear, (void *) &a); +} + +int item_type_int (struct lex_item *i) +{ + if (i->type == TYPE_INT) + return (1); + if ((i->type == TYPE_SYM) && i->val.sym && (i->val.sym->type == SYM_INT)) + return (1); + return (0); +} + +int item_val (struct lex_item *item) +{ + assert (item_type_int (item)); + + if (item->type == TYPE_INT) + return (item->val.num); + + if (item->type == TYPE_SYM) + return (item->val.sym->val); + + return (0); +} + +char * item_str (struct lex_item *item) +{ + return (item->str); +} + +int item_strcmp (struct lex_item *x, struct lex_item *y) +{ + return (strcmp (item_str (x), item_str (y))); +} + +static const char * cmp_str (int cmp) +{ + switch (cmp) { + case LT: return "<"; + case GT: return ">"; + case LE: return "<="; + case GE: return ">="; + case EQ: return "=="; + case NE: return "!="; + } + return ("??"); +} + +int item_cmp (int cmp, struct lex_item *x, struct lex_item *y) +{ + int rv = -1; + + switch (cmp) { + case LT: + if (item_type_int (x) && item_type_int (y)) + rv = (item_val (x) < item_val (y)); + break; + case GT: + if (item_type_int (x) && item_type_int (y)) + rv = (item_val (x) > item_val(y)); + break; + case LE: + if (item_type_int (x) && item_type_int (y)) + rv = (item_val (x) <= item_val (y)); + break; + case GE: + if (item_type_int (x) && item_type_int (y)) + rv = (item_val (x) >= item_val (y)); + break; + case EQ: + if (item_type_int (x) && item_type_int (y)) + rv = (item_val (x) == item_val (y)); + else + rv = (item_strcmp (x, y) == 0); + break; + case NE: + if (item_type_int (x) && item_type_int (y)) + rv = (x->val.num != y->val.num); + else + rv = (item_strcmp (x, y) != 0); + break; + default: + log_err ("Invalid comparitor %d\n", cmp); + } + + if (rv < 0) + log_err ("Invalid comparison: `%s'(=%s) %s `%s'(=%s)\n", + x->name, item_str (x), cmp_str (cmp), + y->name, item_str (y)); + else + log_debug ("testing: (`%s'(=%s) %s `%s'(=%s)) = %s\n", + x->name, item_str (x), cmp_str (cmp), + y->name, item_str (y), (rv ? "true":"false")); + + return (rv); +} + +int is_valid_identifier (const char *str) +{ + const char *p; + + if (!str) + return (0); + + /* + * First character must be [a-zA-Z_] + */ + if (!(isalpha (str[0]) || str[0] == '_')) + return (0); + + for (p = str + 1; *p != '\0'; p++) { + if (!(isalnum (*p) || *p == '_')) + return (0); + } + + return (1); +} + + +/**************************************************************************** + * Symbol functions + ****************************************************************************/ + +int sym_find (struct sym *s, char *name) +{ + return (strcmp (s->name, name) == 0); +} + +void sym_destroy (struct sym *s) +{ + if (s == NULL) + return; + + if (s->name) + free (s->name); + if (s->string) + free (s->string); + free (s); +} + +static int sym_reset_value (struct sym *s, const char *value) +{ + long val; + char *p; + + if (s->string) + free (s->string); + + s->string = strdup (value); + s->type = SYM_STR; + s->val = -1; + + val = strtol (value, &p, 10); + + if (p && *p == '\0') { + s->type = SYM_INT; + s->val = (int) val; + } + + return (0); +} + +struct sym * sym_create (const char *name, const char *value) +{ + struct sym *s = malloc (sizeof (*s)); + + memset (s, 0, sizeof (*s)); + + s->name = strdup (name); + + sym_reset_value (s, value); + + return (s); + +} + +static struct sym * sym_lookup (List l, char *s) +{ + if (l == NULL) + return (NULL); + return (list_find_first (l, (ListFindF) sym_find, s)); +} + +int sym_delete (char *name) +{ + int rc = 0; + + log_verbose ("undef \"%s\"\n", name); + + if (symtab) + rc = list_delete_all (symtab, (ListFindF) sym_find, name); + + return (rc); +} + +int env_cache_delete (char *name) +{ + int rc = 0; + if (envtab) + rc = list_delete_all (envtab, (ListFindF) sym_find, name); + + return (rc); +} + +const struct sym * keyword_define (char *name, const char *value) +{ + struct sym *s; + + if (!keytab) + keytab = list_create ((ListDelF) sym_destroy); + else + list_delete_all (keytab, (ListFindF) sym_find, name); + + if ((s = sym_create (name, value))) + list_prepend (keytab, s); + + return (s); +} + +const struct sym * sym_define (char *name, const char *value) +{ + struct sym *s; + + /* + * Do not override a keyword with a symbol + */ + if (sym_lookup (keytab, name)) + return (NULL); + + if (!symtab) + symtab = list_create ((ListDelF) sym_destroy); + + if ((s = sym_lookup (symtab, name))) + sym_reset_value (s, value); + else if ((s = sym_create (name, value))) + list_prepend (symtab, s); + + return (s); +} + +static const struct sym * env_sym_create (char *name, const char *value) +{ + struct sym *s = NULL; + + if (envtab == NULL) + envtab = list_create ((ListDelF) sym_destroy); + + if ((s = sym_create (name, value))) + list_prepend (envtab, s); + else + log_err ("Failed to create env symbol \"%s\". Out of memory?", name); + + return (s); +} + +const struct sym * sym (char *name) +{ + const char *rv; + const struct sym *s; + + if ((s = sym_lookup (keytab, name))) + return (s); + + if ((s = sym_lookup (symtab, name))) + return (s); + + if ((rv = xgetenv (name))) + return (env_sym_create (name, rv)); + + return (NULL); +} + +void symtab_destroy () +{ + if (symtab) { + list_destroy (symtab); + symtab = NULL; + } + + if (envtab) { + list_destroy (envtab); + envtab = NULL; + } +} + +void keytab_destroy () +{ + if (keytab) { + list_destroy (keytab); + keytab = NULL; + } +} + +int print_sym (struct sym *s, void *arg) +{ + log_msg (" %s = \"%s\"\n", s->name, s->string); + return (0); +} + +void dump_symbols (void) +{ + log_msg ("Dumping symbols\n"); + list_for_each (symtab, (ListForF) print_sym, NULL); +} + +void dump_keywords (void) +{ + log_msg ("Dumping keywords\n"); + list_for_each (keytab, (ListForF) print_sym, NULL); +} + +/**************************************************************************** + * Initialization and Cleanup + ****************************************************************************/ + +void lex_fini () +{ + symtab_destroy (); + + if (itemcache) { + list_destroy (itemcache); + itemcache = NULL; + } + + if (includes) { + list_destroy (includes); + includes = NULL; + } + + file_info_destroy (current); + current = NULL; +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/use-env/use-env-parser.y b/use-env/use-env-parser.y new file mode 100644 index 0000000..f1fbbea --- /dev/null +++ b/use-env/use-env-parser.y @@ -0,0 +1,676 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +%{ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "use-env.h" +#include "log_msg.h" +#include "list.h" + +#define YYDEBUG 1 +int yydebug = 0; + +extern int yylex (); +void yyerror (const char *); + +/* + * Set parser options from config file + */ +static int set_parser_option (const char *option, struct lex_item *x); +static int define_symbol (char *name, struct lex_item *x); + +/* + * Environment manipulation functions + */ +static int env_var_set (char *name, char *val, int op); +static int env_var_unset (char *name); + +/* + * Condition functions + */ +static int condition_push_if (int val); +static int condition_push_else_if (int val); +static int condition_push_else (); +static int condition_pop_endif (); +static int condition_pop (); +static int condition (); + +/* + * Special in-task block + */ +static int in_task_begin (); +static int in_task_end (); + +/* + * Item tests + */ +static int do_fnmatch (struct lex_item *x, struct lex_item *y); +static int item_defined (struct lex_item *i); +static int cmp_items (int cmp, struct lex_item *x, struct lex_item *y); +static int test_item (struct lex_item *i); +static int include_file (char *name); +static void dump_item (char *name); + + +struct parser_ctx { + int in_task; + struct use_env_ops *ops; + void *arg; +}; + +static struct parser_ctx ctx = { 0, NULL, NULL }; + + +%} + +%union { + int val; + struct lex_item *item; +} + +%token APPEND +%token PREPEND +%token UNSET +%token INCLUDE +%token COND_SET +%token IF +%token ELSE +%token ENDIF +%token AND +%token OR +%token DEFINED +%token PRINT +%token SET +%token DEF +%token UNDEF +%token DUMP +%token IN_TASK +%token MATCH +%token ITEM +%token EQ LT GT LE GE NE + +%type test tests cmp op + +%left EQ LT GT LE GE NE AND OR '!' + +%% + +stmts : /* empty */ + | stmt { lex_item_cache_clear (); } + | stmts stmt { lex_item_cache_clear (); } + ; + +stmt : stmt_end + | expr stmt_end + | if_stmt stmt_end + | in_task stmt_end + | print stmt_end + | error stmt_end + | INCLUDE ITEM '\n' { if (include_file ($2->name) < 0) YYABORT; } + ; + +stmt_end: '\n' + | ';' + ; + +print : PRINT ITEM { if (condition()) printf ("%s\n", item_str ($2)); } + ; + +if_stmt : IF '(' tests ')' { if (condition_push_if ($3) < 0) YYABORT; } + '\n' + stmts if_tail + ; + +if_tail : ENDIF { condition_pop_endif (); } + + | ELSE '\n' { if (condition_push_else () < 0) YYABORT; } + stmts ENDIF { condition_pop_endif (); } + + | ELSE IF { condition_pop (); } + '(' tests ')' { if (condition_push_else_if ($5) < 0) YYABORT; } + '\n' + stmts if_tail + ; + +in_task : IN_TASK { in_task_begin (); } + block { in_task_end (); } + | IN_TASK '\n' { in_task_begin (); } + block { in_task_end (); } + +block : '{' stmts '}' + +tests : tests AND test { $$ = ($1 && $3); } + | tests OR test { $$ = ($1 || $3); } + | test + ; + +test : ITEM cmp ITEM { if (($$ = cmp_items ($2, $1, $3)) < 0) YYABORT; } + | DEFINED ITEM { if (($$ = item_defined ($2)) < 0) YYABORT; } + | ITEM { if (($$ = test_item ($1)) < 0) YYABORT; } + | '(' test ')' { $$ = $2; } + | '!' test { if (condition ()) $$ = !($2); else $$ = 0; } + | ITEM MATCH ITEM { if (($$ = do_fnmatch ($3, $1)) < 0) YYABORT; } + + +expr : ITEM op ITEM { env_var_set ($1->name, item_str ($3), $2); } + | UNSET ITEM { env_var_unset ($2->name); } + | SET ITEM ITEM { set_parser_option ($2->name, $3); } + | DUMP ITEM { dump_item ($2->name); } + | DEF ITEM '=' ITEM { if (define_symbol ($2->name, $4) < 0) YYABORT; } + | UNDEF ITEM { if (condition ()) sym_delete ($2->name); } + ; + +op : '=' { $$ = '='; } + | COND_SET { $$ = COND_SET; } + | APPEND { $$ = APPEND; } + | PREPEND { $$ = PREPEND; } + ; + +cmp : EQ { $$ = EQ; } + | LT { $$ = LT; } + | GT { $$ = GT; } + | LE { $$ = LE; } + | GE { $$ = GE; } + | NE { $$ = NE; } + ; + +%% + +void yyerror (const char *msg) +{ + log_err ("%s\n", msg); +} + +/**************************************************************************** + * Data Types + ****************************************************************************/ + +struct cond { + unsigned int val:1; + unsigned int fallthru:1; +}; + +/**************************************************************************** + * Global static variables + ****************************************************************************/ + +static List cond_stack = NULL; + +/**************************************************************************** + * Includes + ****************************************************************************/ + +static int include_file (char *name) +{ + if (condition ()) + return (lex_include_push (name)); + + return (0); +} + +/**************************************************************************** + * Item tests + ****************************************************************************/ + +static int do_fnmatch (struct lex_item *x, struct lex_item *y) +{ + log_debug ("fnmatch (\"%s\", \"%s\")\n", item_str (x), item_str (y)); + if (condition ()) + return (fnmatch (item_str (x), item_str (y), 0) == 0); + return (0); +} + +static int item_defined (struct lex_item *i) +{ + if (i->type != TYPE_SYM) { + log_err ("use of `defined' keyword on non-symbol \"%s\"\n", i->name); + return (-1); + } + + if (condition ()) + return (i->val.sym != NULL); + else + return (0); +} + +static int test_item (struct lex_item *i) +{ + int rc = 0; + + if (!condition ()) + return (0); + + /* + * Return 0 unless item is INT and non-zero, or + * item string is not empty. + */ + if (item_type_int (i)) + rc = (item_val (i) != 0); + else { + char *p = item_str (i); + rc = p ? strlen (p) > 0 : 0; + } + + return (rc); +} + +static int cmp_items (int cmp, struct lex_item *x, struct lex_item *y) +{ + if (condition () == 0) + return (0); + + return (item_cmp (cmp, x, y)); +} + +/**************************************************************************** + * Set parser options + ****************************************************************************/ + +static int set_parser_option (const char *option, struct lex_item *x) +{ + if (condition() == 0) + return (0); + + if (strcmp (option, "debuglevel") == 0) { + + if (!item_type_int (x)) { + log_err ("Invalid value in \"set debuglevel %s\"\n", item_str (x)); + return (-1); + } + + log_msg_set_verbose (item_val (x)); + + log_verbose ("set debuglevel %d\n", item_val (x)); + } + else { + log_err ("Unknown option \"%s\" to set keyword\n", option); + return (-1); + } + + return (0); +} + +static void dump_item (char *name) +{ + if (condition() == 0) + return; + + if (strncmp (name, "symbols", strlen (name)) == 0) + dump_symbols (); + else if (strncmp (name, "keywords", strlen (name)) == 0) + dump_keywords (); + else if (strncmp (name, "all", strlen (name)) == 0) { + dump_keywords (); + dump_symbols (); + } + else + log_err ("Invalid argument \"%s\" to `dump' command\n", name); + + return; +} + +static int define_symbol (char *name, struct lex_item *x) +{ + if (!is_valid_identifier (name)) { + log_err ("Unable to define invalid identifier \"%s\"\n", name); + return (-1); + } + + if (condition() == 0) + return (0); + + log_verbose ("define %s = \"%s\"\n", name, item_str (x)); + + return (sym_define (name, item_str (x)) != NULL); +} + +/**************************************************************************** + * Environment manipulation + ****************************************************************************/ + +const char * xgetenv (const char *name) +{ + if (ctx.ops && ctx.ops->getenv) + return ((*ctx.ops->getenv) (ctx.arg, name)); + else + return (getenv (name)); +} + +int xunsetenv (const char *name) +{ + if (ctx.ops && ctx.ops->getenv) + return ((*ctx.ops->unsetenv) (ctx.arg, name)); + else + return (unsetenv (name)); +} + +int xsetenv (const char *name, const char *value, int overwrite) +{ + if (ctx.ops && ctx.ops->setenv) + return ((*ctx.ops->setenv) (ctx.arg, name, value, overwrite)); + else + return (setenv (name, value, overwrite)); +} + +static char * env_var_add (char *buf, size_t size, + const char *orig, const char *val, int append) +{ + if (strlen (val) >= size) + return (NULL); + + if (strlen (orig) == 0) + return (strcpy (buf, val)); + + if ((strlen (val) + strlen (orig) + 2) > size) + return (NULL); + + if (append) + snprintf (buf, size, "%s:%s", orig, val); + else + snprintf (buf, size, "%s:%s", val, orig); + + return (buf); +} + +static int env_var_unset (char *name) +{ + if (!is_valid_identifier (name)) + return (log_err ("Invalid identifier \"%s\" in unset\n", name)); + + if (condition () == 0) + return (0); + + log_verbose ("unsetenv (%s)\n", name); + + /* + * Delete any references to this value in the local env_cache + */ + env_cache_delete (name); + + if (xunsetenv (name) < 0) + return ((log_err ("unsetenv (%s): %s\n", name, strerror (errno)))); + + return (0); +} + + +static int env_var_set (char *name, char *val, int op) +{ + char buf [4096]; + const char *orig = NULL; + char *newval = val; + int overwrite = 1; + + if (!is_valid_identifier (name)) + return (log_err ("Invalid identifier \"%s\" in expression\n", name)); + + if (condition () == 0) + return (0); + + if (op == COND_SET) + overwrite = 0; + + if (((op == APPEND) || (op == PREPEND)) && (orig = xgetenv (name))) + newval = env_var_add (buf, sizeof (buf), orig, val, op == APPEND); + + /* + * Delete any references to this value in the local env_cache + */ + env_cache_delete (name); + + log_verbose ("setenv (%s, \"%s\", overwrite=%d)\n", + name, newval, overwrite); + + return (xsetenv (name, newval, overwrite)); +} + + +/**************************************************************************** + * Conditional stack + ****************************************************************************/ + +static struct cond * cond_create (int v) +{ + struct cond *c = malloc (sizeof (*c)); + + if (c == NULL) + return (NULL); + + c->val = v; + c->fallthru = 0; + return (c); +} + +static void cond_destroy (struct cond *c) +{ + free (c); +} + +static void condition_fallthru_set () +{ + struct cond *c = list_peek (cond_stack); + c->fallthru = 1; +} + +static void condition_fallthru_clear () +{ + struct cond *c = list_peek (cond_stack); + c->fallthru = 0; +} + +static int condition_fallthru () +{ + struct cond *c = list_peek (cond_stack); + return (c->fallthru); +} + +int condition () +{ + struct cond *c = list_peek (cond_stack); + /* + * The current condition value must be true + * AND fallthru must NOT be set in order + * to evaluate expressions. + * + * If fallthru is set this means we are in + * the middle of evaluating an if/else(if)* + * within this block, and the true condition + * was already evaluated. Thus we no longer need + * to evaluate expressions in else if's for this + * block. + */ + return (c->val && !c->fallthru); +} + +static int condition_push_val (int val) +{ + struct cond *c; + + if (!(c = cond_create (val))) + return (-1); + + log_debug2 ("Pushing new condition %d\n", val); + + list_push (cond_stack, c); + + return (c->val); +} + +void condition_init () +{ + cond_stack = list_create ((ListDelF) cond_destroy); + + /* + * Intiial condition is false if we're in task context. + */ + if (ctx.in_task) + condition_push_val (0); + else + condition_push_val (1); +} + +void condition_fini () +{ + if (cond_stack) { + list_destroy (cond_stack); + cond_stack = NULL; + } +} + +static int condition_pop () +{ + int rv; + struct cond *c; + + + if (!(c = list_pop (cond_stack))) + return (log_err ("else/endif without if")); + + log_debug2 ("Popped old condition %d\n", c->val); + + rv = c->val; + cond_destroy (c); + + return (rv); +} + +static int condition_pop_endif () +{ + int rv = condition_pop (); + /* + * endif resets fallthru state + */ + condition_fallthru_clear (); + return (rv); +} + +static int condition_push_if (int val) +{ + /* + * If this `if' statement is true, then update current + * fall thru state to true so we fall through any + * subsequent else statements (and don't evaluate + * any else if expressions). + */ + if (val) + condition_fallthru_set (); + + return (condition_push_val (val)); +} + +static int condition_push_else () +{ + int val = condition_pop (); + + /* + * If we're falling through subsequent else's push false + * Otherwise, push the inverse of the last value if + * this block is being evaluated (condtion == true) + */ + val = 0; + if (condition () && !condition_fallthru ()) + val = !val; + + return (condition_push_val (val)); +} + +static int condition_push_else_if (int val) +{ + if (!condition () || condition_fallthru ()) + val = 0; + else if (val != 0) + condition_fallthru_set (); + + return (condition_push_val (val)); +} + +/**************************************************************************** + * In-task support + ****************************************************************************/ + +static int in_task_begin (void) +{ + log_debug ("Found `in task' block: in_task = %d\n", ctx.in_task); + return condition_push_val (ctx.in_task); +} + +static int in_task_end (void) +{ + return condition_pop (); +} + + +/**************************************************************************** + * Initialization and Cleanup + ****************************************************************************/ + +void use_env_parser_init (int in_task) +{ + ctx.in_task = in_task; + /* + * Keytab created on-demand + */ + condition_init (); +} + +void use_env_set_operations (struct use_env_ops *ops, void *arg) +{ + ctx.ops = ops; + ctx.arg = arg; +} + +int use_env_parse (const char *filename) +{ + if (lex_file_init (filename) < 0) { + log_err ("Failed to open config file %s\n", filename); + return (-1); + } + + if (yyparse ()) { + log_err ("%s: Parser failed.\n", filename); + return (-1); + } + + lex_fini (); + + return (0); +} + +void use_env_parser_fini () +{ + condition_fini (); + keytab_destroy (); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/use-env/use-env.c b/use-env/use-env.c new file mode 100644 index 0000000..d91d15b --- /dev/null +++ b/use-env/use-env.c @@ -0,0 +1,460 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#include +#include +#include +#include +#include + +#include + +#include "use-env.h" +#include "list.h" +#include "split.h" +#include "log_msg.h" + +#define NO_SEARCH_SYSTEM 1<<0 +#define NO_SEARCH_USER 1<<1 + +SPANK_PLUGIN(use-env, 1) + +/**************************************************************************** + * Static Variables + ****************************************************************************/ + +static int local_user_cb_supported = 0; /* 1 if spank_local_user is avail*/ + +static int disable_in_task = 0; /* Don't run in task if nonzero */ +static char * default_name = "default"; /* Name of system default file */ +static List env_list = NULL; /* Global list of files to read */ + + +/**************************************************************************** + * Wrappers for spank environment manipulation + ****************************************************************************/ + +static int use_env_setenv (spank_t, const char *, const char *, int); +static int use_env_unsetenv (spank_t, const char *); +static const char *use_env_getenv (spank_t, const char *); + +static struct use_env_ops spank_env_ops = { + (getenv_f) use_env_getenv, + (setenv_f) use_env_setenv, + (unsetenv_f) use_env_unsetenv +}; + +/**************************************************************************** + * SPANK Options + ****************************************************************************/ + +static int use_env_opt_process (int val, char *optarg, int remote); + +struct spank_option spank_options[] = +{ + { "use-env", "[name]", + "Read env from ~/.slurm/environment/[name] or " + "/etc/slurm/environment/[name]", 1, 0, + (spank_opt_cb_f) use_env_opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + +/**************************************************************************** + * Forward Declarations + ****************************************************************************/ + +static int check_local_user_symbol (); +static int use_env_debuglevel (); +static int process_args (int ac, char **av); +static char * env_override_file_search (char *, size_t, const char *, int); +static int do_env_override (const char *path, spank_t sp); +static int define_all_keywords (spank_t sp); + +/**************************************************************************** + * SPANK Functions + ****************************************************************************/ + +/* + * slurm_spank_init is called as root in slurmd, but I don't + * think this matters in this case because all we do here + * is initialize the parser and search for default environment + * override files. Maybe later this can be duplicated in + * slurm_spank_user_init for safety. + */ +int slurm_spank_init (spank_t sp, int ac, char **av) +{ + char buf [4096]; + size_t len = sizeof (buf); + + check_local_user_symbol (); + + if (process_args (ac, av) < 0) + return (-1); + + env_list = list_create ((ListDelF) free); + + /* + * Set environment access functions to spank versions + * if we're running remotely. + */ + if (spank_remote (sp)) + use_env_set_operations (&spank_env_ops, sp); + + /* + * Check for default files in the following order: + * /etc/slurm/environment/default || /etc/slurm/env-default.conf + * ~/.slurm/environment/default || ~/.slurm/env-default.conf + */ + if (env_override_file_search (buf, len, default_name, NO_SEARCH_USER)) + list_append (env_list, strdup (buf)); + + /* + * Always use name "default" for user default environment + */ + if (env_override_file_search (buf, len, "default", NO_SEARCH_SYSTEM)) + list_append (env_list, strdup (buf)); + + /* + * Initialize logging and parser: + */ + log_msg_init ("use-env"); + use_env_parser_init (spank_remote (sp)); + log_msg_set_verbose (use_env_debuglevel ()); + + /* + * if we don't have the local_user callback, then we have + * to instantiate the default environment here. + */ + if (!local_user_cb_supported && !spank_remote (sp)) { + list_for_each (env_list, (ListForF) do_env_override, NULL); + list_destroy (env_list); + } + + return (0); +} + +int slurm_spank_local_user_init (spank_t sp, int ac, char **av) +{ + if (define_all_keywords (sp) < 0) + return (-1); + + list_for_each (env_list, (ListForF) do_env_override, NULL); + list_destroy (env_list); + + return (0); +} + +int slurm_spank_task_init (spank_t sp, int ac, char **av) +{ + /* + * Reset operations to make sure the right spank handle is + * available. + */ + use_env_set_operations (&spank_env_ops, sp); + + if (define_all_keywords (sp) < 0) + return (-1); + + list_for_each (env_list, (ListForF) do_env_override, (void *) sp); + list_destroy (env_list); + return (0); +} + +int slurm_spank_exit (spank_t sp, int ac, char **av) +{ + use_env_parser_fini (); + log_msg_fini (); + return (0); +} + +/**************************************************************************** + * Static Functions + ****************************************************************************/ + +static int check_local_user_symbol () +{ + int (*sym_supported) (const char *); + + if ( (sym_supported = dlsym (NULL, "spank_symbol_supported")) + && (*sym_supported) ("slurm_spank_local_user_init")) + local_user_cb_supported = 1; + else + slurm_debug3 ("use-env: slurm_spank_local_user_init not supported"); + + return (0); +} + +static int use_env_debuglevel () +{ + const char *val; + int rv = 0; + + if ((val = xgetenv ("SPANK_USE_ENV_DEBUG"))) { + char *p; + long n = strtol (val, &p, 10); + if (p && (*p == '\0')) + rv = n; + else + slurm_error ("Invalid value %s for SPANK_USE_ENV_DEBUG", val); + } + + return (rv); +} + +static char * +env_override_file_search (char *path, size_t len, const char *name, int flags) +{ + const char *home; + int check_user = !(flags & NO_SEARCH_USER); + int check_sys = !(flags & NO_SEARCH_SYSTEM); + + if (check_user && (home = xgetenv ("HOME"))) { + snprintf (path, len, "%s/.slurm/environment/%s", home, name); + if (access (path, R_OK) >= 0) + return (path); + snprintf (path, len, "%s/.slurm/env-%s.conf", home, name); + if (access (path, R_OK) >= 0) { + return (path); + } + } + + if (check_sys) { + snprintf (path, len, "/etc/slurm/environment/%s", name); + if (access (path, R_OK) >= 0) + return (path); + snprintf (path, len, "/etc/slurm/env-%s.conf", name); + if (access (path, R_OK) >= 0) + return (path); + } + + return (NULL); +} + +static int do_env_override (const char *path, spank_t sp) +{ + slurm_verbose ("use_env_parse (%s)", path); + + if (use_env_parse (path) < 0) { + slurm_error ("--use-env: Errors reading %s\n", path); + return (-1); + } + return (0); +} + +static int path_cmp (char *x, char *y) +{ + return (strcmp (x, y) == 0); +} + +static int check_and_append_env_opt (char *name, List l) +{ + int rc = 0; + char buf [4096]; + size_t len = sizeof (buf); + + if (!env_override_file_search (buf, len, name, 0)) { + slurm_error ("use-env: Unable to find env override file \"%s\"", name); + return (-1); + } + + /* + * If we don't have the local_user callback, then we have + * to call do_env_override immediately + */ + if (!local_user_cb_supported) + rc = do_env_override (buf, NULL); + else if (!list_find_first (env_list, (ListFindF) path_cmp, buf)) + list_append (env_list, strdup (buf)); + + return (rc); +} + +static int use_env_opt_process (int val, char *optarg, int remote) +{ + List l; + + if (optarg == NULL) { + slurm_error ("--use-env: Invalid argument"); + return (-1); + } + + l = list_split (",", optarg); + if (list_for_each (l, (ListForF) check_and_append_env_opt, env_list) < 0) + return (-1); + list_destroy (l); + + return (0); +} + +static int +define_use_env_keyword (spank_t sp, char *name, spank_item_t item) +{ + int n; + int val; + char buf [64]; + + if (spank_get_item (sp, item, &val) != ESPANK_SUCCESS) { + slurm_error ("use-env: spank_get_item failed for %s\n", name); + return (-1); + } + + n = snprintf (buf, sizeof (buf), "%u", val); + + if ((n < 0) || (n >= sizeof (buf))) { + slurm_error ("use-env: value of %s too large for buffer\n", name); + return (-1); + } + + if (keyword_define (name, buf) == NULL) + return (-1); + + return (0); +} + +static int set_argv_keywords (spank_t sp) +{ + char cmdline [4096]; + char buf [64]; + const char **av; + int ac; + int i; + int n; + + if (spank_get_item (sp, S_JOB_ARGV, &ac, &av) != ESPANK_SUCCESS) { + slurm_error ("use-env: spank_get_item failed for argv"); + return (-1); + } + + n = snprintf (buf, sizeof (buf), "%d", ac); + + if ((n < 0) || (n >= sizeof (buf))) { + slurm_error ("use-env: value of ARGC too large"); + return (-1); + } + + keyword_define ("SLURM_ARGC", buf); + + memset (cmdline, 0, sizeof (cmdline)); + + for (i = 0; i < ac; i++) { + snprintf (buf, sizeof (buf), "SLURM_ARGV%d", i); + keyword_define (buf, av[i]); + + if ((n = strlen (cmdline)) != 0) { + strcat (cmdline, " "); + n++; + } + + if (sizeof (cmdline) > (n + strlen (av[i]) + 1)) + strcat (cmdline, av[i]); + } + + keyword_define ("SLURM_CMDLINE", cmdline); + + return (0); +} + +static int define_all_keywords (spank_t sp) +{ + /* + * These keywords are only accessible from this context + */ + if (define_use_env_keyword (sp, "SLURM_NNODES", S_JOB_NNODES) < 0) + return (-1); + if (define_use_env_keyword (sp, "SLURM_NPROCS", S_JOB_TOTAL_TASK_COUNT) < 0) + return (-1); + if (define_use_env_keyword (sp, "SLURM_JOBID", S_JOB_ID) < 0) + return (-1); + if (define_use_env_keyword (sp, "SLURM_STEPID", S_JOB_STEPID) < 0) + return (-1); + + if (set_argv_keywords (sp) < 0) + return (-1); + + if (!spank_remote (sp)) + return (0); + + if (define_use_env_keyword (sp, "SLURM_PROCID", S_TASK_GLOBAL_ID) < 0) + return (-1); + if (define_use_env_keyword (sp, "SLURM_LOCALID", S_TASK_ID) < 0) + return (-1); + if (define_use_env_keyword (sp, "SLURM_NODEID", S_JOB_NODEID) < 0) + return (-1); + + return (0); +} + +static int process_args (int ac, char **av) +{ + int i; + for (i = 0; i < ac; i++) { + if (strncmp ("default=", av[i], 8) == 0) + default_name = av[i] + 8; + else if (strcmp ("disable_in_task", av[i]) == 0) + disable_in_task = 1; + else { + slurm_error ("use-env: Invalid option \"%s\"", av[i]); + return (-1); + } + } + + return (0); +} + +/**************************************************************************** + * Environment manipulation wrappers + ****************************************************************************/ + +static const char *use_env_getenv (spank_t sp, const char *name) +{ + static char buf [4096]; + + memset (buf, 0, sizeof (buf)); + + if (spank_getenv (sp, name, buf, sizeof (buf)) != ESPANK_SUCCESS) + return (NULL); + + return (buf); +} + +static int use_env_unsetenv (spank_t sp, const char *name) +{ + if (spank_unsetenv (sp, name) != ESPANK_SUCCESS) + return (-1); + return (0); +} + + +static int use_env_setenv (spank_t sp, const char *name, const char *val, + int overwrite) +{ + if (spank_setenv (sp, name, val, overwrite) != ESPANK_SUCCESS && overwrite) + return (-1); + return (0); +} + +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/use-env/use-env.h b/use-env/use-env.h new file mode 100644 index 0000000..ee1b068 --- /dev/null +++ b/use-env/use-env.h @@ -0,0 +1,123 @@ +/***************************************************************************** + * + * Copyright (C) 2007-2008 Lawrence Livermore National Security, LLC. + * Produced at Lawrence Livermore National Laboratory. + * Written by Mark Grondona . + * + * UCRL-CODE-235358 + * + * This file is part of chaos-spankings, a set of spank plugins for SLURM. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + ****************************************************************************/ + +#ifndef _USE_ENV_H +#define _USE_ENV_H + +enum { TYPE_STR, TYPE_INT, TYPE_SYM }; +enum { SYM_INT, SYM_STR }; + +struct lex_item { + int used; /* Is item still used (for item cache) */ + char * name; /* Name of item */ + int type; /* Type of item (int, string, symbol) */ + char * str; /* String representation of item */ + + union { /* Union of different item types */ + int num; + char *str; + const struct sym *sym; + } val; +}; + +struct sym { + char * name; /* Name of symbol */ + int type; /* Type of symbol (INT || STRING) */ + int val; /* Value if type is INT */ + char * string; /* String representation */ +}; + +typedef char * (*getenv_f) (void *arg, const char *name); +typedef int (*unsetenv_f) (void *arg, const char *name); +typedef int (*setenv_f) (void *arg, const char *name, + const char *value, int overwrite); + +struct use_env_ops { + getenv_f getenv; + setenv_f setenv; + unsetenv_f unsetenv; +}; + +/* + * Environment manipulation + */ +const char * xgetenv (const char *name); +int xunsetenv (const char *name); +int xsetenv (const char *name, const char *value, int overwrite); + + +/* + * Parser operations: + */ +void use_env_parser_init (); +void use_env_set_operations (struct use_env_ops *ops, void *arg); +int use_env_parse (const char *filename); +void use_env_parser_fini (); + +/* + * Lexer cleanup + */ +void lex_fini (); + +/* + * lex_item functions + */ +void lex_item_cache_clear (); +struct lex_item * lex_item_create (char *name, int type); +int is_valid_identifier (const char *s); + +int item_cmp (int cmp, struct lex_item *x, struct lex_item *y); +int item_strcmp (struct lex_item *x, struct lex_item *y); +char * item_str (struct lex_item *item); +int item_val (struct lex_item *item); +int item_type_int (struct lex_item *i); + +/* + * symbol lookup and definition functions + */ +const struct sym * sym (char *name); +const struct sym * sym_define (char *name, const char *value); +const struct sym * keyword_define (char *name, const char *value); +int sym_delete (char *name); +int env_cache_delete (char *name); +void symtab_destroy (); +void keytab_destroy (); +void dump_keywords (); +void dump_symbols (); + +/* + * include file functions + */ +int lex_file_init (const char *file); +int lex_include_push (const char *include); +int lex_include_pop (); + +const char *lex_file (); +int lex_line (); +int lex_line_increment (); + +#endif +/* + * vi: ts=4 sw=4 expandtab + */ diff --git a/use-env/version.map b/use-env/version.map new file mode 100644 index 0000000..e234ff4 --- /dev/null +++ b/use-env/version.map @@ -0,0 +1,9 @@ +{ global: + plugin_name; + plugin_type; + plugin_version; + spank*; + slurm_spank*; + local: + *; +};