Perl Style Summery... Following the Perl style guide, * all capatial identifiers are reserved for special meaning with Perl. * Functions and local variables are all lowercase. * Module's persistent variables are capitalized. * Identifiers with multiple words are separated by an underscore. * don't use mixed capitals without underscores. (you wouldn't like reading this book without spaces, either) ------------------------------------------------------------------------------- Versions perl -V # all %Config defines perl -V:archname # the architucture to use for installation perl '-V:install.*' # the installation directories perl -MTk -le 'print $Tk::VERSION' perl -MCGI -le 'print $CGI::VERSION' perl -MFile::Find -le 'print $File::Find::VERSION' NOTE: -l turns on end of line auto handler (chomp input, add \n to output) pmdesc -v -w -s # print all perl modules and version numbers # perl cookbook recipe 12.19 example 12-3 Debugger perl -d:ptkdb myscript.pl ------------------------------------------------------------------------------- Shell Polyglot... if run as a shell script, re-run using perl =======8<--------CUT HERE---------- #!/usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if $running_under_some_shell; =======8<--------CUT HERE---------- ------------------------------------------------------------------------------- General Program -- Inline Manual, and Option handling. This gets perl to read the comments at the top of the script to use as the script documentation and manual. Keeping it all together. NOTE: I no longer use "Pod::Usage" for inscript manuals as it is not installed by default on Redhat 8 or Fedora 31. It is a shame, but self-reading scripts are more portible. =======8<--------CUT HERE---------- #!/usr/bin/perl # # program [options] file... # # Quick Description of program # # Options: # --help Print this help # ### # # Full and extensive description of program. # ##### # # Programmers Notes # # Anthony Thyssen (September 2005) # use strict; use FindBin; my $PROGNAME = $FindBin::Script; sub Error { print STDERR "$PROGNAME: ", @_, "\n"; exit 1; } sub Usage { print STDERR "$PROGNAME: ", @_, "\n" if @_; @ARGV = ( "$FindBin::Bin/$PROGNAME" ); # locate script file while( <> ) { next if 1 .. 2; last if /^###/; last unless /^#/; s/^#$//; s/^# //; last if /^$/; print STDERR "Usage: " if 3 .. 3; print STDERR; } print STDERR "For Options use --help\n"; exit 10; } sub Help { @ARGV = ( "$FindBin::Bin/$PROGNAME" ); # locate script file while( <> ) { next if $. == 1; last if /^###/; last unless /^#/; s/^#$//; s/^# //; print STDERR; } exit 10; } sub Doc { @ARGV = ( "$FindBin::Bin/$PROGNAME" ); # locate script file while( <> ) { next if $. == 1; last if /^#####/; last unless /^#/; s/^#$//; s/^# //; print STDERR; } exit 10; } my ($debug,$force,$verbose) = map(0, 0..10); # initialise flags # Option handler OPTION: # Multi-switch option handling while( @ARGV && $ARGV[0] =~ s/^-(?=.)// ) { $_ = shift; { m/^$/ && do { next }; # next argument m/^-$/ && do { last }; # End of options m/^-?help$/ && Help; # Usage Help '-?' m/^-?doc$/ && Doc; # Print help manual comments s/^d// && do { $debug++; redo }; # \ s/^f// && do { $force++; redo }; # > multi-switch options s/^v// && do { $verbose++; redo }; # / Usage( "Unknown Option \"-$_\"" ); } continue { next OPTION }; last OPTION; } #------------------ # Expanded Argument handling, notes and type examples # # This method allows you to have command line options in the forms... # GNU long options: --help --debug # Multi-switch options: -dvr # Option with Argument: -nNAME OR -n NAME # With Optional Argument: -i.bak OR -i # Numerical Arguments -x1y2z3 OR -x 1 -y 2 -z 3 # Sort like numerical ranges: -k20,30 # tar-like option args: -fbs filename blocks skip # # Developed from the Perl Camel Book v3, page 122 # Programmers Note: # Within the inner option block... # "next" is equivelent to a "next OPTION" # "last" is equivelent to a "last OPTION" # "redo" means look for more multi-switch options (inner block) # # initialise flags my ( $debug,$force,$verbose,$name,$back $start,$end,$file,$blocks,$skip) = map(0, 0..10); OPTION: # Multi-switch option handling while( @ARGV && $ARGV[0] =~ s/^-(?=.)// ) { $_ = shift; { m/^$/ && do { next }; # Next option m/^-$/ && do { last }; # End of options '--' m/^\?/ && do { Usage }; # Usage Help '-?' m/^-help$/ && Usage( -verbose => 1); # quick help (synopsis) m/^-manual$/ && Usage( -verbose => 2); # inline manual m/^-debug$/ && do { $debug++; next }; # whole --word switches s/^d// && do { $debug++; redo }; # \ s/^f// && do { $force++; redo }; # > multi-switch options s/^v// && do { $verbose++; redo }; # / s/^n// && do { $name = $_ || shift; next }; # "-nARG" OR "-n ARG" s/^i// && do { $back = length() ? $_ : ".bak"; next }; # "-i.sfx" OR "-i" # "-x1y2z3" OR "-x 1 -y 2 -z 3" s/^x// && do { $x = length() ? ( s/^\d+// ? $& : 0 ) : shift; redo }; s/^y// && do { $y = length() ? ( s/^\d+// ? $& : 0 ) : shift; redo }; s/^z// && do { $z = length() ? ( s/^\d+// ? $& : 0 ) : shift; redo }; s/^k(\d+)// && do { $start = $1; redo }; # Numerical range arguments s/^,(\d+)// && do { $end = $1; redo }; # EG: -k10,20 s/^f// && do { $file = shift; redo }; # Tar-like option arguments s/^b// && do { $blocks = shift; redo }; # Not that I like this style! s/^s// && do { $skip = shift; redo }; # EG: -fbs filename blocks skip Usage( "$PROGNAME: Unknown Option \"-$_\"" ); } continue { next OPTION }; last OPTION; } Usage( "$PROGNAME: Too Few Arguments" ) unless @ARGV; my $file = shift; my $template = shift || "default_template.txt"; Usage( "$PROGNAME: Too Many Arguments" ) if @ARGV; =======8<--------CUT HERE---------- ------------------------------------------------------------------------- Perl Pod Usage Method I don't use this any more, as POD is no longer a standard install! Now I use comments, using the exact same technique I use for Shell Scripts. =======8<--------CUT HERE---------- #!/usr/bin/perl =head1 NAME program - A program with pod documention =head1 SYNOPSIS program [options] file... Options: -f Switch or Flag -a arg Option with agument -d|--debug Turn on debugging --help Quick Help (synopsis) --manual Program Manual =head1 DESCRIPTION Program documention in the body of the document. =head1 AUTHOR Anthony Thyssen 3 December 2002 =cut use strict; use FindBin; my $PROGNAME = $FindBin::Script; # Perl Pod Usage Method sub Usage { eval { use Pod::Usage; pod2usage ("@_", "-exitval", 10 ); }; } ARGUMENT: # Multi-switch option handling while( @ARGV && $ARGV[0] =~ s/^-(?=.)// ) { $_ = shift; { m/^$/ && do { next }; # next argument m/^-$/ && do { last }; # End of options m/^\?/ && do { Usage }; # Usage Help m/^-help$/ && &Usage( -verbose => 1); # quick help (synopsis) m/^-doc$/ && &Usage( -verbose => 2); # output the entire manual m/^-manual$/ && &Usage( -verbose => 2); # output the entire manual # ... Usage( "Unknown Option \"-$_\"\n" ); } continue { next ARGUMENT }; last ARGUMENT; } =======8<--------CUT HERE---------- ------------------------------------------------------------------------------- Indented HERE Here here Document Shell cut and paste documents (very simple) NOTE: The reason for this is that you can get get differences with the EOL handling if the code is simply pasted directly on the comman line, depending on where it was copied from. See "shell/file.txt" for more details on the "Copy-N-Paste Problem..." . echo ' |Line One | Line Two | Line Three |Line Four ' | perl -ne 's/^\s*\|// && print' > t In Perl Scripts (using a herefile() subroutine) # Remove the indent of a here file... # And strip any # comments found in the here file # Adjust to suit you here file requirements of your program... sub herefile { my $string = shift; $string =~ s/^\n//s; # remove any pure blank lines $string =~ s/^[ \t]*#.*\n//gm; # completely remove full line comments $string =~ s/#.*//gm; # remove end-of-line comments $string =~ s/^[ \t]+\| ?//gm; # remove the indent part of the line $string =~ s/[ \t]+$//gm; # remove any extra end-of-line spaces return $string; } (my $prog = $0) =~ s/^.*\///; sub Usage { die @_, herefile(" | Usage: $prog [-options] file... | -d Output extra debugging information # -e Obsolete Option, do not report to the user "); } print herefile( <<'EOF' ); | yes the indent to the left is removed, | and the type of indent can changed to suit data # You can even add comment lines into the here file! | you can print this # but don't print this EOF General purpose here-file fixer (found on web)... It looks to see whether each line begins with a common substring ($leader), and if so, strips that substring off. Otherwise, it takes the amount of leading whitespace found on the first line and removes that much off each subsequent line. Does not handle mix of command lines and marked lines, as above sub herefixer { local $_ = shift; my ($white, $leader); # common whitespace and common leading string if (/^\s*(?:([^\w\s]+)(\s*).*\n)(?:\s*\1\2?.*\n)+$/) { ($white, $leader) = ($2, quotemeta($1)); } else { ($white, $leader) = (/^(\s+)/, ''); } s/^\s*?$leader(?:$white)?//gm; return $_; } It also does not allow the use of here file comments ------------------------------------------------------------------------------- Tricks # Auto flush STDOUT and STDERR select((select(STDOUT), $| = 1)[$[]); select((select(STDERR), $| = 1)[$[]); # Test a string ($foo) against multiple values $foo eq 'bar' || $foo eq 'baz' || $foo eq 'boo' # verbose and redundant $foo=~/^(bar|baz|boo)$/o # using a RE (slow) grep{$_ eq $foo} qw(bar baz boo) # grep of array %check=map{$_=>1}qw(bar baz boo); $check{$foo} # using a hash (fast) {map{$_=>1}qw(bar baz boo)}->{$foo} # inline hash lookup # tricky splits (NOTE: a 'g' flag or '()' is required) ($num) = /\d+/g; # assign just the matched string ('g' needed) ($num) = /(\d+)/; # assign just the matched string ('()' needed) ($a, $b, $c) = "123 4 56" =~ /\d+/g; # three matched strings ($b) = "123 4 56" =~ /\d+ (\d+) \d+/; # just the middle number ($a, $b, $c) = "123 4 56" =~ /(\d)+/g; # the last digit in each number! ($a, $b, $c) = /(\w+) (\w+) (\w+)/; # three words only ($before, $a, $b, $c, $after) = split(/(\w+) (\w+) (\w+)/, $_, 2); # Assigned to varible, if variable is not true $option ||= "default_value"; # Assign from a cache, otherwise look it up and cache it too $uid = $user{$user} ||= getpwnam($user); # make a backup of all the listed files (multiple copy) perl -p -i.bak -e '' # Array to Hash (for 'value exists' lookup of array elements) %hash = @array; # array into a hash undefined values. @hash{@array} = (1) x @array; # array to a hash of true values %hash = map { $_, 1 } @array; # Copy and Modify an Array (which version is better?) # Remember normally "for" or "map" will modify the actual array given!!! # But assigning makes a copy of it first before it is modified. for( @new = @old ) { s/bad/good/g }; map { s/bad/good/g } ( @new = @old ); # Constant Variables (read only) $PI = 3; # This can be modified! $PI++; # That is, this would work when it shouldn't *PI = \3.1415927; # This can not be modified! print "PI = $PI\n"; # subroutine constant sub PI() { 3.1415927 } # approximaton sub PI() { 4 * atan2(1,1) } # will be re-calculated multiple times!!!! sub PI() { $PI_cache ||= 4 * atan2(1,1) } # unless you cache it. print "PI = ", PI, "\n"; # This will calculate only once! but works like a subroutine constant use constant PI => 4 * atan2(1,1); # calculated ONCE only RE grep matching against an array of values given @words = gw(alt1 alt2 alt3); Slowly check each word, one word at a time if ( grep { /^$word$/ } @words ) { ... fi Or use RE alturnatives (assuming words are well defined) $" = '|'; if ( $word ~= /^(@words)$/ ) { ... } Count and remove matches from an array @a = qw( a b cd e fg h i ); print scalar @a, ": @a\n"; $count=0; @a = grep( !(/../ && ++$count), @a ); print "$count matches removed\n"; print scalar @a, ": @a\n"; WARNING $host = "machine.localdomain."; $host =~ /\Q$host\E\b/; Will NOT match as $host does not end in an alphanumeric (\w) character. That is because \b only works NEXT to a \w character!!!! In this case $host ends in '.' which is not a word boundary at the end of the string. Big NO NO -- Using local in looping block. for ( 1 .. 100 ) { solution local(@array) local(@array); ===> for ( 1 .. 100 ) { ... undef @array; ... ... } } The above will cause you to have 100 @arrays before the end of the loop. Alturnative is to use "my" instead of "local". ------------------------------------------------------------------------------- Regular Expressions, Regex... WARNING: Perl can execute code in regular expresions (often multiple times!) EG: /....(?{..perl code..})..../; Multi-line string Regular Expresions.. /./ will NOT match "\n", but /[\000-\377]/ will, as will /./s /^/ matches start of string, but /^/m will match after "\n" /\A/ only matchs start of string /\z/ only matches end of string /\Z/ may match before a final newline character in the string /$/ normally matches as /\Z/. EG: end of string or a final newline. ?re? Matches true ONCE ONLY for life of program or until reset() called. EG: match first time only while in a loop. /[\0-\xff]/ will also match ANY character in a string. These modifers modify this behaviour.... /s make '.' also match new lines (EG: /./ really becomes ALL characters) /m make '^' and '$' match near newline characters, not just near the ends of string Debuging, and seeing exactly what perl is doing... see "Programming Perl v3" p213 (using embeded prints) and p195 for RE debuging Expanding Tabs (simply) 1 while s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e; Compress multiple blank lines to one blank line (This is not easy!) perl -ne 'if (/\S/) { print; $i=0 } else {print unless $i; $i=1; }' Or inverting the search (print paragraphs) perl -ne 'print if /\S/../^\s*$/' NOTE: simplier methods exist using sed, and vim macros see file "shell/script.hints" Remove C Comments (using minimal RE expandsion - perl v5) $program =~ s{ /\* .*? \*/ }[]gsx; Efficent double quotes match (with quote escape) This uses ++ which will never backtrack to try a smaller string as we know their is no quote to match in the smaller string /"(?:[^"\\]++|\\.)*+"/ Remove surounding spaces in one RE The '/g' is needed to match twice, it is slower than separate REs s/^\s+│\s+$//g; Removing extra spaces (multi-line) This removes all spaces at the start and end of lines in a multi-line string. The final newline will be removed, as will all blank lines, but as '$' is a zero length match before a newline, the end of line newlines will not be removed (just the last one). $string =~ s/^\s+│\s+$//gm; Lowercase and capitalise a title/author line s/(? a_b c_d perl can however use a loop to solve the problem echo "a b c d" | perl -pe 'while( s/(\S) (\S)/${1}_$2/g ) {}' # => a_b_c_d Using zero-width look-ahead and look-behind https://www.regular-expressions.info/lookaround.html Using positive match look-behind and positive match lookahead echo "a b c d" | perl -pe 's/(*plb:\S)\s(*pla:\S)/_/g' # => a_b_c_d using negative match look-behind and negative match lookahead echo "a b c d" | perl -pe 's/(*nlb:\s)\s(*nla:\s)/_/g' # => a_b_c_d_ NOTE: \K will not do this. Like sed it will only match twice! This find words NOT ending in 's' /\b\w+(*nlb:s)\b/ this is not the same as /\b\w+[^s]\b/ which will OVER match "John's" and "you say" ------------------------------------------------------------------------------- Mysteries of the "comma" operator # NOTE Parentheses play a very important role # Also the ',' is of lower precedance (less important) than '=' $scalar = 'a', 'b', 'c'; # scalar; $scalar = 'a' $scalar = ('a', 'b', 'c'); # scalar; $scalar = 'c' ($scalar) = ('a', 'b', 'c'); # list; $scalar = 'a' @array = 'a', 'b', 'c'; # scalar; @array = ('a') @array = ('a', 'b', 'c'); # list; @array = ('a', 'b', 'c') ------------------------------------------------------------------------------- Scalar/List Contex in functions sub A { return ('a', 'b', 'c'); } sub B { # This is DIFFERENT to others my @array = ('a', 'b', 'c'); return @array; } sub C { my @array = ('a', 'b', 'c'); return @array[0..$#array]; } sub D { my @array = ('a', 'b', 'c'); return ( ( @array ) ); } $a = A(); # $a = ('a', 'b', 'c') => $a = 'c' # Note comma operator ($b) = A(); # ($b) = ('a', 'b', 'c') => $b = 'a' $c = B(); # $c = @array => $c = 3 ($d) = B(); # ($d) = @array => $d = 'a' $e = C(); # $e = 'c' # C() behaves like A()!!! ($f) = C(); # $f = 'a' # D() will also act like A()!!! Of course a function can ask what context it is in... This is of course equivenelt to A(), C() and D() sub E { my @array = ('a', 'b', 'c'); return wantarray ? @array : $array[0]; } ------------------------------------------------------------------------------- Word finding optimizations On a large string (like a whole file) Where $_ = "whole_file_of_words" Comparing hash keys is faster than a RE of the words EG: &word{ qw( and or then last next ) } @file = grep { defined $word{lc($_) } split; is 3 times faster than $word = "(and|or|then|last|next|........)" s/ $word / /ig; Using minimal matching s/\b(\w*?aaa\w*?) / /ig; is faster than normal longest matching s/\b(\w*aaa\w*) / /ig; for very short matches on very long strings (whole files in memory) Extreme care is advised on the sub-strings. Word boundary is slower than a plain space s/ (and|or|then|last|next) / /ig; is faster than s/\b(and|or|then|last|next) //ig; due to complexity of the match, BUT the /g does NOT work properly without the \b (it will skip words)!! ------------------------------------------------------------------------------- Remove duplicates from an array If you don't care but the order, convert the array into a hash.. Making each element a key. my %hash = map { $_, 1 } @array; my @unique = keys %hash; %hash = (); A hash & array can be used to only accept first seen, store info about the element in the hash, while the array keeps track of the key order. A Module solution is uniq() from List::MoreUtils In perl 5.010 you can use 'smart match' operator print "The array contains $item" if $item ~~ @array; print "The hash contains $item" if $item ~~ @hash; ------------------------------------------------------------------------------- Signals Handling Signals and print statments In C doing any sort of IO in a interupt (like a printf) is a BAD IDEA. As few clib functions are re-entrent. However in perl 5.8.1 and on all signals are deferred to be handled between Perl OPs. As such using print within a signal handler in perl is okay. ------ # # Interupt handler for Critical Sections # # We don't want to be left with a incomplete data file. So we only note that a # interupt occured, but return to the task at hand afterward. We then exit # when it is safe to do so. Critical Sections should be kept as small as # posible, and enter and exit frequently. Also watch for any posible blocking # actions performed during the critical period. # my $interupted = 0; sub interupt { $interupted = 1; } # We were interupted? sub set_sig_handler { # turn handler on/off my( $turn_on ) = @_; die "INTERUPT CAUGHT DURING CRITICAL PERIOD -- EXITING NOW\n" if $interupted; $SIG{'HUP'} = $SIG{'QUIT'} = $SIG{'INT'} = $SIG{'TERM'} = $turn_on ? 'interupt' : 'DEFAULT'; } set_sig_handler(1); # ... critical section ... # ... clean up ... set_sig_handler(0); ------ Signals and END {} blocks NOTE: While perl END {} blocks and module DESTROY {} blocks are called by perl when the program exits, they are NOT called when the program is interupted!!! If this is required, then you need to set up an interupt handler to actually call the "exit()" funtion. This can be done in a number of ways.... use sigtrap(die normal-signals) OR... sub set_signal { my($func) = @_; $SIG{'INT'} = $func; $SIG{'QUIT'} = $func; $SIG{'HUP'} = $func; $SIG{'TERM'} = $func; } sub Interputed { print STDERR "Interupted, doing cleanup\n"; system("rm -f ".mail_dir."/*$new"); exit 10; } set_signal( \&Interupted ); ------------------------------------------------------------------------------- Automatic SU if ( $> == 0 ) { print "$prog : You must be user $db_user \n" ; print "-- performing automatic su \n" ; { exec 'su', '-',$db_user,'-c', "'".join("' '","$db_home/bin/$prog",@ARGV)."'"; } print STDERR "Unable to su to $db_user -- exiting \n"; exit 0 ; }elsif ( (getpwnam($db_user))[2] != $> ) { # im not duty -- BAD print STDERR "You must be $db_user to run this script \n" ; exit 0 ; } ------------------------------------------------------------------------------- Auto Background (perl 5) exit 0 if fork; # basic background use POSIX qw(setsid); setsid(); # disassociate from terminal etc. Extra Discussion... Should he also run setgid(), and then either close filehandles 1-3 (stdin, stdout, stderr)? But we are not trying to turn the whole thing into a 'daemon' - we are just trying to "background" it so that shell returns a prompt. As such the 'setsid' is just a way to avoid having shell's SIGSTOP etc. get in the way. Truely disassociated GUI apps are very rare - most will die horribly when window manager exits when user logs out. ------------------------------------------------------------------------------- Date and Time conversion in perl For good rundown of all the modules for manipulating Dates and Times in perl (dated 2003 but still good) The Many Dates and Times of Perl http://www.perl.com/pub/2003/03/13/datetime.html formatted time (ISO standard) &time_format(999523563) sub time_format { my @d = localtime(shift); # time as a 9 element array $d[4] ++; # adjust month $d[5] += 1900; # adjust year to 4 digits return( sprintf( "%04d-%02d-%02d %02d:%02d:%02d", reverse @d[0..5] ) ); } time in english #require "ctime.pl"; # perl 4 method (newline in ctime()) use Time::localtime; $time = 999523563; $date = &ctime($time); $date =~ s/\s*\b[A-Z]{3}\b\s*/ /; # remove timezone and any linefeed print $date, "\n"; --- time delta (time periods) Modules (not everywhere) # report time in years, days, hours, etc (never months or weeks) # only reporting two values EG: days and hours, or minutes and secs use Time::Duration; print "Runtime ", duration(time - $^T), "\n"; print "Program has been running for... ", time_delta_rough( time - $^T ), "\n"; # An approximate version (only needs to be rough at longer scales) sub time_delta_rough { # convert a time in seconds to a rough idea of how far in the future my $secs = shift; my $mins = int(($secs/60)%60); # mins in hour (integer) my $hours = ($secs/3600)%24; # hours in day (floating point) my $days = $secs/86400; my $weeks = $days/7; my $months = $days/30.5; # it gets VERY rough from here my $years = $days/365.25; return ( $years > 0.95 ? sprintf("%.2f years",$years) : $weeks > 8.5 ? sprintf("%.2f months",$months) : $weeks > 3 ? sprintf("%.2f weeks",$weeks) : $days >= 1 ? sprintf("%d days %.2f hrs", $days, $hours) : $hours >= 1 ? sprintf("%d:%02d hrs", $hours, $mins) : sprintf("%d mins", $mins) ); } sub time_delta_exact { # convert a time in seconds to more human readble string my $secs = shift; my $days = int ($secs/86400); $secs %= 86400; my $hours = int ($secs/3600); $secs %= 3600; my $mins = int ($secs/60); $secs %= 60; return ( $days > 0 ? sprintf("%d days %d:%02d",$days, $hours, $mins) ; $hours > 0 ? sprintf("%d:%02d", $hours, $mins) : $mins > 0 ? "$mins mins $secs secs" : "only $secs seconds" ); } sub time_delta_long_exact { # Calculating in exact Days, Months, Years is much harder as both of these # are of variable length, The actual delta then depends on the actual dates # involved and so we need to calculate delta outselves. # # A simple method may be to subtract based on localtime units. # EG: convert the two times into year/month/day, then do the subtraction # in those units with appropriate pass along but even this has problems. # So use Data::Calc module (the "perl-Date-Calc" package) # my ($t1, $t2) = @_; my $secs = $t1 - $t2; if ( $secs < 86400 * 31 ) { # Delta is less than a month.. # Using normal time_delta_exact() my $days = int ($secs/86400); $secs %= 86400; my $hours = int ($secs/3600); $secs %= 3600; my $mins = int ($secs/60); $secs %= 60; return ( $days > 0 ? sprintf("%d days %d:%02d",$days, $hours, $mins) ; $hours > 0 ? sprintf("%d:%02d", $hours, $mins) : $mins > 0 ? "$mins mins $secs secs" : "only $secs seconds" ); } else { # Delta is more than a month - don't worry about hours! # my @d1 = localtime($t1); # my @d2 = localtime($t1); # my $years = $d1[0] - $d2[0]; # my $months = $d1[1] - $d2[1]; # my $days = $d1[2] - $d2[2]; # # Negative wrap-around handling # $month--, $days = $d1[7] - $d2[7] if $days < 0; # not correct # $year--, $months += 12 if $months < 0; use Date::Calc; # this does the job correctly my($years, $months, $days) = N_Delta_YMD(@d1[0..2],@d2[0..2]); return( "$years yrs, $months mnths, $days days" ); } } date --> time #repuire "timelocal.pl"; # perl 4 use Time::Local; # perl 5 @d = ( 18, 9, 1998 ); # date = 18 September 1998 $d[1] --; # adjust month to 0-11 $d[2] -= 1900 if $d[2] > 1900; # adjust year 2001 -> 101 $time = timelocal( 0,0,0, @d ); # midnight on that day Date Parse Module use Date::Parse; my $dt = str2time('25/02/1990 23:48:00'); $dt += 6 * 60; # + 6 mins Date Manipulation module See http://search.cpan.org/~sbeck/DateManip-5.44/Manip.pod This is a very large and monolithic module. WARNING: It also changed from v5 to v6 to try and make it work better Watch if other modules requires its functionality. use Date::Manip; $date = ParseDate("25/02/1990 23:48:00"); $delta = ParseDateDelta("+ 6 minutes"); $new = DateCalc($date,$delta); # or alternate/shorthand: # $new =DateCalc("25/02/1990 23:48:00","+ 6 minutes"); # then output however you like. print &UnixDate($new,"It is now %T on %b %e, %Y."); ------------------------------------------------------------------------------- Progress Reporting.... # --- progress of data read from file --- $PROGRESS = 1; # Clear Progress report lines $B = `tput el`; # terminfo: clear to end of line #my $B = `tput ed`; # terminfo: clear to end of display #my $B = `tput dl 1`; # terminfo: delete line #my $B = (" "x(`tput cols`||80) . "\r"; # blank spaces & return -- fallback #my $B = (" "x79) . "\r"; # hardcode 80 columns #print "-"x100, "\r", $B, "x\n"; exit; # DEBUG print STDERR "Main Processing Loop...\n" if PROGRESS; # process start time my $start_time = time; # Setup progress report my $progress = ''; # clear progress line my $progress_total = -s $data; # data file size my( $progress_done, $progress_last ) = (0,0); open(DATA, "$data") || die("Unable to read \"$data\" : $!\n"); while( ) { $progress_done += length if $PROGRESS; #... # to output some info, clear progress info, output, restore progress print STDERR "${B}" if $PROGRESS; print "report some info\n"; print STDERR $progress if $PROGRESS; #... } continue { if( $PROGRESS ) { if ( $last_time != time ) { # update once a second (optional) $last_time = time; $progress = progress($progress_total,$progress_done,$start_time); print STDERR $B," Working...", $progress, "\r"; } } } print STDERR $B if $PROGRESS; warn("Assertion Failure: Progress count ($progress_total) ". "does not equal progress target ($progress_done)!") unless $progress_total == $progress_done; my $process_time = time_delta(time - $start_time); printf "Processed %d lines in 4process_time\n", $.; # ..... # Generate the progress report -- for main loop only { # Progress is on the numbers given to this sub-routine { sub progress { { my ($tot,$curr,$stime) = @_; { return '' unless $tot && $curr; { { $curr /= $tot; # how much of the run have we completed { my $run = (time - $stime); # time we have been running { $tot = $run / $curr; # total time of run start to finish { my $left = $tot - $run; # time left to finish { { if ( $tot > 86400*2.5 ) { # multi-day report { # status report in days/hours/minutes { return sprintf( "%4.1f%% / %dd%02d:%02d => %dd%02d:%02d + %dd%02d:%02d", 100*$curr, $tot /86400, $tot /3600%24, $tot /60%60, $run /86400, $run /3600%24, $run /60%60, $left /86400, $left /3600%24, $left /60%60 ); } elsif ( $tot > 2400 ) { # more then 40 minutes, less 2.5 days # status report in hours/minutes return sprintf( "%4.1f%% / %d:%02d => %d:%02d + %d:%02d", 100*$curr, $tot /3600, $tot /60%60 ); # $tot %60, $run /3600, $run /60%60, # $run %60, $left /3600, $left /60%60, # $left %60 ); } else { # status report in minutes/secones return sprintf( "%4.1f%% %d:%02d + %d:%02d => %d:%02d", 100*$curr, $tot /60, $tot %60, $run /60, $run %60, $left /60, $left %60 ); } } ---- sub time_delta { # convert a time in seconds to more human readble string # See other time_delta() reports elsewhere in this file my $secs = shift; my $days = int ($secs/86400); $secs %= 86400; my $hours = int ($secs/3600); $secs %= 3600; my $mins = int ($secs/60); $secs %= 60; return ( $days > 0 ? "$days days $hours:" . sprintf("%02d",$mins) : $hours > 0 ? "$hours hrs $mins mins" : $mins > 0 ? "$mins mins $secs secs" : "only $secs seconds" ); } ---- # Convert bytes to a more human form # See also "humanize" in "info/shell/general.txt" # Remove space between number and ISO units to allow "sort -h" to sort it! # sub humanize_size { my $x = shift; for my $x_unit ( ('bytes', 'kB', 'MB', 'GB', 'TB') ) { return sprintf( "%3.1f %s", $x, $x_unit ) if $x < 800.0; $x /= 1024.0; } } print humanize_size(325000), "\n"; # -> 317.4 kB ------------------------------------------------------------------------------- Sort methods and techniques By numerical hash contents largest first for ( sort {$hash{$b}<=>$hash{$a}} keys %hash ) { ... } Case insenitive sorting sub case_insensitive { "\U$a" cmp "\U$b"; } Numerically sub numerically { $a <=> $b; } Sort Associated array by value not key sub byvalue { $value{$a} <=> $value{$b} } foreach key ( sort byvalue keys %value ) { BODY; } Sort by value then by by key sub val_key { $second{$a} <=> $second{$b} || $a cmp $b } Sort a hierarchal (dot) naming scheme -- Marc Horowitz (maybe ripe for a Schwartzian Transform - see below) IE: paths, newsgroups... sub depthfirst { $aa = $a."/~"; $aa =~ s|/|!|og; $bb = $b."/~"; $bb =~ s|/|!|og; $aa cmp $bb; } Print associative array by value (quickly) This will create a plain array with the value before the name then print the sorted list output. This is very fast. $mask = "%04d %s"; for (@arr) { push (@idx, sprintf($mask, (/^\s+\((\d+)\)\s+(\S+)/))) } print @arr[ sort {$idx[$a] cmp $idx[$b]} 0 .. $#idx ]; undef @idx NOTE: in the above that you CAN sort without a function (directly) Sorting by a computable field. (also known as the "Schwartzian Transform", after Randal Schwartz) The problem with sorting with a computable field is that you could end up computing the field at least 2 or more times in a sort function! The following using a map, to extract, pre-compute and store the sorting values, before sorting them. Afterwards it returns the original element removing the sorting values. In this problem we extract the first number in the given set of strings. NOTE: you start at the bottom line and work your way up. @sorted_fields = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ /(\d+)/, $_ ] } @fields; Another example with a multi-field password file... print map { $_->[0] } # print the original line sort { $a->[1] <=> $b->[1] # first by gid || $a->[2] <=> $b->[2] # then by uid || $a->[3] cmp $b->[3] # and by login (should not be needed) } map { [ $_, (split /:/)[3,2,0] ] } # array: line then sort fields `cat /etc/passwd`; # read password file The sorting field(s) can be either the first, or later field in the generated arrays. Or you could make it part of the data structure that is being sorted. You can read his own notes on this 'transform' in UnixReview Column 64 (May 2006) http://www.stonehenge.com/merlyn/UnixReview/col64.html OR on basic sorting methods in perl in http://www.stonehenge.com/merlyn/UnixReview/col06.html ------------------------------------------------------------------------------- Micro sleep. One method is to use the select() timeout select(undef,undef,undef,.01); You could also set an alarm... (see perl/functions/alarm.pl) ------------------------------------------------------------------------------- Random selections from an array (shuffle) # create array of numbers to shuffle my($i, @number ); for( $i=1; $i<=$NUMBER; $i++ ) { push(@number, $i); } print "number list = ", join(",", @number), "\n"; # create the randomized array by removing elements from a number list # srand($$^time); # randomize random number generator (if desired) while( @number > 0 ) { # while we have a number to be picked push(@randomized, splice(@number, rand(@number), 1) ); # random pick } print "random list = ", join(",", @randomized), "\n"; --- # shuffle array in place... (Perl Cookbook Recipe 4.17) # NOTE: requires a real array argument due to function prototype # sub fisher_yates_shuffle(\@) { my $array = shift; my $i; for ($i = @$array; --$i; ) { my $j = int rand ($i+1); next if $i == $j; @$array[$i,$j] = @$array[$j,$i]; } } fisher_yates_shuffle @array; ------------------------------------------------------------------------------- Incrementing a string using your own rules Perl's auto-increment of strings is limited to specific strings This method defines your own. inc($) increase a single character EG: 3 -> 4 roll($) roll a string a chars EG: 9999 -> 0000 while( <> ) { s/(.*)([0..8])([9]*)$/ $1 . inc($2) . roll($3) /e; print "$_\n"; } The first (.*) makes the RE work faster by ignoring start chars WARNING: 99999 will not increment but 099999 will ------------------------------------------------------------------------------- Format handling You can turn off page breaks the same way it does internally when it notices the lack of a top-of-form format. Just set $- to a huge number. HOWEVER this will result in the top of form NEVER being printed. Correct way is to let the first write happen then assign $- so it can never reach zero again. select(FILEHANDLE); foreach i ( @array ) { ....; write; # write top-of-form and the other lines $- = 99; # form always has 99 lines left - never end the page } ------------------------------------------------------------------------------- Outputing elements in columns Example 5 rows of data. Note the newline statement is BEFORE the element printing { print "Title: " # indent for the first line my( $i, $s ) = ( " "x8, ", " ); # indent and seperator my( $c, $e ) = ( 5, -1 ); # columns per row, element countdown foreach ( @elements ) { print "$s" if $e > 0; # separator if not end-of-line print "\n$i" if $e == 0; # end-of-line and indent $e = $c if $e <= 0; # reset element countdown print "%10s", $_; $e--; # format and print element } print "\n"; } In many cases this general loop can be simplified. For example if no special indent is needed on first line. ... example to be added ... If no special seperator is needed, and it can be used as part of indent. { print "Title:" # no return from previous line my( $c, $e ) = (5); # this many elements per row foreach ( @elements ) { print("\n "),$e=$c unless $e; print " %10s", $_; $e--; # format and print element } print "\n"; } ------------------------------------------------------------------------------- Text Formating and Word Wrapping Perl Text Format (using the standard Text::Wrap module) #!/usr/bin/perl use Text::Wrap; $Text::Wrap::columns = 78; # default: 76 $Text::Wrap::huge = 'overflow'; $_ = 'words' print(Text::Wrap::wrap(' ', ' ', $_),"\n") This one works like "fmt", reading files in paragraph, joining lines and wrapping them. (Perl Cookbook, page 28) #!/bin/perl use Text::Wrap qw(&wrap $columns, $huge); $columns=72; $huge='overflow'; ($/, $\) = ( '', "\n\n"); # read and output mutli-line paragraphs while(<>) { s/\s*\n\s*/ /g; # remove newlines and indents print wrap('', '', $_); # format paragraph } Wrapping using perl 'format' (does not compress inter-word spaces, may need more experimentation) use FileHandle; STDOUT->formline("^" . ("<"x72)."~~\n", @text); You could use a shell command like "fmt" or "fold". But these are not always installed on a machine. This uses "xargs" to do the job, as it is more likely to be available on all systems (portible). WARNING: be sure @text is untainted. system("echo '@text' | xargs -s 70 echo ' '"); See "shell/script.hints" ------------------------------------------------------------------------------- tr and variables problem The tr command will not accept variables, the following is a hack to allow this. This does not solve the delimiter problem however. eval "\$string =~ tr/$chars/$replacement_chars/"; ------------------------------------------------------------------------------- convert a bit vector into a list of intergers $low = -1; $high = -1; $range_cnt = 0; $printed = 0; for $i (0..($bitmap_size-1)) { if (vec($bitmap_ptr, $i, 1) == 1) { if ($low+$range_cnt == $i) { $range_cnt++; } elsif ($range_cnt > 2) { print "..", ($low+$range_cnt-1), ", $i"; $range_cnt = 1; $low = $i; } elsif ($range_cnt == 2) { print ", " if $printed; print $low+1, ", $i"; $printed = 1; $low = $i; $range_cnt = 1; } else { print ", " if $printed; print "$i"; $printed = 1; $low = $i; $range_cnt = 1; } $high = $i; } } if ($high != $low) { if ($range_cnt > 2) { print ".."; } elsif ($range_cnt == 2) { print ", " if $printed; } print "$high"; } print "\n"; dgross@rchland.vnet.ibm.com (Dave Gross) ------------------------------------------------------------------------------- Indirect function calls -- function ptrs sub foo() { print "foo( ", join(", ", @_), " )\n"; } $function = "foo"; # function expression &$function( "arg1", "arg2" ); # indirect call NOTE in version 4 $function can NOT be replaced with an expression though it can in version 5 ------------------------------------------------------------------------------- Command Exit Checks Beware the $! is not reset by the call to system. To be on the safe side you should do: $! = 0; $exit = system('foo'); die "$0: foo: $! (exit $exit)\n" if $!; Also system and `` blocks signals SIGINT and SIGQUIT so such signals will NOT normally effect the main process, only the system call. You need to check $? to see if it was terminated by a signal. system() itself actually returns the program exit status (unlike $?) The $? is not simply the exit status as per the shell, but the full 16 bit value returned by wait(). $?>>8 (High Bits) The actual exit value. $?&255 (Low Bits) Unusual exit due to signal or core dump $?&128 specifies if a core dump was produced $?&127 Gives what signal (if any) terminated the process If /bin/sh is used to run the command (one argument to system() and meta characters (space etc) is present) then the shell will typically print an error message to stderr, $! will not be set, $? >> 8 is the exit status, and signal status will not be recoverable. ------------------------------------------------------------------------------- Set System Limits in perl... =======8<-------- require 'syscall.ph'; require 'sys/resource.ph'; # note h2ph doesn't always win on this one # -- hand editing may be necessary # Arrange so no core files are generated $coresize = pack("i2",0,0); syscall(&SYS_setrlimit, &RLIMIT_CORE, $coresize); # Make stack size large $stacksize = pack("i2",1024*1024*4,1024*1024*4); syscall(&SYS_setrlimit, &RLIMIT_STACK, $stacksize); =======8<-------- ------------------------------------------------------------------------------- User Accounts and perl... The following is dependant on the nsswitch and Solaris systems... getpwent() and shadow password as root The getpwent() function will return the users password to root IF * users password in located in "/etc/passwd" -- fat chance * user is listed in the /etc/shadow file and perl version is >5.005_57 * it was called on the NIS+ server && user is in the NIS+ password file and you are authorized to see that password. Only in these cases will the getpwent() perl function return the users encrypted login password. This is a real pain. Especially as perl does not have access to the system librarys shadow database getsd* functions. getpwnam(user) The getpwnam(user) will never return the users password, but will let you know if this user is actually a valid login user of this machine. EG: user is in /etc/passwd or the appropriate netgroups access to the NIS+ Login Group restrictions... A user which can not login due to some login groups restrictions (EG: NetGroups under NIS, or a LDAP authenticated login group) will NOT be listed by ANY of the getpw* functions. In other words a user which was disabled due to group access, may not be listed, dependant on the nsswitch settings (EG: "compact" setting or LDAP authenticated search restrictions) The alturnative is to always list all potential users (via nss), even if they are NOT in the right login group for this machine. Then if nessary, reject those with denied login by group access manually yourself. This however mean that unless your perl script is "pam" smart, it can NOT determine if a user still present in the password database, but is denied login access (via group), should have their home cleaned up and deleted without some other external indication of the users "final deletion". ------------------------------------------------------------------------------- Randomise srand() Randomising the srand function can be very difficult. The seed may not change quickly, or never change in a programs life time, same have only have a limited number of start seeds (0 to 60,000 for process ids). And combineing them may still be limited. Quick (limited) choices... * Current time (won't change within same `tick' time() * Using the process id of the current process $$ * Time and process id time() ^ $$ * Using the process id of a sub-shell (always different) `echo \$\$` Randomise on gziped process list ( from apache 1.3.3 dbmmanage) sub randomise { my $psf; for (qw(-xlwwa -le)) { `ps $_ 2>/dev/null`; $psf = $_, last unless $?; } srand (unpack("%L*", `ps $psf | gzip -f`)); } Alternatively Use the `cksum' on the same source. this is faster but may not be availble on all systems. sub randomise { my $psf; for (qw(-xlwwa -le)) { `ps $_ 2>/dev/null`; # test option $psf = $_, last unless $?; } srand ( `ps $psf | cksum` ); } WARNING: process list generation could be slow! Especially on a machine using a remote password list (like LDAP or NIS+), and network problems hit. Another major problem is that the ps options vary from machine to machine which results in the need for option checking making it even slower. Also output does not change much as some parts giz archive start is a constant. Also note that perl5.005 and later automatically randomises the random number generator Albert Cahalan suggests you use the following list of "ps" options. -le -xlwwa (putting POSIX-standard -le first) -elwwa (valid for both BSD and UNIX) xlwwa -le (removing the initial "-" kills a warning on some systems) ------------------------------------------------------------------------------- Password Encryption in Perl Specifically the 'salted' password encryption generally you would do something like... Initialization... @salt_set = ('a'..'z', 'A'..'Z', '0'..'9', '.', '/'); $salt_size = scalar @salt_set; # should be 64 characters! Method 1... # From Example in Perl 4 Camel Book # The salt for today is seleted by the traditional method sub gen_salt { my($passwd) = @_; my($perturb1,$perturb2,$week); # perturb the salt with start of input passwd ($perturb1,$perturb2) = unpack("C2", $passwd); $week = time() / (60*60*24*7) + $perturb1 + $perturb2; return( $salt_set[ $week % $salt_size ] . $salt_set[ time() % $salt_size ] ); } crypt( $passwd, gen_salt($passwd) ); Method 2... # Extracted from dbmmanage in Apatche 1.3.3 distribution # randomise the salt for all strings. sub gen_salt { join('', map($salt_set[rand $salt_size], 1..2) ); } crypt( $passwd, gen_salt() ); Other Techniques... Generate a random password from $logname # 8 character randomised passwd # method: encrypt the logname then grab LAST 8 chars $passwd = substr( crypt( $logname, gen_salt() ), -8, 8 ); # Substitute characters which could be misinterperted # EG: characters O0Q all look simular, and dot may be missed $passwd =~ tr|0OQ./+1Il^#;|XYZabc234rst|; ------------------------------------------------------------------------------- Vgrind entry for perl programs PERL|perl|Perl:\ :pb=^\d?(sub|package)\d\p\d:\ :bb={:be=}:cb=#:ce=$:sb=":se=\e":lb=':\ :le=\e':tl:\ :id=_:\ :kw=\ if for foreach unless until while continue else elsif \ do eval require \ die exit \ defined delete reset \ goto last redo next dump \ local undef return \ write format \ sub package NOTE: things like $#, $', s#/foo##, and $foo'bar confuse vgrind ------------------------------------------------------------------------------- Suid Vulnerability (v5.002) Suid Perlscripts using suidperl or sperl are insecure due to a race condition on some systems. The program does not relinquish its root privileges properly. Patch available or get and install 5.003 or a C wrapper can be used. ftp://coombs.anu.edu.au/pub/perl/src/fixsperl-0 -------------------------------------------------------------------------------