sum1 # sum1 - print column sums sum1 # input: rows of numbers sum1 # output: sum of each column sum1 # missing entries are treated as zeros sum1 sum1 { for (i = 1; i <= NF; i++) sum1 sum[i] += $i sum1 if (NF > maxfld) sum1 maxfld = NF sum1 } sum1 END { for (i = 1; i <= maxfld; i++) { sum1 printf("%g", sum[i]) sum1 if (i < maxfld) sum1 printf("\t") sum1 else sum1 printf("\n") sum1 } sum1 } sum2 # sum2 - print column sums sum2 # check that each line has the same number of fields sum2 # as line one sum2 sum2 NR==1 { nfld = NF } sum2 { for (i = 1; i <= NF; i++) sum2 sum[i] += $i sum2 if (NF != nfld) sum2 print "line " NR " has " NF " entries, not " nfld sum2 } sum2 END { for (i = 1; i <= nfld; i++) sum2 printf("%g%s", sum[i], i < nfld ? "\t" : "\n") sum2 } sum3 # sum3 - print sums of numeric columns sum3 # input: rows of integers and strings sum3 # output: sums of numeric columns sum3 # assumes every line has same layout sum3 sum3 NR==1 { nfld = NF sum3 for (i = 1; i <= NF; i++) sum3 numcol[i] = isnum($i) sum3 } sum3 sum3 { for (i = 1; i <= NF; i++) sum3 if (numcol[i]) sum3 sum[i] += $i sum3 } sum3 sum3 END { for (i = 1; i <= nfld; i++) { sum3 if (numcol[i]) sum3 printf("%g", sum[i]) sum3 else sum3 printf("--") sum3 printf(i < nfld ? "\t" : "\n") sum3 } sum3 } sum3 sum3 function isnum(n) { return n ~ /^[+-]?[0-9]+$/ } 3-4.ans { total[$1] += $2 } 3-4.ans END { for (x in total) print x, total[x] | "sort" } percent # percent percent # input: a column of nonnegative numbers percent # output: each number and its percentage of the total percent percent { x[NR] = $1; sum += $1 } percent percent END { if (sum != 0) percent for (i = 1; i <= NR; i++) percent printf("%10.2f %5.1f\n", x[i], 100*x[i]/sum) percent } histogram # histogram histogram # input: numbers between 0 and 100 histogram # output: histogram of deciles histogram histogram { x[int($1/10)]++ } histogram histogram END { for (i = 0; i < 10; i++) histogram printf(" %2d - %2d: %3d %s\n", histogram 10*i, 10*i+9, x[i], rep(x[i],"*")) histogram printf("100: %3d %s\n", x[10], rep(x[10],"*")) histogram } histogram histogram function rep(n,s, t) { # return string of n s's histogram while (n-- > 0) histogram t = t s histogram return t histogram } hist.sh awk ' hist.sh # generate random integers hist.sh BEGIN { for (i = 1; i <= 200; i++) hist.sh print int(101*rand()) hist.sh } hist.sh ' | hist.sh awk -f histogram histans1.awk histans1.awk { x[int($1/10)]++ } histans1.awk END { max = MAXSTARS = 25 histans1.awk for (i = 0; i <= 10; i++) histans1.awk if (x[i] > max) histans1.awk max = x[i] histans1.awk for (i = 0; i <= 10; i++) histans1.awk y[i] = x[i]/max * MAXSTARS histans1.awk for (i = 0; i < 10; i++) histans1.awk printf(" %2d - %2d: %3d %s\n", histans1.awk 10*i, 10*i+9, x[i], rep(y[i],"*")) histans1.awk printf("100: %3d %s\n", x[10], rep(y[10],"*")) histans1.awk } histans1.awk histans1.awk function rep(n,s, t) { # return string of n s's histans1.awk while (n-- > 0) histans1.awk t = t s histans1.awk return t histans1.awk } sumcomma # sumcomma - add up numbers containing commas sumcomma sumcomma { gsub(/,/, ""); sum += $0 } sumcomma END { print sum } addcomma # addcomma - put commas in numbers addcomma # input: a number per line addcomma # output: the input number followed by addcomma # the number with commas and two decimal places addcomma addcomma { printf("%-12s %20s\n", $0, addcomma($0)) } addcomma addcomma function addcomma(x, num) { addcomma if (x < 0) addcomma return "-" addcomma(-x) addcomma num = sprintf("%.2f", x) # num is dddddd.dd addcomma while (num ~ /[0-9][0-9][0-9][0-9]/) addcomma sub(/[0-9][0-9][0-9][,.]/, ",&", num) addcomma return num addcomma } addcomma.ans /^[+-]?[0-9][0-9]?[0-9]?(,[0-9][0-9][0-9])*$/ { addcomma.ans gsub(/,/, "") addcomma.ans sum += $0 addcomma.ans next addcomma.ans } addcomma.ans { print "bad format:", $0 } addcomma.ans END { print sum } addcomma.ans2 /^[+-]?[0-9][0-9]?[0-9]?(,[0-9][0-9][0-9])*([.][0-9]*)?$/ { addcomma.ans2 gsub(/,/, "") addcomma.ans2 sum += $0 addcomma.ans2 next addcomma.ans2 } addcomma.ans2 { print "bad format:", $0} addcomma.ans2 END { print sum } datecvt # date convert - convert mmddyy into yymmdd in $1 datecvt datecvt { $1 = substr($1,5,2) substr($1,1,2) substr($1,3,2); print } date.data 013042 mary's birthday date.data 032772 mark's birthday date.data 052470 anniversary date.data 061209 mother's birthday date.data 110175 elizabeth's birthday daynum function daynum(y, m, d, days, i, n) { # 1 == Jan 1, 1901 daynum split("31 28 31 30 31 30 31 31 30 31 30 31", days) daynum # 365 days a year, plus one for each leap year daynum n = (y-1901) * 365 + int((y-1901)/4) daynum if (y % 4 == 0) # leap year from 1901 to 2099 daynum days[2]++ daynum for (i = 1; i < m; i++) daynum n += days[i] daynum return n + d daynum } daynum { print daynum($1, $2, $3) } nm.output file.o: nm.output 00000c80 T _addroot nm.output 00000b30 T _checkdev nm.output 00000a3c T _checkdupl nm.output U _chown nm.output U _client nm.output U _close nm.output funmount.o: nm.output 00000000 T _funmount nm.output U cerror nm.format # nm.format - add filename to each nm output line nm.format nm.format NF == 1 { file = $1 } nm.format NF == 2 { print file, $1, $2 } nm.format NF == 3 { print file, $2, $3 } prchecks # prchecks - print formatted checks prchecks # input: number \t amount \t payee prchecks # output: eight lines of text for preprinted check forms prchecks prchecks BEGIN { prchecks FS = "\t" prchecks dashes = sp45 = sprintf("%45s", " ") prchecks gsub(/ /, "-", dashes) # to protect the payee prchecks "date" | getline date # get today's date prchecks split(date, d, " ") prchecks date = d[2] " " d[3] ", " d[6] prchecks initnum() # set up tables for number conversion prchecks } prchecks NF != 3 || $2 >= 1000000 { # illegal data prchecks printf("\nline %d illegal:\n%s\n\nVOID\nVOID\n\n\n", NR, $0) prchecks next # no check printed prchecks } prchecks { printf("\n") # nothing on line 1 prchecks printf("%s%s\n", sp45, $1) # number, indented 45 spaces prchecks printf("%s%s\n", sp45, date) # date, indented 45 spaces prchecks amt = sprintf("%.2f", $2) # formatted amount prchecks printf("Pay to %45.45s $%s\n", $3 dashes, amt) # line 4 prchecks printf("the sum of %s\n", numtowords(amt)) # line 5 prchecks printf("\n\n\n") # lines 6, 7 and 8 prchecks } prchecks prchecks function numtowords(n, cents, dols) { # n has 2 decimal places prchecks cents = substr(n, length(n)-1, 2) prchecks dols = substr(n, 1, length(n)-3) prchecks if (dols == 0) prchecks return "zero dollars and " cents " cents exactly" prchecks return intowords(dols) " dollars and " cents " cents exactly" prchecks } prchecks prchecks function intowords(n) { prchecks n = int(n) prchecks if (n >= 1000) prchecks return intowords(n/1000) " thousand " intowords(n%1000) prchecks if (n >= 100) prchecks return intowords(n/100) " hundred " intowords(n%100) prchecks if (n >= 20) prchecks return tens[int(n/10)] " " intowords(n%10) prchecks return nums[n] prchecks } prchecks prchecks function initnum() { prchecks split("one two three four five six seven eight nine " \ prchecks "ten eleven twelve thirteen fourteen fifteen " \ prchecks "sixteen seventeen eighteen nineteen", nums, " ") prchecks split("ten twenty thirty forty fifty sixty " \ prchecks "seventy eighty ninety", tens, " ") prchecks } checkfix.ans # prchecks - print formatted checks checkfix.ans # input: number \t amount \t payee checkfix.ans # output: eight lines of text for preprinted check forms checkfix.ans checkfix.ans BEGIN { checkfix.ans FS = "\t" checkfix.ans dashes = sp45 = sprintf("%45s", " ") checkfix.ans gsub(/ /, "-", dashes) # to protect the payee checkfix.ans "date" | getline date # get today's date checkfix.ans split(date, d, " ") checkfix.ans date = d[2] ". " d[3] ", " d[6] checkfix.ans initnum() # set up tables for number conversion checkfix.ans } checkfix.ans NF != 3 { checkfix.ans printf("\nrec %d has %d fields:\n|%s|\n\nVOID\nVOID\n\n\n", checkfix.ans NR, NF, $0) checkfix.ans next checkfix.ans } checkfix.ans { printf("\n") # nothing on line 1 checkfix.ans printf("%s%s\n", sp45, $1) # number, indented 45 spaces checkfix.ans printf("%s%s\n", sp45, date) # date, indented 45 spaces checkfix.ans amt = sprintf("%.2f", $2) # formatted amount checkfix.ans printf("Pay to %45.45s $%s\n", $3 dashes, amt) # line 4 checkfix.ans printf("the sum of %s\n", numtowords(amt)) # line 5 checkfix.ans printf("\n\n\n") # lines 6, 7 and 8 checkfix.ans } checkfix.ans function numtowords(n, cents, dols, s) { # n has 2 decimal places checkfix.ans cents = substr(n, length(n)-1, 2) checkfix.ans dols = substr(n, 1, length(n)-3) checkfix.ans if (dols == 0) checkfix.ans s = "zero dollars and " cents " cents exactly" checkfix.ans else checkfix.ans s = intowords(dols) " dollars and " cents " cents exactly" checkfix.ans sub(/^one dollars/, "one dollar", s) checkfix.ans gsub(/ +/, " ", s) checkfix.ans return s checkfix.ans } checkfix.ans function intowords(n) { checkfix.ans n = int(n) checkfix.ans if (n >= 1000000) checkfix.ans return("VOID") checkfix.ans if (n >= 1000) checkfix.ans return intowords(n/1000) " thousand " intowords(n%1000) checkfix.ans if (n >= 100) checkfix.ans return intowords(n/100) " hundred " intowords(n%100) checkfix.ans if (n >= 20) checkfix.ans return tens[int(n/10)] " " intowords(n%10) checkfix.ans return nums[n] checkfix.ans } checkfix.ans function initnum() { checkfix.ans split("one two three four five six seven eight nine " \ checkfix.ans "ten eleven twelve thirteen fourteen fifteen " \ checkfix.ans "sixteen seventeen eighteen nineteen", nums, " ") checkfix.ans split("ten twenty thirty forty fifty sixty " \ checkfix.ans "seventy eighty ninety", tens, " ") checkfix.ans } colcheck # colcheck - check consistency of columns colcheck # input: rows of numbers and strings colcheck # output: lines whose format differs from first line colcheck colcheck NR == 1 { colcheck nfld = NF colcheck for (i = 1; i <= NF; i++) colcheck type[i] = isnum($i) colcheck } colcheck { if (NF != nfld) colcheck printf("line %d has %d fields instead of %d\n", colcheck NR, NF, nfld) colcheck for (i = 1; i <= NF; i++) colcheck if (isnum($i) != type[i]) colcheck printf("field %d in line %d differs from line 1\n", colcheck i, NR) colcheck } colcheck colcheck function isnum(n) { return n ~ /^[+-]?[0-9]+$/ } p12check # p12check - check input for alternating .P1/.P2 delimiters p12check p12check /^\.P1/ { if (p != 0) p12check print ".P1 after .P1, line", NR p12check p = 1 p12check } p12check /^\.P2/ { if (p != 1) p12check print ".P2 with no preceding .P1, line", NR p12check p = 0 p12check } p12check END { if (p != 0) print "missing .P2 at end" } delim.ans BEGIN { delim.ans expects["aa"] = "bb" delim.ans expects["cc"] = "dd" delim.ans expects["ee"] = "ff" delim.ans } delim.ans /^(aa|cc|ee)/ { delim.ans if (p != "") delim.ans print "line", NR, ": expected " p delim.ans p = expects[substr($0, 1, 2)] delim.ans } delim.ans /^(bb|dd|ff)/ { delim.ans x = substr($0, 1, 2) delim.ans if (p != x) { delim.ans print "line", NR, ": saw " x delim.ans if (p) delim.ans print ", expected", p delim.ans } delim.ans p = "" delim.ans } delim.ans END { delim.ans if (p != "") delim.ans print "at end, missing", p delim.ans } passwd # passwd - check password file passwd passwd BEGIN { passwd FS = ":" } passwd NF != 7 { passwd printf("line %d, does not have 7 fields: %s\n", NR, $0) } passwd $1 ~ /[^A-Za-z0-9]/ { passwd printf("line %d, nonalphanumeric user id: %s\n", NR, $0) } passwd $2 == "" { passwd printf("line %d, no password: %s\n", NR, $0) } passwd $3 ~ /[^0-9]/ { passwd printf("line %d, nonnumeric user id: %s\n", NR, $0) } passwd $4 ~ /[^0-9]/ { passwd printf("line %d, nonnumeric group id: %s\n", NR, $0) } passwd $6 !~ /^\// { passwd printf("line %d, invalid login directory: %s\n", NR, $0) } checkgen.data NF != 7 does not have 7 fields checkgen.data $1 ~ /[^A-Za-z0-9]/ nonalphanumeric user id checkgen.data $2 == "" no password checkgen # checkgen - generate data-checking program checkgen # input: expressions of the form: pattern tabs message checkgen # output: program to print message when pattern matches checkgen checkgen BEGIN { FS = "\t+" } checkgen { printf("%s {\n\tprintf(\"line %%d, %s: %%s\\n\",NR,$0) }\n", checkgen $1, $2) checkgen } valid.ans BEGIN { FS = "\t" } valid.ans /^=/ { print substr($0, 2); next } valid.ans { printf("%s {\n\tprintf(\"line %%d, %s: %%s\\n\",NR,$0) }\n", valid.ans $1, $2) valid.ans } compat # compat - check if awk program uses new built-in names compat compat BEGIN { asplit("close system atan2 sin cos rand srand " \ compat "match sub gsub", fcns) compat asplit("ARGC ARGV FNR RSTART RLENGTH SUBSEP", vars) compat asplit("do delete function return", keys) compat } compat compat { line = $0 } compat compat /"/ { gsub(/"([^"]|\\")*"/, "", line) } # remove strings, compat /\// { gsub(/\/([^\/]|\\\/)+\//, "", line) } # reg exprs, compat /#/ { sub(/#.*/, "", line) } # and comments compat compat { n = split(line, x, "[^A-Za-z0-9_]+") # into words compat for (i = 1; i <= n; i++) { compat if (x[i] in fcns) compat warn(x[i] " is now a built-in function") compat if (x[i] in vars) compat warn(x[i] " is now a built-in variable") compat if (x[i] in keys) compat warn(x[i] " is now a keyword") compat } compat } compat compat function asplit(str, arr) { # make an assoc array from str compat n = split(str, temp) compat for (i = 1; i <= n; i++) compat arr[temp[i]]++ compat return n compat } compat compat function warn(s) { compat sub(/^[ \t]*/, "") compat printf("file %s, line %d: %s\n\t%s\n", FILENAME, FNR, s, $0) compat } bundle # bundle - combine multiple files into one bundle bundle { print FILENAME, $0 } unbundle # unbundle - unpack a bundle into separate files unbundle unbundle $1 != prev { close(prev); prev = $1 } unbundle { print substr($0, index($0, " ") + 1) >$1 } addr.1 Adam Smith addr.1 1234 Wall St., Apt. 5C addr.1 New York, NY 10021 addr.1 212 555-4321 addr.1 addr.1 David W. Copperfield addr.1 221 Dickens Lane addr.1 Monterey, CA 93940 addr.1 408 555-0041 addr.1 work phone 408 555-6532 addr.1 Mary, birthday January 30 addr.1 addr.1 Canadian Consulate addr.1 555 Fifth Ave addr.1 New York, NY addr.1 212 586-2400 ny1.awk BEGIN { RS = "" } ny1.awk /New York/ ny2.awk BEGIN { RS = ""; ORS = "\n\n" } ny2.awk /New York/ smith.awk BEGIN { RS = ""; FS = "\n" } smith.awk $1 ~ /Smith$/ { print $1, $4 } # name, phone msort.sh # pipeline to sort address list by last names msort.sh msort.sh awk ' msort.sh BEGIN { RS = ""; FS = "\n" } msort.sh { printf("%s!!#", x[split($1, x, " ")]) msort.sh for (i = 1; i <= NF; i++) msort.sh printf("%s%s", $i, i < NF ? "!!#" : "\n") msort.sh } msort.sh ' | msort.sh sort | msort.sh awk ' msort.sh BEGIN { FS = "!!#" } msort.sh { for (i = 2; i <= NF; i++) msort.sh printf("%s\n", $i) msort.sh printf("\n") msort.sh } msort.sh ' addr.2 accountant addr.2 Adam Smith addr.2 1234 Wall St., Apt. 5C addr.2 New York, NY 10021 addr.2 addr.2 doctor - ophthalmologist addr.2 Dr. Will Seymour addr.2 798 Maple Blvd. addr.2 Berkeley Heights, NJ 07922 addr.2 addr.2 lawyer addr.2 David W. Copperfield addr.2 221 Dickens Lane addr.2 Monterey, CA 93940 addr.2 addr.2 doctor - pediatrician addr.2 Dr. Susan Mark addr.2 600 Mountain Avenue addr.2 Murray Hill, NJ 07974 doctors1.awk /^doctor/, /^$/ doctors2.awk /^doctor/ { p = 1; next } doctors2.awk p == 1 doctors2.awk /^$/ { p = 0; next } checks.data check 1021 checks.data to Champagne Unlimited checks.data amount 123.10 checks.data date 1/1/87 checks.data checks.data deposit checks.data amount 500.00 checks.data date 1/1/87 checks.data checks.data check 1022 checks.data date 1/2/87 checks.data amount 45.10 checks.data to Getwell Drug Store checks.data tax medical checks.data checks.data check 1023 checks.data amount 125.00 checks.data to International Travel checks.data date 1/3/87 checks.data checks.data amount 50.00 checks.data to Carnegie Hall checks.data date 1/3/87 checks.data check 1024 checks.data tax charitable contribution checks.data checks.data to American Express checks.data check 1025 checks.data amount 75.75 checks.data date 1/5/87 check1 # check1 - print total deposits and checks check1 check1 /^check/ { ck = 1; next } check1 /^deposit/ { dep = 1; next } check1 /^amount/ { amt = $2; next } check1 /^$/ { addup() } check1 check1 END { addup() check1 printf("deposits $%.2f, checks $%.2f\n", check1 deposits, checks) check1 } check1 check1 function addup() { check1 if (ck) check1 checks += amt check1 else if (dep) check1 deposits += amt check1 ck = dep = amt = 0 check1 } check2 # check2 - print total deposits and checks check2 check2 BEGIN { RS = ""; FS = "\n" } check2 /(^|\n)deposit/ { deposits += field("amount"); next } check2 /(^|\n)check/ { checks += field("amount"); next } check2 END { printf("deposits $%.2f, checks $%.2f\n", check2 deposits, checks) check2 } check2 check2 function field(name, i,f) { check2 for (i = 1; i <= NF; i++) { check2 split($i, f, "\t") check2 if (f[1] == name) check2 return f[2] check2 } check2 printf("error: no field %s in record\n%s\n", name, $0) check2 } check3 # check3 - print check information check3 check3 BEGIN { RS = ""; FS = "\n" } check3 /(^|\n)check/ { check3 for (i = 1; i <= NF; i++) { check3 split($i, f, "\t") check3 val[f[1]] = f[2] check3 } check3 printf("%8s %5d %8s %s\n", check3 val["date"], check3 val["check"], check3 sprintf("$%.2f", val["amount"]), check3 val["to"]) check3 for (i in val) check3 delete val[i] check3 }