Links & files

Awk reference and ressources	Example files
gnu.org	`mail-list`
oreilly effective awk programming	`inventory-shipped`

📑 Functions#

Built-in Functions#

Calling Built-in Functions#

function(arg1,… argn)

The function’s parameters are evaluated completely before the function’s call is performed.

Numeric Functions#

atan2(y, x)	return the arctangent of y / x in radians
cos(x)	return the cosine of x , with x in radians
exp(x)	return the exponential of x ( e ^ x )
int(x)	return the nearest integer to x
log(x)	return the natural logarithm of x
rand()	return a random number
sin(x)	return the sine of x , with x in radians.
sqrt(x)	return the positive square root of x
srand([x])	set the starting point for generating random numbers to the value x

awk '# Function to roll a simulated die.
function roll(n) { return 1 + int(rand() * n) }
# Roll 3 six-sided dice and
# print total number of points.
{
printf("%d points\n", roll(6) + roll(6) + roll(6))
}'

String-Manipulation Functions#

asort(source[,dest[,how]])	sorts the values of source
asorti(source[,dest[,how]])	if dest is specified, then source is duplicated into dest

awk 'BEGIN {
a["last"] = "de"
a["first"] = "sac"
a["middle"] = "cul"
for (i in a)
printf "%s\n", a[i]
asort(a)
for (i in a)
printf "%s\n", a[i]
}'

sac
cul
de
cul
de
sac

gensub(regexp,replacement,how[,target])	replace all matches of regexp with replacement
gsub(regexp,replacement[,target])	replace all matches of regexp with replacement

awk 'BEGIN {
a = "abc def"
b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a)
print b
}'

def abc

echo 'a b c a d e' | awk '{
print gensub(/a/, "AA", 1)
print gensub(/a/, "AA", 2)
print gensub(/a/, "AA", "g")
}'

AA b c a d e
a b c AA d e
AA b c AA d e

index(in,find)	return the position of find in string in
length([string])	return the number of characters in string

awk 'BEGIN {
str = "guillaume"
print index(str, "aume")
print index(str, "gui")
}'

awk 'length($1) > 8 { print }' mail-list

match(string,regexp[,array])

search string matched by regexp and return the character position

echo 'FIND ru+n
My program runs
but not very quickly
FIND guisam
JF+KM
This line is property of Reality Engineering Co.
guisam was here.' | awk '{ if ($1 == "FIND")
regex = $2
else {
where = match($0, regex)
if (where != 0)
print "Match of", regex, "found at", where, "in", $0
}}'

Match of ru+n found at 12 in My program runs
Match of guisam found at 1 in guisam was here.

echo "guillaume blablaba samblahblah" | awk '
match($0, /(gui).*(sam).*/, arr) { print arr[1]arr[2] }'

guisam

echo "guillaume blablaba samsamsam blahblah" | awk '
match($0, /(gui).*(sam).*/, arr) {
print arr[1]arr[2]
print arr[1, "start"], arr[1, "length"]
print arr[2, "start"], arr[2, "length"]
}'

guisam
1 3
20 3

patsplit(string,array[,fieldpat[,seps]])	divide string into pieces defined by fieldpat or fieldsep and store the
split(string,array[,fieldsep[,seps]])	pieces in array and the separator strings in the seps array

awk 'BEGIN {
split("cul-de_sac", a, /-|_/, seps)
for (i in a)
printf "%s -> %s\n", "a["i"]", a[i]
for (j in seps)
printf "%s -> %s\n", "seps["j"]", seps[j]
}'

a[1] -> cul
a[2] -> de
a[3] -> sac
seps[1] -> -
seps[2] -> _

sprintf(format,expression1,…)

return (without printing) the string that printf would have printed out

awk 'BEGIN {
pival = sprintf("pi = %.2f (approx.)", 22/7)
print pival
}'

pi = 3.14 (approx.)

strtonum(str)

examine str and return its numeric value

echo 0x17 | awk '{ printf "%.2f\n", strtonum($1) }'

23.00

sub(regexp,replacement[,target])

replace all matches of regexp with replacement

awk 'BEGIN {
str = "guillaume, guillaume, everywhere"
print sub(/llaume/, "sam", str)
print str
sub("guisam", "& was here", str)
print str
}'

1
guisam, guillaume, everywhere
guisam was here, guillaume, everywhere

substr(string,start[,length])

return a length long substring, starting at character number start

awk 'BEGIN {
str = "abcdefghi"
print substr(str, 1, 3) "DE" substr(str, 6)
}'

abcDEfghi

tolower(string)	each uppercase character in the string replaced with lowercase character
toupper(string)	each lowercase character in the string replaced with uppercase character

Input/Output Functions#

close(filename[,how])	close the file filename for input or output
fflush([filename])	flush any buffered output
system(command)	execute the operating system command command

Interactive Versus Noninteractive Buffering#

awk '{print $1$2}'
1 2
12
gui sam
guisam

awk '{print $1$2}' | cat
1 2
gui sam
12
guisam

Time Functions#

mktime(datespec)

turn datespec into a timestamp

awk 'BEGIN { t = mktime("2020 02 27 13 31 31")
print t }'

1582806691

strftime([format[,timestamp[,utc-flag]]])

format the time timestamp based on the format string

Default format is PROCINFO[“strftime”]

"%a %b %e %H:%M:%S %Z %Y"

awk 'BEGIN {
now = strftime()
old = strftime("%F %T %Z", "1582733718", 1)
print now
print old
}'

Thu Feb 27 01:38:47 CET 2020
2020-02-26 16:15:18 GMT

systime()

current time as the number of seconds since the system epoch

awk 'BEGIN {
now = systime()
print strftime(PROCINFO["strftime"], now)
print strftime(PROCINFO["strftime"], now, 1)
}'

Thu Feb 27 01:48:27 CET 2020
Thu Feb 27 00:48:27 GMT 2020

Getting Type Information#

isarray(x)

return a true value if x is an array

String-Translation Functions#

bindtextdomain(directory[,domain])	set the directory for message translation files
dcgettext(string[,domain[,category]])	return the translation of string in text domain domain for locale category category
dcngettext(string1,string2,number[,domain[,category]])	return the plural form used for number of the translation of string1 and string2 in text domain domain for locale category category

User-Defined Functions#

Function Definition Syntax#

function name([parameter-list])

{

body-of-function

}

Place some extra space between the arguments and the local variables.

echo -e "test\ntest" | awk '
function myprint(num)
{
printf "%s\n", num
}
{
myprint($0)
}'

awk '
function delarray(a,    i)
{
for (i in a)
delete a[i]
}
BEGIN {
a[1] = "un"
a[2] = "deux"
for (j in a)
printf "%s -> %s\n", "a["j"]", a[j]
delarray(a)
print a[1] }'

a[1] -> un
a[2] -> deux

echo "Guisam" | awk '
function rev(str) {
if (str == "")
return ""
return (rev(substr(str, 2)) substr(str, 1, 1))
}
{ print rev($0) }'

masiuG

# awk version of C ctime(3) function
awk 'function ctime(ts,   format)
{
format = "%a %b %e %H:%M:%S %Z %Y"
if (ts == 0)
ts = systime()
# use current time as default
return strftime(format, ts)
}
BEGIN { print ctime()
}'

Fri Feb 28 20:40:27 CET 2020

Calling User-Defined Functions#

Controlling variable scope#

There is no way to make a variable local to a { … } block in awk, but you can make a variable local to a function.

i is a global variable

awk 'function bar()
{
for (i = 0; i < 3; i++)
print "bar'\''s i=" i
}
function foo(j)
{
i = j + 1
print "foo'\''s i=" i
bar()
print "foo'\''s i=" i
}
BEGIN {
i = 10
print "top'\''s i=" i
foo(0)
print "top'\''s i=" i
}'

top's i=10
foo's i=1
bar's i=0
bar's i=1
bar's i=2
foo's i=3
top's i=3

i is a local variable

awk 'function bar(    i)
{
for (i = 0; i < 3; i++)
print "bar'\''s i=" i
}
function foo(j,    i)
{
i = j + 1
print "foo'\''s i=" i
bar()
print "foo'\''s i=" i
}
BEGIN {
i = 10
print "top'\''s i=" i
foo(0)
print "top'\''s i=" i
}'

top's i=10
foo's i=1
bar's i=0
bar's i=1
bar's i=2
foo's i=1
top's i=10

Passing function arguments by value or by reference#

If the argument is an array variable, then it is passed by reference. Otherwise, the argument is passed by value.

by reference

awk 'function myfunc(array)
{
print array[1]
array[1] = "zzz"
print array[1]
}
BEGIN {
foo[1] = "bar"
myfunc(foo)
print foo[1]
}'

bar
zzz
zzz

by value

awk 'function myfunc(str)
{
print str
str = "zzz"
print str
}
BEGIN {
foo = "bar"
myfunc(foo)
print foo }'

bar
zzz
bar

The return Statement#

return [expression]

echo "1 5 23 8 16
44 3 5 2 8 26
256 291 1396 2962 100
-6 467 998 1101
99385 11 0 225" | awk '
function maxelt(vec,    i, ret)
{
for (i in vec) {
if (ret == "" || vec[i] > ret)
ret = vec[i]
}
return ret
} {
for(i = 1; i <= NF; i++)
nums[NR, i] = $i
}
END {
print maxelt(nums)
}'

Indirect Function Calls#

You can specify the name of the function to call as a string variable, and then call the function.

the_func = “name”

result = @the_func()

file class_data1

Biology_101 sum average data: 87.0 92.4 78.5 94.9
Chemistry_305 sum average data: 75.2 98.3 94.7 88.2
English_401 sum average data: 100.0 95.6 87.1 93.4

file class_data2

Biology_101 sum average sort rsort data: 87.0 92.4 78.5 94.9
Chemistry_305 sum average sort rsort data: 75.2 98.3 94.7 88.2
English_401 sum average sort rsort data: 100.0 95.6 87.1 93.4

file indirect.awk

# indirectcall.awk --- Demonstrate indirect function calls
# average --- return the average of the values in fields $first - $last
function average(first, last,    sum, i)
{
    sum = 0;
    for (i = first; i <= last; i++)
         sum += $i
    return sum / (last - first + 1)
}
# sum --- return the sum of the values in fields $first - $last
function sum(first, last,    ret, i)
{
    ret = 0;
    for (i = first; i <= last; i++)
        ret += $i
    return ret
}
# For each record, print the class name and the requested statistics
{
    class_name = $1
    gsub(/_/, " ", class_name) # Replace _ with spaces
    # find start
    for (i = 1; i <= NF; i++) {
        if ($i == "data:") {
            start = i + 1
            break
        }
    }
    printf("%s:\n", class_name)
    for (i = 2; $i != "data:"; i++) {
        the_function = $i
        printf("\t%s: <%s>\n", $i, @the_function(start, NF) "")
    }
   print ""
}

file quicksort.awk

# quicksort.awk --- Quicksort algorithm, with user-supplied
# comparison function
# quicksort --- C.A.R. Hoare's quicksort algorithm. See Wikipedia
# or almost any algorithms or computer science text.
function quicksort(data, left, right, less_than,    i, last)
{
    if (left >= right) # do nothing if array contains fewer
    return # than two elements

    quicksort_swap(data, left, int((left + right) / 2))
    last = left
    for (i = left + 1; i <= right; i++)
        if (@less_than(data[i], data[left]))
            quicksort_swap(data, ++last, i)
            quicksort_swap(data, left, last)
            quicksort(data, left, last - 1, less_than)
            quicksort(data, last + 1, right, less_than)
}

# quicksort_swap --- helper function for quicksort, should really be inline
function quicksort_swap(data, i, j,    temp)
{
    temp = data[i]
    data[i] = data[j]
    data[j] = temp
}

# num_lt --- do a numeric less than comparison
function num_lt(left, right)
{
    return ((left + 0) < (right + 0))
}

# num_ge --- do a numeric greater than or equal to comparison
function num_ge(left, right)
{
    return ((left + 0) >= (right + 0))
}

# do_sort --- sort the data according to `compare'
# and return it as a string
function do_sort(first, last, compare,    data, i, retval)
{
    delete data
    for (i = 1; first <= last; first++) {
        data[i] = $first
        i++
    }
    quicksort(data, 1, i-1, compare)
    retval = data[1]
    for (i = 2; i in data; i++)
        retval = retval " " data[i]
        return retval
}

# sort --- sort the data in ascending order and return it as a string
function sort(first, last)
{
    return do_sort(first, last, "num_lt")
}

# rsort --- sort the data in descending order and return it as a string
function rsort(first, last)
{
    return do_sort(first, last, "num_ge")
}

awk -f indirectcall.awk class_data1
Biology 101:
        sum: <352.8>
        average: <88.2>

Chemistry 305:
        sum: <356.4>
        average: <89.1>

English 401:
        sum: <376.1>
        average: <94.025>

awk -f indirectcall.awk -f quicksort.awk class_data2
Biology 101:
        sum: <352.8>
        average: <88.2>
        sort: <78.5 87.0 92.4 94.9>
        rsort: <94.9 92.4 87.0 78.5>

Chemistry 305:
        sum: <356.4>
        average: <89.1>
        sort: <75.2 88.2 94.7 98.3>
        rsort: <98.3 94.7 88.2 75.2>

English 401:
        sum: <376.1>
        average: <94.025>
        sort: <87.1 93.4 95.6 100.0>
        rsort: <100.0 95.6 93.4 87.1>

📑 Functions

Contents