Links & files
Awk reference and ressources |
Example files |
๐ Functions#
Built-in Functions#
Calling Built-in Functions#
function(arg1,โฆ argn)
The functionโs parameters are evaluated completely before the functionโs call is performed.
Numeric Functions#
atan2(y, x) |
return the arctangent of y / x in radians |
cos(x) |
return the cosine of x , with x in radians |
exp(x) |
return the exponential of x ( e ^ x ) |
int(x) |
return the nearest integer to x |
log(x) |
return the natural logarithm of x |
rand() |
return a random number |
sin(x) |
return the sine of x , with x in radians. |
sqrt(x) |
return the positive square root of x |
srand([x]) |
set the starting point for generating random numbers to the value x |
awk '# Function to roll a simulated die.
function roll(n) { return 1 + int(rand() * n) }
# Roll 3 six-sided dice and
# print total number of points.
{
printf("%d points\n", roll(6) + roll(6) + roll(6))
}'
String-Manipulation Functions#
asort(source[,dest[,how]]) |
sorts the values of source |
asorti(source[,dest[,how]]) |
if dest is specified, then source is duplicated into dest |
awk 'BEGIN {
a["last"] = "de"
a["first"] = "sac"
a["middle"] = "cul"
for (i in a)
printf "%s\n", a[i]
asort(a)
for (i in a)
printf "%s\n", a[i]
}'
sac
cul
de
cul
de
sac
gensub(regexp,replacement,how[,target]) |
replace all matches of regexp with replacement |
gsub(regexp,replacement[,target]) |
replace all matches of regexp with replacement |
awk 'BEGIN {
a = "abc def"
b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a)
print b
}'
def abc
echo 'a b c a d e' | awk '{
print gensub(/a/, "AA", 1)
print gensub(/a/, "AA", 2)
print gensub(/a/, "AA", "g")
}'
AA b c a d e
a b c AA d e
AA b c AA d e
index(in,find) |
return the position of find in string in |
length([string]) |
return the number of characters in string |
awk 'BEGIN {
str = "guillaume"
print index(str, "aume")
print index(str, "gui")
}'
awk 'length($1) > 8 { print }' mail-list
match(string,regexp[,array]) |
search string matched by regexp and return the character position |
echo 'FIND ru+n
My program runs
but not very quickly
FIND guisam
JF+KM
This line is property of Reality Engineering Co.
guisam was here.' | awk '{ if ($1 == "FIND")
regex = $2
else {
where = match($0, regex)
if (where != 0)
print "Match of", regex, "found at", where, "in", $0
}}'
Match of ru+n found at 12 in My program runs
Match of guisam found at 1 in guisam was here.
echo "guillaume blablaba samblahblah" | awk '
match($0, /(gui).*(sam).*/, arr) { print arr[1]arr[2] }'
guisam
echo "guillaume blablaba samsamsam blahblah" | awk '
match($0, /(gui).*(sam).*/, arr) {
print arr[1]arr[2]
print arr[1, "start"], arr[1, "length"]
print arr[2, "start"], arr[2, "length"]
}'
guisam
1 3
20 3
patsplit(string,array[,fieldpat[,seps]]) |
divide string into pieces defined by fieldpat or fieldsep and store the |
split(string,array[,fieldsep[,seps]]) |
pieces in array and the separator strings in the seps array |
awk 'BEGIN {
split("cul-de_sac", a, /-|_/, seps)
for (i in a)
printf "%s -> %s\n", "a["i"]", a[i]
for (j in seps)
printf "%s -> %s\n", "seps["j"]", seps[j]
}'
a[1] -> cul
a[2] -> de
a[3] -> sac
seps[1] -> -
seps[2] -> _
sprintf(format,expression1,โฆ) |
return (without printing) the string that printf would have printed out |
awk 'BEGIN {
pival = sprintf("pi = %.2f (approx.)", 22/7)
print pival
}'
pi = 3.14 (approx.)
strtonum(str) |
examine str and return its numeric value |
echo 0x17 | awk '{ printf "%.2f\n", strtonum($1) }'
23.00
sub(regexp,replacement[,target]) |
replace all matches of regexp with replacement |
awk 'BEGIN {
str = "guillaume, guillaume, everywhere"
print sub(/llaume/, "sam", str)
print str
sub("guisam", "& was here", str)
print str
}'
1
guisam, guillaume, everywhere
guisam was here, guillaume, everywhere
substr(string,start[,length]) |
return a length long substring, starting at character number start |
awk 'BEGIN {
str = "abcdefghi"
print substr(str, 1, 3) "DE" substr(str, 6)
}'
abcDEfghi
tolower(string) |
each uppercase character in the string replaced with lowercase character |
toupper(string) |
each lowercase character in the string replaced with uppercase character |
Input/Output Functions#
close(filename[,how]) |
close the file filename for input or output |
fflush([filename]) |
flush any buffered output |
system(command) |
execute the operating system command command |
Interactive Versus Noninteractive Buffering#
awk '{print $1$2}'
1 2
12
gui sam
guisam
awk '{print $1$2}' | cat
1 2
gui sam
12
guisam
Time Functions#
mktime(datespec) |
turn datespec into a timestamp |
awk 'BEGIN { t = mktime("2020 02 27 13 31 31")
print t }'
1582806691
strftime([format[,timestamp[,utc-flag]]]) |
format the time timestamp based on the format string |
"%a %b %e %H:%M:%S %Z %Y"awk 'BEGIN {
now = strftime()
old = strftime("%F %T %Z", "1582733718", 1)
print now
print old
}'
Thu Feb 27 01:38:47 CET 2020
2020-02-26 16:15:18 GMT
systime() |
current time as the number of seconds since the system epoch |
awk 'BEGIN {
now = systime()
print strftime(PROCINFO["strftime"], now)
print strftime(PROCINFO["strftime"], now, 1)
}'
Thu Feb 27 01:48:27 CET 2020
Thu Feb 27 00:48:27 GMT 2020
Getting Type Information#
isarray(x) |
return a true value if x is an array |
String-Translation Functions#
bindtextdomain(directory[,domain]) |
set the directory for message translation files
|
dcgettext(string[,domain[,category]]) |
return the translation of string in text
domain domain for locale category category
|
dcngettext(string1,string2,number[,domain[,category]]) |
return the plural form used for number of the
translation of string1 and string2 in text
domain domain for locale category category
|
User-Defined Functions#
Function Definition Syntax#
Place some extra space between the arguments and the local variables.
echo -e "test\ntest" | awk '
function myprint(num)
{
printf "%s\n", num
}
{
myprint($0)
}'
awk '
function delarray(a, i)
{
for (i in a)
delete a[i]
}
BEGIN {
a[1] = "un"
a[2] = "deux"
for (j in a)
printf "%s -> %s\n", "a["j"]", a[j]
delarray(a)
print a[1] }'
a[1] -> un
a[2] -> deux
echo "Guisam" | awk '
function rev(str) {
if (str == "")
return ""
return (rev(substr(str, 2)) substr(str, 1, 1))
}
{ print rev($0) }'
masiuG
# awk version of C ctime(3) function
awk 'function ctime(ts, format)
{
format = "%a %b %e %H:%M:%S %Z %Y"
if (ts == 0)
ts = systime()
# use current time as default
return strftime(format, ts)
}
BEGIN { print ctime()
}'
Fri Feb 28 20:40:27 CET 2020
Calling User-Defined Functions#
Controlling variable scope#
There is no way to make a variable local to a { โฆ } block in awk, but you can make a variable local to a function.
i is a global variable
awk 'function bar()
{
for (i = 0; i < 3; i++)
print "bar'\''s i=" i
}
function foo(j)
{
i = j + 1
print "foo'\''s i=" i
bar()
print "foo'\''s i=" i
}
BEGIN {
i = 10
print "top'\''s i=" i
foo(0)
print "top'\''s i=" i
}'
top's i=10
foo's i=1
bar's i=0
bar's i=1
bar's i=2
foo's i=3
top's i=3
i is a local variable
awk 'function bar( i)
{
for (i = 0; i < 3; i++)
print "bar'\''s i=" i
}
function foo(j, i)
{
i = j + 1
print "foo'\''s i=" i
bar()
print "foo'\''s i=" i
}
BEGIN {
i = 10
print "top'\''s i=" i
foo(0)
print "top'\''s i=" i
}'
top's i=10
foo's i=1
bar's i=0
bar's i=1
bar's i=2
foo's i=1
top's i=10
Passing function arguments by value or by reference#
If the argument is an array variable, then it is passed by reference. Otherwise, the argument is passed by value.
by reference
awk 'function myfunc(array)
{
print array[1]
array[1] = "zzz"
print array[1]
}
BEGIN {
foo[1] = "bar"
myfunc(foo)
print foo[1]
}'
bar
zzz
zzz
by value
awk 'function myfunc(str)
{
print str
str = "zzz"
print str
}
BEGIN {
foo = "bar"
myfunc(foo)
print foo }'
bar
zzz
bar
The return Statement#
return [expression]
echo "1 5 23 8 16
44 3 5 2 8 26
256 291 1396 2962 100
-6 467 998 1101
99385 11 0 225" | awk '
function maxelt(vec, i, ret)
{
for (i in vec) {
if (ret == "" || vec[i] > ret)
ret = vec[i]
}
return ret
} {
for(i = 1; i <= NF; i++)
nums[NR, i] = $i
}
END {
print maxelt(nums)
}'
99385
Indirect Function Calls#
You can specify the name of the function to call as a string variable, and then call the function.
file
class_data1
Biology_101 sum average data: 87.0 92.4 78.5 94.9
Chemistry_305 sum average data: 75.2 98.3 94.7 88.2
English_401 sum average data: 100.0 95.6 87.1 93.4
file
class_data2
Biology_101 sum average sort rsort data: 87.0 92.4 78.5 94.9
Chemistry_305 sum average sort rsort data: 75.2 98.3 94.7 88.2
English_401 sum average sort rsort data: 100.0 95.6 87.1 93.4
file
indirect.awk
# indirectcall.awk --- Demonstrate indirect function calls
# average --- return the average of the values in fields $first - $last
function average(first, last, sum, i)
{
sum = 0;
for (i = first; i <= last; i++)
sum += $i
return sum / (last - first + 1)
}
# sum --- return the sum of the values in fields $first - $last
function sum(first, last, ret, i)
{
ret = 0;
for (i = first; i <= last; i++)
ret += $i
return ret
}
# For each record, print the class name and the requested statistics
{
class_name = $1
gsub(/_/, " ", class_name) # Replace _ with spaces
# find start
for (i = 1; i <= NF; i++) {
if ($i == "data:") {
start = i + 1
break
}
}
printf("%s:\n", class_name)
for (i = 2; $i != "data:"; i++) {
the_function = $i
printf("\t%s: <%s>\n", $i, @the_function(start, NF) "")
}
print ""
}
file
quicksort.awk
# quicksort.awk --- Quicksort algorithm, with user-supplied
# comparison function
# quicksort --- C.A.R. Hoare's quicksort algorithm. See Wikipedia
# or almost any algorithms or computer science text.
function quicksort(data, left, right, less_than, i, last)
{
if (left >= right) # do nothing if array contains fewer
return # than two elements
quicksort_swap(data, left, int((left + right) / 2))
last = left
for (i = left + 1; i <= right; i++)
if (@less_than(data[i], data[left]))
quicksort_swap(data, ++last, i)
quicksort_swap(data, left, last)
quicksort(data, left, last - 1, less_than)
quicksort(data, last + 1, right, less_than)
}
# quicksort_swap --- helper function for quicksort, should really be inline
function quicksort_swap(data, i, j, temp)
{
temp = data[i]
data[i] = data[j]
data[j] = temp
}
# num_lt --- do a numeric less than comparison
function num_lt(left, right)
{
return ((left + 0) < (right + 0))
}
# num_ge --- do a numeric greater than or equal to comparison
function num_ge(left, right)
{
return ((left + 0) >= (right + 0))
}
# do_sort --- sort the data according to `compare'
# and return it as a string
function do_sort(first, last, compare, data, i, retval)
{
delete data
for (i = 1; first <= last; first++) {
data[i] = $first
i++
}
quicksort(data, 1, i-1, compare)
retval = data[1]
for (i = 2; i in data; i++)
retval = retval " " data[i]
return retval
}
# sort --- sort the data in ascending order and return it as a string
function sort(first, last)
{
return do_sort(first, last, "num_lt")
}
# rsort --- sort the data in descending order and return it as a string
function rsort(first, last)
{
return do_sort(first, last, "num_ge")
}
awk -f indirectcall.awk class_data1
Biology 101:
sum: <352.8>
average: <88.2>
Chemistry 305:
sum: <356.4>
average: <89.1>
English 401:
sum: <376.1>
average: <94.025>
awk -f indirectcall.awk -f quicksort.awk class_data2
Biology 101:
sum: <352.8>
average: <88.2>
sort: <78.5 87.0 92.4 94.9>
rsort: <94.9 92.4 87.0 78.5>
Chemistry 305:
sum: <356.4>
average: <89.1>
sort: <75.2 88.2 94.7 98.3>
rsort: <98.3 94.7 88.2 75.2>
English 401:
sum: <376.1>
average: <94.025>
sort: <87.1 93.4 95.6 100.0>
rsort: <100.0 95.6 93.4 87.1>