โฅ๏ธ Awk is awesome#
Format command output#
แ
awk 'NR<=1 || /^\/dev/' <(df -PTh)
Sys. de fichiers Type Taille Utilisรฉ Dispo Uti% Montรฉ sur
/dev/dm-0 btrfs 237G 114G 122G 49% /
/dev/dm-0 btrfs 237G 114G 122G 49% /home
/dev/nvme0n1p2 ext4 974M 317M 591M 35% /boot
/dev/nvme0n1p1 vfat 599M 35M 565M 6% /boot/efi
แ
awk '{
if ( NR == 1 )
printf "\n%s %s %-10s %-7s%-7s%-8s\n",$1,$2,$3,$4,$7,$9
if ( NR == 1 )
printf "%-18s %-6s %-6s %s\n","-----------------","----","-----","-----"
if ( NR > 1 && /^\/dev/ )
printf "%-18s %-6s %-6s %s\n",$1,$2,$5,$7
}' <(df -PTh)
Sys. de fichiers Type Dispo Montรฉ
----------------- ---- ----- -----
/dev/dm-0 btrfs 122G /
/dev/dm-0 btrfs 122G /home
/dev/nvme0n1p2 ext4 591M /boot
/dev/nvme0n1p1 vfat 565M /boot/efi
แ
awk '/inet /{split($2,a,"/");print a[1]}' <(ip a s wlp1s0)
192.168.94.15
Parse logs#
#!/usr/bin/env bash
# vim: ai ts=4 sts=4 sw=4
set -eu
set -o pipefail
typeset -r HLINE=$(printf -- '-%0.s' {1..29})
typeset -r VARNISHLOG=/var/log/varnish/varnishncsa.log
#typeset -r VARNISHLOG=/root/vlogstest.log
typeset -r SINCE=$(awk '{print "๐ since", substr($4,2)}' <(head -1 "$VARNISHLOG".1))
#typeset -r SINCE=$(awk '{print "๐ since", substr($4,2)}' <(head -1 "$VARNISHLOG"))
typeset -r BOTS="Svix-Webhooks
ClaudeBot
fdroidswh-git
Ai2Bot-Dolma
GitHub-Hookshot
curl/7.58.0
enea-prod
grnet-prod
unidue-prod
zenodo
DotBot
Blackbox
ChatGPT-User
GPTBot
Bytespider
Turnitin
Barkrowler
Sogou
fossology
check_http
facebookexternalhit
DataForSeoBot
meta-externalagent
Amazonbot
Googlebot
AhrefsBot
GoogleOther
DataForSeoBot
ImagesiftBot
bingbot
SemanticScholarBot
PetalBot
SemrushBot
DuckDuckGo
Applebot
YandexBot
YandexRenderResourcesBot
Baiduspider"
# a little awk and โฅ๏ธ in this cruel world
awk -v bots="$BOTS" \
-v hline="$HLINE" \
-v since="$SINCE" '
BEGIN{
format="%-29s %s\n"
printf format, hline, hline
format="%-28s %s\n"
split(bots,bot," ")
for (i in bot)
counters[bot[i]]=0
printf format, "๐ Web Crawlers", since
format="%-29s %s\n"
printf format, hline, hline
format="%-29s %-14s %s\n"
printf format, "", "Hits", "Ratio"
printf format, "", hline, ""
}
{
inline=0
for (i in counters){
if($0~i&&$9<400){
counters[i]+=1
inline=1
}
}
if(inline==0){
others+=1
}
if($9<400){
requests++
}
else{
errors[$9]++
}
}
END{
# sort by bots name
#PROCINFO["sorted_in"] = "@ind_str_asc"
# sort by hits
PROCINFO["sorted_in"] = "@val_num_desc"
"date" | getline date
close("date")
for (i in counters){
if (counters[i] > 0){
printf format, i, counters[i], counters[i]/NR*100
}
}
printf format, "Others", others, others/NR*100
printf format, hline, hline, ""
format="%-28s %-14s %s\n"
printf format, "โ
Requests", requests, requests/NR*100
printf format, hline, hline, ""
printf format, "๐จ Errors", NR-requests, (NR-requests)/NR*100
printf format, hline, hline, ""
format="%-29s %-14s %s\n"
for (e in errors){
printf format, e, errors[e], errors[e]/NR*100
}
printf "\nReported on %s.\n", date
}' "$VARNISHLOG"{,.1}
#}' "$VARNISHLOG"
exit 0
root@moma:~# top-known-bot-varnish-hits.sh
----------------------------- -----------------------------
๐ Web Crawlers ๐ since 22/Sep/2025:07:29:34
----------------------------- -----------------------------
Hits Ratio
-----------------------------
grnet-prod 7651762 43.9396
enea-prod 5548836 31.8637
Svix-Webhooks 957266 5.49702
curl/7.58.0 207631 1.1923
Amazonbot 176931 1.01601
Applebot 106025 0.608839
bingbot 36173 0.20772
DataForSeoBot 35006 0.201019
GoogleOther 29211 0.167742
check_http 28097 0.161345
GPTBot 26919 0.15458
Googlebot 19606 0.112586
PetalBot 12706 0.0729631
fossology 8820 0.0506481
SemrushBot 7374 0.0423446
zenodo 7014 0.0402773
SemanticScholarBot 5111 0.0293495
Blackbox 4671 0.0268228
Turnitin 2544 0.0146087
Sogou 2045 0.0117432
AhrefsBot 1190 0.00683347
ChatGPT-User 338 0.00194094
YandexRenderResourcesBot 97 0.000557014
DuckDuckGo 70 0.000401969
YandexBot 60 0.000344545
unidue-prod 51 0.000292863
GitHub-Hookshot 36 0.000206727
DotBot 19 0.000109106
Others 2541192 14.5926
----------------------------- -----------------------------
โ
Requests 17032724 97.8089
----------------------------- -----------------------------
๐จ Errors 381559 2.19107
----------------------------- -----------------------------
404 347717 1.99673
400 19445 0.111661
403 5585 0.0320714
429 4290 0.024635
401 3330 0.0191222
502 1162 0.00667268
504 29 0.00016653
500 1 5.74241e-06
Reported on Tue Sep 23 09:41:05 UTC 2025.
root@moma:~# awk 'BEGIN{format="| %-16s | %-6s |\n"}
/GPTBot/{counters[$1]+=1}
END{printf format, "GPTBot addr", "Hits"
printf format, "---", "---"
for (i in counters){printf format, i, counters[i]}}' /var/log/varnish/varnishncsa.log
| GPTBot addr | Hits |
| --- | --- |
| 4.227.36.72 | 3 |
| 4.227.36.120 | 1 |
| 4.227.36.0 | 2 |
| 20.171.207.65 | 1 |
| 20.171.207.68 | 1 |
| 4.227.36.127 | 2 |
| 20.171.207.89 | 1 |
| 4.227.36.16 | 2 |
| 20.171.207.191 | 47022 |