โ™ฅ๏ธ Awk is awesome

โ™ฅ๏ธ Awk is awesome#

Format command output#

 แ… awk 'NR<=1 || /^\/dev/' <(df -PTh)
Sys. de fichiers Type     Taille Utilisรฉ Dispo Uti% Montรฉ sur
/dev/dm-0        btrfs      237G    114G  122G  49% /
/dev/dm-0        btrfs      237G    114G  122G  49% /home
/dev/nvme0n1p2   ext4       974M    317M  591M  35% /boot
/dev/nvme0n1p1   vfat       599M     35M  565M   6% /boot/efi
 แ… awk '{
if ( NR == 1 )
printf "\n%s %s %-10s %-7s%-7s%-8s\n",$1,$2,$3,$4,$7,$9
if ( NR == 1 )
printf "%-18s %-6s %-6s %s\n","-----------------","----","-----","-----"
if ( NR > 1 && /^\/dev/ )
printf "%-18s %-6s %-6s %s\n",$1,$2,$5,$7
}' <(df -PTh)

Sys. de fichiers   Type   Dispo  Montรฉ
-----------------  ----   -----  -----
/dev/dm-0          btrfs  122G   /
/dev/dm-0          btrfs  122G   /home
/dev/nvme0n1p2     ext4   591M   /boot
/dev/nvme0n1p1     vfat   565M   /boot/efi
แ… awk '/inet /{split($2,a,"/");print a[1]}' <(ip a s wlp1s0)
192.168.94.15

Parse logs#

#!/usr/bin/env bash
# vim: ai ts=4 sts=4 sw=4

set -eu
set -o pipefail

typeset -r HLINE=$(printf -- '-%0.s' {1..29})
typeset -r VARNISHLOG=/var/log/varnish/varnishncsa.log
#typeset -r VARNISHLOG=/root/vlogstest.log
typeset -r SINCE=$(awk '{print "๐Ÿ“† since", substr($4,2)}' <(head -1 "$VARNISHLOG".1))
#typeset -r SINCE=$(awk '{print "๐Ÿ“† since", substr($4,2)}' <(head -1 "$VARNISHLOG"))
typeset -r BOTS="Svix-Webhooks
     ClaudeBot
     fdroidswh-git
     Ai2Bot-Dolma
     GitHub-Hookshot
     curl/7.58.0
     enea-prod
     grnet-prod
     unidue-prod
     zenodo
     DotBot
     Blackbox
     ChatGPT-User
     GPTBot
     Bytespider
     Turnitin
     Barkrowler
     Sogou
     fossology
     check_http
     facebookexternalhit
     DataForSeoBot
     meta-externalagent
     Amazonbot
     Googlebot
     AhrefsBot
     GoogleOther
     DataForSeoBot
     ImagesiftBot
     bingbot
     SemanticScholarBot
     PetalBot
     SemrushBot
     DuckDuckGo
     Applebot
     YandexBot
     YandexRenderResourcesBot
     Baiduspider"

# a little awk and โ™ฅ๏ธ in this cruel world
awk -v bots="$BOTS" \
     -v hline="$HLINE" \
     -v since="$SINCE" '
BEGIN{
     format="%-29s %s\n"
     printf format, hline, hline
     format="%-28s %s\n"
     split(bots,bot," ")
     for (i in bot)
             counters[bot[i]]=0
     printf format, "๐ŸŒ Web Crawlers", since
     format="%-29s %s\n"
     printf format, hline, hline
     format="%-29s %-14s %s\n"
     printf format, "", "Hits", "Ratio"
     printf format, "", hline, ""
}
{
     inline=0
     for (i in counters){
             if($0~i&&$9<400){
                     counters[i]+=1
                     inline=1
             }
     }
     if(inline==0){
             others+=1
     }
     if($9<400){
             requests++
     }
     else{
             errors[$9]++
     }
}
END{
     # sort by bots name
     #PROCINFO["sorted_in"] = "@ind_str_asc"
     # sort by hits
     PROCINFO["sorted_in"] = "@val_num_desc"
     "date" | getline date
     close("date")
     for (i in counters){
             if (counters[i] > 0){
             printf format, i, counters[i], counters[i]/NR*100
             }
     }
     printf format, "Others", others, others/NR*100
     printf format, hline, hline, ""
     format="%-28s %-14s %s\n"
     printf format, "โœ… Requests", requests, requests/NR*100
     printf format, hline, hline, ""
     printf format, "๐Ÿšจ Errors", NR-requests, (NR-requests)/NR*100
     printf format, hline, hline, ""
     format="%-29s %-14s %s\n"
     for (e in errors){
             printf format, e, errors[e], errors[e]/NR*100
     }
     printf "\nReported on %s.\n", date
}' "$VARNISHLOG"{,.1}
#}' "$VARNISHLOG"

exit 0
root@moma:~# top-known-bot-varnish-hits.sh
----------------------------- -----------------------------
๐ŸŒ Web Crawlers               ๐Ÿ“† since 22/Sep/2025:07:29:34
----------------------------- -----------------------------
                              Hits           Ratio
                              -----------------------------
grnet-prod                    7651762        43.9396
enea-prod                     5548836        31.8637
Svix-Webhooks                 957266         5.49702
curl/7.58.0                   207631         1.1923
Amazonbot                     176931         1.01601
Applebot                      106025         0.608839
bingbot                       36173          0.20772
DataForSeoBot                 35006          0.201019
GoogleOther                   29211          0.167742
check_http                    28097          0.161345
GPTBot                        26919          0.15458
Googlebot                     19606          0.112586
PetalBot                      12706          0.0729631
fossology                     8820           0.0506481
SemrushBot                    7374           0.0423446
zenodo                        7014           0.0402773
SemanticScholarBot            5111           0.0293495
Blackbox                      4671           0.0268228
Turnitin                      2544           0.0146087
Sogou                         2045           0.0117432
AhrefsBot                     1190           0.00683347
ChatGPT-User                  338            0.00194094
YandexRenderResourcesBot      97             0.000557014
DuckDuckGo                    70             0.000401969
YandexBot                     60             0.000344545
unidue-prod                   51             0.000292863
GitHub-Hookshot               36             0.000206727
DotBot                        19             0.000109106
Others                        2541192        14.5926
----------------------------- -----------------------------
โœ… Requests                   17032724       97.8089
----------------------------- -----------------------------
๐Ÿšจ Errors                     381559         2.19107
----------------------------- -----------------------------
404                           347717         1.99673
400                           19445          0.111661
403                           5585           0.0320714
429                           4290           0.024635
401                           3330           0.0191222
502                           1162           0.00667268
504                           29             0.00016653
500                           1              5.74241e-06

Reported on Tue Sep 23 09:41:05 UTC 2025.
root@moma:~# awk 'BEGIN{format="| %-16s | %-6s |\n"}
/GPTBot/{counters[$1]+=1}
END{printf format, "GPTBot addr", "Hits"
printf format, "---", "---"
for (i in counters){printf format, i, counters[i]}}' /var/log/varnish/varnishncsa.log
| GPTBot addr      | Hits   |
| ---              | ---    |
| 4.227.36.72      | 3      |
| 4.227.36.120     | 1      |
| 4.227.36.0       | 2      |
| 20.171.207.65    | 1      |
| 20.171.207.68    | 1      |
| 4.227.36.127     | 2      |
| 20.171.207.89    | 1      |
| 4.227.36.16      | 2      |
| 20.171.207.191   | 47022  |