/usr/share/yunohost/hooks/conf_regen/97-nginx_rebots-block
#!/bin/bash
action=$1
pending_dir=$4
nginx_conf="${pending_dir}/../nginx/etc/nginx/conf.d/security.conf.inc"
[[ "$action" == "pre" ]] || exit 0
[[ -e "$nginx_conf" ]] || exit 0
cat << EOF >> $nginx_conf
# Some really bad bot with legacy user agent
if (\$http_user_agent ~* "(iPod|MSIE|Trident/|Presto/|PPC Mac OS X|Gecko/\\d{4}-|C(?:riOS|hrome)/(?:\\d{1,2}|1[0-1]\\d|12[0-4])\\.|F(?:irefox|xiOS)/(?:[0-9]{1,2}|1[1-2][0-9]|130)\\.|Version/(?:[4-9]|1[0-6]).*Safari/)") {
return 403;
}
# List from https://github.com/ai-robots-txt/ai.robots.txt/blob/main/nginx-block-ai-bots.conf
if (\$http_user_agent ~* "(AI2Bot|Ai2Bot\-Dolma|aiHitBot|Amazonbot|anthropic\-ai|Applebot|Applebot\-Extended|Brightbot\ 1\.0|Bytespider|CCBot|ChatGPT\-User|Claude\-Web|ClaudeBot|cohere\-ai|cohere\-training\-data\-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google\-Extended|GoogleOther|GoogleOther\-Image|GoogleOther\-Video|GPTBot|iaskspider/2\.0|ICC\-Crawler|ImagesiftBot|img2dataset|imgproxy|ISSCyberRiskCrawler|Kangaroo\ Bot|meta\-externalagent|Meta\-ExternalAgent|meta\-externalfetcher|Meta\-ExternalFetcher|NovaAct|OAI\-SearchBot|omgili|omgilibot|Operator|PanguBot|Perplexity\-User|PerplexityBot|PetalBot|Scrapy|SemrushBot\-OCOB|SemrushBot\-SWA|Sidetrade\ indexer\ bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio\-Extended|YouBot)") {
return 403;
}
EOF