From 5af2e66ab7ebe3f5f8f76e17c16b933073f6bed6 Mon Sep 17 00:00:00 2001 From: marshmallow Date: Sun, 5 May 2024 09:29:55 +1000 Subject: [PATCH] feat: add robots.txt SQUASH: use EndBug/add-and-commit SQUASH: document cron time more desciptive job name --- .github/workflows/robots.txt.yml | 34 ++++++++++++ public/robots.txt | 95 ++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 .github/workflows/robots.txt.yml create mode 100644 public/robots.txt diff --git a/.github/workflows/robots.txt.yml b/.github/workflows/robots.txt.yml new file mode 100644 index 0000000..f814daf --- /dev/null +++ b/.github/workflows/robots.txt.yml @@ -0,0 +1,34 @@ +name: "Update robots.txt" +on: + schedule: + - cron: "0 0 * * 6" # At 00:00 on Saturday + workflow_dispatch: + +jobs: + update: + name: "Update robots.txt" + + runs-on: ubuntu-latest + + permissions: + contents: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Pull latest robots + run: | + curl -X POST https://api.darkvisitors.com/robots-txts \ + -H "Authorization: Bearer ${{ secrets.DarkVisitorsBearer }}" \ + -H "Content-Type: application/json" \ + --data-raw '{"agent_types": ["AI Assistant", "AI Data Scraper", "AI Search Crawler", "Undocumented AI Agent"]}' \ + --output ./public/robots.txt + + - name: Commit changes + uses: EndBug/add-and-commit@v9 + with: + message: "chore: generate robots.txt" + default_author: github_actions + push: true + diff --git a/public/robots.txt b/public/robots.txt new file mode 100644 index 0000000..bfb7ac5 --- /dev/null +++ b/public/robots.txt @@ -0,0 +1,95 @@ +# AI Search Crawler +# https://darkvisitors.com/agents/amazonbot + +User-agent: Amazonbot +Disallow: / + +# Undocumented AI Agent +# https://darkvisitors.com/agents/anthropic-ai + +User-agent: anthropic-ai +Disallow: / + +# AI Search Crawler +# https://darkvisitors.com/agents/applebot + +User-agent: Applebot +Disallow: / + +# AI Data Scraper +# https://darkvisitors.com/agents/bytespider + +User-agent: Bytespider +Disallow: / + +# AI Data Scraper +# https://darkvisitors.com/agents/ccbot + +User-agent: CCBot +Disallow: / + +# AI Assistant +# https://darkvisitors.com/agents/chatgpt-user + +User-agent: ChatGPT-User +Disallow: / + +# Undocumented AI Agent +# https://darkvisitors.com/agents/claude-web + +User-agent: Claude-Web +Disallow: / + +# Undocumented AI Agent +# https://darkvisitors.com/agents/claudebot + +User-agent: ClaudeBot +Disallow: / + +# Undocumented AI Agent +# https://darkvisitors.com/agents/cohere-ai + +User-agent: cohere-ai +Disallow: / + +# AI Data Scraper +# https://darkvisitors.com/agents/diffbot + +User-agent: Diffbot +Disallow: / + +# AI Data Scraper +# https://darkvisitors.com/agents/facebookbot + +User-agent: FacebookBot +Disallow: / + +# AI Data Scraper +# https://darkvisitors.com/agents/google-extended + +User-agent: Google-Extended +Disallow: / + +# AI Data Scraper +# https://darkvisitors.com/agents/gptbot + +User-agent: GPTBot +Disallow: / + +# AI Data Scraper +# https://darkvisitors.com/agents/omgili + +User-agent: omgili +Disallow: / + +# AI Search Crawler +# https://darkvisitors.com/agents/perplexitybot + +User-agent: PerplexityBot +Disallow: / + +# AI Search Crawler +# https://darkvisitors.com/agents/youbot + +User-agent: YouBot +Disallow: / \ No newline at end of file