# =========================== # Robots.txt for https://insightspsychology.org/ # =========================== # Block common unnecessary pages, like admin and login, to save crawl budget User-agent: * Disallow: /wp-admin/ Disallow: /myai- Disallow: /cgi-bin/ Disallow: /trackback/ Disallow: /search/ Disallow: /rss/ Disallow: /comments/ Disallow: /comments/feed Disallow: /*/trackback/ Disallow: /feed/$ Disallow: /*/feed/$ Disallow: /*/feed/rss/$ Disallow: /*/feed/ Disallow: /*/comments/ Disallow: /?p= Disallow: /archives/ Disallow: /tag/* Disallow: /tag/ Disallow: /wp-* Disallow: /login/ Disallow: /*.inc$ # Allow specific pages Allow: /wp-admin/admin-ajax.php Allow: /wp-includes/ Allow: /wp-content/plugins/ Allow: /wp-content/themes/ Allow: /wp-content/uploads/ # =========================== # Block URLs with common tracking parameters (like UTM) # =========================== Disallow: /*?utm_* Disallow: /*?ref=* Disallow: /*?sessionid=* # Block unnecessary query parameters that don't contribute to valuable content Disallow: /*?sort=* Disallow: /*?filter=* Disallow: /*?order=* Disallow: /*?page=* Disallow: /*?search=* # =========================== # Block category, tag, and author archive pages # =========================== Disallow: /category/ Disallow: /tag/ Disallow: /author/ # =========================== # Allow crawling of UTM parameters for tracking purposes # =========================== Allow: /*?utm_source=* Allow: /*?utm_medium=* Allow: /*?utm_campaign=* # =========================== # Allow crawling of specific sections (uploads, plugins) # =========================== Allow: /wp-content/uploads/ Allow: /wp-content/plugins/ # =========================== # Sitemap files # =========================== Sitemap: https://insightspsychology.org/sitemap_index.xml Sitemap: https://insightspsychology.org/post-sitemap.xml Sitemap: https://insightspsychology.org/page-sitemap.xml # =========================== # Block unnecessary AJAX requests or dynamic pages # =========================== Disallow: /ajax/ Disallow: /dynamic/ # Prevent crawling of temporary directories or files Disallow: /tmp/ Disallow: /cache/ Disallow: /logs/ # =========================== # Allow specific bots (search engines, essential crawlers) # =========================== User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Googlebot-Video Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / User-agent: Sogou Allow: / User-agent: Exabot Allow: / User-agent: facebot Allow: / User-agent: ia_archiver Allow: / User-agent: Applebot Allow: / # =========================== # Advanced Search Engine Crawlers (Good to Allow) # =========================== User-agent: AdsBot-Google Allow: / User-agent: APis-Google Allow: / User-agent: Google-InspectionTool Allow: / User-agent: Storebot-Google Allow: / User-agent: Pinterestbot Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / User-agent: TelegramBot Allow: / User-agent: FacebookExternalHit Allow: / User-agent: Snapchat Allow: / User-agent: Redditbot Allow: / User-agent: Baiduspider-image Allow: / User-agent: YandexImages Allow: / User-agent: YandexMobileBot Allow: / User-agent: YandexDirect Allow: / # =========================== # SEO Tools, Scrapers & Competitive Intelligence Bots # =========================== User-agent: AhrefsBot Allow: / User-agent: SemrushBot Allow: / User-agent: Moz Allow: / User-agent: Screaming Frog SEO Spider Allow: / Disallow: */page/ User-agent: SiteAuditBot Allow: / User-agent: SEOkicks-Robot Allow: / User-agent: Seoscanners Allow: / User-agent: RankActiveLinkBot Allow: / User-agent: RankFlex Allow: / User-agent: LinkdexBot Allow: / User-agent: Lipperhey Spider Allow: / User-agent: MegaIndex.ru Allow: / User-agent: BLEXBot Allow: / User-agent: DotBot Allow: / User-agent: DataForSeoBot Allow: / User-agent: SurdotlyBot Allow: / User-agent: spbot Allow: / User-agent: CCBot Allow: / User-agent: GrapeshotCrawler Allow: / # =========================== # AI & LLM Training Bots (Allow) # =========================== User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: anthropic-ai Allow: / User-agent: cohere-ai Allow: / User-agent: OAI-SearchBot Allow: / User-agent: PerplexityBot Allow: / User-agent: FacebookBot Allow: / User-agent: PetalBot Allow: / User-agent: Bytespider Allow: / User-agent: Applebot-Image Allow: / # =========================== # Malicious / Aggressive Scrapers & Archivers (Block) # =========================== User-agent: MJ12bot Disallow: / User-agent: TurnitinBot Disallow: / User-agent: CopyRightCheck Disallow: / User-agent: archive.org_bot Disallow: / User-agent: HTTrack Disallow: / User-agent: Wget Disallow: / User-agent: Nutch Disallow: / User-agent: k2spider Disallow: / User-agent: Qwantify Disallow: / User-agent: Netcraft Disallow: / User-agent: Exabot Disallow: / User-agent: proximic Disallow: / # =========================== # Explicit Allowance for other important search bots # =========================== User-agent: Bingbot Allow: / User-agent: Slurp # ================================ # AI & LLM DISCOVERY LAYER # ================================ # Structured AI interaction and governance files # AI behavior and guidance Sitemap: https://insightspsychology.org/ai.txt # LLM usage and training policy Sitemap: https://insightspsychology.org/llms.txt # Machine-readable AI identity layer Sitemap: https://insightspsychology.org/ai-manifesto.json # Advanced AI indexing structures Sitemap: https://insightspsychology.org/semantic-sitemap.xml Sitemap: https://insightspsychology.org/vector-feed.xml