Fix crawler timeout for JavaScript-heavy documentation sites

Remove wait_for='body' selector from documentation site crawling config. The body element exists immediately in HTML, causing unnecessary timeouts for JavaScript-rendered content. Now relies on domcontentloaded event and delay_before_return_html for proper JavaScript execution.
2025-08-20 21:55:58 +03:00 · 2025-08-20 21:55:58 +03:00 · 8792a1b0dd
commit 8792a1b0dd
parent 26a933288f
2 changed files with 0 additions and 2 deletions
--- a/python/src/server/services/crawling/strategies/batch.py
+++ b/python/src/server/services/crawling/strategies/batch.py
@ -91,7 +91,6 @@ class BatchCrawlStrategy:
                cache_mode=CacheMode.BYPASS,
                stream=True,  # Enable streaming for faster parallel processing
                markdown_generator=self.markdown_generator,
-                wait_for="body",  # Simple selector for batch
                wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
                page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
                delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),
--- a/python/src/server/services/crawling/strategies/recursive.py
+++ b/python/src/server/services/crawling/strategies/recursive.py
@ -94,7 +94,6 @@ class RecursiveCrawlStrategy:
                cache_mode=CacheMode.BYPASS,
                stream=True,  # Enable streaming for faster parallel processing
                markdown_generator=self.markdown_generator,
-                wait_for='body',
                wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
                page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
                delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),