Fix crawler timeout for JavaScript-heavy documentation sites

Remove wait_for='body' selector from documentation site crawling config.
The body element exists immediately in HTML, causing unnecessary timeouts
for JavaScript-rendered content. Now relies on domcontentloaded event
and delay_before_return_html for proper JavaScript execution.
This commit is contained in:
Rasmus Widing 2025-08-20 21:55:58 +03:00 committed by Wirasm
parent 26a933288f
commit 8792a1b0dd
2 changed files with 0 additions and 2 deletions

View File

@ -91,7 +91,6 @@ class BatchCrawlStrategy:
cache_mode=CacheMode.BYPASS,
stream=True, # Enable streaming for faster parallel processing
markdown_generator=self.markdown_generator,
wait_for="body", # Simple selector for batch
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),

View File

@ -94,7 +94,6 @@ class RecursiveCrawlStrategy:
cache_mode=CacheMode.BYPASS,
stream=True, # Enable streaming for faster parallel processing
markdown_generator=self.markdown_generator,
wait_for='body',
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),