Skip to content

Instantly share code, notes, and snippets.

@aravindkarnam
Created March 26, 2025 07:20
Show Gist options
  • Save aravindkarnam/879a16df48442f3e526a6896ec8bc6ae to your computer and use it in GitHub Desktop.
Save aravindkarnam/879a16df48442f3e526a6896ec8bc6ae to your computer and use it in GitHub Desktop.
Crawl linkedin pages with crawl4ai
from crawl4ai import (
CrawlerRunConfig,
BrowserConfig,
AsyncWebCrawler,
DefaultMarkdownGenerator,
BM25ContentFilter,
)
import asyncio
from pprint import pprint
browser_config = BrowserConfig(
headless=False,
user_data_dir="/Users/aravindkarnam/.crawl4ai/profiles/linkedin",
use_managed_browser=True,
)
async def getProfile():
async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(
"https://www.linkedin.com/in/aravindkarnam/",
config=CrawlerRunConfig(
delay_before_return_html=3,
scan_full_page=True,
scroll_delay=1,
markdown_generator=DefaultMarkdownGenerator(
content_filter=BM25ContentFilter(
user_query="who is aravind karnam? what are his professional qualifications and work experience?"
)
),
),
)
pprint(result.markdown.fit_markdown)
asyncio.run(getProfile())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment