The site uses the CSS class `title` inside a card element. This selector works on the current layout (2024‑06) but may need tweaking if Ullu redesigns the page. """ soup = BeautifulSoup(html, "lxml") titles = set()
# -------------------------------------------------------------- # CORE LOGIC # -------------------------------------------------------------- def _load_cache() -> List[str] | None: """Return cached titles if file exists and is fresh, else None.""" if not CACHE_FILE.is_file(): return None mtime = CACHE_FILE.stat().st_mtime if time.time() - mtime > CACHE_TTL_SECONDS: return None try: return json.loads(CACHE_FILE.read_text(encoding="utf-8")) except Exception: return None
# Each card looks like <div class="show-card"> … <h3 class="title">XYZ</h3> … for h3 in soup.select("h3.title"): title = h3.get_text(strip=True) if title: titles.add(title) all ullu web series name
import requests from bs4 import BeautifulSoup
def _save_cache(titles: List[str]) -> None: """Persist titles to the JSON cache.""" CACHE_FILE.write_text(json.dumps(titles, ensure_ascii=False, indent=2), encoding="utf-8") The site uses the CSS class `title` inside a card element
Returns ------- List[str] Alphabetically sorted, duplicate‑free series titles. """ if not force_refresh: cached = _load_cache() if cached is not None: return cached
pip install requests beautifulsoup4 lxml #!/usr/bin/env python3 """ Ullu – fetch all series titles. """ if not force_refresh: cached = _load_cache() if
all_titles: Set[str] = set() page_url = requests.compat.urljoin(BASE_URL, CATALOGUE_PATH)