import datetime import os import re import time from typing import List, Optional from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.service import Service from selenium.webdriver.support.wait import WebDriverWait from bs4 import BeautifulSoup from nebula_rss import NebulaVideo class NebulaLoader: def __init__( self, username: str, password: str, driver_path: Optional[os.PathLike] = None ): self.username = username self.password = password service = None if driver_path: service = Service(driver_path()) options = Options() options.headless = True self.driver = webdriver.Firefox(service=service, options=options) self.driver.implicitly_wait(10) # seconds @staticmethod def _parse_anchor(anchor) -> NebulaVideo: info_div = anchor.next_sibling details_anchor = info_div.find_all('a')[1] divs = details_anchor.find_all('div') title_div = divs[0] details_div = divs[1] creator = details_div.find('span').string release_text = details_div.find('time').get('datetime') release_date = datetime.datetime.fromisoformat(release_text.replace('Z', '+00:00')) return NebulaVideo( title=title_div.string, creator=creator, url='https://nebula.app' + anchor.get('href'), release_at=release_date ) def load(self) -> List[NebulaVideo]: self.driver.get('https://nebula.app/login') username_input = '//*[@name="email"]' password_input = '//*[@name="password"]' login_submit = '//*[@id="NebulaApp"]/div[2]/div[2]/div[1]/div/form/button' self.driver.find_element(By.XPATH, username_input).send_keys(self.username) self.driver.find_element(By.XPATH, password_input).send_keys(self.password) self.driver.find_element(By.XPATH, login_submit).click() delay = 3 # wait for "My shows" link wait = WebDriverWait(self.driver, delay) myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'menu a[href|="/myshows"]'))) myshows.click() myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'h2'))) video_links = [] count_remaining = 5 follower_error_re = re.compile("You aren't following any creators yet.*") while not video_links and count_remaining > 0: time.sleep(2) soup = BeautifulSoup(self.driver.page_source, features="lxml") follower_error = False follower_error = [p for p in soup.find_all('p') if p.find(string=follower_error_re)] if follower_error: print('Error loading videos, reloading page') self.driver.refresh() count_remaining = 5 else: all_anchors = soup.find_all('a') video_links = [a for a in all_anchors if a.get('href').startswith('/videos/') and a.get('aria-hidden')] count_remaining -= 1 return [NebulaLoader._parse_anchor(v) for v in video_links]