nebula-rss/nebula_rss/nebula_loader.py

import os
import re
import time
from typing import Optional

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup

from nebula_rss import NebulaVideo


class NebulaLoader:
    def __init__(
        self,
        username: str,
        password: str,
        driver_path: Optional[os.PathLike] = None
    ):
        self.username = username
        self.password = password
        service = None
        if driver_path:
            service = Service(driver_path())
        options = Options()
        options.headless = True
        self.driver = webdriver.Firefox(service=service, options=options)
        self.driver.implicitly_wait(10)  # seconds
        #NebulaVideo('', '', '', None)

    def load(self):
        self.driver.get('https://nebula.app/login')

        username_input = '//*[@name="email"]'
        password_input = '//*[@name="password"]'
        login_submit = '//*[@id="NebulaApp"]/div[2]/div[2]/div[1]/div/form/button'

        self.driver.find_element(By.XPATH, username_input).send_keys(self.username)
        self.driver.find_element(By.XPATH, password_input).send_keys(self.password)
        self.driver.find_element(By.XPATH, login_submit).click()

        delay = 3
        # wait for "My shows" link
        wait = WebDriverWait(self.driver, delay)
        myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'menu a[href|="/myshows"]')))
        myshows.click()
        myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'h2')))

        video_links = []
        count_remaining = 5
        follower_error_re = re.compile("You aren't following any creators yet.*")
        while not video_links and count_remaining > 0:
            time.sleep(2)
            soup = BeautifulSoup(self.driver.page_source)
            follower_error = False
            follower_error = [p for p in soup.find_all('p') if p.find(string=follower_error_re)]
            if follower_error:
                print('Error loading videos, reloading page')
                self.driver.refresh()
                count_remaining = 5
            else:
                video_links = [a for a in soup.find_all('a') if a.get('href').startswith('/videos/')]
            count_remaining -= 1
        #v = nebula_video.NebulaVideo('', '', '', None)
        print(video_links)