nebula-rss/nebula_rss/nebula_loader.py
2022-01-11 15:51:04 +01:00

95 lines
3.5 KiB
Python
Executable File

import datetime
import logging
import os
import re
import time
from typing import List, Optional
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.remote.remote_connection import LOGGER
from bs4 import BeautifulSoup
from nebula_rss import NebulaVideo
class NebulaLoader:
def __init__(
self,
username: str,
password: str,
driver_path: Optional[os.PathLike] = None
):
self.username = username
self.password = password
LOGGER.setLevel(logging.FATAL)
service = None
if driver_path:
service = Service(driver_path)
options = Options()
options.headless = True
self.driver = webdriver.Firefox(
service=service,
options=options,
log_path=os.devnull,
service_log_path=os.devnull)
self.driver.implicitly_wait(10) # seconds
@staticmethod
def _parse_anchor(anchor) -> NebulaVideo:
info_div = anchor.next_sibling
details_anchor = info_div.find_all('a')[1]
divs = details_anchor.find_all('div')
title_div = divs[0]
details_div = divs[1]
creator = details_div.find('span').string
release_text = details_div.find('time').get('datetime')
release_date = datetime.datetime.fromisoformat(release_text.replace('Z', '+00:00'))
return NebulaVideo(
title=title_div.string,
creator=creator,
url='https://nebula.app' + anchor.get('href'),
release_at=release_date
)
def load(self) -> List[NebulaVideo]:
self.driver.get('https://nebula.app/login')
username_input = '//*[@name="email"]'
password_input = '//*[@name="password"]'
login_submit = '//*[@id="NebulaApp"]/div[2]/div[2]/div[1]/div/form/button'
self.driver.find_element(By.XPATH, username_input).send_keys(self.username)
self.driver.find_element(By.XPATH, password_input).send_keys(self.password)
self.driver.find_element(By.XPATH, login_submit).click()
delay = 3
# wait for "My shows" link
wait = WebDriverWait(self.driver, delay)
myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'menu a[href|="/myshows"]')))
myshows.click()
myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'h2')))
video_links = []
count_remaining = 5
follower_error_re = re.compile("You aren't following any creators yet.*")
while not video_links and count_remaining > 0:
time.sleep(2)
soup = BeautifulSoup(self.driver.page_source, features="lxml")
follower_error = False
follower_error = [p for p in soup.find_all('p') if p.find(string=follower_error_re)]
if follower_error:
print('Error loading videos, reloading page')
self.driver.refresh()
count_remaining = 5
else:
all_anchors = soup.find_all('a')
video_links = [a for a in all_anchors if a.get('href').startswith('/videos/') and a.get('aria-hidden')]
count_remaining -= 1
return [NebulaLoader._parse_anchor(v) for v in video_links]