2022-01-11 14:17:33 +00:00
|
|
|
import datetime
|
2022-01-11 14:51:04 +00:00
|
|
|
import logging
|
2022-01-11 13:09:24 +00:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import time
|
2022-01-11 14:17:33 +00:00
|
|
|
from typing import List, Optional
|
2022-01-11 13:09:24 +00:00
|
|
|
|
|
|
|
from selenium import webdriver
|
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
|
from selenium.webdriver.firefox.options import Options
|
|
|
|
from selenium.webdriver.firefox.service import Service
|
|
|
|
from selenium.webdriver.support.wait import WebDriverWait
|
2022-01-11 14:51:04 +00:00
|
|
|
from selenium.webdriver.remote.remote_connection import LOGGER
|
2022-01-11 13:09:24 +00:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
from nebula_rss import NebulaVideo
|
|
|
|
|
|
|
|
|
|
|
|
class NebulaLoader:
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
username: str,
|
|
|
|
password: str,
|
|
|
|
driver_path: Optional[os.PathLike] = None
|
|
|
|
):
|
|
|
|
self.username = username
|
|
|
|
self.password = password
|
2022-01-11 14:51:04 +00:00
|
|
|
|
|
|
|
LOGGER.setLevel(logging.FATAL)
|
2022-01-11 13:09:24 +00:00
|
|
|
service = None
|
|
|
|
if driver_path:
|
2022-01-11 14:51:04 +00:00
|
|
|
service = Service(driver_path)
|
2022-01-11 13:09:24 +00:00
|
|
|
options = Options()
|
|
|
|
options.headless = True
|
2022-01-11 14:51:04 +00:00
|
|
|
self.driver = webdriver.Firefox(
|
|
|
|
service=service,
|
|
|
|
options=options,
|
|
|
|
log_path=os.devnull,
|
|
|
|
service_log_path=os.devnull)
|
2022-01-11 13:09:24 +00:00
|
|
|
self.driver.implicitly_wait(10) # seconds
|
|
|
|
|
2022-01-11 14:17:33 +00:00
|
|
|
@staticmethod
|
|
|
|
def _parse_anchor(anchor) -> NebulaVideo:
|
|
|
|
info_div = anchor.next_sibling
|
|
|
|
details_anchor = info_div.find_all('a')[1]
|
|
|
|
divs = details_anchor.find_all('div')
|
|
|
|
title_div = divs[0]
|
|
|
|
details_div = divs[1]
|
|
|
|
creator = details_div.find('span').string
|
|
|
|
release_text = details_div.find('time').get('datetime')
|
|
|
|
release_date = datetime.datetime.fromisoformat(release_text.replace('Z', '+00:00'))
|
|
|
|
return NebulaVideo(
|
|
|
|
title=title_div.string,
|
|
|
|
creator=creator,
|
|
|
|
url='https://nebula.app' + anchor.get('href'),
|
|
|
|
release_at=release_date
|
|
|
|
)
|
|
|
|
|
|
|
|
def load(self) -> List[NebulaVideo]:
|
2022-01-11 13:09:24 +00:00
|
|
|
self.driver.get('https://nebula.app/login')
|
|
|
|
|
|
|
|
username_input = '//*[@name="email"]'
|
|
|
|
password_input = '//*[@name="password"]'
|
|
|
|
login_submit = '//*[@id="NebulaApp"]/div[2]/div[2]/div[1]/div/form/button'
|
|
|
|
|
|
|
|
self.driver.find_element(By.XPATH, username_input).send_keys(self.username)
|
|
|
|
self.driver.find_element(By.XPATH, password_input).send_keys(self.password)
|
|
|
|
self.driver.find_element(By.XPATH, login_submit).click()
|
|
|
|
|
|
|
|
delay = 3
|
|
|
|
# wait for "My shows" link
|
|
|
|
wait = WebDriverWait(self.driver, delay)
|
|
|
|
myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'menu a[href|="/myshows"]')))
|
|
|
|
myshows.click()
|
|
|
|
myshows = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'h2')))
|
|
|
|
|
|
|
|
video_links = []
|
|
|
|
count_remaining = 5
|
|
|
|
follower_error_re = re.compile("You aren't following any creators yet.*")
|
|
|
|
while not video_links and count_remaining > 0:
|
|
|
|
time.sleep(2)
|
2022-01-11 14:17:33 +00:00
|
|
|
soup = BeautifulSoup(self.driver.page_source, features="lxml")
|
2022-01-11 13:09:24 +00:00
|
|
|
follower_error = False
|
|
|
|
follower_error = [p for p in soup.find_all('p') if p.find(string=follower_error_re)]
|
|
|
|
if follower_error:
|
|
|
|
print('Error loading videos, reloading page')
|
|
|
|
self.driver.refresh()
|
|
|
|
count_remaining = 5
|
|
|
|
else:
|
2022-01-11 14:17:33 +00:00
|
|
|
all_anchors = soup.find_all('a')
|
|
|
|
video_links = [a for a in all_anchors if a.get('href').startswith('/videos/') and a.get('aria-hidden')]
|
2022-01-11 13:09:24 +00:00
|
|
|
count_remaining -= 1
|
2022-01-11 14:17:33 +00:00
|
|
|
return [NebulaLoader._parse_anchor(v) for v in video_links]
|