parsing video divs
This commit is contained in:
parent
d2bf250fbf
commit
eae1b8e1cf
@ -1,7 +1,8 @@
|
|||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from typing import Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
@ -30,9 +31,25 @@ class NebulaLoader:
|
|||||||
options.headless = True
|
options.headless = True
|
||||||
self.driver = webdriver.Firefox(service=service, options=options)
|
self.driver = webdriver.Firefox(service=service, options=options)
|
||||||
self.driver.implicitly_wait(10) # seconds
|
self.driver.implicitly_wait(10) # seconds
|
||||||
#NebulaVideo('', '', '', None)
|
|
||||||
|
|
||||||
def load(self):
|
@staticmethod
|
||||||
|
def _parse_anchor(anchor) -> NebulaVideo:
|
||||||
|
info_div = anchor.next_sibling
|
||||||
|
details_anchor = info_div.find_all('a')[1]
|
||||||
|
divs = details_anchor.find_all('div')
|
||||||
|
title_div = divs[0]
|
||||||
|
details_div = divs[1]
|
||||||
|
creator = details_div.find('span').string
|
||||||
|
release_text = details_div.find('time').get('datetime')
|
||||||
|
release_date = datetime.datetime.fromisoformat(release_text.replace('Z', '+00:00'))
|
||||||
|
return NebulaVideo(
|
||||||
|
title=title_div.string,
|
||||||
|
creator=creator,
|
||||||
|
url='https://nebula.app' + anchor.get('href'),
|
||||||
|
release_at=release_date
|
||||||
|
)
|
||||||
|
|
||||||
|
def load(self) -> List[NebulaVideo]:
|
||||||
self.driver.get('https://nebula.app/login')
|
self.driver.get('https://nebula.app/login')
|
||||||
|
|
||||||
username_input = '//*[@name="email"]'
|
username_input = '//*[@name="email"]'
|
||||||
@ -55,7 +72,7 @@ class NebulaLoader:
|
|||||||
follower_error_re = re.compile("You aren't following any creators yet.*")
|
follower_error_re = re.compile("You aren't following any creators yet.*")
|
||||||
while not video_links and count_remaining > 0:
|
while not video_links and count_remaining > 0:
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
soup = BeautifulSoup(self.driver.page_source)
|
soup = BeautifulSoup(self.driver.page_source, features="lxml")
|
||||||
follower_error = False
|
follower_error = False
|
||||||
follower_error = [p for p in soup.find_all('p') if p.find(string=follower_error_re)]
|
follower_error = [p for p in soup.find_all('p') if p.find(string=follower_error_re)]
|
||||||
if follower_error:
|
if follower_error:
|
||||||
@ -63,7 +80,7 @@ class NebulaLoader:
|
|||||||
self.driver.refresh()
|
self.driver.refresh()
|
||||||
count_remaining = 5
|
count_remaining = 5
|
||||||
else:
|
else:
|
||||||
video_links = [a for a in soup.find_all('a') if a.get('href').startswith('/videos/')]
|
all_anchors = soup.find_all('a')
|
||||||
|
video_links = [a for a in all_anchors if a.get('href').startswith('/videos/') and a.get('aria-hidden')]
|
||||||
count_remaining -= 1
|
count_remaining -= 1
|
||||||
#v = nebula_video.NebulaVideo('', '', '', None)
|
return [NebulaLoader._parse_anchor(v) for v in video_links]
|
||||||
print(video_links)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user