parsing test

This commit is contained in:
Max Nuding 2022-01-11 15:51:04 +01:00
parent eae1b8e1cf
commit 9b6639ad3c
Signed by: phlaym
GPG Key ID: A06651BAB6777237
5 changed files with 66 additions and 43 deletions

View File

@ -1,4 +1,5 @@
import datetime import datetime
import logging
import os import os
import re import re
import time import time
@ -10,6 +11,7 @@ from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.remote.remote_connection import LOGGER
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from nebula_rss import NebulaVideo from nebula_rss import NebulaVideo
@ -24,12 +26,18 @@ class NebulaLoader:
): ):
self.username = username self.username = username
self.password = password self.password = password
LOGGER.setLevel(logging.FATAL)
service = None service = None
if driver_path: if driver_path:
service = Service(driver_path()) service = Service(driver_path)
options = Options() options = Options()
options.headless = True options.headless = True
self.driver = webdriver.Firefox(service=service, options=options) self.driver = webdriver.Firefox(
service=service,
options=options,
log_path=os.devnull,
service_log_path=os.devnull)
self.driver.implicitly_wait(10) # seconds self.driver.implicitly_wait(10) # seconds
@staticmethod @staticmethod

View File

@ -1,6 +1,7 @@
from dataclasses import dataclass from dataclasses import dataclass
import datetime import datetime
@dataclass @dataclass
class NebulaVideo: class NebulaVideo:
"""Defines a video on Nebula""" """Defines a video on Nebula"""

78
poetry.lock generated
View File

@ -128,6 +128,14 @@ category = "main"
optional = false optional = false
python-versions = ">=3.5" python-versions = ">=3.5"
[[package]]
name = "iniconfig"
version = "1.1.1"
description = "iniconfig: brain-dead simple config-ini parsing"
category = "dev"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "lxml" name = "lxml"
version = "4.7.1" version = "4.7.1"
@ -142,14 +150,6 @@ html5 = ["html5lib"]
htmlsoup = ["beautifulsoup4"] htmlsoup = ["beautifulsoup4"]
source = ["Cython (>=0.29.7)"] source = ["Cython (>=0.29.7)"]
[[package]]
name = "more-itertools"
version = "8.12.0"
description = "More routines for operating on iterables, beyond itertools"
category = "dev"
optional = false
python-versions = ">=3.5"
[[package]] [[package]]
name = "outcome" name = "outcome"
version = "1.1.0" version = "1.1.0"
@ -174,14 +174,15 @@ pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
[[package]] [[package]]
name = "pluggy" name = "pluggy"
version = "0.13.1" version = "1.0.0"
description = "plugin and hook calling mechanisms for python" description = "plugin and hook calling mechanisms for python"
category = "dev" category = "dev"
optional = false optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" python-versions = ">=3.6"
[package.extras] [package.extras]
dev = ["pre-commit", "tox"] dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
[[package]] [[package]]
name = "py" name = "py"
@ -228,24 +229,23 @@ diagrams = ["jinja2", "railroad-diagrams"]
[[package]] [[package]]
name = "pytest" name = "pytest"
version = "5.4.3" version = "6.2.5"
description = "pytest: simple powerful testing with Python" description = "pytest: simple powerful testing with Python"
category = "dev" category = "dev"
optional = false optional = false
python-versions = ">=3.5" python-versions = ">=3.6"
[package.dependencies] [package.dependencies]
atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
attrs = ">=17.4.0" attrs = ">=19.2.0"
colorama = {version = "*", markers = "sys_platform == \"win32\""} colorama = {version = "*", markers = "sys_platform == \"win32\""}
more-itertools = ">=4.0.0" iniconfig = "*"
packaging = "*" packaging = "*"
pluggy = ">=0.12,<1.0" pluggy = ">=0.12,<2.0"
py = ">=1.5.0" py = ">=1.8.2"
wcwidth = "*" toml = "*"
[package.extras] [package.extras]
checkqa-mypy = ["mypy (==v0.761)"]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
[[package]] [[package]]
@ -304,6 +304,14 @@ category = "main"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
[[package]]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
category = "dev"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]] [[package]]
name = "trio" name = "trio"
version = "0.19.0" version = "0.19.0"
@ -353,14 +361,6 @@ brotli = ["brotlipy (>=0.6.0)"]
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[[package]]
name = "wcwidth"
version = "0.2.5"
description = "Measures the displayed width of unicode strings in a terminal"
category = "dev"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "wsproto" name = "wsproto"
version = "1.0.0" version = "1.0.0"
@ -375,7 +375,7 @@ h11 = ">=0.9.0,<1"
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "62f191b8a92cdb5ce1da2df6d85f052f1a6db3f07b9420183693ced1941cdd4f" content-hash = "feac0aede04b3bbace7c3b2a6c9f0240e6ee1210abd4b598083952fcaa185e20"
[metadata.files] [metadata.files]
async-generator = [ async-generator = [
@ -490,6 +490,10 @@ idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
] ]
iniconfig = [
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
]
lxml = [ lxml = [
{file = "lxml-4.7.1-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:d546431636edb1d6a608b348dd58cc9841b81f4116745857b6cb9f8dadb2725f"}, {file = "lxml-4.7.1-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:d546431636edb1d6a608b348dd58cc9841b81f4116745857b6cb9f8dadb2725f"},
{file = "lxml-4.7.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6308062534323f0d3edb4e702a0e26a76ca9e0e23ff99be5d82750772df32a9e"}, {file = "lxml-4.7.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6308062534323f0d3edb4e702a0e26a76ca9e0e23ff99be5d82750772df32a9e"},
@ -552,10 +556,6 @@ lxml = [
{file = "lxml-4.7.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:447d5009d6b5447b2f237395d0018901dcc673f7d9f82ba26c1b9f9c3b444b60"}, {file = "lxml-4.7.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:447d5009d6b5447b2f237395d0018901dcc673f7d9f82ba26c1b9f9c3b444b60"},
{file = "lxml-4.7.1.tar.gz", hash = "sha256:a1613838aa6b89af4ba10a0f3a972836128801ed008078f8c1244e65958f1b24"}, {file = "lxml-4.7.1.tar.gz", hash = "sha256:a1613838aa6b89af4ba10a0f3a972836128801ed008078f8c1244e65958f1b24"},
] ]
more-itertools = [
{file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"},
{file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"},
]
outcome = [ outcome = [
{file = "outcome-1.1.0-py2.py3-none-any.whl", hash = "sha256:c7dd9375cfd3c12db9801d080a3b63d4b0a261aa996c4c13152380587288d958"}, {file = "outcome-1.1.0-py2.py3-none-any.whl", hash = "sha256:c7dd9375cfd3c12db9801d080a3b63d4b0a261aa996c4c13152380587288d958"},
{file = "outcome-1.1.0.tar.gz", hash = "sha256:e862f01d4e626e63e8f92c38d1f8d5546d3f9cce989263c521b2e7990d186967"}, {file = "outcome-1.1.0.tar.gz", hash = "sha256:e862f01d4e626e63e8f92c38d1f8d5546d3f9cce989263c521b2e7990d186967"},
@ -565,8 +565,8 @@ packaging = [
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
] ]
pluggy = [ pluggy = [
{file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
{file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
] ]
py = [ py = [
{file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
@ -585,8 +585,8 @@ pyparsing = [
{file = "pyparsing-3.0.6.tar.gz", hash = "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"}, {file = "pyparsing-3.0.6.tar.gz", hash = "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"},
] ]
pytest = [ pytest = [
{file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"},
{file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"},
] ]
python-dateutil = [ python-dateutil = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
@ -611,6 +611,10 @@ soupsieve = [
{file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"}, {file = "soupsieve-2.3.1-py3-none-any.whl", hash = "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb"},
{file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"}, {file = "soupsieve-2.3.1.tar.gz", hash = "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"},
] ]
toml = [
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
trio = [ trio = [
{file = "trio-0.19.0-py3-none-any.whl", hash = "sha256:c27c231e66336183c484fbfe080fa6cc954149366c15dc21db8b7290081ec7b8"}, {file = "trio-0.19.0-py3-none-any.whl", hash = "sha256:c27c231e66336183c484fbfe080fa6cc954149366c15dc21db8b7290081ec7b8"},
{file = "trio-0.19.0.tar.gz", hash = "sha256:895e318e5ec5e8cea9f60b473b6edb95b215e82d99556a03eb2d20c5e027efe1"}, {file = "trio-0.19.0.tar.gz", hash = "sha256:895e318e5ec5e8cea9f60b473b6edb95b215e82d99556a03eb2d20c5e027efe1"},
@ -623,10 +627,6 @@ urllib3 = [
{file = "urllib3-1.26.8-py2.py3-none-any.whl", hash = "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed"}, {file = "urllib3-1.26.8-py2.py3-none-any.whl", hash = "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed"},
{file = "urllib3-1.26.8.tar.gz", hash = "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"}, {file = "urllib3-1.26.8.tar.gz", hash = "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c"},
] ]
wcwidth = [
{file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
{file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
]
wsproto = [ wsproto = [
{file = "wsproto-1.0.0-py3-none-any.whl", hash = "sha256:d8345d1808dd599b5ffb352c25a367adb6157e664e140dbecba3f9bc007edb9f"}, {file = "wsproto-1.0.0-py3-none-any.whl", hash = "sha256:d8345d1808dd599b5ffb352c25a367adb6157e664e140dbecba3f9bc007edb9f"},
{file = "wsproto-1.0.0.tar.gz", hash = "sha256:868776f8456997ad0d9720f7322b746bbe9193751b5b290b7f924659377c8c38"}, {file = "wsproto-1.0.0.tar.gz", hash = "sha256:868776f8456997ad0d9720f7322b746bbe9193751b5b290b7f924659377c8c38"},

View File

@ -12,7 +12,7 @@ lxml = "^4.7.1"
bs4 = "^0.0.1" bs4 = "^0.0.1"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
pytest = "^5.2" pytest = "^6.2"
[build-system] [build-system]
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]

View File

@ -1,5 +1,19 @@
from nebula_rss import __version__ import datetime
from nebula_rss import __version__, NebulaLoader
from bs4 import BeautifulSoup
def test_version(): def test_version():
assert __version__ == '0.1.0' assert __version__ == '0.1.0'
def test_video_parsing():
example_div = '<div class="css-14ccti3"><a aria-hidden="true" tabindex="-1" class="css-cvinzg" href="/videos/hai-the-bug-that-created-free-public-wifi-networks-that-didnt-work"><div class="css-1ei3f96"><svg viewBox="0 0 2000 1125" class="css-hnyg4"></svg><div class="css-1i7faai"><picture class="css-0"><source srcset="https://images.watchnebula.com/p/dfbf8ec1-0573-41b0-a69e-ed773b32bfd4.jpeg?height=240&amp; 426w, https://images.watchnebula.com/p/dfbf8ec1-0573-41b0-a69e-ed773b32bfd4.jpeg?height=720&amp; 1280w, https://images.watchnebula.com/p/dfbf8ec1-0573-41b0-a69e-ed773b32bfd4.jpeg?height=1080&amp; 1920w" sizes="(max-width: 414px) 100vw, (max-width: 768px) and (min-width: 415px) 50vw, 440px" type="image/jpeg"><img src="https://images.watchnebula.com/p/dfbf8ec1-0573-41b0-a69e-ed773b32bfd4.jpeg?height=720&amp;" alt="The Bug That Created “Free Public Wifi” Networks That Didnt Work thumbnail" width="2000" height="1125" class="css-1u2fze8"></picture><div class="css-zazjdf"><span class="css-y4yfni">Video length:</span><time datetime="PT5M3S">5:03</time></div></div></div></a><div class="css-xzijep"><a class="css-bu30ts" href="/hai"><picture class="css-0"><source srcset="https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.webp?width=16&amp; 16w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.webp?width=32&amp; 32w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.webp?width=64&amp; 64w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.webp?width=128&amp; 128w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.webp?width=256&amp; 256w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.webp?width=512&amp; 512w" sizes="36px" type="image/webp"><source srcset="https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.jpeg?width=16&amp; 16w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.jpeg?width=32&amp; 32w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.jpeg?width=64&amp; 64w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.jpeg?width=128&amp; 128w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.jpeg?width=256&amp; 256w, https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.jpeg?width=512&amp; 512w" sizes="36px" type="image/jpeg"><img src="https://images.watchnebula.com/p/ba3e3488-6c9a-4bb1-a3f7-9ccc05352868.jpeg?width=64&amp;" alt="Half as Interesting avatar" width="2000" height="2000" class="css-izq1dd"></picture></a><a class="css-1eqy4pw" href="/videos/hai-the-bug-that-created-free-public-wifi-networks-that-didnt-work"><div class="css-1njioi">The Bug That Created “Free Public Wifi” Networks That Didnt Work</div><div class="css-4e1m5a"><span>Half as Interesting</span><span class="css-1go4ftc">•</span><span class="css-y4yfni">Video published:</span><time datetime="2022-01-06T15:39:39.000Z">5 days ago</time></div></a></div></div>' # noqa
soup = BeautifulSoup(example_div, features='lxml')
anchor = soup.div.a
video = NebulaLoader._parse_anchor(anchor)
assert video.title == 'The Bug That Created “Free Public Wifi” Networks That Didnt Work'
assert video.creator == 'Half as Interesting'
assert video.url == 'https://nebula.app/videos/hai-the-bug-that-created-free-public-wifi-networks-that-didnt-work'
assert video.release_at == datetime.datetime(2022, 1, 6, 15, 39, 39, tzinfo=datetime.timezone.utc)
print(video)