Fix #24, refactor URL detection
This commit is contained in:
parent
9bbcc843c2
commit
68aade4f1f
@ -1,7 +1,5 @@
|
|||||||
HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening
|
HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening
|
||||||
URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com,songwhip.com
|
|
||||||
YOUTUBE_API_KEY = CHANGE_ME
|
YOUTUBE_API_KEY = CHANGE_ME
|
||||||
YOUTUBE_DISABLE = false
|
|
||||||
ODESLI_API_KEY = CHANGE_ME
|
ODESLI_API_KEY = CHANGE_ME
|
||||||
MASTODON_INSTANCE = 'metalhead.club'
|
MASTODON_INSTANCE = 'metalhead.club'
|
||||||
BASE_URL = 'https://moshingmammut.phlaym.net'
|
BASE_URL = 'https://moshingmammut.phlaym.net'
|
||||||
|
11
README.md
11
README.md
@ -11,8 +11,8 @@ Having a quick overview over what is being posted can be a great way to discover
|
|||||||
|
|
||||||
This is fairly simple from a technical point of view! metalhead.club's local timeline is being watched using the
|
This is fairly simple from a technical point of view! metalhead.club's local timeline is being watched using the
|
||||||
Mastodon Streaming API over a Websocket. Every time a new post arrives, it is checked if it contains any music by
|
Mastodon Streaming API over a Websocket. Every time a new post arrives, it is checked if it contains any music by
|
||||||
checking included hashtags and URLs. A list of tags and URLs can be found in [the configuration](.env.EXAMPLE).
|
checking included hashtags and URLs. A list of tags can be found in [the configuration](.env.EXAMPLE).
|
||||||
Additionally, lins to YouTube are queried, if they are music or other videos using the YouTube API.
|
Additionally, links are vetted if they are music by checking if https://song.link finds info on them.
|
||||||
|
|
||||||
If a post passes this check it is saved to a SQLite database.
|
If a post passes this check it is saved to a SQLite database.
|
||||||
|
|
||||||
@ -93,11 +93,12 @@ and set your `User`, `Group`, `ExecStart` and `WorkingDirectory` accordingly.
|
|||||||
|
|
||||||
#### On your development machine
|
#### On your development machine
|
||||||
|
|
||||||
Copy `.env.EXAMPLE` to `.env` and add your `YOUTUBE_API_KEY`.
|
Copy `.env.EXAMPLE` to `.env` and add your `YOUTUBE_API_KEY` and `ODESLI_API_KEY`.
|
||||||
To obtain one follow [YouTube's guide](https://developers.google.com/youtube/registering_an_application) to create an
|
To obtain one follow [YouTube's guide](https://developers.google.com/youtube/registering_an_application) to create an
|
||||||
_API key_.
|
_API key_.
|
||||||
If `YOUTUBE_API_KEY` is unset, all YouTube videos will be assumed to contain music links.
|
If `YOUTUBE_API_KEY` is unset, no playlist will be updated.
|
||||||
If this is unwanted, set `YOUTUBE_DISABLE` to `true`).
|
|
||||||
|
If `ODESLI_API_KEY` is unset, your rate limit to the song.link API will be lower.
|
||||||
|
|
||||||
Run `npm run build` and copy the output folder, usually `build` to `$APP_DIR` on your server.
|
Run `npm run build` and copy the output folder, usually `build` to `$APP_DIR` on your server.
|
||||||
|
|
||||||
|
903
package-lock.json
generated
903
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "moshing-mammut",
|
"name": "moshing-mammut",
|
||||||
"version": "1.1.0",
|
"version": "1.3.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"license": "LGPL-3.0-or-later",
|
"license": "LGPL-3.0-or-later",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
@ -1,89 +1,17 @@
|
|||||||
import {
|
import { HASHTAG_FILTER, MASTODON_INSTANCE, ODESLI_API_KEY } from '$env/static/private';
|
||||||
HASHTAG_FILTER,
|
|
||||||
MASTODON_INSTANCE,
|
|
||||||
ODESLI_API_KEY,
|
|
||||||
URL_FILTER,
|
|
||||||
YOUTUBE_API_KEY,
|
|
||||||
YOUTUBE_DISABLE
|
|
||||||
} from '$env/static/private';
|
|
||||||
import { log } from '$lib/log';
|
import { log } from '$lib/log';
|
||||||
import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
|
import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
|
||||||
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
|
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
|
||||||
import { getPosts, savePost } from '$lib/server/db';
|
import { getPosts, savePost } from '$lib/server/db';
|
||||||
import { createFeed, saveAtomFeed } from '$lib/server/rss';
|
import { createFeed, saveAtomFeed } from '$lib/server/rss';
|
||||||
import { sleep } from '$lib/sleep';
|
import { sleep } from '$lib/sleep';
|
||||||
import { isTruthy } from '$lib/truthyString';
|
|
||||||
import { WebSocket } from 'ws';
|
import { WebSocket } from 'ws';
|
||||||
|
|
||||||
const YOUTUBE_REGEX = new RegExp(
|
|
||||||
/https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
|
|
||||||
);
|
|
||||||
|
|
||||||
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
|
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
|
||||||
|
|
||||||
export class TimelineReader {
|
export class TimelineReader {
|
||||||
private static _instance: TimelineReader;
|
private static _instance: TimelineReader;
|
||||||
|
|
||||||
private static async isMusicVideo(videoId: string) {
|
|
||||||
if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
|
|
||||||
// Assume that it *is* a music link when no YT API key is provided
|
|
||||||
// If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
const searchParams = new URLSearchParams([
|
|
||||||
['part', 'snippet'],
|
|
||||||
['id', videoId],
|
|
||||||
['key', YOUTUBE_API_KEY]
|
|
||||||
]);
|
|
||||||
const youtubeVideoUrl = new URL(`https://www.googleapis.com/youtube/v3/videos?${searchParams}`);
|
|
||||||
const resp = await fetch(youtubeVideoUrl);
|
|
||||||
const respObj = await resp.json();
|
|
||||||
if (!respObj.items.length) {
|
|
||||||
log.warn('Could not find video with id', videoId);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const item = respObj.items[0];
|
|
||||||
if (item.tags?.includes('music')) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const categorySearchParams = new URLSearchParams([
|
|
||||||
['part', 'snippet'],
|
|
||||||
['id', item.categoryId],
|
|
||||||
['key', YOUTUBE_API_KEY]
|
|
||||||
]);
|
|
||||||
const youtubeCategoryUrl = new URL(
|
|
||||||
`https://www.googleapis.com/youtube/v3/videoCategories?${categorySearchParams}`
|
|
||||||
);
|
|
||||||
const categoryTitle: string = await fetch(youtubeCategoryUrl)
|
|
||||||
.then((r) => r.json())
|
|
||||||
.then((r) => r.items[0]?.title);
|
|
||||||
return categoryTitle === 'Music';
|
|
||||||
}
|
|
||||||
|
|
||||||
private static async checkYoutubeMatches(postContent: string): Promise<string | null> {
|
|
||||||
if (isTruthy(YOUTUBE_DISABLE)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
const matches = postContent.matchAll(YOUTUBE_REGEX);
|
|
||||||
for (const match of matches) {
|
|
||||||
if (match === undefined || match.groups === undefined) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const videoId = match.groups.videoId.toString();
|
|
||||||
try {
|
|
||||||
const isMusic = await TimelineReader.isMusicVideo(videoId);
|
|
||||||
if (isMusic) {
|
|
||||||
return match[0];
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
log.error('Could not check if', videoId, 'is a music video', e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static async getSongInfo(url: URL, remainingTries = 6): Promise<SongInfo | null> {
|
private static async getSongInfo(url: URL, remainingTries = 6): Promise<SongInfo | null> {
|
||||||
if (remainingTries === 0) {
|
if (remainingTries === 0) {
|
||||||
log.error('No tries remaining. Lookup failed!');
|
log.error('No tries remaining. Lookup failed!');
|
||||||
@ -109,16 +37,18 @@ export class TimelineReader {
|
|||||||
if (response.status === 429) {
|
if (response.status === 429) {
|
||||||
throw new Error('Rate limit reached', { cause: 429 });
|
throw new Error('Rate limit reached', { cause: 429 });
|
||||||
}
|
}
|
||||||
return response.json().then((odesliInfo: OdesliResponse) => {
|
const odesliInfo: OdesliResponse = await response.json();
|
||||||
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
|
if (!odesliInfo || !odesliInfo.entitiesByUniqueId || !odesliInfo.entityUniqueId) {
|
||||||
const platform: Platform = 'youtube';
|
return null;
|
||||||
return {
|
}
|
||||||
...info,
|
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
|
||||||
pageUrl: odesliInfo.pageUrl,
|
const platform: Platform = 'youtube';
|
||||||
youtubeUrl: odesliInfo.linksByPlatform[platform]?.url,
|
return {
|
||||||
postedUrl: url.toString()
|
...info,
|
||||||
} as SongInfo;
|
pageUrl: odesliInfo.pageUrl,
|
||||||
});
|
youtubeUrl: odesliInfo.linksByPlatform[platform]?.url,
|
||||||
|
postedUrl: url.toString()
|
||||||
|
} as SongInfo;
|
||||||
});
|
});
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (e instanceof Error && e.cause === 429) {
|
if (e instanceof Error && e.cause === 429) {
|
||||||
@ -131,24 +61,6 @@ export class TimelineReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static async getUrlFromPreviewCard(post: Post): Promise<string | undefined> {
|
|
||||||
return undefined;
|
|
||||||
// Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon
|
|
||||||
/*
|
|
||||||
if (post.card) {
|
|
||||||
return post.card?.url;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
const status: Post = await (
|
|
||||||
await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`)
|
|
||||||
).json();
|
|
||||||
return status.card?.url;
|
|
||||||
} catch (e) {
|
|
||||||
log.error(`Could not fetch status ${post.url}`, e);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
private startWebsocket() {
|
private startWebsocket() {
|
||||||
const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
|
const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
|
||||||
socket.onopen = () => {
|
socket.onopen = () => {
|
||||||
@ -165,74 +77,27 @@ export class TimelineReader {
|
|||||||
const hashttags: string[] = HASHTAG_FILTER.split(',');
|
const hashttags: string[] = HASHTAG_FILTER.split(',');
|
||||||
const found_tags: Tag[] = post.tags.filter((t: Tag) => hashttags.includes(t.name));
|
const found_tags: Tag[] = post.tags.filter((t: Tag) => hashttags.includes(t.name));
|
||||||
|
|
||||||
const urls: string[] = URL_FILTER.split(',');
|
const urlMatches = post.content.matchAll(URL_REGEX);
|
||||||
const found_urls = urls.filter((t) => post.content.includes(t));
|
|
||||||
// If we don't have any tags or non-youtube urls, check youtube
|
|
||||||
// YT is handled separately, because it requires an API call and therefore is slower
|
|
||||||
if (found_urls.length === 0 && found_tags.length === 0) {
|
|
||||||
const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content);
|
|
||||||
if (youtubeUrl === null) {
|
|
||||||
log.log('Ignoring post', post.url);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
log.debug('Found YT URL', youtubeUrl, found_urls, found_urls.length);
|
|
||||||
} else {
|
|
||||||
log.debug('Found URLs and/or tags:', found_urls, found_tags);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Change URL detection above to use this regex.
|
|
||||||
// Looks like we're stuck with regex for now instead of using preview cards.
|
|
||||||
// Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song,
|
|
||||||
// if not, ignore it. No need to consult the YT API and give those links a special handling
|
|
||||||
const musicUrls: URL[] = [];
|
|
||||||
const musicUrl = await TimelineReader.getUrlFromPreviewCard(post);
|
|
||||||
if (musicUrl) {
|
|
||||||
try {
|
|
||||||
musicUrls.push(new URL(musicUrl));
|
|
||||||
} catch (e) {
|
|
||||||
log.error(
|
|
||||||
'URL received from preview card does not seem to be a valid URL',
|
|
||||||
musicUrl,
|
|
||||||
e
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const urlMatches = post.content.matchAll(URL_REGEX);
|
|
||||||
for (const match of urlMatches) {
|
|
||||||
if (match === undefined || match.groups === undefined) {
|
|
||||||
console.warn(
|
|
||||||
'Match listed in allMatches, but either it or its groups are undefined',
|
|
||||||
match
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const urlMatch = match.groups.postUrl.toString();
|
|
||||||
let url: URL;
|
|
||||||
try {
|
|
||||||
url = new URL(urlMatch);
|
|
||||||
} catch (e) {
|
|
||||||
log.error('URL found via Regex does not seem to be a valud url', urlMatch, e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check *all* found url and let odesli determine if it is music or not
|
|
||||||
musicUrls.push(url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const songs: SongInfo[] = [];
|
const songs: SongInfo[] = [];
|
||||||
log.debug(`Checking ${musicUrls.length} URLs if they contain song data`);
|
for (const match of urlMatches) {
|
||||||
for (const url of musicUrls) {
|
if (match === undefined || match.groups === undefined) {
|
||||||
let hostname: string | null = null;
|
log.warn(
|
||||||
try {
|
'Match listed in allMatches, but either it or its groups are undefined',
|
||||||
hostname = new URL(url).hostname;
|
match
|
||||||
} catch (e) {
|
);
|
||||||
log.error(`Could not check hostname for URL ${url}`, e);
|
|
||||||
}
|
|
||||||
if (hostname === 'songwhip.com') {
|
|
||||||
// TODO: Implement checking the songwhip API
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
const urlMatch = match.groups.postUrl.toString();
|
||||||
|
let url: URL;
|
||||||
|
try {
|
||||||
|
url = new URL(urlMatch);
|
||||||
|
} catch (e) {
|
||||||
|
log.error('URL found via Regex does not seem to be a valud url', urlMatch, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check *all* found url and let odesli determine if it is music or not
|
||||||
|
log.debug(`Checking ${url} if it contains song data`);
|
||||||
const info = await TimelineReader.getSongInfo(url);
|
const info = await TimelineReader.getSongInfo(url);
|
||||||
log.debug(`Found song info for ${url}?`, info);
|
log.debug(`Found song info for ${url}?`, info);
|
||||||
if (info) {
|
if (info) {
|
||||||
@ -240,6 +105,13 @@ export class TimelineReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we don't have any tags or non-youtube urls, check youtube
|
||||||
|
// YT is handled separately, because it requires an API call and therefore is slower
|
||||||
|
if (songs.length === 0 && found_tags.length === 0) {
|
||||||
|
log.log('Ignoring post', post.url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
await savePost(post, songs);
|
await savePost(post, songs);
|
||||||
log.debug('Saved post', post.url);
|
log.debug('Saved post', post.url);
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@
|
|||||||
}
|
}
|
||||||
.post {
|
.post {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
max-width: 600px;
|
max-width: min(800px, 80vw);
|
||||||
margin-bottom: 1em;
|
margin-bottom: 1em;
|
||||||
border-bottom: 1px solid var(--color-border);
|
border-bottom: 1px solid var(--color-border);
|
||||||
padding: 1em;
|
padding: 1em;
|
||||||
|
Loading…
Reference in New Issue
Block a user