Fix #24, refactor URL detection
This commit is contained in:
parent
9bbcc843c2
commit
68aade4f1f
@ -1,7 +1,5 @@
|
||||
HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening
|
||||
URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com,songwhip.com
|
||||
YOUTUBE_API_KEY = CHANGE_ME
|
||||
YOUTUBE_DISABLE = false
|
||||
ODESLI_API_KEY = CHANGE_ME
|
||||
MASTODON_INSTANCE = 'metalhead.club'
|
||||
BASE_URL = 'https://moshingmammut.phlaym.net'
|
||||
|
11
README.md
11
README.md
@ -11,8 +11,8 @@ Having a quick overview over what is being posted can be a great way to discover
|
||||
|
||||
This is fairly simple from a technical point of view! metalhead.club's local timeline is being watched using the
|
||||
Mastodon Streaming API over a Websocket. Every time a new post arrives, it is checked if it contains any music by
|
||||
checking included hashtags and URLs. A list of tags and URLs can be found in [the configuration](.env.EXAMPLE).
|
||||
Additionally, lins to YouTube are queried, if they are music or other videos using the YouTube API.
|
||||
checking included hashtags and URLs. A list of tags can be found in [the configuration](.env.EXAMPLE).
|
||||
Additionally, links are vetted if they are music by checking if https://song.link finds info on them.
|
||||
|
||||
If a post passes this check it is saved to a SQLite database.
|
||||
|
||||
@ -93,11 +93,12 @@ and set your `User`, `Group`, `ExecStart` and `WorkingDirectory` accordingly.
|
||||
|
||||
#### On your development machine
|
||||
|
||||
Copy `.env.EXAMPLE` to `.env` and add your `YOUTUBE_API_KEY`.
|
||||
Copy `.env.EXAMPLE` to `.env` and add your `YOUTUBE_API_KEY` and `ODESLI_API_KEY`.
|
||||
To obtain one follow [YouTube's guide](https://developers.google.com/youtube/registering_an_application) to create an
|
||||
_API key_.
|
||||
If `YOUTUBE_API_KEY` is unset, all YouTube videos will be assumed to contain music links.
|
||||
If this is unwanted, set `YOUTUBE_DISABLE` to `true`).
|
||||
If `YOUTUBE_API_KEY` is unset, no playlist will be updated.
|
||||
|
||||
If `ODESLI_API_KEY` is unset, your rate limit to the song.link API will be lower.
|
||||
|
||||
Run `npm run build` and copy the output folder, usually `build` to `$APP_DIR` on your server.
|
||||
|
||||
|
903
package-lock.json
generated
903
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "moshing-mammut",
|
||||
"version": "1.1.0",
|
||||
"version": "1.3.0",
|
||||
"private": true,
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"scripts": {
|
||||
|
@ -1,89 +1,17 @@
|
||||
import {
|
||||
HASHTAG_FILTER,
|
||||
MASTODON_INSTANCE,
|
||||
ODESLI_API_KEY,
|
||||
URL_FILTER,
|
||||
YOUTUBE_API_KEY,
|
||||
YOUTUBE_DISABLE
|
||||
} from '$env/static/private';
|
||||
import { HASHTAG_FILTER, MASTODON_INSTANCE, ODESLI_API_KEY } from '$env/static/private';
|
||||
import { log } from '$lib/log';
|
||||
import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
|
||||
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
|
||||
import { getPosts, savePost } from '$lib/server/db';
|
||||
import { createFeed, saveAtomFeed } from '$lib/server/rss';
|
||||
import { sleep } from '$lib/sleep';
|
||||
import { isTruthy } from '$lib/truthyString';
|
||||
import { WebSocket } from 'ws';
|
||||
|
||||
const YOUTUBE_REGEX = new RegExp(
|
||||
/https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
|
||||
);
|
||||
|
||||
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
|
||||
|
||||
export class TimelineReader {
|
||||
private static _instance: TimelineReader;
|
||||
|
||||
private static async isMusicVideo(videoId: string) {
|
||||
if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
|
||||
// Assume that it *is* a music link when no YT API key is provided
|
||||
// If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy
|
||||
return true;
|
||||
}
|
||||
const searchParams = new URLSearchParams([
|
||||
['part', 'snippet'],
|
||||
['id', videoId],
|
||||
['key', YOUTUBE_API_KEY]
|
||||
]);
|
||||
const youtubeVideoUrl = new URL(`https://www.googleapis.com/youtube/v3/videos?${searchParams}`);
|
||||
const resp = await fetch(youtubeVideoUrl);
|
||||
const respObj = await resp.json();
|
||||
if (!respObj.items.length) {
|
||||
log.warn('Could not find video with id', videoId);
|
||||
return false;
|
||||
}
|
||||
|
||||
const item = respObj.items[0];
|
||||
if (item.tags?.includes('music')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const categorySearchParams = new URLSearchParams([
|
||||
['part', 'snippet'],
|
||||
['id', item.categoryId],
|
||||
['key', YOUTUBE_API_KEY]
|
||||
]);
|
||||
const youtubeCategoryUrl = new URL(
|
||||
`https://www.googleapis.com/youtube/v3/videoCategories?${categorySearchParams}`
|
||||
);
|
||||
const categoryTitle: string = await fetch(youtubeCategoryUrl)
|
||||
.then((r) => r.json())
|
||||
.then((r) => r.items[0]?.title);
|
||||
return categoryTitle === 'Music';
|
||||
}
|
||||
|
||||
private static async checkYoutubeMatches(postContent: string): Promise<string | null> {
|
||||
if (isTruthy(YOUTUBE_DISABLE)) {
|
||||
return null;
|
||||
}
|
||||
const matches = postContent.matchAll(YOUTUBE_REGEX);
|
||||
for (const match of matches) {
|
||||
if (match === undefined || match.groups === undefined) {
|
||||
continue;
|
||||
}
|
||||
const videoId = match.groups.videoId.toString();
|
||||
try {
|
||||
const isMusic = await TimelineReader.isMusicVideo(videoId);
|
||||
if (isMusic) {
|
||||
return match[0];
|
||||
}
|
||||
} catch (e) {
|
||||
log.error('Could not check if', videoId, 'is a music video', e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static async getSongInfo(url: URL, remainingTries = 6): Promise<SongInfo | null> {
|
||||
if (remainingTries === 0) {
|
||||
log.error('No tries remaining. Lookup failed!');
|
||||
@ -109,7 +37,10 @@ export class TimelineReader {
|
||||
if (response.status === 429) {
|
||||
throw new Error('Rate limit reached', { cause: 429 });
|
||||
}
|
||||
return response.json().then((odesliInfo: OdesliResponse) => {
|
||||
const odesliInfo: OdesliResponse = await response.json();
|
||||
if (!odesliInfo || !odesliInfo.entitiesByUniqueId || !odesliInfo.entityUniqueId) {
|
||||
return null;
|
||||
}
|
||||
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
|
||||
const platform: Platform = 'youtube';
|
||||
return {
|
||||
@ -119,7 +50,6 @@ export class TimelineReader {
|
||||
postedUrl: url.toString()
|
||||
} as SongInfo;
|
||||
});
|
||||
});
|
||||
} catch (e) {
|
||||
if (e instanceof Error && e.cause === 429) {
|
||||
log.warn('song.link rate limit reached. Trying again in 10 seconds');
|
||||
@ -131,24 +61,6 @@ export class TimelineReader {
|
||||
}
|
||||
}
|
||||
|
||||
private static async getUrlFromPreviewCard(post: Post): Promise<string | undefined> {
|
||||
return undefined;
|
||||
// Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon
|
||||
/*
|
||||
if (post.card) {
|
||||
return post.card?.url;
|
||||
}
|
||||
try {
|
||||
const status: Post = await (
|
||||
await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`)
|
||||
).json();
|
||||
return status.card?.url;
|
||||
} catch (e) {
|
||||
log.error(`Could not fetch status ${post.url}`, e);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
private startWebsocket() {
|
||||
const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
|
||||
socket.onopen = () => {
|
||||
@ -165,42 +77,11 @@ export class TimelineReader {
|
||||
const hashttags: string[] = HASHTAG_FILTER.split(',');
|
||||
const found_tags: Tag[] = post.tags.filter((t: Tag) => hashttags.includes(t.name));
|
||||
|
||||
const urls: string[] = URL_FILTER.split(',');
|
||||
const found_urls = urls.filter((t) => post.content.includes(t));
|
||||
// If we don't have any tags or non-youtube urls, check youtube
|
||||
// YT is handled separately, because it requires an API call and therefore is slower
|
||||
if (found_urls.length === 0 && found_tags.length === 0) {
|
||||
const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content);
|
||||
if (youtubeUrl === null) {
|
||||
log.log('Ignoring post', post.url);
|
||||
return;
|
||||
}
|
||||
log.debug('Found YT URL', youtubeUrl, found_urls, found_urls.length);
|
||||
} else {
|
||||
log.debug('Found URLs and/or tags:', found_urls, found_tags);
|
||||
}
|
||||
|
||||
// TODO: Change URL detection above to use this regex.
|
||||
// Looks like we're stuck with regex for now instead of using preview cards.
|
||||
// Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song,
|
||||
// if not, ignore it. No need to consult the YT API and give those links a special handling
|
||||
const musicUrls: URL[] = [];
|
||||
const musicUrl = await TimelineReader.getUrlFromPreviewCard(post);
|
||||
if (musicUrl) {
|
||||
try {
|
||||
musicUrls.push(new URL(musicUrl));
|
||||
} catch (e) {
|
||||
log.error(
|
||||
'URL received from preview card does not seem to be a valid URL',
|
||||
musicUrl,
|
||||
e
|
||||
);
|
||||
}
|
||||
} else {
|
||||
const urlMatches = post.content.matchAll(URL_REGEX);
|
||||
const songs: SongInfo[] = [];
|
||||
for (const match of urlMatches) {
|
||||
if (match === undefined || match.groups === undefined) {
|
||||
console.warn(
|
||||
log.warn(
|
||||
'Match listed in allMatches, but either it or its groups are undefined',
|
||||
match
|
||||
);
|
||||
@ -216,23 +97,7 @@ export class TimelineReader {
|
||||
}
|
||||
|
||||
// Check *all* found url and let odesli determine if it is music or not
|
||||
musicUrls.push(url);
|
||||
}
|
||||
}
|
||||
|
||||
const songs: SongInfo[] = [];
|
||||
log.debug(`Checking ${musicUrls.length} URLs if they contain song data`);
|
||||
for (const url of musicUrls) {
|
||||
let hostname: string | null = null;
|
||||
try {
|
||||
hostname = new URL(url).hostname;
|
||||
} catch (e) {
|
||||
log.error(`Could not check hostname for URL ${url}`, e);
|
||||
}
|
||||
if (hostname === 'songwhip.com') {
|
||||
// TODO: Implement checking the songwhip API
|
||||
continue;
|
||||
}
|
||||
log.debug(`Checking ${url} if it contains song data`);
|
||||
const info = await TimelineReader.getSongInfo(url);
|
||||
log.debug(`Found song info for ${url}?`, info);
|
||||
if (info) {
|
||||
@ -240,6 +105,13 @@ export class TimelineReader {
|
||||
}
|
||||
}
|
||||
|
||||
// If we don't have any tags or non-youtube urls, check youtube
|
||||
// YT is handled separately, because it requires an API call and therefore is slower
|
||||
if (songs.length === 0 && found_tags.length === 0) {
|
||||
log.log('Ignoring post', post.url);
|
||||
return;
|
||||
}
|
||||
|
||||
await savePost(post, songs);
|
||||
log.debug('Saved post', post.url);
|
||||
|
||||
|
@ -187,7 +187,7 @@
|
||||
}
|
||||
.post {
|
||||
width: 100%;
|
||||
max-width: 600px;
|
||||
max-width: min(800px, 80vw);
|
||||
margin-bottom: 1em;
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
padding: 1em;
|
||||
|
Loading…
Reference in New Issue
Block a user