Extract song info from odesli (song.link)

This commit is contained in:
2023-04-22 08:50:17 +02:00
parent 45eeb550b3
commit b62936ed54
6 changed files with 309 additions and 13 deletions

View File

@ -1,13 +1,16 @@
import {
HASHTAG_FILTER,
MASTODON_INSTANCE,
ODESLI_API_KEY,
URL_FILTER,
YOUTUBE_API_KEY,
YOUTUBE_DISABLE
} from '$env/static/private';
import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
import { getPosts, savePost } from '$lib/server/db';
import { createFeed, saveAtomFeed } from '$lib/server/rss';
import { sleep } from '$lib/sleep';
import { isTruthy } from '$lib/truthyString';
import { WebSocket } from 'ws';
@ -15,11 +18,13 @@ const YOUTUBE_REGEX = new RegExp(
/https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
);
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
export class TimelineReader {
private static _instance: TimelineReader;
private static async isMusicVideo(videoId: string) {
if (YOUTUBE_API_KEY === undefined) {
if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
// Assume that it *is* a music link when no YT API key is provided
// If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy
return true;
@ -56,9 +61,9 @@ export class TimelineReader {
return categoryTitle === 'Music';
}
private static async checkYoutubeMatches(postContent: string): Promise<boolean> {
private static async checkYoutubeMatches(postContent: string): Promise<string | null> {
if (isTruthy(YOUTUBE_DISABLE)) {
return false;
return null;
}
const matches = postContent.matchAll(YOUTUBE_REGEX);
for (const match of matches) {
@ -69,18 +74,93 @@ export class TimelineReader {
try {
const isMusic = await TimelineReader.isMusicVideo(videoId);
if (isMusic) {
return true;
return match[0];
}
} catch (e) {
console.error('Could not check if', videoId, 'is a music video', e);
}
}
return false;
return null;
}
private static async getSongInfo(
url: string,
remainingTries: number = 6
): Promise<SongInfo | null> {
if (remainingTries === 0) {
console.error('No tries remaining. Lookup failed!');
return null;
}
let hostname: string;
try {
hostname = new URL(url).hostname;
} catch (e) {
console.error(`Could not construct URL ${url}`, e);
return null;
}
if (hostname === 'songwhip.com') {
// song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a
// Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info
// except maybe scraping their HTML
return null;
}
const odesliParams = new URLSearchParams();
odesliParams.append('url', url);
odesliParams.append('userCountry', 'DE');
odesliParams.append('songIfSingle', 'true');
if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') {
odesliParams.append('key', ODESLI_API_KEY);
}
const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`;
try {
return fetch(odesliApiUrl).then(async (response) => {
if (response.status === 429) {
throw new Error('Rate limit reached', { cause: 429 });
}
return response.json().then((odesliInfo: OdesliResponse) => {
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
const platform: Platform = 'youtube';
return {
...info,
pageUrl: odesliInfo.pageUrl,
youtubeUrl: odesliInfo.linksByPlatform[platform]?.url
} as SongInfo;
});
});
} catch (e) {
if (e instanceof Error && e.cause === 429) {
console.warn('song.link rate limit reached. Trying again in 10 seconds');
await sleep(10_000);
return await this.getSongInfo(url, remainingTries - 1);
}
console.error(`Failed to load ${url} info from song.link`, e);
return null;
}
}
private static async getUrlFromPreviewCard(post: Post): Promise<string | undefined> {
return undefined;
// Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon
/*
if (post.card) {
return post.card?.url;
}
try {
const status: Post = await (
await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`)
).json();
return status.card?.url;
} catch (e) {
console.error(`Could not fetch status ${post.url}`, e);
}
*/
}
private startWebsocket() {
const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
socket.onopen = () => {
console.log('Connected to WS');
socket.send('{ "type": "subscribe", "stream": "public:local"}');
};
socket.onmessage = async (event) => {
@ -95,17 +175,69 @@ export class TimelineReader {
const urls: string[] = URL_FILTER.split(',');
const found_urls = urls.filter((t) => post.content.includes(t));
const urlsToCheck: string[] = [];
// If we don't have any tags or non-youtube urls, check youtube
// YT is handled separately, because it requires an API call and therefore is slower
if (
found_urls.length === 0 &&
found_tags.length === 0 &&
!(await TimelineReader.checkYoutubeMatches(post.content))
) {
return;
if (found_urls.length === 0 && found_tags.length === 0) {
const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content);
if (youtubeUrl === null) {
console.log('Ignoring post', post.url);
return;
}
urlsToCheck.push(youtubeUrl);
console.log('Found YT URL', youtubeUrl, found_urls, found_urls.length);
}
// TODO: Change URL detection above to use this regex.
// Looks like we're stuck with regex for now instead of using preview cards.
// Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song,
// if not, ignore it. No need to consult the YT API and give those links a special handling
const musicUrls: string[] = [];
const musicUrl = await TimelineReader.getUrlFromPreviewCard(post);
if (musicUrl) {
musicUrls.push(musicUrl);
} else {
const urlMatches = post.content.matchAll(URL_REGEX);
for (const match of urlMatches) {
if (match === undefined || match.groups === undefined) {
continue;
}
const urlMatch = match.groups.postUrl.toString();
const musicUrl = urls.find((u) => urlMatch.includes(u));
if (musicUrl) {
musicUrls.push(urlMatch);
}
}
}
for (const url of musicUrls) {
let hostname: string | null = null;
try {
hostname = new URL(url).hostname;
} catch (e) {
console.error(`Could not check hostname for URL ${url}`, e);
}
if (hostname === 'songwhip.com') {
// TODO: Implement checking the songwhip API
continue;
}
const info = await TimelineReader.getSongInfo(url);
if (info) {
console.info(
'Got song info for',
post.url,
url,
info.artistName,
info.title,
info.thumbnailUrl,
info.pageUrl,
info.youtubeUrl
);
}
}
await savePost(post);
const posts = await getPosts(null, null, 100);
await saveAtomFeed(createFeed(posts));
} catch (e) {