diff --git a/.env.EXAMPLE b/.env.EXAMPLE index d792037..5d58b81 100644 --- a/.env.EXAMPLE +++ b/.env.EXAMPLE @@ -1,7 +1,8 @@ HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening -URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com +URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com,songwhip.com YOUTUBE_API_KEY = CHANGE_ME YOUTUBE_DISABLE = false +ODESLI_API_KEY = CHANGE_ME MASTODON_INSTANCE = 'metalhead.club' BASE_URL = 'https://moshingmammut.phlaym.net' VERBOSE = false diff --git a/src/lib/mastodon/response.ts b/src/lib/mastodon/response.ts index bee9da9..c73dd53 100644 --- a/src/lib/mastodon/response.ts +++ b/src/lib/mastodon/response.ts @@ -10,6 +10,16 @@ export interface Post { url: string; content: string; account: Account; + card?: PreviewCard; +} + +export interface PreviewCard { + url: string; + title: string; + image?: string; + blurhash?: string; + width: number; + height: number; } export interface Tag { diff --git a/src/lib/odesliResponse.ts b/src/lib/odesliResponse.ts new file mode 100644 index 0000000..e500adf --- /dev/null +++ b/src/lib/odesliResponse.ts @@ -0,0 +1,143 @@ +export type SongInfo = { + pageUrl: string; + youtubeUrl?: string; + type: 'song' | 'album'; + title?: string; + artistName?: string; + thumbnailUrl?: string; +}; + +export type SongwhipReponse = { + type: 'track' | 'album'; + name: string; + image?: string; + url: string; +}; + +export type OdesliResponse = { + /** + * The unique ID for the input entity that was supplied in the request. The + * data for this entity, such as title, artistName, etc. will be found in + * an object at `nodesByUniqueId[entityUniqueId]` + */ + entityUniqueId: string; + + /** + * The userCountry query param that was supplied in the request. It signals + * the country/availability we use to query the streaming platforms. Defaults + * to 'US' if no userCountry supplied in the request. + * + * NOTE: As a fallback, our service may respond with matches that were found + * in a locale other than the userCountry supplied + */ + userCountry: string; + + /** + * A URL that will render the Songlink page for this entity + */ + pageUrl: string; + + /** + * A collection of objects. Each key is a platform, and each value is an + * object that contains data for linking to the match + */ + linksByPlatform: { + /** + * Each key in `linksByPlatform` is a Platform. A Platform will exist here + * only if there is a match found. E.g. if there is no YouTube match found, + * then neither `youtube` or `youtubeMusic` properties will exist here + */ + [k in Platform]: { + /** + * The unique ID for this entity. Use it to look up data about this entity + * at `entitiesByUniqueId[entityUniqueId]` + */ + entityUniqueId: string; + + /** + * The URL for this match + */ + url: string; + + /** + * The native app URI that can be used on mobile devices to open this + * entity directly in the native app + */ + nativeAppUriMobile?: string; + + /** + * The native app URI that can be used on desktop devices to open this + * entity directly in the native app + */ + nativeAppUriDesktop?: string; + }; + }; + + // A collection of objects. Each key is a unique identifier for a streaming + // entity, and each value is an object that contains data for that entity, + // such as `title`, `artistName`, `thumbnailUrl`, etc. + entitiesByUniqueId: { + [entityUniqueId: string]: { + // This is the unique identifier on the streaming platform/API provider + id: string; + + type: 'song' | 'album'; + + title?: string; + artistName?: string; + thumbnailUrl?: string; + thumbnailWidth?: number; + thumbnailHeight?: number; + + // The API provider that powered this match. Useful if you'd like to use + // this entity's data to query the API directly + apiProvider: APIProvider; + + // An array of platforms that are "powered" by this entity. E.g. an entity + // from Apple Music will generally have a `platforms` array of + // `["appleMusic", "itunes"]` since both those platforms/links are derived + // from this single entity + platforms: Platform[]; + }; + }; +}; + +export type Platform = + | 'spotify' + | 'itunes' + | 'appleMusic' + | 'youtube' + | 'youtubeMusic' + | 'google' + | 'googleStore' + | 'pandora' + | 'deezer' + | 'tidal' + | 'amazonStore' + | 'amazonMusic' + | 'soundcloud' + | 'napster' + | 'yandex' + | 'spinrilla' + | 'audius' + | 'audiomack' + | 'anghami' + | 'boomplay'; + +export type APIProvider = + | 'spotify' + | 'itunes' + | 'youtube' + | 'google' + | 'pandora' + | 'deezer' + | 'tidal' + | 'amazon' + | 'soundcloud' + | 'napster' + | 'yandex' + | 'spinrilla' + | 'audius' + | 'audiomack' + | 'anghami' + | 'boomplay'; diff --git a/src/lib/server/db.ts b/src/lib/server/db.ts index 20ff258..0a4e0c2 100644 --- a/src/lib/server/db.ts +++ b/src/lib/server/db.ts @@ -240,6 +240,11 @@ export async function savePost(post: Post): Promise { return; } + if (!post.tags.length) { + resolve(undefined); + return; + } + db.parallelize(() => { let remaining = post.tags.length; for (const tag of post.tags) { diff --git a/src/lib/server/timeline.ts b/src/lib/server/timeline.ts index 23defa6..7f78fe5 100644 --- a/src/lib/server/timeline.ts +++ b/src/lib/server/timeline.ts @@ -1,13 +1,16 @@ import { HASHTAG_FILTER, MASTODON_INSTANCE, + ODESLI_API_KEY, URL_FILTER, YOUTUBE_API_KEY, YOUTUBE_DISABLE } from '$env/static/private'; import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response'; +import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse'; import { getPosts, savePost } from '$lib/server/db'; import { createFeed, saveAtomFeed } from '$lib/server/rss'; +import { sleep } from '$lib/sleep'; import { isTruthy } from '$lib/truthyString'; import { WebSocket } from 'ws'; @@ -15,11 +18,13 @@ const YOUTUBE_REGEX = new RegExp( /https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?[a-zA-Z_0-9-]+)/gm ); +const URL_REGEX = new RegExp(/href="(?[^>]+?)" target="_blank"/gm); + export class TimelineReader { private static _instance: TimelineReader; private static async isMusicVideo(videoId: string) { - if (YOUTUBE_API_KEY === undefined) { + if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') { // Assume that it *is* a music link when no YT API key is provided // If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy return true; @@ -56,9 +61,9 @@ export class TimelineReader { return categoryTitle === 'Music'; } - private static async checkYoutubeMatches(postContent: string): Promise { + private static async checkYoutubeMatches(postContent: string): Promise { if (isTruthy(YOUTUBE_DISABLE)) { - return false; + return null; } const matches = postContent.matchAll(YOUTUBE_REGEX); for (const match of matches) { @@ -69,18 +74,93 @@ export class TimelineReader { try { const isMusic = await TimelineReader.isMusicVideo(videoId); if (isMusic) { - return true; + return match[0]; } } catch (e) { console.error('Could not check if', videoId, 'is a music video', e); } } - return false; + return null; + } + + private static async getSongInfo( + url: string, + remainingTries: number = 6 + ): Promise { + if (remainingTries === 0) { + console.error('No tries remaining. Lookup failed!'); + return null; + } + let hostname: string; + try { + hostname = new URL(url).hostname; + } catch (e) { + console.error(`Could not construct URL ${url}`, e); + return null; + } + if (hostname === 'songwhip.com') { + // song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a + // Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info + // except maybe scraping their HTML + return null; + } + + const odesliParams = new URLSearchParams(); + odesliParams.append('url', url); + odesliParams.append('userCountry', 'DE'); + odesliParams.append('songIfSingle', 'true'); + if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') { + odesliParams.append('key', ODESLI_API_KEY); + } + const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`; + try { + return fetch(odesliApiUrl).then(async (response) => { + if (response.status === 429) { + throw new Error('Rate limit reached', { cause: 429 }); + } + return response.json().then((odesliInfo: OdesliResponse) => { + const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId]; + const platform: Platform = 'youtube'; + return { + ...info, + pageUrl: odesliInfo.pageUrl, + youtubeUrl: odesliInfo.linksByPlatform[platform]?.url + } as SongInfo; + }); + }); + } catch (e) { + if (e instanceof Error && e.cause === 429) { + console.warn('song.link rate limit reached. Trying again in 10 seconds'); + await sleep(10_000); + return await this.getSongInfo(url, remainingTries - 1); + } + console.error(`Failed to load ${url} info from song.link`, e); + return null; + } + } + + private static async getUrlFromPreviewCard(post: Post): Promise { + return undefined; + // Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon + /* + if (post.card) { + return post.card?.url; + } + try { + const status: Post = await ( + await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`) + ).json(); + return status.card?.url; + } catch (e) { + console.error(`Could not fetch status ${post.url}`, e); + } + */ } private startWebsocket() { const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`); socket.onopen = () => { + console.log('Connected to WS'); socket.send('{ "type": "subscribe", "stream": "public:local"}'); }; socket.onmessage = async (event) => { @@ -95,17 +175,69 @@ export class TimelineReader { const urls: string[] = URL_FILTER.split(','); const found_urls = urls.filter((t) => post.content.includes(t)); - + const urlsToCheck: string[] = []; // If we don't have any tags or non-youtube urls, check youtube // YT is handled separately, because it requires an API call and therefore is slower - if ( - found_urls.length === 0 && - found_tags.length === 0 && - !(await TimelineReader.checkYoutubeMatches(post.content)) - ) { - return; + if (found_urls.length === 0 && found_tags.length === 0) { + const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content); + if (youtubeUrl === null) { + console.log('Ignoring post', post.url); + return; + } + urlsToCheck.push(youtubeUrl); + console.log('Found YT URL', youtubeUrl, found_urls, found_urls.length); } + + // TODO: Change URL detection above to use this regex. + // Looks like we're stuck with regex for now instead of using preview cards. + // Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song, + // if not, ignore it. No need to consult the YT API and give those links a special handling + const musicUrls: string[] = []; + const musicUrl = await TimelineReader.getUrlFromPreviewCard(post); + if (musicUrl) { + musicUrls.push(musicUrl); + } else { + const urlMatches = post.content.matchAll(URL_REGEX); + for (const match of urlMatches) { + if (match === undefined || match.groups === undefined) { + continue; + } + const urlMatch = match.groups.postUrl.toString(); + const musicUrl = urls.find((u) => urlMatch.includes(u)); + if (musicUrl) { + musicUrls.push(urlMatch); + } + } + } + + for (const url of musicUrls) { + let hostname: string | null = null; + try { + hostname = new URL(url).hostname; + } catch (e) { + console.error(`Could not check hostname for URL ${url}`, e); + } + if (hostname === 'songwhip.com') { + // TODO: Implement checking the songwhip API + continue; + } + const info = await TimelineReader.getSongInfo(url); + if (info) { + console.info( + 'Got song info for', + post.url, + url, + info.artistName, + info.title, + info.thumbnailUrl, + info.pageUrl, + info.youtubeUrl + ); + } + } + await savePost(post); + const posts = await getPosts(null, null, 100); await saveAtomFeed(createFeed(posts)); } catch (e) { diff --git a/src/lib/sleep.ts b/src/lib/sleep.ts new file mode 100644 index 0000000..3c701f9 --- /dev/null +++ b/src/lib/sleep.ts @@ -0,0 +1,5 @@ +export function sleep(timeInMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, timeInMs); + }); +}