Extract song info from odesli (song.link)

2023-04-22 08:50:17 +02:00 · 2023-04-22 08:50:17 +02:00 · b62936ed54
commit b62936ed54
parent 45eeb550b3
6 changed files with 309 additions and 13 deletions
--- a/.env.EXAMPLE
+++ b/.env.EXAMPLE
@ -1,7 +1,8 @@
 HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening
-URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com
+URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com,songwhip.com
 YOUTUBE_API_KEY = CHANGE_ME
 YOUTUBE_DISABLE = false
+ODESLI_API_KEY = CHANGE_ME
 MASTODON_INSTANCE = 'metalhead.club'
 BASE_URL = 'https://moshingmammut.phlaym.net'
 VERBOSE = false
--- a/src/lib/mastodon/response.ts
+++ b/src/lib/mastodon/response.ts
@ -10,6 +10,16 @@ export interface Post {
  url: string;
  content: string;
  account: Account;
+  card?: PreviewCard;
+}
+
+export interface PreviewCard {
+  url: string;
+  title: string;
+  image?: string;
+  blurhash?: string;
+  width: number;
+  height: number;
 }

 export interface Tag {
--- a/src/lib/odesliResponse.ts
+++ b/src/lib/odesliResponse.ts
@ -0,0 +1,143 @@
+export type SongInfo = {
+  pageUrl: string;
+  youtubeUrl?: string;
+  type: 'song' | 'album';
+  title?: string;
+  artistName?: string;
+  thumbnailUrl?: string;
+};
+
+export type SongwhipReponse = {
+  type: 'track' | 'album';
+  name: string;
+  image?: string;
+  url: string;
+};
+
+export type OdesliResponse = {
+  /**
+   * The unique ID for the input entity that was supplied in the request. The
+   * data for this entity, such as title, artistName, etc. will be found in
+   * an object at `nodesByUniqueId[entityUniqueId]`
+   */
+  entityUniqueId: string;
+
+  /**
+   * The userCountry query param that was supplied in the request. It signals
+   * the country/availability we use to query the streaming platforms. Defaults
+   * to 'US' if no userCountry supplied in the request.
+   *
+   * NOTE: As a fallback, our service may respond with matches that were found
+   * in a locale other than the userCountry supplied
+   */
+  userCountry: string;
+
+  /**
+   * A URL that will render the Songlink page for this entity
+   */
+  pageUrl: string;
+
+  /**
+   * A collection of objects. Each key is a platform, and each value is an
+   * object that contains data for linking to the match
+   */
+  linksByPlatform: {
+    /**
+     * Each key in `linksByPlatform` is a Platform. A Platform will exist here
+     * only if there is a match found. E.g. if there is no YouTube match found,
+     * then neither `youtube` or `youtubeMusic` properties will exist here
+     */
+    [k in Platform]: {
+      /**
+       * The unique ID for this entity. Use it to look up data about this entity
+       * at `entitiesByUniqueId[entityUniqueId]`
+       */
+      entityUniqueId: string;
+
+      /**
+       * The URL for this match
+       */
+      url: string;
+
+      /**
+       * The native app URI that can be used on mobile devices to open this
+       * entity directly in the native app
+       */
+      nativeAppUriMobile?: string;
+
+      /**
+       * The native app URI that can be used on desktop devices to open this
+       * entity directly in the native app
+       */
+      nativeAppUriDesktop?: string;
+    };
+  };
+
+  // A collection of objects. Each key is a unique identifier for a streaming
+  // entity, and each value is an object that contains data for that entity,
+  // such as `title`, `artistName`, `thumbnailUrl`, etc.
+  entitiesByUniqueId: {
+    [entityUniqueId: string]: {
+      // This is the unique identifier on the streaming platform/API provider
+      id: string;
+
+      type: 'song' | 'album';
+
+      title?: string;
+      artistName?: string;
+      thumbnailUrl?: string;
+      thumbnailWidth?: number;
+      thumbnailHeight?: number;
+
+      // The API provider that powered this match. Useful if you'd like to use
+      // this entity's data to query the API directly
+      apiProvider: APIProvider;
+
+      // An array of platforms that are "powered" by this entity. E.g. an entity
+      // from Apple Music will generally have a `platforms` array of
+      // `["appleMusic", "itunes"]` since both those platforms/links are derived
+      // from this single entity
+      platforms: Platform[];
+    };
+  };
+};
+
+export type Platform =
+  | 'spotify'
+  | 'itunes'
+  | 'appleMusic'
+  | 'youtube'
+  | 'youtubeMusic'
+  | 'google'
+  | 'googleStore'
+  | 'pandora'
+  | 'deezer'
+  | 'tidal'
+  | 'amazonStore'
+  | 'amazonMusic'
+  | 'soundcloud'
+  | 'napster'
+  | 'yandex'
+  | 'spinrilla'
+  | 'audius'
+  | 'audiomack'
+  | 'anghami'
+  | 'boomplay';
+
+export type APIProvider =
+  | 'spotify'
+  | 'itunes'
+  | 'youtube'
+  | 'google'
+  | 'pandora'
+  | 'deezer'
+  | 'tidal'
+  | 'amazon'
+  | 'soundcloud'
+  | 'napster'
+  | 'yandex'
+  | 'spinrilla'
+  | 'audius'
+  | 'audiomack'
+  | 'anghami'
+  | 'boomplay';
--- a/src/lib/server/db.ts
+++ b/src/lib/server/db.ts
@ -240,6 +240,11 @@ export async function savePost(post: Post): Promise<undefined> {
              return;
            }

+            if (!post.tags.length) {
+              resolve(undefined);
+              return;
+            }
+
            db.parallelize(() => {
              let remaining = post.tags.length;
              for (const tag of post.tags) {
--- a/src/lib/server/timeline.ts
+++ b/src/lib/server/timeline.ts
@ -1,13 +1,16 @@
 import {
  HASHTAG_FILTER,
  MASTODON_INSTANCE,
+  ODESLI_API_KEY,
  URL_FILTER,
  YOUTUBE_API_KEY,
  YOUTUBE_DISABLE
 } from '$env/static/private';
 import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
+import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
 import { getPosts, savePost } from '$lib/server/db';
 import { createFeed, saveAtomFeed } from '$lib/server/rss';
+import { sleep } from '$lib/sleep';
 import { isTruthy } from '$lib/truthyString';
 import { WebSocket } from 'ws';

@ -15,11 +18,13 @@ const YOUTUBE_REGEX = new RegExp(
  /https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
 );

+const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
+
 export class TimelineReader {
  private static _instance: TimelineReader;

  private static async isMusicVideo(videoId: string) {
-    if (YOUTUBE_API_KEY === undefined) {
+    if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
      // Assume that it *is* a music link when no YT API key is provided
      // If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy
      return true;
@ -56,9 +61,9 @@ export class TimelineReader {
    return categoryTitle === 'Music';
  }

-  private static async checkYoutubeMatches(postContent: string): Promise<boolean> {
+  private static async checkYoutubeMatches(postContent: string): Promise<string | null> {
    if (isTruthy(YOUTUBE_DISABLE)) {
-      return false;
+      return null;
    }
    const matches = postContent.matchAll(YOUTUBE_REGEX);
    for (const match of matches) {
@ -69,18 +74,93 @@ export class TimelineReader {
      try {
        const isMusic = await TimelineReader.isMusicVideo(videoId);
        if (isMusic) {
-          return true;
+          return match[0];
        }
      } catch (e) {
        console.error('Could not check if', videoId, 'is a music video', e);
      }
    }
-    return false;
+    return null;
+  }
+
+  private static async getSongInfo(
+    url: string,
+    remainingTries: number = 6
+  ): Promise<SongInfo | null> {
+    if (remainingTries === 0) {
+      console.error('No tries remaining. Lookup failed!');
+      return null;
+    }
+    let hostname: string;
+    try {
+      hostname = new URL(url).hostname;
+    } catch (e) {
+      console.error(`Could not construct URL ${url}`, e);
+      return null;
+    }
+    if (hostname === 'songwhip.com') {
+      // song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a
+      // Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info
+      // except maybe scraping their HTML
+      return null;
+    }
+
+    const odesliParams = new URLSearchParams();
+    odesliParams.append('url', url);
+    odesliParams.append('userCountry', 'DE');
+    odesliParams.append('songIfSingle', 'true');
+    if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') {
+      odesliParams.append('key', ODESLI_API_KEY);
+    }
+    const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`;
+    try {
+      return fetch(odesliApiUrl).then(async (response) => {
+        if (response.status === 429) {
+          throw new Error('Rate limit reached', { cause: 429 });
+        }
+        return response.json().then((odesliInfo: OdesliResponse) => {
+          const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
+          const platform: Platform = 'youtube';
+          return {
+            ...info,
+            pageUrl: odesliInfo.pageUrl,
+            youtubeUrl: odesliInfo.linksByPlatform[platform]?.url
+          } as SongInfo;
+        });
+      });
+    } catch (e) {
+      if (e instanceof Error && e.cause === 429) {
+        console.warn('song.link rate limit reached. Trying again in 10 seconds');
+        await sleep(10_000);
+        return await this.getSongInfo(url, remainingTries - 1);
+      }
+      console.error(`Failed to load ${url} info from song.link`, e);
+      return null;
+    }
+  }
+
+  private static async getUrlFromPreviewCard(post: Post): Promise<string | undefined> {
+    return undefined;
+    // Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon
+    /*
+    if (post.card) {
+      return post.card?.url;
+    }
+    try {
+      const status: Post = await (
+        await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`)
+      ).json();
+      return status.card?.url;
+    } catch (e) {
+      console.error(`Could not fetch status ${post.url}`, e);
+    }
+    */
  }

  private startWebsocket() {
    const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
    socket.onopen = () => {
+      console.log('Connected to WS');
      socket.send('{ "type": "subscribe", "stream": "public:local"}');
    };
    socket.onmessage = async (event) => {
@ -95,17 +175,69 @@ export class TimelineReader {

        const urls: string[] = URL_FILTER.split(',');
        const found_urls = urls.filter((t) => post.content.includes(t));
-
+        const urlsToCheck: string[] = [];
        // If we don't have any tags or non-youtube urls, check youtube
        // YT is handled separately, because it requires an API call and therefore is slower
-        if (
-          found_urls.length === 0 &&
-          found_tags.length === 0 &&
-          !(await TimelineReader.checkYoutubeMatches(post.content))
-        ) {
-          return;
+        if (found_urls.length === 0 && found_tags.length === 0) {
+          const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content);
+          if (youtubeUrl === null) {
+            console.log('Ignoring post', post.url);
+            return;
+          }
+          urlsToCheck.push(youtubeUrl);
+          console.log('Found YT URL', youtubeUrl, found_urls, found_urls.length);
        }
+
+        // TODO: Change URL detection above to use this regex.
+        // Looks like we're stuck with regex for now instead of using preview cards.
+        // Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song,
+        // if not, ignore it. No need to consult the YT API and give those links a special handling
+        const musicUrls: string[] = [];
+        const musicUrl = await TimelineReader.getUrlFromPreviewCard(post);
+        if (musicUrl) {
+          musicUrls.push(musicUrl);
+        } else {
+          const urlMatches = post.content.matchAll(URL_REGEX);
+          for (const match of urlMatches) {
+            if (match === undefined || match.groups === undefined) {
+              continue;
+            }
+            const urlMatch = match.groups.postUrl.toString();
+            const musicUrl = urls.find((u) => urlMatch.includes(u));
+            if (musicUrl) {
+              musicUrls.push(urlMatch);
+            }
+          }
+        }
+
+        for (const url of musicUrls) {
+          let hostname: string | null = null;
+          try {
+            hostname = new URL(url).hostname;
+          } catch (e) {
+            console.error(`Could not check hostname for URL ${url}`, e);
+          }
+          if (hostname === 'songwhip.com') {
+            // TODO: Implement checking the songwhip API
+            continue;
+          }
+          const info = await TimelineReader.getSongInfo(url);
+          if (info) {
+            console.info(
+              'Got song info for',
+              post.url,
+              url,
+              info.artistName,
+              info.title,
+              info.thumbnailUrl,
+              info.pageUrl,
+              info.youtubeUrl
+            );
+          }
+        }
+
        await savePost(post);
+
        const posts = await getPosts(null, null, 100);
        await saveAtomFeed(createFeed(posts));
      } catch (e) {
--- a/src/lib/sleep.ts
+++ b/src/lib/sleep.ts
@ -0,0 +1,5 @@
+export function sleep(timeInMs: number): Promise<undefined> {
+  return new Promise((resolve) => {
+    setTimeout(resolve, timeInMs);
+  });
+}