Extract song info from odesli (song.link)

2023-04-22 08:50:17 +02:00 · 2023-04-22 08:50:17 +02:00 · b62936ed54
commit b62936ed54
parent 45eeb550b3
6 changed files with 309 additions and 13 deletions
--- a/.env.EXAMPLE
+++ b/.env.EXAMPLE
@ -1,7 +1,8 @@
 HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening
-URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com
+URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com,songwhip.com
 YOUTUBE_API_KEY = CHANGE_ME
 YOUTUBE_DISABLE = false
 ODESLI_API_KEY = CHANGE_ME
 MASTODON_INSTANCE = 'metalhead.club'
 BASE_URL = 'https://moshingmammut.phlaym.net'
 VERBOSE = false
--- a/src/lib/mastodon/response.ts
+++ b/src/lib/mastodon/response.ts
@ -10,6 +10,16 @@ export interface Post {
  url: string;
  content: string;
  account: Account;
  card?: PreviewCard;
 }
 export interface PreviewCard {
  url: string;
  title: string;
  image?: string;
  blurhash?: string;
  width: number;
  height: number;
 }
 export interface Tag {
--- a/src/lib/odesliResponse.ts
+++ b/src/lib/odesliResponse.ts
@ -0,0 +1,143 @@
 export type SongInfo = {
  pageUrl: string;
  youtubeUrl?: string;
  type: 'song' | 'album';
  title?: string;
  artistName?: string;
  thumbnailUrl?: string;
 };
 export type SongwhipReponse = {
  type: 'track' | 'album';
  name: string;
  image?: string;
  url: string;
 };
 export type OdesliResponse = {
  /**
   * The unique ID for the input entity that was supplied in the request. The
   * data for this entity, such as title, artistName, etc. will be found in
   * an object at `nodesByUniqueId[entityUniqueId]`
   */
  entityUniqueId: string;
  /**
   * The userCountry query param that was supplied in the request. It signals
   * the country/availability we use to query the streaming platforms. Defaults
   * to 'US' if no userCountry supplied in the request.
   *
   * NOTE: As a fallback, our service may respond with matches that were found
   * in a locale other than the userCountry supplied
   */
  userCountry: string;
  /**
   * A URL that will render the Songlink page for this entity
   */
  pageUrl: string;
  /**
   * A collection of objects. Each key is a platform, and each value is an
   * object that contains data for linking to the match
   */
  linksByPlatform: {
    /**
     * Each key in `linksByPlatform` is a Platform. A Platform will exist here
     * only if there is a match found. E.g. if there is no YouTube match found,
     * then neither `youtube` or `youtubeMusic` properties will exist here
     */
    [k in Platform]: {
      /**
       * The unique ID for this entity. Use it to look up data about this entity
       * at `entitiesByUniqueId[entityUniqueId]`
       */
      entityUniqueId: string;
      /**
       * The URL for this match
       */
      url: string;
      /**
       * The native app URI that can be used on mobile devices to open this
       * entity directly in the native app
       */
      nativeAppUriMobile?: string;
      /**
       * The native app URI that can be used on desktop devices to open this
       * entity directly in the native app
       */
      nativeAppUriDesktop?: string;
    };
  };
  // A collection of objects. Each key is a unique identifier for a streaming
  // entity, and each value is an object that contains data for that entity,
  // such as `title`, `artistName`, `thumbnailUrl`, etc.
  entitiesByUniqueId: {
    [entityUniqueId: string]: {
      // This is the unique identifier on the streaming platform/API provider
      id: string;
      type: 'song' | 'album';
      title?: string;
      artistName?: string;
      thumbnailUrl?: string;
      thumbnailWidth?: number;
      thumbnailHeight?: number;
      // The API provider that powered this match. Useful if you'd like to use
      // this entity's data to query the API directly
      apiProvider: APIProvider;
      // An array of platforms that are "powered" by this entity. E.g. an entity
      // from Apple Music will generally have a `platforms` array of
      // `["appleMusic", "itunes"]` since both those platforms/links are derived
      // from this single entity
      platforms: Platform[];
    };
  };
 };
 export type Platform =
  | 'spotify'
  | 'itunes'
  | 'appleMusic'
  | 'youtube'
  | 'youtubeMusic'
  | 'google'
  | 'googleStore'
  | 'pandora'
  | 'deezer'
  | 'tidal'
  | 'amazonStore'
  | 'amazonMusic'
  | 'soundcloud'
  | 'napster'
  | 'yandex'
  | 'spinrilla'
  | 'audius'
  | 'audiomack'
  | 'anghami'
  | 'boomplay';
 export type APIProvider =
  | 'spotify'
  | 'itunes'
  | 'youtube'
  | 'google'
  | 'pandora'
  | 'deezer'
  | 'tidal'
  | 'amazon'
  | 'soundcloud'
  | 'napster'
  | 'yandex'
  | 'spinrilla'
  | 'audius'
  | 'audiomack'
  | 'anghami'
  | 'boomplay';
--- a/src/lib/server/db.ts
+++ b/src/lib/server/db.ts
@ -240,6 +240,11 @@ export async function savePost(post: Post): Promise<undefined> {
              return;
            }
            if (!post.tags.length) {
              resolve(undefined);
              return;
            }
            db.parallelize(() => {
              let remaining = post.tags.length;
              for (const tag of post.tags) {
--- a/src/lib/server/timeline.ts
+++ b/src/lib/server/timeline.ts
@ -1,13 +1,16 @@
 import {
  HASHTAG_FILTER,
  MASTODON_INSTANCE,
  ODESLI_API_KEY,
  URL_FILTER,
  YOUTUBE_API_KEY,
  YOUTUBE_DISABLE
 } from '$env/static/private';
 import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
 import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
 import { getPosts, savePost } from '$lib/server/db';
 import { createFeed, saveAtomFeed } from '$lib/server/rss';
 import { sleep } from '$lib/sleep';
 import { isTruthy } from '$lib/truthyString';
 import { WebSocket } from 'ws';
@ -15,11 +18,13 @@ const YOUTUBE_REGEX = new RegExp(
  /https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
 );
 const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
 export class TimelineReader {
  private static _instance: TimelineReader;
  private static async isMusicVideo(videoId: string) {
-    if (YOUTUBE_API_KEY === undefined) {
+    if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
      // Assume that it *is* a music link when no YT API key is provided
      // If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy
      return true;
@ -56,9 +61,9 @@ export class TimelineReader {
    return categoryTitle === 'Music';
  }
-  private static async checkYoutubeMatches(postContent: string): Promise<boolean> {
+  private static async checkYoutubeMatches(postContent: string): Promise<string | null> {
    if (isTruthy(YOUTUBE_DISABLE)) {
-      return false;
+      return null;
    }
    const matches = postContent.matchAll(YOUTUBE_REGEX);
    for (const match of matches) {
@ -69,18 +74,93 @@ export class TimelineReader {
      try {
        const isMusic = await TimelineReader.isMusicVideo(videoId);
        if (isMusic) {
-          return true;
+          return match[0];
        }
      } catch (e) {
        console.error('Could not check if', videoId, 'is a music video', e);
      }
    }
-    return false;
+    return null;
  }
  private static async getSongInfo(
    url: string,
    remainingTries: number = 6
  ): Promise<SongInfo | null> {
    if (remainingTries === 0) {
      console.error('No tries remaining. Lookup failed!');
      return null;
    }
    let hostname: string;
    try {
      hostname = new URL(url).hostname;
    } catch (e) {
      console.error(`Could not construct URL ${url}`, e);
      return null;
    }
    if (hostname === 'songwhip.com') {
      // song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a
      // Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info
      // except maybe scraping their HTML
      return null;
    }
    const odesliParams = new URLSearchParams();
    odesliParams.append('url', url);
    odesliParams.append('userCountry', 'DE');
    odesliParams.append('songIfSingle', 'true');
    if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') {
      odesliParams.append('key', ODESLI_API_KEY);
    }
    const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`;
    try {
      return fetch(odesliApiUrl).then(async (response) => {
        if (response.status === 429) {
          throw new Error('Rate limit reached', { cause: 429 });
        }
        return response.json().then((odesliInfo: OdesliResponse) => {
          const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
          const platform: Platform = 'youtube';
          return {
            ...info,
            pageUrl: odesliInfo.pageUrl,
            youtubeUrl: odesliInfo.linksByPlatform[platform]?.url
          } as SongInfo;
        });
      });
    } catch (e) {
      if (e instanceof Error && e.cause === 429) {
        console.warn('song.link rate limit reached. Trying again in 10 seconds');
        await sleep(10_000);
        return await this.getSongInfo(url, remainingTries - 1);
      }
      console.error(`Failed to load ${url} info from song.link`, e);
      return null;
    }
  }
  private static async getUrlFromPreviewCard(post: Post): Promise<string | undefined> {
    return undefined;
    // Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon
    /*
    if (post.card) {
      return post.card?.url;
    }
    try {
      const status: Post = await (
        await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`)
      ).json();
      return status.card?.url;
    } catch (e) {
      console.error(`Could not fetch status ${post.url}`, e);
    }
    */
  }
  private startWebsocket() {
    const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
    socket.onopen = () => {
      console.log('Connected to WS');
      socket.send('{ "type": "subscribe", "stream": "public:local"}');
    };
    socket.onmessage = async (event) => {
@ -95,17 +175,69 @@ export class TimelineReader {
        const urls: string[] = URL_FILTER.split(',');
        const found_urls = urls.filter((t) => post.content.includes(t));
-
+        const urlsToCheck: string[] = [];
        // If we don't have any tags or non-youtube urls, check youtube
        // YT is handled separately, because it requires an API call and therefore is slower
-        if (
+        if (found_urls.length === 0 && found_tags.length === 0) {
-          found_urls.length === 0 &&
+          const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content);
-          found_tags.length === 0 &&
+          if (youtubeUrl === null) {
-          !(await TimelineReader.checkYoutubeMatches(post.content))
+            console.log('Ignoring post', post.url);
-        ) {
+            return;
-          return;
+          }
          urlsToCheck.push(youtubeUrl);
          console.log('Found YT URL', youtubeUrl, found_urls, found_urls.length);
        }
        // TODO: Change URL detection above to use this regex.
        // Looks like we're stuck with regex for now instead of using preview cards.
        // Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song,
        // if not, ignore it. No need to consult the YT API and give those links a special handling
        const musicUrls: string[] = [];
        const musicUrl = await TimelineReader.getUrlFromPreviewCard(post);
        if (musicUrl) {
          musicUrls.push(musicUrl);
        } else {
          const urlMatches = post.content.matchAll(URL_REGEX);
          for (const match of urlMatches) {
            if (match === undefined || match.groups === undefined) {
              continue;
            }
            const urlMatch = match.groups.postUrl.toString();
            const musicUrl = urls.find((u) => urlMatch.includes(u));
            if (musicUrl) {
              musicUrls.push(urlMatch);
            }
          }
        }
        for (const url of musicUrls) {
          let hostname: string | null = null;
          try {
            hostname = new URL(url).hostname;
          } catch (e) {
            console.error(`Could not check hostname for URL ${url}`, e);
          }
          if (hostname === 'songwhip.com') {
            // TODO: Implement checking the songwhip API
            continue;
          }
          const info = await TimelineReader.getSongInfo(url);
          if (info) {
            console.info(
              'Got song info for',
              post.url,
              url,
              info.artistName,
              info.title,
              info.thumbnailUrl,
              info.pageUrl,
              info.youtubeUrl
            );
          }
        }
        await savePost(post);
        const posts = await getPosts(null, null, 100);
        await saveAtomFeed(createFeed(posts));
      } catch (e) {
--- a/src/lib/sleep.ts
+++ b/src/lib/sleep.ts
@ -0,0 +1,5 @@
 export function sleep(timeInMs: number): Promise<undefined> {
  return new Promise((resolve) => {
    setTimeout(resolve, timeInMs);
  });
 }