Extract song info from odesli (song.link)

This commit is contained in:
Max Nuding 2023-04-22 08:50:17 +02:00
parent 45eeb550b3
commit b62936ed54
Signed by: phlaym
GPG Key ID: A06651BAB6777237
6 changed files with 309 additions and 13 deletions

View File

@ -1,7 +1,8 @@
HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening
URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com
URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com,songwhip.com
YOUTUBE_API_KEY = CHANGE_ME
YOUTUBE_DISABLE = false
ODESLI_API_KEY = CHANGE_ME
MASTODON_INSTANCE = 'metalhead.club'
BASE_URL = 'https://moshingmammut.phlaym.net'
VERBOSE = false

View File

@ -10,6 +10,16 @@ export interface Post {
url: string;
content: string;
account: Account;
card?: PreviewCard;
}
export interface PreviewCard {
url: string;
title: string;
image?: string;
blurhash?: string;
width: number;
height: number;
}
export interface Tag {

143
src/lib/odesliResponse.ts Normal file
View File

@ -0,0 +1,143 @@
export type SongInfo = {
pageUrl: string;
youtubeUrl?: string;
type: 'song' | 'album';
title?: string;
artistName?: string;
thumbnailUrl?: string;
};
export type SongwhipReponse = {
type: 'track' | 'album';
name: string;
image?: string;
url: string;
};
export type OdesliResponse = {
/**
* The unique ID for the input entity that was supplied in the request. The
* data for this entity, such as title, artistName, etc. will be found in
* an object at `nodesByUniqueId[entityUniqueId]`
*/
entityUniqueId: string;
/**
* The userCountry query param that was supplied in the request. It signals
* the country/availability we use to query the streaming platforms. Defaults
* to 'US' if no userCountry supplied in the request.
*
* NOTE: As a fallback, our service may respond with matches that were found
* in a locale other than the userCountry supplied
*/
userCountry: string;
/**
* A URL that will render the Songlink page for this entity
*/
pageUrl: string;
/**
* A collection of objects. Each key is a platform, and each value is an
* object that contains data for linking to the match
*/
linksByPlatform: {
/**
* Each key in `linksByPlatform` is a Platform. A Platform will exist here
* only if there is a match found. E.g. if there is no YouTube match found,
* then neither `youtube` or `youtubeMusic` properties will exist here
*/
[k in Platform]: {
/**
* The unique ID for this entity. Use it to look up data about this entity
* at `entitiesByUniqueId[entityUniqueId]`
*/
entityUniqueId: string;
/**
* The URL for this match
*/
url: string;
/**
* The native app URI that can be used on mobile devices to open this
* entity directly in the native app
*/
nativeAppUriMobile?: string;
/**
* The native app URI that can be used on desktop devices to open this
* entity directly in the native app
*/
nativeAppUriDesktop?: string;
};
};
// A collection of objects. Each key is a unique identifier for a streaming
// entity, and each value is an object that contains data for that entity,
// such as `title`, `artistName`, `thumbnailUrl`, etc.
entitiesByUniqueId: {
[entityUniqueId: string]: {
// This is the unique identifier on the streaming platform/API provider
id: string;
type: 'song' | 'album';
title?: string;
artistName?: string;
thumbnailUrl?: string;
thumbnailWidth?: number;
thumbnailHeight?: number;
// The API provider that powered this match. Useful if you'd like to use
// this entity's data to query the API directly
apiProvider: APIProvider;
// An array of platforms that are "powered" by this entity. E.g. an entity
// from Apple Music will generally have a `platforms` array of
// `["appleMusic", "itunes"]` since both those platforms/links are derived
// from this single entity
platforms: Platform[];
};
};
};
export type Platform =
| 'spotify'
| 'itunes'
| 'appleMusic'
| 'youtube'
| 'youtubeMusic'
| 'google'
| 'googleStore'
| 'pandora'
| 'deezer'
| 'tidal'
| 'amazonStore'
| 'amazonMusic'
| 'soundcloud'
| 'napster'
| 'yandex'
| 'spinrilla'
| 'audius'
| 'audiomack'
| 'anghami'
| 'boomplay';
export type APIProvider =
| 'spotify'
| 'itunes'
| 'youtube'
| 'google'
| 'pandora'
| 'deezer'
| 'tidal'
| 'amazon'
| 'soundcloud'
| 'napster'
| 'yandex'
| 'spinrilla'
| 'audius'
| 'audiomack'
| 'anghami'
| 'boomplay';

View File

@ -240,6 +240,11 @@ export async function savePost(post: Post): Promise<undefined> {
return;
}
if (!post.tags.length) {
resolve(undefined);
return;
}
db.parallelize(() => {
let remaining = post.tags.length;
for (const tag of post.tags) {

View File

@ -1,13 +1,16 @@
import {
HASHTAG_FILTER,
MASTODON_INSTANCE,
ODESLI_API_KEY,
URL_FILTER,
YOUTUBE_API_KEY,
YOUTUBE_DISABLE
} from '$env/static/private';
import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
import { getPosts, savePost } from '$lib/server/db';
import { createFeed, saveAtomFeed } from '$lib/server/rss';
import { sleep } from '$lib/sleep';
import { isTruthy } from '$lib/truthyString';
import { WebSocket } from 'ws';
@ -15,11 +18,13 @@ const YOUTUBE_REGEX = new RegExp(
/https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
);
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
export class TimelineReader {
private static _instance: TimelineReader;
private static async isMusicVideo(videoId: string) {
if (YOUTUBE_API_KEY === undefined) {
if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
// Assume that it *is* a music link when no YT API key is provided
// If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy
return true;
@ -56,9 +61,9 @@ export class TimelineReader {
return categoryTitle === 'Music';
}
private static async checkYoutubeMatches(postContent: string): Promise<boolean> {
private static async checkYoutubeMatches(postContent: string): Promise<string | null> {
if (isTruthy(YOUTUBE_DISABLE)) {
return false;
return null;
}
const matches = postContent.matchAll(YOUTUBE_REGEX);
for (const match of matches) {
@ -69,18 +74,93 @@ export class TimelineReader {
try {
const isMusic = await TimelineReader.isMusicVideo(videoId);
if (isMusic) {
return true;
return match[0];
}
} catch (e) {
console.error('Could not check if', videoId, 'is a music video', e);
}
}
return false;
return null;
}
private static async getSongInfo(
url: string,
remainingTries: number = 6
): Promise<SongInfo | null> {
if (remainingTries === 0) {
console.error('No tries remaining. Lookup failed!');
return null;
}
let hostname: string;
try {
hostname = new URL(url).hostname;
} catch (e) {
console.error(`Could not construct URL ${url}`, e);
return null;
}
if (hostname === 'songwhip.com') {
// song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a
// Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info
// except maybe scraping their HTML
return null;
}
const odesliParams = new URLSearchParams();
odesliParams.append('url', url);
odesliParams.append('userCountry', 'DE');
odesliParams.append('songIfSingle', 'true');
if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') {
odesliParams.append('key', ODESLI_API_KEY);
}
const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`;
try {
return fetch(odesliApiUrl).then(async (response) => {
if (response.status === 429) {
throw new Error('Rate limit reached', { cause: 429 });
}
return response.json().then((odesliInfo: OdesliResponse) => {
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
const platform: Platform = 'youtube';
return {
...info,
pageUrl: odesliInfo.pageUrl,
youtubeUrl: odesliInfo.linksByPlatform[platform]?.url
} as SongInfo;
});
});
} catch (e) {
if (e instanceof Error && e.cause === 429) {
console.warn('song.link rate limit reached. Trying again in 10 seconds');
await sleep(10_000);
return await this.getSongInfo(url, remainingTries - 1);
}
console.error(`Failed to load ${url} info from song.link`, e);
return null;
}
}
private static async getUrlFromPreviewCard(post: Post): Promise<string | undefined> {
return undefined;
// Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon
/*
if (post.card) {
return post.card?.url;
}
try {
const status: Post = await (
await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`)
).json();
return status.card?.url;
} catch (e) {
console.error(`Could not fetch status ${post.url}`, e);
}
*/
}
private startWebsocket() {
const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
socket.onopen = () => {
console.log('Connected to WS');
socket.send('{ "type": "subscribe", "stream": "public:local"}');
};
socket.onmessage = async (event) => {
@ -95,17 +175,69 @@ export class TimelineReader {
const urls: string[] = URL_FILTER.split(',');
const found_urls = urls.filter((t) => post.content.includes(t));
const urlsToCheck: string[] = [];
// If we don't have any tags or non-youtube urls, check youtube
// YT is handled separately, because it requires an API call and therefore is slower
if (
found_urls.length === 0 &&
found_tags.length === 0 &&
!(await TimelineReader.checkYoutubeMatches(post.content))
) {
return;
if (found_urls.length === 0 && found_tags.length === 0) {
const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content);
if (youtubeUrl === null) {
console.log('Ignoring post', post.url);
return;
}
urlsToCheck.push(youtubeUrl);
console.log('Found YT URL', youtubeUrl, found_urls, found_urls.length);
}
// TODO: Change URL detection above to use this regex.
// Looks like we're stuck with regex for now instead of using preview cards.
// Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song,
// if not, ignore it. No need to consult the YT API and give those links a special handling
const musicUrls: string[] = [];
const musicUrl = await TimelineReader.getUrlFromPreviewCard(post);
if (musicUrl) {
musicUrls.push(musicUrl);
} else {
const urlMatches = post.content.matchAll(URL_REGEX);
for (const match of urlMatches) {
if (match === undefined || match.groups === undefined) {
continue;
}
const urlMatch = match.groups.postUrl.toString();
const musicUrl = urls.find((u) => urlMatch.includes(u));
if (musicUrl) {
musicUrls.push(urlMatch);
}
}
}
for (const url of musicUrls) {
let hostname: string | null = null;
try {
hostname = new URL(url).hostname;
} catch (e) {
console.error(`Could not check hostname for URL ${url}`, e);
}
if (hostname === 'songwhip.com') {
// TODO: Implement checking the songwhip API
continue;
}
const info = await TimelineReader.getSongInfo(url);
if (info) {
console.info(
'Got song info for',
post.url,
url,
info.artistName,
info.title,
info.thumbnailUrl,
info.pageUrl,
info.youtubeUrl
);
}
}
await savePost(post);
const posts = await getPosts(null, null, 100);
await saveAtomFeed(createFeed(posts));
} catch (e) {

5
src/lib/sleep.ts Normal file
View File

@ -0,0 +1,5 @@
export function sleep(timeInMs: number): Promise<undefined> {
return new Promise((resolve) => {
setTimeout(resolve, timeInMs);
});
}