Extract song info from odesli (song.link)
This commit is contained in:
parent
45eeb550b3
commit
b62936ed54
@ -1,7 +1,8 @@
|
||||
HASHTAG_FILTER = ichlausche,music,musik,nowplaying,tunetuesday,nowlistening
|
||||
URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com
|
||||
URL_FILTER = song.link,album.link,spotify.com,music.apple.com,bandcamp.com,songwhip.com
|
||||
YOUTUBE_API_KEY = CHANGE_ME
|
||||
YOUTUBE_DISABLE = false
|
||||
ODESLI_API_KEY = CHANGE_ME
|
||||
MASTODON_INSTANCE = 'metalhead.club'
|
||||
BASE_URL = 'https://moshingmammut.phlaym.net'
|
||||
VERBOSE = false
|
||||
|
@ -10,6 +10,16 @@ export interface Post {
|
||||
url: string;
|
||||
content: string;
|
||||
account: Account;
|
||||
card?: PreviewCard;
|
||||
}
|
||||
|
||||
export interface PreviewCard {
|
||||
url: string;
|
||||
title: string;
|
||||
image?: string;
|
||||
blurhash?: string;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface Tag {
|
||||
|
143
src/lib/odesliResponse.ts
Normal file
143
src/lib/odesliResponse.ts
Normal file
@ -0,0 +1,143 @@
|
||||
export type SongInfo = {
|
||||
pageUrl: string;
|
||||
youtubeUrl?: string;
|
||||
type: 'song' | 'album';
|
||||
title?: string;
|
||||
artistName?: string;
|
||||
thumbnailUrl?: string;
|
||||
};
|
||||
|
||||
export type SongwhipReponse = {
|
||||
type: 'track' | 'album';
|
||||
name: string;
|
||||
image?: string;
|
||||
url: string;
|
||||
};
|
||||
|
||||
export type OdesliResponse = {
|
||||
/**
|
||||
* The unique ID for the input entity that was supplied in the request. The
|
||||
* data for this entity, such as title, artistName, etc. will be found in
|
||||
* an object at `nodesByUniqueId[entityUniqueId]`
|
||||
*/
|
||||
entityUniqueId: string;
|
||||
|
||||
/**
|
||||
* The userCountry query param that was supplied in the request. It signals
|
||||
* the country/availability we use to query the streaming platforms. Defaults
|
||||
* to 'US' if no userCountry supplied in the request.
|
||||
*
|
||||
* NOTE: As a fallback, our service may respond with matches that were found
|
||||
* in a locale other than the userCountry supplied
|
||||
*/
|
||||
userCountry: string;
|
||||
|
||||
/**
|
||||
* A URL that will render the Songlink page for this entity
|
||||
*/
|
||||
pageUrl: string;
|
||||
|
||||
/**
|
||||
* A collection of objects. Each key is a platform, and each value is an
|
||||
* object that contains data for linking to the match
|
||||
*/
|
||||
linksByPlatform: {
|
||||
/**
|
||||
* Each key in `linksByPlatform` is a Platform. A Platform will exist here
|
||||
* only if there is a match found. E.g. if there is no YouTube match found,
|
||||
* then neither `youtube` or `youtubeMusic` properties will exist here
|
||||
*/
|
||||
[k in Platform]: {
|
||||
/**
|
||||
* The unique ID for this entity. Use it to look up data about this entity
|
||||
* at `entitiesByUniqueId[entityUniqueId]`
|
||||
*/
|
||||
entityUniqueId: string;
|
||||
|
||||
/**
|
||||
* The URL for this match
|
||||
*/
|
||||
url: string;
|
||||
|
||||
/**
|
||||
* The native app URI that can be used on mobile devices to open this
|
||||
* entity directly in the native app
|
||||
*/
|
||||
nativeAppUriMobile?: string;
|
||||
|
||||
/**
|
||||
* The native app URI that can be used on desktop devices to open this
|
||||
* entity directly in the native app
|
||||
*/
|
||||
nativeAppUriDesktop?: string;
|
||||
};
|
||||
};
|
||||
|
||||
// A collection of objects. Each key is a unique identifier for a streaming
|
||||
// entity, and each value is an object that contains data for that entity,
|
||||
// such as `title`, `artistName`, `thumbnailUrl`, etc.
|
||||
entitiesByUniqueId: {
|
||||
[entityUniqueId: string]: {
|
||||
// This is the unique identifier on the streaming platform/API provider
|
||||
id: string;
|
||||
|
||||
type: 'song' | 'album';
|
||||
|
||||
title?: string;
|
||||
artistName?: string;
|
||||
thumbnailUrl?: string;
|
||||
thumbnailWidth?: number;
|
||||
thumbnailHeight?: number;
|
||||
|
||||
// The API provider that powered this match. Useful if you'd like to use
|
||||
// this entity's data to query the API directly
|
||||
apiProvider: APIProvider;
|
||||
|
||||
// An array of platforms that are "powered" by this entity. E.g. an entity
|
||||
// from Apple Music will generally have a `platforms` array of
|
||||
// `["appleMusic", "itunes"]` since both those platforms/links are derived
|
||||
// from this single entity
|
||||
platforms: Platform[];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
export type Platform =
|
||||
| 'spotify'
|
||||
| 'itunes'
|
||||
| 'appleMusic'
|
||||
| 'youtube'
|
||||
| 'youtubeMusic'
|
||||
| 'google'
|
||||
| 'googleStore'
|
||||
| 'pandora'
|
||||
| 'deezer'
|
||||
| 'tidal'
|
||||
| 'amazonStore'
|
||||
| 'amazonMusic'
|
||||
| 'soundcloud'
|
||||
| 'napster'
|
||||
| 'yandex'
|
||||
| 'spinrilla'
|
||||
| 'audius'
|
||||
| 'audiomack'
|
||||
| 'anghami'
|
||||
| 'boomplay';
|
||||
|
||||
export type APIProvider =
|
||||
| 'spotify'
|
||||
| 'itunes'
|
||||
| 'youtube'
|
||||
| 'google'
|
||||
| 'pandora'
|
||||
| 'deezer'
|
||||
| 'tidal'
|
||||
| 'amazon'
|
||||
| 'soundcloud'
|
||||
| 'napster'
|
||||
| 'yandex'
|
||||
| 'spinrilla'
|
||||
| 'audius'
|
||||
| 'audiomack'
|
||||
| 'anghami'
|
||||
| 'boomplay';
|
@ -240,6 +240,11 @@ export async function savePost(post: Post): Promise<undefined> {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!post.tags.length) {
|
||||
resolve(undefined);
|
||||
return;
|
||||
}
|
||||
|
||||
db.parallelize(() => {
|
||||
let remaining = post.tags.length;
|
||||
for (const tag of post.tags) {
|
||||
|
@ -1,13 +1,16 @@
|
||||
import {
|
||||
HASHTAG_FILTER,
|
||||
MASTODON_INSTANCE,
|
||||
ODESLI_API_KEY,
|
||||
URL_FILTER,
|
||||
YOUTUBE_API_KEY,
|
||||
YOUTUBE_DISABLE
|
||||
} from '$env/static/private';
|
||||
import type { Post, Tag, TimelineEvent } from '$lib/mastodon/response';
|
||||
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
|
||||
import { getPosts, savePost } from '$lib/server/db';
|
||||
import { createFeed, saveAtomFeed } from '$lib/server/rss';
|
||||
import { sleep } from '$lib/sleep';
|
||||
import { isTruthy } from '$lib/truthyString';
|
||||
import { WebSocket } from 'ws';
|
||||
|
||||
@ -15,11 +18,13 @@ const YOUTUBE_REGEX = new RegExp(
|
||||
/https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
|
||||
);
|
||||
|
||||
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
|
||||
|
||||
export class TimelineReader {
|
||||
private static _instance: TimelineReader;
|
||||
|
||||
private static async isMusicVideo(videoId: string) {
|
||||
if (YOUTUBE_API_KEY === undefined) {
|
||||
if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
|
||||
// Assume that it *is* a music link when no YT API key is provided
|
||||
// If it should assumed to not be YOUTUBE_DISABLE needs to be set to something truthy
|
||||
return true;
|
||||
@ -56,9 +61,9 @@ export class TimelineReader {
|
||||
return categoryTitle === 'Music';
|
||||
}
|
||||
|
||||
private static async checkYoutubeMatches(postContent: string): Promise<boolean> {
|
||||
private static async checkYoutubeMatches(postContent: string): Promise<string | null> {
|
||||
if (isTruthy(YOUTUBE_DISABLE)) {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
const matches = postContent.matchAll(YOUTUBE_REGEX);
|
||||
for (const match of matches) {
|
||||
@ -69,18 +74,93 @@ export class TimelineReader {
|
||||
try {
|
||||
const isMusic = await TimelineReader.isMusicVideo(videoId);
|
||||
if (isMusic) {
|
||||
return true;
|
||||
return match[0];
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Could not check if', videoId, 'is a music video', e);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
private static async getSongInfo(
|
||||
url: string,
|
||||
remainingTries: number = 6
|
||||
): Promise<SongInfo | null> {
|
||||
if (remainingTries === 0) {
|
||||
console.error('No tries remaining. Lookup failed!');
|
||||
return null;
|
||||
}
|
||||
let hostname: string;
|
||||
try {
|
||||
hostname = new URL(url).hostname;
|
||||
} catch (e) {
|
||||
console.error(`Could not construct URL ${url}`, e);
|
||||
return null;
|
||||
}
|
||||
if (hostname === 'songwhip.com') {
|
||||
// song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a
|
||||
// Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info
|
||||
// except maybe scraping their HTML
|
||||
return null;
|
||||
}
|
||||
|
||||
const odesliParams = new URLSearchParams();
|
||||
odesliParams.append('url', url);
|
||||
odesliParams.append('userCountry', 'DE');
|
||||
odesliParams.append('songIfSingle', 'true');
|
||||
if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') {
|
||||
odesliParams.append('key', ODESLI_API_KEY);
|
||||
}
|
||||
const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`;
|
||||
try {
|
||||
return fetch(odesliApiUrl).then(async (response) => {
|
||||
if (response.status === 429) {
|
||||
throw new Error('Rate limit reached', { cause: 429 });
|
||||
}
|
||||
return response.json().then((odesliInfo: OdesliResponse) => {
|
||||
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
|
||||
const platform: Platform = 'youtube';
|
||||
return {
|
||||
...info,
|
||||
pageUrl: odesliInfo.pageUrl,
|
||||
youtubeUrl: odesliInfo.linksByPlatform[platform]?.url
|
||||
} as SongInfo;
|
||||
});
|
||||
});
|
||||
} catch (e) {
|
||||
if (e instanceof Error && e.cause === 429) {
|
||||
console.warn('song.link rate limit reached. Trying again in 10 seconds');
|
||||
await sleep(10_000);
|
||||
return await this.getSongInfo(url, remainingTries - 1);
|
||||
}
|
||||
console.error(`Failed to load ${url} info from song.link`, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static async getUrlFromPreviewCard(post: Post): Promise<string | undefined> {
|
||||
return undefined;
|
||||
// Currently disabled, because it seems to always be null, even after re-fetching the post from Mastodon
|
||||
/*
|
||||
if (post.card) {
|
||||
return post.card?.url;
|
||||
}
|
||||
try {
|
||||
const status: Post = await (
|
||||
await fetch(`https://${MASTODON_INSTANCE}/api/v1/statuses/${post.id}`)
|
||||
).json();
|
||||
return status.card?.url;
|
||||
} catch (e) {
|
||||
console.error(`Could not fetch status ${post.url}`, e);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
private startWebsocket() {
|
||||
const socket = new WebSocket(`wss://${MASTODON_INSTANCE}/api/v1/streaming`);
|
||||
socket.onopen = () => {
|
||||
console.log('Connected to WS');
|
||||
socket.send('{ "type": "subscribe", "stream": "public:local"}');
|
||||
};
|
||||
socket.onmessage = async (event) => {
|
||||
@ -95,17 +175,69 @@ export class TimelineReader {
|
||||
|
||||
const urls: string[] = URL_FILTER.split(',');
|
||||
const found_urls = urls.filter((t) => post.content.includes(t));
|
||||
|
||||
const urlsToCheck: string[] = [];
|
||||
// If we don't have any tags or non-youtube urls, check youtube
|
||||
// YT is handled separately, because it requires an API call and therefore is slower
|
||||
if (
|
||||
found_urls.length === 0 &&
|
||||
found_tags.length === 0 &&
|
||||
!(await TimelineReader.checkYoutubeMatches(post.content))
|
||||
) {
|
||||
return;
|
||||
if (found_urls.length === 0 && found_tags.length === 0) {
|
||||
const youtubeUrl = await TimelineReader.checkYoutubeMatches(post.content);
|
||||
if (youtubeUrl === null) {
|
||||
console.log('Ignoring post', post.url);
|
||||
return;
|
||||
}
|
||||
urlsToCheck.push(youtubeUrl);
|
||||
console.log('Found YT URL', youtubeUrl, found_urls, found_urls.length);
|
||||
}
|
||||
|
||||
// TODO: Change URL detection above to use this regex.
|
||||
// Looks like we're stuck with regex for now instead of using preview cards.
|
||||
// Might as well use it to find URLs. Could also use this for YouTube: If Odesli finds something, it's a song,
|
||||
// if not, ignore it. No need to consult the YT API and give those links a special handling
|
||||
const musicUrls: string[] = [];
|
||||
const musicUrl = await TimelineReader.getUrlFromPreviewCard(post);
|
||||
if (musicUrl) {
|
||||
musicUrls.push(musicUrl);
|
||||
} else {
|
||||
const urlMatches = post.content.matchAll(URL_REGEX);
|
||||
for (const match of urlMatches) {
|
||||
if (match === undefined || match.groups === undefined) {
|
||||
continue;
|
||||
}
|
||||
const urlMatch = match.groups.postUrl.toString();
|
||||
const musicUrl = urls.find((u) => urlMatch.includes(u));
|
||||
if (musicUrl) {
|
||||
musicUrls.push(urlMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const url of musicUrls) {
|
||||
let hostname: string | null = null;
|
||||
try {
|
||||
hostname = new URL(url).hostname;
|
||||
} catch (e) {
|
||||
console.error(`Could not check hostname for URL ${url}`, e);
|
||||
}
|
||||
if (hostname === 'songwhip.com') {
|
||||
// TODO: Implement checking the songwhip API
|
||||
continue;
|
||||
}
|
||||
const info = await TimelineReader.getSongInfo(url);
|
||||
if (info) {
|
||||
console.info(
|
||||
'Got song info for',
|
||||
post.url,
|
||||
url,
|
||||
info.artistName,
|
||||
info.title,
|
||||
info.thumbnailUrl,
|
||||
info.pageUrl,
|
||||
info.youtubeUrl
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
await savePost(post);
|
||||
|
||||
const posts = await getPosts(null, null, 100);
|
||||
await saveAtomFeed(createFeed(posts));
|
||||
} catch (e) {
|
||||
|
5
src/lib/sleep.ts
Normal file
5
src/lib/sleep.ts
Normal file
@ -0,0 +1,5 @@
|
||||
export function sleep(timeInMs: number): Promise<undefined> {
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(resolve, timeInMs);
|
||||
});
|
||||
}
|
Loading…
Reference in New Issue
Block a user