Files
moshing-mammut/src/lib/server/timeline.ts
2025-07-04 11:44:18 +02:00

505 lines
16 KiB
TypeScript

import {
HASHTAG_FILTER,
MASTODON_ACCESS_TOKEN,
MASTODON_INSTANCE,
ODESLI_API_KEY,
YOUTUBE_API_KEY
} from '$env/static/private';
import { Logger } from '$lib/log';
import type {
Account,
AccountAvatar,
Post,
SongThumbnailImage,
Tag,
TimelineEvent
} from '$lib/mastodon/response';
import { SongThumbnailImageKind } from '$lib/mastodon/response';
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
import {
getAvatars,
getPosts,
getSongThumbnails,
removeAvatars,
saveAvatar,
savePost,
saveSongThumbnail
} from '$lib/server/db';
import { SpotifyPlaylistAdder } from '$lib/server/playlist/spotifyPlaylistAdder';
import { YoutubePlaylistAdder } from '$lib/server/playlist/ytPlaylistAdder';
import { createFeed, saveAtomFeed } from '$lib/server/rss';
import { sleep } from '$lib/sleep';
import crypto from 'crypto';
import fs from 'fs/promises';
import { console } from 'inspector/promises';
import sharp from 'sharp';
import { URL, URLSearchParams } from 'url';
import { WebSocket } from 'ws';
import type { PlaylistAdder } from './playlist/playlistAdder';
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
const INVIDIOUS_REGEX = new RegExp(/invidious.*?watch.*?v=(?<videoId>[a-zA-Z_0-9-]+)/gm);
const YOUTUBE_REGEX = new RegExp(
/https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
);
export class TimelineReader {
private static _instance: TimelineReader;
private lastPosts: string[] = [];
private playlistAdders: PlaylistAdder[];
private logger: Logger;
private async isMusicVideo(videoId: string) {
if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
// Assume that it *is* a music link when no YT API key is provided
this.logger.debug('YT API not configured');
return true;
}
const searchParams = new URLSearchParams([
['part', 'snippet'],
['id', videoId],
['key', YOUTUBE_API_KEY]
]);
const youtubeVideoUrl = new URL(`https://www.googleapis.com/youtube/v3/videos?${searchParams}`);
const resp = await fetch(youtubeVideoUrl);
const respObj = await resp.json();
if (!respObj.items.length) {
this.logger.warn('Could not find video with id', videoId);
return false;
}
const item = respObj.items[0];
if (!item.snippet) {
this.logger.warn('Could not load snippet for video', videoId, item);
return false;
}
if (item.snippet.tags?.includes('music')) {
return true;
}
const categorySearchParams = new URLSearchParams([
['part', 'snippet'],
['id', item.snippet.categoryId],
['key', YOUTUBE_API_KEY]
]);
const youtubeCategoryUrl = new URL(
`https://www.googleapis.com/youtube/v3/videoCategories?${categorySearchParams}`
);
const categoryTitle: string = await fetch(youtubeCategoryUrl)
.then((r) => r.json())
.then((r) => r.items[0]?.snippet?.title);
this.logger.debug('YT category', categoryTitle);
return categoryTitle === 'Music';
}
public async getSongInfoInPost(post: Post): Promise<SongInfo[]> {
const urlMatches = post.content.matchAll(URL_REGEX);
const songs: SongInfo[] = [];
for (const match of urlMatches) {
if (match === undefined || match.groups === undefined) {
this.logger.warn(
'Match listed in allMatches, but either it or its groups are undefined',
match
);
continue;
}
const urlMatch = match.groups.postUrl.toString();
let url: URL;
try {
url = new URL(urlMatch);
} catch (e) {
this.logger.error('URL found via Regex does not seem to be a valud url', urlMatch, e);
continue;
}
// Check *all* found url and let odesli determine if it is music or not
this.logger.debug(`Checking ${url} if it contains song data`);
const info = await this.getSongInfo(url);
//this.logger.debug(`Found song info for ${url}?`, info);
if (info) {
songs.push(info);
}
}
return songs;
}
private async getSongInfo(url: URL, remainingTries = 6): Promise<SongInfo | null> {
if (remainingTries === 0) {
this.logger.error('No tries remaining. Lookup failed!');
return null;
}
if (url.hostname === 'songwhip.com') {
// song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a
// Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info
// except maybe scraping their HTML
return null;
}
const videoId = INVIDIOUS_REGEX.exec(url.href)?.groups?.videoId;
const urlString =
videoId !== undefined ? `https://youtube.com/watch?v=${videoId}` : url.toString();
const odesliParams = new URLSearchParams();
odesliParams.append('url', urlString);
odesliParams.append('userCountry', 'DE');
odesliParams.append('songIfSingle', 'true');
if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') {
odesliParams.append('key', ODESLI_API_KEY);
}
const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`;
try {
const response = await fetch(odesliApiUrl);
if (response.status === 429) {
throw new Error('Rate limit reached', { cause: 429 });
}
const odesliInfo: OdesliResponse = await response.json();
if (!odesliInfo || !odesliInfo.entitiesByUniqueId || !odesliInfo.entityUniqueId) {
return null;
}
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
//this.logger.debug('odesli response', info);
const platform: Platform = 'youtube';
if (info.platforms.includes(platform)) {
const youtubeId =
videoId ??
YOUTUBE_REGEX.exec(url.href)?.groups?.videoId ??
new URL(odesliInfo.pageUrl).pathname.split('/y/').pop();
if (youtubeId === undefined) {
this.logger.warn(
'Looks like a youtube video, but could not extract a video id',
url,
odesliInfo
);
return null;
}
const isMusic = await this.isMusicVideo(youtubeId);
if (!isMusic) {
this.logger.debug('Probably not a music video', youtubeId, url);
return null;
}
}
const spotify: Platform = 'spotify';
const songInfo = {
...info,
pageUrl: odesliInfo.pageUrl,
youtubeUrl: odesliInfo.linksByPlatform[platform]?.url,
spotifyUrl: odesliInfo.linksByPlatform[spotify]?.url,
spotifyUri: odesliInfo.linksByPlatform[spotify]?.nativeAppUriDesktop,
postedUrl: url.toString()
} as SongInfo;
if (songInfo.youtubeUrl && !songInfo.spotifyUrl) {
this.logger.warn('SongInfo with YT, but no spotify URL', odesliInfo);
}
return songInfo;
} catch (e) {
if (e instanceof Error && e.cause === 429) {
this.logger.warn('song.link rate limit reached. Trying again in 10 seconds');
await sleep(10_000);
return await this.getSongInfo(url, remainingTries - 1);
}
this.logger.error(`Failed to load ${url} info from song.link`, e);
return null;
}
}
private async addToPlaylist(song: SongInfo) {
for (let adder of this.playlistAdders) {
await adder.addToPlaylist(song);
}
}
private async resizeAvatar(
baseName: string,
size: number,
suffix: string,
folder: string,
sharpAvatar: sharp.Sharp
): Promise<string | null> {
const fileName = `${folder}/${baseName}_${suffix}`;
const exists = await fs
.access(fileName, fs.constants.F_OK)
.then(() => true)
.catch(() => false);
if (exists) {
this.logger.debug('File already exists', fileName);
return null;
}
this.logger.debug('Saving avatar', fileName);
await sharpAvatar.resize(size).toFile(fileName);
return fileName;
}
private resizeAvatarPromiseMaker(
avatarFilenameBase: string,
baseSize: number,
maxPixelDensity: number,
accountUrl: string,
formats: string[],
avatar: ArrayBuffer
): Promise<void>[] {
const sharpAvatar = sharp(avatar);
const promises: Promise<void>[] = [];
for (let i = 1; i <= maxPixelDensity; i++) {
promises.push(
...formats.map((f) =>
this.resizeAvatar(avatarFilenameBase, baseSize * i, `${i}x.${f}`, 'avatars', sharpAvatar)
.then(
(fn) =>
({
accountUrl: accountUrl,
file: fn,
sizeDescriptor: `${i}x`
}) as AccountAvatar
)
.then(saveAvatar)
)
);
}
return promises;
}
private resizeThumbnailPromiseMaker(
filenameBase: string,
baseSize: number,
maxPixelDensity: number,
songThumbnailUrl: string,
formats: string[],
image: ArrayBuffer,
kind: SongThumbnailImageKind
): Promise<void>[] {
const sharpAvatar = sharp(image);
const promises: Promise<void>[] = [];
for (let i = 1; i <= maxPixelDensity; i++) {
promises.push(
...formats.map((f) =>
this.resizeAvatar(filenameBase, baseSize * i, `${i}x.${f}`, 'thumbnails', sharpAvatar)
.then(
(fn) =>
({
songThumbnailUrl: songThumbnailUrl,
file: fn,
sizeDescriptor: `${i}x`,
kind: kind
}) as SongThumbnailImage
)
.then(saveSongThumbnail)
)
);
}
return promises;
}
private async saveAvatar(account: Account) {
try {
const existingAvatars = await getAvatars(account.url, 1);
const existingAvatarBase = existingAvatars.shift()?.file.split('/').pop()?.split('_').shift();
const avatarFilenameBase =
new URL(account.avatar).pathname.split('/').pop()?.split('.').shift() ?? account.acct;
// User's avatar changed. Remove the old one!
if (existingAvatarBase && existingAvatarBase !== avatarFilenameBase) {
await removeAvatars(account.url);
const avatarsToDelete = (await fs.readdir('avatars'))
.filter((x) => x.startsWith(existingAvatarBase + '_'))
.map((x) => {
this.logger.debug('Removing existing avatar file', x);
return x;
})
.map((x) => fs.unlink('avatars/' + x));
await Promise.allSettled(avatarsToDelete);
}
const avatarResponse = await fetch(account.avatar);
const avatar = await avatarResponse.arrayBuffer();
await Promise.all(
this.resizeAvatarPromiseMaker(
avatarFilenameBase,
50,
3,
account.url,
['webp', 'avif', 'jpeg'],
avatar
)
);
} catch (e) {
console.error('Could not resize and save avatar for', account.acct, account.avatar, e);
}
}
private async saveSongThumbnails(songs: SongInfo[]) {
for (const song of songs) {
if (!song.thumbnailUrl) {
continue;
}
try {
const existingThumbs = await getSongThumbnails(song);
if (existingThumbs.length) {
continue;
}
const fileBaseName = crypto.createHash('sha256').update(song.thumbnailUrl).digest('hex');
const imageResponse = await fetch(song.thumbnailUrl);
const avatar = await imageResponse.arrayBuffer();
await Promise.all(
this.resizeThumbnailPromiseMaker(
fileBaseName + '_large',
200,
3,
song.thumbnailUrl,
['webp', 'avif', 'jpeg'],
avatar,
SongThumbnailImageKind.Big
)
);
await Promise.all(
this.resizeThumbnailPromiseMaker(
fileBaseName + '_small',
60,
3,
song.thumbnailUrl,
['webp', 'avif', 'jpeg'],
avatar,
SongThumbnailImageKind.Small
)
);
} catch (e) {
console.error(
'Could not resize and save song thumbnail for',
song.pageUrl,
song.thumbnailUrl,
e
);
}
}
}
private async checkAndSavePost(post: Post) {
const hashttags: string[] = HASHTAG_FILTER.split(',');
const found_tags: Tag[] = post.tags.filter((t: Tag) => hashttags.includes(t.name));
const songs = await this.getSongInfoInPost(post);
// If we don't have any tags or non-youtube urls, check youtube
// YT is handled separately, because it requires an API call and therefore is slower
if (songs.length === 0 && found_tags.length === 0) {
this.logger.log('Ignoring post', post.url);
return;
}
await savePost(post, songs);
await this.saveAvatar(post.account);
await this.saveSongThumbnails(songs);
this.logger.debug('Saved post', post.url, 'songs', songs);
const posts = await getPosts(null, null, 100);
await saveAtomFeed(createFeed(posts));
for (let song of songs) {
this.logger.debug('Adding to playlist', song);
await this.addToPlaylist(song);
}
}
private startWebsocket() {
const socketLogger = new Logger('Websocket');
const socket = new WebSocket(
`wss://${MASTODON_INSTANCE}/api/v1/streaming?type=subscribe&stream=public:local&access_token=${MASTODON_ACCESS_TOKEN}`
);
socket.onopen = () => {
socketLogger.log('Connected to WS');
};
socket.onmessage = async (event) => {
try {
const data: TimelineEvent = JSON.parse(event.data.toString());
socketLogger.debug('ES event', data.event);
if (data.event !== 'update') {
socketLogger.log('Ignoring ES event', data.event);
return;
}
const post: Post = JSON.parse(data.payload);
// Sometimes onmessage is called twice for the same post.
// This looks to be an issue with automatic reloading in the dev environment,
// but hard to tell
if (this.lastPosts.includes(post.id)) {
socketLogger.log('Skipping post, already handled', post.id);
return;
}
this.lastPosts.push(post.id);
while (this.lastPosts.length > 10) {
this.lastPosts.shift();
}
await this.checkAndSavePost(post);
} catch (e) {
socketLogger.error('error message', event, event.data, e);
}
};
socket.onclose = (event) => {
socketLogger.warn(
`Websocket connection to ${MASTODON_INSTANCE} closed. Code: ${event.code}, reason: '${event.reason}'`,
event
);
setTimeout(() => {
socketLogger.info(`Attempting to reconenct to WS`);
this.startWebsocket();
}, 10000);
};
socket.onerror = (event) => {
socketLogger.error(
`Websocket connection to ${MASTODON_INSTANCE} failed. ${event.type}: ${event.error}, message: '${event.message}'`
);
};
}
private async loadPostsSinceLastRun() {
const now = new Date().toISOString();
let latestPost = await getPosts(null, now, 1);
if (latestPost.length > 0) {
this.logger.log('Last post in DB since', now, latestPost[0].created_at);
} else {
this.logger.log('No posts in DB since');
}
let u = new URL(`https://${MASTODON_INSTANCE}/api/v1/timelines/public?local=true&limit=40`);
if (latestPost.length > 0) {
u.searchParams.append('since_id', latestPost[0].id);
}
for (let tag of HASHTAG_FILTER.split(',')) {
u.searchParams.append('q', '#' + tag);
}
const headers = {
Authorization: `Bearer ${MASTODON_ACCESS_TOKEN}`
};
const latestPosts: Post[] = await fetch(u, { headers }).then((r) => r.json());
this.logger.info('searched posts', latestPosts.length);
for (const post of latestPosts) {
await this.checkAndSavePost(post);
}
}
private constructor() {
this.logger = new Logger('Timeline');
this.logger.log('Constructing timeline object');
this.playlistAdders = [new YoutubePlaylistAdder(), new SpotifyPlaylistAdder()];
this.startWebsocket();
this.loadPostsSinceLastRun()
.then((_) => {
this.logger.info('loaded posts since last run');
})
.catch((e) => {
this.logger.error('cannot fetch latest posts', e);
});
}
public static init() {
if (this._instance === undefined) {
this._instance = new TimelineReader();
}
}
public static get instance(): TimelineReader {
TimelineReader.init();
return this._instance;
}
}