430 lines
13 KiB
TypeScript

import {
HASHTAG_FILTER,
MASTODON_ACCESS_TOKEN,
MASTODON_INSTANCE,
ODESLI_API_KEY,
YOUTUBE_API_KEY
} from '$env/static/private';
import { log } from '$lib/log';
import type {
Account,
AccountAvatar,
Post,
SongThumbnailImage,
Tag,
TimelineEvent
} from '$lib/mastodon/response';
import { SongThumbnailImageKind } from '$lib/mastodon/response';
import type { OdesliResponse, Platform, SongInfo } from '$lib/odesliResponse';
import {
getAvatars,
getPosts,
getSongThumbnails,
removeAvatars,
saveAvatar,
savePost,
saveSongThumbnail
} from '$lib/server/db';
import { createFeed, saveAtomFeed } from '$lib/server/rss';
import { sleep } from '$lib/sleep';
import crypto from 'crypto';
import fs from 'fs/promises';
import sharp from 'sharp';
import { WebSocket } from 'ws';
const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
const INVIDIOUS_REGEX = new RegExp(/invidious.*?watch.*?v=(?<videoId>[a-zA-Z_0-9-]+)/gm);
const YOUTUBE_REGEX = new RegExp(
/https?:\/\/(www\.)?youtu((be.com\/.*?v=)|(\.be\/))(?<videoId>[a-zA-Z_0-9-]+)/gm
);
export class TimelineReader {
private static _instance: TimelineReader;
private static async isMusicVideo(videoId: string) {
if (!YOUTUBE_API_KEY || YOUTUBE_API_KEY === 'CHANGE_ME') {
// Assume that it *is* a music link when no YT API key is provided
return true;
}
const searchParams = new URLSearchParams([
['part', 'snippet'],
['id', videoId],
['key', YOUTUBE_API_KEY]
]);
const youtubeVideoUrl = new URL(`https://www.googleapis.com/youtube/v3/videos?${searchParams}`);
const resp = await fetch(youtubeVideoUrl);
const respObj = await resp.json();
if (!respObj.items.length) {
console.warn('Could not find video with id', videoId);
return false;
}
const item = respObj.items[0];
if (!item.snippet) {
console.warn('Could not load snippet for video', videoId, item);
return false;
}
if (item.snippet.tags?.includes('music')) {
return true;
}
const categorySearchParams = new URLSearchParams([
['part', 'snippet'],
['id', item.snippet.categoryId],
['key', YOUTUBE_API_KEY]
]);
const youtubeCategoryUrl = new URL(
`https://www.googleapis.com/youtube/v3/videoCategories?${categorySearchParams}`
);
const categoryTitle: string = await fetch(youtubeCategoryUrl)
.then((r) => r.json())
.then((r) => r.items[0]?.snippet?.title);
return categoryTitle === 'Music';
}
public static async getSongInfoInPost(post: Post): Promise<SongInfo[]> {
const urlMatches = post.content.matchAll(URL_REGEX);
const songs: SongInfo[] = [];
for (const match of urlMatches) {
if (match === undefined || match.groups === undefined) {
log.warn('Match listed in allMatches, but either it or its groups are undefined', match);
continue;
}
const urlMatch = match.groups.postUrl.toString();
let url: URL;
try {
url = new URL(urlMatch);
} catch (e) {
log.error('URL found via Regex does not seem to be a valud url', urlMatch, e);
continue;
}
// Check *all* found url and let odesli determine if it is music or not
log.debug(`Checking ${url} if it contains song data`);
const info = await TimelineReader.getSongInfo(url);
log.debug(`Found song info for ${url}?`, info);
if (info) {
songs.push(info);
}
}
return songs;
}
private static async getSongInfo(url: URL, remainingTries = 6): Promise<SongInfo | null> {
if (remainingTries === 0) {
log.error('No tries remaining. Lookup failed!');
return null;
}
if (url.hostname === 'songwhip.com') {
// song.link doesn't support songwhip links and songwhip themselves will provide metadata if you pass in a
// Apple Music/Spotify/etc link, but won't when provided with their own link, so no way to extract song info
// except maybe scraping their HTML
return null;
}
const videoId = INVIDIOUS_REGEX.exec(url.href)?.groups?.videoId;
const urlString =
videoId !== undefined ? `https://youtube.com/watch?v=${videoId}` : url.toString();
const odesliParams = new URLSearchParams();
odesliParams.append('url', urlString);
odesliParams.append('userCountry', 'DE');
odesliParams.append('songIfSingle', 'true');
if (ODESLI_API_KEY && ODESLI_API_KEY !== 'CHANGE_ME') {
odesliParams.append('key', ODESLI_API_KEY);
}
const odesliApiUrl = `https://api.song.link/v1-alpha.1/links?${odesliParams}`;
try {
const response = await fetch(odesliApiUrl);
if (response.status === 429) {
throw new Error('Rate limit reached', { cause: 429 });
}
const odesliInfo: OdesliResponse = await response.json();
if (!odesliInfo || !odesliInfo.entitiesByUniqueId || !odesliInfo.entityUniqueId) {
return null;
}
const info = odesliInfo.entitiesByUniqueId[odesliInfo.entityUniqueId];
const platform: Platform = 'youtube';
if (info.platforms.includes(platform)) {
const youtubeId =
videoId ??
YOUTUBE_REGEX.exec(url.href)?.groups?.videoId ??
new URL(odesliInfo.pageUrl).pathname.split('/y/').pop();
if (youtubeId === undefined) {
log.warn('Looks like a youtube video, but could not extract a video id', url, odesliInfo);
return null;
}
const isMusic = await TimelineReader.isMusicVideo(youtubeId);
if (!isMusic) {
log.debug('Probably not a music video', url);
return null;
}
}
return {
...info,
pageUrl: odesliInfo.pageUrl,
youtubeUrl: odesliInfo.linksByPlatform[platform]?.url,
postedUrl: url.toString()
} as SongInfo;
} catch (e) {
if (e instanceof Error && e.cause === 429) {
log.warn('song.link rate limit reached. Trying again in 10 seconds');
await sleep(10_000);
return await this.getSongInfo(url, remainingTries - 1);
}
log.error(`Failed to load ${url} info from song.link`, e);
return null;
}
}
private static async resizeAvatar(
baseName: string,
size: number,
suffix: string,
folder: string,
sharpAvatar: sharp.Sharp
): Promise<string | null> {
const fileName = `${folder}/${baseName}_${suffix}`;
const exists = await fs
.access(fileName, fs.constants.F_OK)
.then(() => true)
.catch(() => false);
if (exists) {
log.debug('File already exists', fileName);
return null;
}
log.debug('Saving avatar', fileName);
await sharpAvatar.resize(size).toFile(fileName);
return fileName;
}
private static resizeAvatarPromiseMaker(
avatarFilenameBase: string,
baseSize: number,
maxPixelDensity: number,
accountUrl: string,
formats: string[],
avatar: ArrayBuffer
): Promise<void>[] {
const sharpAvatar = sharp(avatar);
const promises: Promise<void>[] = [];
for (let i = 1; i <= maxPixelDensity; i++) {
promises.push(
...formats.map((f) =>
TimelineReader.resizeAvatar(
avatarFilenameBase,
baseSize * i,
`${i}x.${f}`,
'avatars',
sharpAvatar
)
.then(
(fn) =>
({
accountUrl: accountUrl,
file: fn,
sizeDescriptor: `${i}x`
}) as AccountAvatar
)
.then(saveAvatar)
)
);
}
return promises;
}
private static resizeThumbnailPromiseMaker(
filenameBase: string,
baseSize: number,
maxPixelDensity: number,
songThumbnailUrl: string,
formats: string[],
image: ArrayBuffer,
kind: SongThumbnailImageKind
): Promise<void>[] {
const sharpAvatar = sharp(image);
const promises: Promise<void>[] = [];
for (let i = 1; i <= maxPixelDensity; i++) {
promises.push(
...formats.map((f) =>
TimelineReader.resizeAvatar(
filenameBase,
baseSize * i,
`${i}x.${f}`,
'thumbnails',
sharpAvatar
)
.then(
(fn) =>
({
songThumbnailUrl: songThumbnailUrl,
file: fn,
sizeDescriptor: `${i}x`,
kind: kind
}) as SongThumbnailImage
)
.then(saveSongThumbnail)
)
);
}
return promises;
}
private static async saveAvatar(account: Account) {
try {
const existingAvatars = await getAvatars(account.url, 1);
const existingAvatarBase = existingAvatars.shift()?.file.split('/').pop()?.split('_').shift();
const avatarFilenameBase =
new URL(account.avatar).pathname.split('/').pop()?.split('.').shift() ?? account.acct;
// User's avatar changed. Remove the old one!
if (existingAvatarBase && existingAvatarBase !== avatarFilenameBase) {
await removeAvatars(account.url);
const avatarsToDelete = (await fs.readdir('avatars'))
.filter((x) => x.startsWith(existingAvatarBase + '_'))
.map((x) => {
log.debug('Removing existing avatar file', x);
return x;
})
.map((x) => fs.unlink('avatars/' + x));
await Promise.allSettled(avatarsToDelete);
}
const avatarResponse = await fetch(account.avatar);
const avatar = await avatarResponse.arrayBuffer();
await Promise.all(
TimelineReader.resizeAvatarPromiseMaker(
avatarFilenameBase,
50,
3,
account.url,
['webp', 'avif', 'jpeg'],
avatar
)
);
} catch (e) {
console.error('Could not resize and save avatar for', account.acct, account.avatar, e);
}
}
private static async saveSongThumbnails(songs: SongInfo[]) {
for (const song of songs) {
if (!song.thumbnailUrl) {
continue;
}
try {
const existingThumbs = await getSongThumbnails(song);
if (existingThumbs.length) {
continue;
}
const fileBaseName = crypto.createHash('sha256').update(song.thumbnailUrl).digest('hex');
const imageResponse = await fetch(song.thumbnailUrl);
const avatar = await imageResponse.arrayBuffer();
await Promise.all(
TimelineReader.resizeThumbnailPromiseMaker(
fileBaseName + '_large',
200,
3,
song.thumbnailUrl,
['webp', 'avif', 'jpeg'],
avatar,
SongThumbnailImageKind.Big
)
);
await Promise.all(
TimelineReader.resizeThumbnailPromiseMaker(
fileBaseName + '_small',
60,
3,
song.thumbnailUrl,
['webp', 'avif', 'jpeg'],
avatar,
SongThumbnailImageKind.Small
)
);
} catch (e) {
console.error(
'Could not resize and save song thumbnail for',
song.pageUrl,
song.thumbnailUrl,
e
);
}
}
}
private startWebsocket() {
const socket = new WebSocket(
`wss://${MASTODON_INSTANCE}/api/v1/streaming?type=subscribe&stream=public:local&access_token=${MASTODON_ACCESS_TOKEN}`
);
socket.onopen = () => {
log.log('Connected to WS');
};
socket.onmessage = async (event) => {
try {
const data: TimelineEvent = JSON.parse(event.data.toString());
if (data.event !== 'update') {
log.log('Ignoring ES event', data.event);
return;
}
const post: Post = JSON.parse(data.payload);
const hashttags: string[] = HASHTAG_FILTER.split(',');
const found_tags: Tag[] = post.tags.filter((t: Tag) => hashttags.includes(t.name));
const songs = await TimelineReader.getSongInfoInPost(post);
// If we don't have any tags or non-youtube urls, check youtube
// YT is handled separately, because it requires an API call and therefore is slower
if (songs.length === 0 && found_tags.length === 0) {
log.log('Ignoring post', post.url);
return;
}
await savePost(post, songs);
await TimelineReader.saveAvatar(post.account);
await TimelineReader.saveSongThumbnails(songs);
log.debug('Saved post', post.url);
const posts = await getPosts(null, null, 100);
await saveAtomFeed(createFeed(posts));
} catch (e) {
log.error('error message', event, event.data, e);
}
};
socket.onclose = (event) => {
log.warn(
`Websocket connection to ${MASTODON_INSTANCE} closed. Code: ${event.code}, reason: '${event.reason}'`,
event
);
setTimeout(() => {
log.info(`Attempting to reconenct to WS`);
this.startWebsocket();
}, 10000);
};
socket.onerror = (event) => {
log.error(
`Websocket connection to ${MASTODON_INSTANCE} failed. ${event.type}: ${event.error}, message: '${event.message}'`
);
};
}
private constructor() {
log.log('Constructing timeline object');
this.startWebsocket();
}
public static init() {
log.log('Timeline object init');
if (this._instance === undefined) {
this._instance = new TimelineReader();
}
}
public static get instance(): TimelineReader {
TimelineReader.init();
return this._instance;
}
}