rework spotify import, add import webUI

This commit is contained in:
2026-02-09 03:53:05 -08:00
parent 4d77999edb
commit 126b77fa87
3 changed files with 369 additions and 248 deletions

View File

@@ -1,43 +1,23 @@
package migrate package migrate
import ( import (
"archive/zip"
"context" "context"
"encoding/json"
"fmt" "fmt"
"io"
"os" "os"
"path/filepath"
"strings" "strings"
"time" "time"
"muzi/db"
"github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5"
) )
type SpotifyTrack struct { type SpotifyTrack struct {
Timestamp string `json:"ts"` Timestamp string `json:"ts"`
Platform string `json:"-"`
Played int `json:"ms_played"` Played int `json:"ms_played"`
Country string `json:"-"`
IP string `json:"-"`
Name string `json:"master_metadata_track_name"` Name string `json:"master_metadata_track_name"`
Artist string `json:"master_metadata_album_artist_name"` Artist string `json:"master_metadata_album_artist_name"`
Album string `json:"master_metadata_album_album_name"` Album string `json:"master_metadata_album_album_name"`
TrackURI string `json:"-"`
Episode string `json:"-"`
Show string `json:"-"`
EpisodeURI string `json:"-"`
Audiobook string `json:"-"`
AudiobookURI string `json:"-"`
AudiobookChapterURI string `json:"-"`
AudiobookChapter string `json:"-"`
ReasonStart string `json:"-"`
ReasonEnd string `json:"-"`
Shuffle bool `json:"-"`
Skipped bool `json:"-"`
Offline bool `json:"-"`
OfflineTimestamp int `json:"-"`
Incognito bool `json:"-"`
} }
type existingTrack struct { type existingTrack struct {
@@ -46,32 +26,88 @@ type existingTrack struct {
Artist string Artist string
} }
func getExistingTracks(conn *pgx.Conn, userId int, tracks []SpotifyTrack) (map[string]bool, error) { // trackSource implements pgx.CopyFromSource for efficient bulk inserts
type trackSource struct {
tracks []SpotifyTrack
existing map[string]struct{}
idx int
userId int
}
func (s *trackSource) Next() bool {
for s.idx < len(s.tracks) {
t := s.tracks[s.idx]
key := fmt.Sprintf("%s|%s|%s", t.Artist, t.Name, t.Timestamp)
if _, exists := s.existing[key]; exists {
s.idx++
continue
}
s.idx++
return true
}
return false
}
func (s *trackSource) Values() ([]any, error) {
// idx is already incremented in Next(), so use idx-1
t := s.tracks[s.idx-1]
// parse spotify string timestamp to a real time object
ts, err := time.Parse(time.RFC3339Nano, t.Timestamp)
if err != nil {
return nil, err
}
return []any{
s.userId,
ts,
t.Name,
t.Artist,
t.Album,
t.Played,
"spotify",
}, nil
}
func (s *trackSource) Err() error {
return nil
}
// find tracks with the same artist and name in a 20 second +- timeframe
func getExistingTracks(
userId int,
tracks []SpotifyTrack,
) (map[string]struct{}, error) {
// check for empty track import
if len(tracks) == 0 { if len(tracks) == 0 {
return map[string]bool{}, nil return map[string]struct{}{}, nil
} }
// find min/max timestamps in this batch to create time window // find min/max timestamps in this batch to create time window to
// search for duplicates
var minTs, maxTs time.Time var minTs, maxTs time.Time
// go through each track (t) in the array
for _, t := range tracks { for _, t := range tracks {
// parse spotify timestamp into operational time datatype
ts, err := time.Parse(time.RFC3339Nano, t.Timestamp) ts, err := time.Parse(time.RFC3339Nano, t.Timestamp)
if err != nil { if err != nil {
continue continue
} }
// if minTs uninitialized or timestamp predates minTs
if minTs.IsZero() || ts.Before(minTs) { if minTs.IsZero() || ts.Before(minTs) {
minTs = ts minTs = ts
} }
// if timestamp comes after maxTs
if ts.After(maxTs) { if ts.After(maxTs) {
maxTs = ts maxTs = ts
} }
} }
// check if all parses failed, therefore no way to find duplicate by time
if minTs.IsZero() { if minTs.IsZero() {
return map[string]bool{}, nil return map[string]struct{}{}, nil
} }
// query only tracks within [min-20s, max+20s] window using timestamp index // find all tracks within [min-20s, max+20s] window (duplicates)
rows, err := conn.Query(context.Background(), rows, err := db.Pool.Query(context.Background(),
`SELECT song_name, artist, timestamp `SELECT song_name, artist, timestamp
FROM history FROM history
WHERE user_id = $1 WHERE user_id = $1
@@ -84,25 +120,39 @@ func getExistingTracks(conn *pgx.Conn, userId int, tracks []SpotifyTrack) (map[s
} }
defer rows.Close() defer rows.Close()
existing := make(map[string]bool) // prepare map to hold duplicate track keys
existing := make(map[string]struct{})
// create array of tracks
var existingTracks []existingTrack var existingTracks []existingTrack
// for each repeat play (-20s +20s)
for rows.Next() { for rows.Next() {
// write the data from json to the track in memory
var t existingTrack var t existingTrack
if err := rows.Scan(&t.SongName, &t.Artist, &t.Timestamp); err != nil { if err := rows.Scan(&t.SongName, &t.Artist, &t.Timestamp); err != nil {
continue continue
} }
// add track in memory to existingTracks array
existingTracks = append(existingTracks, t) existingTracks = append(existingTracks, t)
} }
// check each incoming track against existing ones within 20 second window // index existing tracks by artist|name for O(1) lookup
existingIndex := make(map[string][]time.Time)
for _, t := range existingTracks {
key := t.Artist + "|" + t.SongName
existingIndex[key] = append(existingIndex[key], t.Timestamp)
}
// check each new track against indexed existing tracks
for _, newTrack := range tracks { for _, newTrack := range tracks {
newTs, err := time.Parse(time.RFC3339Nano, newTrack.Timestamp) newTs, err := time.Parse(time.RFC3339Nano, newTrack.Timestamp)
if err != nil { if err != nil {
continue continue
} }
for _, existTrack := range existingTracks {
if newTrack.Name == existTrack.SongName && newTrack.Artist == existTrack.Artist { lookupKey := newTrack.Artist + "|" + newTrack.Name
diff := newTs.Sub(existTrack.Timestamp) if timestamps, found := existingIndex[lookupKey]; found {
for _, existTs := range timestamps {
diff := newTs.Sub(existTs)
if diff < 0 { if diff < 0 {
diff = -diff diff = -diff
} }
@@ -113,7 +163,7 @@ func getExistingTracks(conn *pgx.Conn, userId int, tracks []SpotifyTrack) (map[s
newTrack.Name, newTrack.Name,
newTrack.Timestamp, newTrack.Timestamp,
) )
existing[key] = true existing[key] = struct{}{}
break break
} }
} }
@@ -123,246 +173,69 @@ func getExistingTracks(conn *pgx.Conn, userId int, tracks []SpotifyTrack) (map[s
return existing, nil return existing, nil
} }
func JsonToDB(jsonFile string, userId int) error { func ImportSpotify(tracks []SpotifyTrack, userId int) error {
conn, err := pgx.Connect(
context.Background(),
"postgres://postgres:postgres@localhost:5432/muzi",
)
if err != nil {
fmt.Fprintf(os.Stderr, "Cannot connect to muzi database: %v\n", err)
panic(err)
}
defer conn.Close(context.Background())
jsonData, err := os.ReadFile(jsonFile)
if err != nil {
fmt.Fprintf(os.Stderr, "Cannot read %s: %v\n", jsonFile, err)
return err
}
var tracks []SpotifyTrack
err = json.Unmarshal(jsonData, &tracks)
if err != nil {
fmt.Fprintf(os.Stderr, "Cannot unmarshal %s: %v\n", jsonFile, err)
return err
}
totalImported := 0 totalImported := 0
batchSize := 1000 batchSize := 1000
for batchStart := 0; batchStart < len(tracks); batchStart += batchSize { for batchStart := 0; batchStart < len(tracks); batchStart += batchSize {
// get the limit of the current batch
batchEnd := batchStart + batchSize batchEnd := batchStart + batchSize
// set limit to track array length in current batch too big
if batchEnd > len(tracks) { if batchEnd > len(tracks) {
batchEnd = len(tracks) batchEnd = len(tracks)
} }
// create array to hold valid listens
var validTracks []SpotifyTrack var validTracks []SpotifyTrack
for i := batchStart; i < batchEnd; i++ { for i := batchStart; i < batchEnd; i++ {
// if current track is listened to for 20 sec and name and artist is not
// blank, add to validTracks array
if tracks[i].Played >= 20000 && tracks[i].Name != "" && tracks[i].Artist != "" { if tracks[i].Played >= 20000 && tracks[i].Name != "" && tracks[i].Artist != "" {
validTracks = append(validTracks, tracks[i]) validTracks = append(validTracks, tracks[i])
} }
} }
// if there are no valid tracks in this batch, go to the next
if len(validTracks) == 0 { if len(validTracks) == 0 {
continue continue
} }
existing, err := getExistingTracks(conn, userId, validTracks) // find replayed tracks in the batch that was just gathered
existing, err := getExistingTracks(userId, validTracks)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "Error checking existing tracks: %v\n", err) fmt.Fprintf(os.Stderr, "Error checking existing tracks: %v\n", err)
continue continue
} }
var batchValues []string // get data of struct pointer
var batchArgs []any src := &trackSource{
tracks: validTracks,
for _, t := range validTracks { existing: existing,
key := fmt.Sprintf("%s|%s|%s", t.Artist, t.Name, t.Timestamp) idx: 0,
if existing[key] { userId: userId,
continue
} }
batchValues = append(batchValues, fmt.Sprintf( // insert all valid tracks from current batch into db
"($%d, $%d, $%d, $%d, $%d, $%d, $%d)", copyCount, err := db.Pool.CopyFrom(
len(batchArgs)+1,
len(batchArgs)+2,
len(batchArgs)+3,
len(batchArgs)+4,
len(batchArgs)+5,
len(batchArgs)+6,
len(batchArgs)+7,
))
batchArgs = append(
batchArgs,
userId,
t.Timestamp,
t.Name,
t.Artist,
t.Album,
t.Played,
"spotify",
)
}
if len(batchValues) == 0 {
continue
}
// TODO: replace strings.Join with pgx copy
_, err = conn.Exec(
context.Background(), context.Background(),
`INSERT INTO history (user_id, timestamp, song_name, artist, album_name, ms_played, platform) VALUES `+ pgx.Identifier{"history"},
strings.Join( []string{
batchValues, "user_id",
", ", "timestamp",
)+ "song_name",
` ON CONFLICT ON CONSTRAINT history_user_id_song_name_artist_timestamp_key DO NOTHING;`, "artist",
batchArgs..., "album_name",
"ms_played",
"platform",
},
src,
) )
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "Batch insert failed: %v\n", err) if !strings.Contains(err.Error(), "duplicate") {
fmt.Fprintf(os.Stderr, "Spotify batch insert failed: %v\n", err)
}
} else { } else {
totalImported += len(batchValues) totalImported += int(copyCount)
}
}
fmt.Printf("%d tracks imported from %s\n", totalImported, jsonFile)
return nil
}
func AddDirToDB(path string, userId int) error {
dirs, err := os.ReadDir(path)
if err != nil {
fmt.Fprintf(os.Stderr, "Error while reading path: %s: %v\n", path, err)
return err
}
for _, dir := range dirs {
subPath := filepath.Join(
path,
dir.Name(),
"Spotify Extended Streaming History",
)
entries, err := os.ReadDir(subPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Error while reading path: %s: %v\n", subPath, err)
return err
}
for _, f := range entries {
jsonFileName := f.Name()
if !strings.Contains(jsonFileName, ".json") {
continue
}
if strings.Contains(jsonFileName, "Video") {
continue
}
jsonFilePath := filepath.Join(subPath, jsonFileName)
err = JsonToDB(jsonFilePath, userId)
if err != nil {
fmt.Fprintf(os.Stderr,
"Error adding json data (%s) to muzi database: %v", jsonFilePath, err)
return err
}
}
}
return nil
}
func ImportSpotify(userId int) error {
path := filepath.Join(".", "imports", "spotify", "zip")
targetBase := filepath.Join(".", "imports", "spotify", "extracted")
entries, err := os.ReadDir(path)
if err != nil {
fmt.Fprintf(os.Stderr, "Error reading path: %s: %v\n", path, err)
return err
}
for _, f := range entries {
reader, err := zip.OpenReader(filepath.Join(path, f.Name()))
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening zip: %s: %v\n",
filepath.Join(path, f.Name()), err)
continue
}
defer reader.Close()
fileName := f.Name()
fileFullPath := filepath.Join(path, fileName)
fileBaseName := fileName[:(strings.LastIndex(fileName, "."))]
targetDirFullPath := filepath.Join(targetBase, fileBaseName)
err = Extract(fileFullPath, targetDirFullPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Error extracting %s to %s: %v\n",
fileFullPath, targetDirFullPath, err)
return err
}
}
err = AddDirToDB(targetBase, userId)
if err != nil {
fmt.Fprintf(os.Stderr,
"Error adding directory of data (%s) to muzi database: %v\n",
targetBase, err)
return err
}
return nil
}
func Extract(path string, target string) error {
archive, err := zip.OpenReader(path)
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening zip: %s: %v\n", path, err)
return err
}
defer archive.Close()
zipDir := filepath.Base(path)
zipDir = zipDir[:(strings.LastIndex(zipDir, "."))]
for _, f := range archive.File {
filePath := filepath.Join(target, f.Name)
fmt.Println("extracting:", filePath)
if !strings.HasPrefix(
filePath,
filepath.Clean(target)+string(os.PathSeparator),
) {
err = fmt.Errorf("Invalid file path: %s", filePath)
fmt.Fprintf(os.Stderr, "%v\n", err)
return err
}
if f.FileInfo().IsDir() {
fmt.Println("Creating Directory", filePath)
os.MkdirAll(filePath, os.ModePerm)
continue
}
if err := os.MkdirAll(filepath.Dir(filePath), os.ModePerm); err != nil {
fmt.Fprintf(os.Stderr, "Error making directory: %s: %v\n",
filepath.Dir(filePath), err)
return err
}
fileToExtract, err := os.OpenFile(
filePath,
os.O_WRONLY|os.O_CREATE|os.O_TRUNC,
f.Mode(),
)
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening file: %s: %v\n", filePath, err)
return err
}
defer fileToExtract.Close()
extractedFile, err := f.Open()
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening file: %s: %v\n", f.Name, err)
return err
}
defer extractedFile.Close()
if _, err := io.Copy(fileToExtract, extractedFile); err != nil {
fmt.Fprintf(
os.Stderr,
"Error while copying file: %s to: %s: %v\n",
fileToExtract.Name(),
extractedFile,
err,
)
return err
} }
} }
return nil return nil

148
templates/import.gohtml Normal file
View File

@@ -0,0 +1,148 @@
<!doctype html>
<html>
<head>
<link rel="stylesheet" href="/files/style.css" type="text/css">
<title>muzi | Import Data</title>
<style>
/* Debug: Force bar visibility */
#progress-fill {
background: linear-gradient(90deg, #0f0, #0a0) !important;
min-width: 2px !important;
}
</style>
</head>
<body>
<div class="import-container">
<h1>Import Your Listening Data</h1>
<p>Welcome, {{.Username}}!</p>
<div class="import-section">
<h2>Spotify</h2>
<p>Import your Spotify listening history from your data export.</p>
<form method="POST" action="/import/spotify" enctype="multipart/form-data">
<input type="file" name="json_files" accept=".json,application/json" multiple required>
<button type="submit">Upload Spotify Data</button>
</form>
</div>
<div class="import-section">
<h2>Last.fm</h2>
<p>Import your Last.fm scrobbles.</p>
<form id="lastfm-form" method="POST" action="/import/lastfm">
<input type="text" name="lastfm_username" placeholder="Last.FM Username" required>
<input type="text" name="lastfm_api_key" placeholder="Last.FM API Key" required>
<button type="submit">Import from Last.fm</button>
</form>
<div id="lastfm-progress" class="progress-container" style="display: none;">
<div class="progress-status" id="progress-status">Initializing...</div>
<div class="progress-bar-wrapper">
<div class="progress-bar-fill" id="progress-fill"></div>
<div class="progress-text" id="progress-text">0%</div>
</div>
<div class="progress-tracks" id="progress-tracks"></div>
<div class="progress-error" id="progress-error"></div>
<div class="progress-success" id="progress-success"></div>
</div>
</div>
</div>
<script>
document.getElementById('lastfm-form').addEventListener('submit', async function(e) {
e.preventDefault();
const form = e.target;
const formData = new FormData(form);
const progressContainer = document.getElementById('lastfm-progress');
const progressFill = document.getElementById('progress-fill');
const progressText = document.getElementById('progress-text');
const progressStatus = document.getElementById('progress-status');
const progressTracks = document.getElementById('progress-tracks');
const progressError = document.getElementById('progress-error');
const progressSuccess = document.getElementById('progress-success');
// Reset and show progress container
progressFill.style.width = '0%';
progressFill.classList.add('animating');
progressText.textContent = '0%';
progressStatus.textContent = 'Starting import...';
progressTracks.textContent = '';
progressError.textContent = '';
progressSuccess.textContent = '';
progressContainer.style.display = 'block';
try {
// Convert FormData to URLSearchParams for proper form encoding
const params = new URLSearchParams();
for (const [key, value] of formData) {
params.append(key, value);
}
// Start the import
const response = await fetch('/import/lastfm', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: params
});
if (!response.ok) {
throw new Error('Failed to start import: ' + response.statusText);
}
const data = await response.json();
const jobId = data.job_id;
// Connect to SSE endpoint
const eventSource = new EventSource('/import/lastfm/progress?job=' + jobId);
eventSource.onmessage = function(event) {
const update = JSON.parse(event.data);
if (update.status === 'connected') {
return;
}
if (update.total_pages > 0) {
const completed = update.completed_pages || update.current_page || 0;
const percent = Math.round((completed / update.total_pages) * 100);
progressFill.style.width = percent + '%';
progressText.textContent = percent + '%';
progressStatus.textContent = 'Processing page ' + completed + ' of ' + update.total_pages;
}
if (update.tracks_imported !== undefined) {
progressTracks.textContent = update.tracks_imported.toLocaleString() + ' tracks imported';
}
if (update.status === 'completed') {
progressFill.classList.remove('animating');
progressStatus.textContent = 'Import completed!';
progressSuccess.textContent = 'Successfully imported ' + update.tracks_imported.toLocaleString() + ' tracks from Last.fm';
eventSource.close();
form.reset();
} else if (update.status === 'error') {
progressFill.classList.remove('animating');
progressStatus.textContent = 'Import failed';
progressError.textContent = 'Error: ' + (update.error || 'Unknown error');
eventSource.close();
}
};
eventSource.onerror = function(err) {
progressFill.classList.remove('animating');
progressStatus.textContent = 'Connection error';
progressError.textContent = 'Lost connection to server. The import may still be running in the background.';
eventSource.close();
};
} catch (err) {
progressFill.classList.remove('animating');
progressStatus.textContent = 'Import failed';
progressError.textContent = 'Error: ' + err.message;
}
});
</script>
</body>
</html>

View File

@@ -8,9 +8,12 @@ import (
"errors" "errors"
"fmt" "fmt"
"html/template" "html/template"
"io"
"mime/multipart"
"net/http" "net/http"
"os" "os"
"strconv" "strconv"
"strings"
"sync" "sync"
"muzi/db" "muzi/db"
@@ -23,6 +26,9 @@ import (
"github.com/jackc/pgtype" "github.com/jackc/pgtype"
) )
// 50 MB
const maxHeaderSize int64 = 50 * 1024 * 1024
// will add permissions later // will add permissions later
type Session struct { type Session struct {
Username string Username string
@@ -413,6 +419,99 @@ func importPageHandler() http.HandlerFunc {
} }
} }
func checkUploads(uploads []*multipart.FileHeader, w http.ResponseWriter) []migrate.SpotifyTrack {
if len(uploads) < 1 {
http.Error(w, "No files uploaded", http.StatusBadRequest)
return nil
}
if len(uploads) > 30 {
http.Error(w, "Too many files uploaded (30 max)", http.StatusBadRequest)
return nil
}
var allTracks []migrate.SpotifyTrack
for _, u := range uploads {
if u.Size > maxHeaderSize {
fmt.Fprintf(os.Stderr, "File too large: %s\n", u.Filename)
continue
}
if strings.Contains(u.Filename, "..") ||
strings.Contains(u.Filename, "/") ||
strings.Contains(u.Filename, "\x00") {
fmt.Fprintf(os.Stderr, "Invalid filename: %s\n", u.Filename)
continue
}
file, err := u.Open()
if err != nil {
fmt.Fprintf(os.Stderr, "Error opening %s: %v\n", u.Filename, err)
continue
}
reader := io.LimitReader(file, maxHeaderSize)
data, err := io.ReadAll(reader)
file.Close()
if err != nil {
fmt.Fprintf(os.Stderr, "Error reading %s: %v\n", u.Filename, err)
continue
}
if !json.Valid(data) {
http.Error(w, fmt.Sprintf("Invalid JSON in %s", u.Filename),
http.StatusBadRequest)
return nil
}
var tracks []migrate.SpotifyTrack
if err := json.Unmarshal(data, &tracks); err != nil {
fmt.Fprintf(os.Stderr,
"Error parsing %s: %v\n", u.Filename, err)
continue
}
allTracks = append(allTracks, tracks...)
}
return allTracks
}
func importSpotifyHandler(w http.ResponseWriter, r *http.Request) {
username := getLoggedInUsername(r)
if username == "" {
http.Error(w, "Unauthorized", http.StatusUnauthorized)
return
}
if r.Method != "POST" {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
userId, err := getUserIdByUsername(r.Context(), username)
if err != nil {
fmt.Fprintf(os.Stderr, "Cannot find user %s: %v\n", username, err)
http.Error(w, "User not found", http.StatusNotFound)
return
}
// 32MB memory max
err = r.ParseMultipartForm(32 << 20)
if err != nil {
http.Error(w, "Error parsing form", http.StatusBadRequest)
return
}
allTracks := checkUploads(r.MultipartForm.File["json_files"], w)
if allTracks == nil {
return
}
if err := migrate.ImportSpotify(allTracks, userId); err != nil {
http.Error(w, "Failed to process tracks", http.StatusInternalServerError)
return
}
}
func importLastFMHandler(w http.ResponseWriter, r *http.Request) { func importLastFMHandler(w http.ResponseWriter, r *http.Request) {
username := getLoggedInUsername(r) username := getLoggedInUsername(r)
if username == "" { if username == "" {
@@ -531,6 +630,7 @@ func Start() {
r.Post("/createaccountsubmit", createAccount) r.Post("/createaccountsubmit", createAccount)
r.Post("/settings/duplicate-edits", updateDuplicateEditsSetting) r.Post("/settings/duplicate-edits", updateDuplicateEditsSetting)
r.Post("/import/lastfm", importLastFMHandler) r.Post("/import/lastfm", importLastFMHandler)
r.Post("/import/spotify", importSpotifyHandler)
r.Get("/import/lastfm/progress", importLastFMProgressHandler) r.Get("/import/lastfm/progress", importLastFMProgressHandler)
fmt.Printf("WebUI starting on %s\n", addr) fmt.Printf("WebUI starting on %s\n", addr)
prot := http.NewCrossOriginProtection() prot := http.NewCrossOriginProtection()