changelog shortlog graph tags branches changeset files file revisions raw help

Mercurial > demo / annotate examples/db/mbdb.lisp

changeset 40: 6b652d7d6663
parent: 1ef551e24009
child: 81b7333f27f8
author: Richard Westhaver <ellis@rwest.io>
date: Sun, 14 Apr 2024 20:48:05 -0400
permissions: -rw-r--r--
description: examples
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
1
 ;;; examples/mbdb.lisp --- MusicBrainz Database import and analysis
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
2
 
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
3
 ;; This example show how to migrate a set of complex JSON objects to
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
4
 ;; RocksDB using a dump from the MusicBrainz database
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
5
 ;; (https://musicbrainz.org/). The files are hosted at
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
6
 ;; https://packy.compiler.company/data/mbdump
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
7
 
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
8
 ;; we parse some of the database schema from the sql files here:
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
9
 ;; https://github.com/metabrainz/musicbrainz-server/tree/master/admin/sql
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
10
 
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
11
 ;;; Code:
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
12
 (defpackage :examples/mbdb
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
13
   (:use :cl :std :dat/json :net/fetch :obj/id :rdb :cli/clap :obj/uuid
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
14
         :sb-concurrency :log :dat/csv :dat/proto :sb-thread)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
15
   (:import-from :obj/uuid :make-uuid-from-string)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
16
   (:import-from :cli/progress :with-progress-bar :make-progress-bar
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
17
    :*progress-bar* :*progress-bar-enabled* :update-progress)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
18
   (:import-from :obj/time :parse-timestring :now :timestamp)
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
19
   (:import-from :log :info! :debug!)
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
20
   (:import-from :obj/uri :parse-uri)
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
21
   (:import-from :rocksdb :load-rocksdb)
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
22
   (:export :main))
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
23
 
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
24
 (in-package :examples/mbdb)
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
25
 
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
26
 (load-rocksdb t)
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
27
 
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
28
 ;;; Vars
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
29
 (declaim (timestamp *mbdb-epoch*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
30
 (defvar *mbdb-epoch* (now)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
31
   "mbdb time of birth.")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
32
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
33
 ;; (defvar *mbdb-logger* (make-logger))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
34
 
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
35
 (declaim (type pathname *mbdb-path*))
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
36
 (defvar *mbdb-path* #P"/tmp/mbdb/")
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
37
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
38
 (defvar *default-mbdb-opts*
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
39
   (let ((opts (default-rdb-opts)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
40
     (set-opt opts :enable-statistics 1)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
41
     opts))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
42
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
43
 (declaim (rdb *mbdb*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
44
 (defvar *mbdb* (create-db *mbdb-path* :opts *default-mbdb-opts* :open nil)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
45
   "The local MusicBrainz database. The default value is an uninitialized
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
46
 instance without any columns. Before use, make sure to open the
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
47
 database and on exit the database must be closed.")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
48
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
49
 (declaim (oracle *mbdb-oracle*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
50
 (defvar *mbdb-oracle* (make-oracle sb-thread:*current-thread*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
51
   "The oracle assigned to the mbdb system, which should usually be the current thread.")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
52
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
53
 (declaim (task-pool *mbdb-tasks*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
54
 (defvar *mbdb-tasks* (make-task-pool :oracle *mbdb-oracle*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
55
   "The mbdb task pool. This object holds a queue of jobs which are
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
56
 dispatched to workers. Results are collected and processed by the
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
57
 oracle.")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
58
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
59
 (defvar *mbsamp-pack-url* "https://packy.compiler.company/data/mbsamp.tar.zst"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
60
   "Remote location of MusicBrainz ZST-compressed archive filled with TSV
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
61
 files.")
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
62
 
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
63
 (defvar *mbdump-base-url* "https://packy.compiler.company/data/mbdump/"
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
64
   "Remote location of MusicBrainz JSON data files.")
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
65
 
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
66
 (defvar *mbdump-pack-url* "https://packy.compiler.company/data/mbdump.tar.zst"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
67
   "Remote locaton of MusicBrainz JSON dump pack.")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
68
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
69
 (defvar *mbdump-pack* (merge-pathnames "mbdump.tar.zst" *mbdb-worker-dir*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
70
 (defvar *mbsamp-pack* (merge-pathnames "mbsamp.tar.zst" *mbdb-worker-dir*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
71
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
72
 (defvar *mbdb-worker-dir* (merge-pathnames ".import/" *mbdb-path*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
73
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
74
 (defvar *mbdump-files* nil) ;; set by MBDB-UNPACK
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
75
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
76
 (defvar *mbsamp-files* nil) ;; set by MBDB-UNPACK
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
77
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
78
 ;;; Fetch Data
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
79
 (defun mbdump-fetch ()
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
80
   "Download mbdump data pack."
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
81
   (unless (probe-file *mbdump-pack*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
82
     (download
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
83
      ;; (parse-uri
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
84
      *mbdump-pack-url*
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
85
      ;; )
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
86
      *mbdump-pack*)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
87
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
88
 (defun mbsamp-fetch ()
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
89
   (unless (probe-file *mbsamp-pack*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
90
     (download *mbsamp-pack-url* *mbsamp-pack*)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
91
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
92
 (defun mbsamp-unpack ()
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
93
   ;; unpack into mbsamp
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
94
   (let ((out-dir (merge-pathnames "mbsamp/" *mbdb-worker-dir*)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
95
     (unless (probe-file out-dir)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
96
       (sb-ext:run-program "tar" `("-I" "zstd" "-xf" ,(namestring *mbsamp-pack*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
97
                           :directory *mbdb-worker-dir*
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
98
                           :search t
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
99
                           :wait t))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
100
     (setq *mbsamp-files* (directory "/tmp/mbdb/.import/mbsamp/*"))))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
101
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
102
 (defun mbdump-unpack ()
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
103
   ;; unpack into mbsamp
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
104
   (let ((out-dir (merge-pathnames "mbdump/" *mbdb-worker-dir*)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
105
     (unless (probe-file out-dir)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
106
       (sb-ext:run-program "tar" `("-I" "zstd" "-xf" ,(namestring *mbdump-pack*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
107
                           :directory *mbdb-worker-dir*
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
108
                           :search t
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
109
                           :wait t))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
110
     (setq *mbsamp-files* (directory "/tmp/mbdb/.import/mbdump/*"))))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
111
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
112
 #+nil (extract-mbsamp (car (mbsamp-fetch)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
113
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
114
 ;;; Parsing
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
115
 (define-constant +mbsamp-null+ "\\N" :test #'string=)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
116
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
117
 (defun nullable (str)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
118
   (unless (string= +mbsamp-null+ str)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
119
     (unless (= (length str) 0)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
120
       str)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
121
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
122
 (defun proc-key (type)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
123
   (case (sb-int:keywordicate type)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
124
     (:id 'make-uuid-from-string)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
125
     (:url 'parse-uri)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
126
     (:num 'parse-integer)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
127
     (:*  'nullable)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
128
     (t 'identity)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
129
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
130
 (defun nullable-int (str)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
131
   (parse-integer str :junk-allowed t))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
132
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
133
 (defun nullable-int* (str)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
134
   (or (ignore-errors
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
135
        (parse-integer str :junk-allowed t))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
136
       (nullable str)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
137
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
138
 (defun nullable-time (str)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
139
   (obj/time:parse-timestring str :date-time-separator #\Space :fail-on-error nil))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
140
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
141
 (defun nullable-uri (str)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
142
   (or
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
143
    (ignore-errors
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
144
     (parse-uri str :escape nil))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
145
    (nullable str)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
146
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
147
 (defun mbsamp-schema (name &rest list)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
148
   (cons name list))
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
149
 
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
150
 (defvar *mbsamp-schema-table*
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
151
   (let ((tbl (make-hash-table :test #'equal)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
152
     (mapc (lambda (x)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
153
             (setf (gethash (car x) tbl) (cdr x)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
154
           (list
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
155
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
156
             "alternative_release_type"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
157
             #'parse-integer nil #'nullable #'parse-integer nil #'make-uuid-from-string)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
158
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
159
             "artist"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
160
             #'parse-integer #'make-uuid-from-string nil nil
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
161
             #'nullable-int #'nullable #'nullable #'nullable #'nullable  #'nullable
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
162
             #'nullable-int #'nullable-int #'nullable nil #'parse-integer
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
163
             #'nullable-time #'nullable-int #'nullable-int #'nullable)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
164
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
165
             "track"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
166
             #'parse-integer #'make-uuid-from-string #'parse-integer #'parse-integer
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
167
             #'parse-integer #'nullable-int* nil #'parse-integer #'nullable-int
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
168
             #'parse-integer #'nullable-time #'parse-integer)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
169
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
170
             "recording"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
171
             #'parse-integer #'make-uuid-from-string nil #'parse-integer
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
172
             #'nullable-int #'nullable-int* #'parse-integer #'nullable-time #'parse-integer)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
173
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
174
             "release"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
175
             #'parse-integer #'make-uuid-from-string nil nil nil nil nil nil nil nil nil nil nil #'nullable-time)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
176
            ;; (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
177
            ;;  "url"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
178
            ;;  #'parse-integer #'make-uuid-from-string #'nullable-uri #'parse-integer #'nullable-time)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
179
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
180
             "url" ;; 2,3
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
181
             #'parse-integer #'make-uuid-from-string #'nullable-uri nil nil)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
182
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
183
             "url_gid_redirect"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
184
             #'make-uuid-from-string #'parse-integer #'nullable-time)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
185
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
186
             "tag"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
187
             #'parse-integer nil #'parse-integer)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
188
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
189
             "genre"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
190
             #'parse-integer #'make-uuid-from-string nil nil #'parse-integer #'nullable-time)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
191
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
192
             "work"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
193
             #'parse-integer #'make-uuid-from-string nil #'nullable-int nil #'parse-integer #'nullable-time)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
194
            (mbsamp-schema
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
195
             "instrument"
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
196
             #'parse-integer #'make-uuid-from-string nil #'nullable-int #'parse-integer #'nullable-time nil nil)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
197
            ))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
198
     tbl)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
199
   "A Hashtable containing the various MusicBrainz table schemas of interest.")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
200
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
201
 (defun get-schema (schema) (gethash schema *mbsamp-schema-table*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
202
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
203
 (defun extract-mbsamp (schema)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
204
   "Extract the contents of FILE which is assumed to contain Tab-separated
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
205
 values. Return a 2d array of row(values)."
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
206
   (let ((file (find schema *mbsamp-files* :test #'string= :key #'pathname-name))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
207
         (map-fns (gethash schema *mbsamp-schema-table*)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
208
     (when file
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
209
       (dat/csv:read-csv-file file :header nil :delimiter #\Tab :map-fns map-fns))))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
210
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
211
 (defun extract-mbdump-file (file)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
212
   "Extract the contents of a json-dump FILE. Return a json-object."
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
213
   (with-open-file (f file)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
214
     ;; (sb-impl::with-array-data
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
215
     (loop for x = (json-read f nil)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
216
           while x
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
217
           collect x)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
218
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
219
 (defmacro with-mbsamp-proc (table shape &body vals)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
220
   (with-gensyms (row i)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
221
     `(coerce
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
222
       (loop for ,row across ,table
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
223
             for ,i below (length ,table)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
224
             collect (make-array
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
225
                      ,shape
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
226
                      :initial-contents
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
227
                      (list
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
228
                       ,@(mapcar
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
229
                          (lambda (v) `(aref ,row ,v))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
230
                          vals))))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
231
       'vector)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
232
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
233
 (defmacro def-mbsamp-proc (name &rest vals)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
234
   (with-gensyms (table)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
235
     (let ((fn-name (symbolicate "PROC-MBSAMP-" name)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
236
       `(defun ,fn-name (,table)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
237
          ,(format nil "Process rows of ~A mbsamp data." name)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
238
          (with-mbsamp-proc ,table ,(length vals) ,@vals)))))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
239
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
240
 (defvar *mbsamp-cfs*
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
241
   (vector (make-rdb-cf "url")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
242
           (make-rdb-cf "genre")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
243
           (make-rdb-cf "tag")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
244
           (make-rdb-cf "track")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
245
           (make-rdb-cf "artist")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
246
           (make-rdb-cf "work")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
247
           (make-rdb-cf "recording")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
248
           (make-rdb-cf "release")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
249
           (make-rdb-cf "instrument")))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
250
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
251
 (def-mbsamp-proc url 0 1 2)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
252
 (def-mbsamp-proc genre 0 1 2)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
253
 (def-mbsamp-proc tag 0 1 2)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
254
 (def-mbsamp-proc track 0 1 6)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
255
 (def-mbsamp-proc artist 0 1 2)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
256
 (def-mbsamp-proc work 0 1 4 6)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
257
 (def-mbsamp-proc recording 0 1 2 7)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
258
 (def-mbsamp-proc release 0 1 2 13)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
259
 (def-mbsamp-proc instrument 0 1 2 5 7)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
260
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
261
 (defun extract-mbdump-columns (obj)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
262
   "Extract fields from a json-object, returning a vector of
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
263
   uninitialized column-families which can be created with #'create-cfs.
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
264
 
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
265
 Returns multiple values: the list of columns, the id, and type-id if present."
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
266
   (values
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
267
    (mapcar (lambda (x) (make-rdb-cf (car x))) (json-object-members obj))
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
268
    (make-uuid-from-string (json-getf obj "id"))
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
269
    (when-let ((tid (json-getf obj "type-id")))
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
270
      (make-uuid-from-string tid))))
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
271
 
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
272
 ;;; Tasks
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
273
 (defvar *mbdb-buffer-size* 4096)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
274
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
275
 (defclass mbdb-task (task) ())
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
276
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
277
 ;;; Main
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
278
 (defmain ()
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
279
   (let ((*default-pathname-defaults* *mbdb-path*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
280
         (*progress-bar-enabled* t)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
281
         (*csv-separator* #\Tab)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
282
         (*cpus* (num-cpus))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
283
         (*log-timestamp* nil)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
284
         (*log-level* :warn))
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
285
     (log:info! "Welcome to MBDB")
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
286
     (ensure-directories-exist *mbdb-worker-dir* :verbose t)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
287
     ;; prepare workers
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
288
     (setf *mbdb-oracle* (make-oracle sb-thread:*current-thread*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
289
           *mbdb-tasks* (make-task-pool :oracle *mbdb-oracle*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
290
     (push-worker (sb-thread:make-thread #'mbsamp-fetch) *mbdb-tasks*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
291
     ;; (with-tasks ())
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
292
     (let ((job (make-job)))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
293
       (push-task (make-instance 'mbdb-task :object #'mbsamp-fetch) job))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
294
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
295
     ;; (sb-thread:make-thread #'mbsamp-fetch)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
296
 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
297
     ;; prepare column family data
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
298
     
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
299
     ;; initialize database
39
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
300
     (with-db (db *mbdb*)
1ef551e24009 added musicbrainz db example
Richard Westhaver <ellis@rwest.io>
parents:
diff changeset
301
       (open-db db)
40
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
302
       (setf (rdb-cfs db) *mbsamp-cfs*)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
303
       ;; (create-cfs db)
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
304
       (log:info! "database initialized")
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
305
       ;; 
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
306
       (close-db db))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
307
     
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
308
     ;; launch tasks
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
309
     
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
310
     ;; wait
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
311
     (wait-for-threads (task-pool-workers *mbdb-tasks*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
312
     ;; summarize
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
313
     (info! "mbdb stats" (print-stats *mbdb*))
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
314
     ;; close
6b652d7d6663 examples
Richard Westhaver <ellis@rwest.io>
parents: 39
diff changeset
315
     ))