Setup IMDB Database
- Download the 👉 IMDB Dataset.
- Execute
node create-db-copy-files.js
to create the copy files. - Copy the files to the server.
scp *.sql ip-address:/home/imdb
- On the (dokku) server, execute the following commands:
dokku postgres:create imdb
dokku postgres:expose imdb
DB_URL=$(dokku postgres:info imdb --dsn | sed 's/@.*:/@localhost:/')
NEW_DB=$(echo $DB_URL | sed 's/5432\/.*/5432\/imdb/')
echo $DB_URL > imdb/DB_URL
echo $NEW_DB > imdb/NEW_DB
# copy the content of the imdb folder to the running docker container (not persistent)
docker cp imdb/ $(dokku postgres:info imdb --id):/home/imdb
dokku postgres:enter imdb
cd /home/imdb
psql $(cat DB_URL) -f ./_1-create-db.sql
psql $(cat NEW_DB) -f ./_2-setup-db.sql
psql $(cat NEW_DB) -f ./_3-restore-db.sql
Troubleshooting​
Problem: COPY
does not work when a single line is erroneous.
ERROR: insert or update on table "humans_movies" violates foreign key constraint "humans_movies_movie_id_fkey"
DETAIL: Key (movie_id)=(tt16607280) is not present in table "movies"
To remove the erroneous line, you can use the following command:
sed -i '/tt16607280/d' humans_movies.sql
and reimport this file:
psql $(cat NEW_DB) -c "COPY humans_movies FROM '/home/imdb/humans_movies.sql';"
Unknown Entries 16.08.2023
Index​
-- for text search gin + trigram indexes (reduces search time from ~3s to ~50ms)
CREATE INDEX ON movies USING gin (primary_title gin_trgm_ops);
CREATE INDEX ON movies USING gin (original_title gin_trgm_ops);
CREATE INDEX ON humans USING gin (name gin_trgm_ops);
-- for eq-comparison a normal btree index is faster
CREATE INDEX ON movies(primary_title);
CREATE INDEX ON movies(original_title);
CREATE INDEX ON humans(name);
CREATE INDEX ON movies(type);
CREATE INDEX ON movies(primary_title);
CREATE INDEX ON movies(original_title);
CREATE INDEX ON movies(is_adult);
CREATE INDEX ON movies(start_year);
CREATE INDEX ON movies(end_year);
CREATE INDEX ON movies(runtime_minutes);
CREATE INDEX ON humans(name);
CREATE INDEX ON humans(birth_year);
CREATE INDEX ON humans(death_year);
CREATE INDEX ON humans_movies(human_id);
CREATE INDEX ON humans_movies(movie_id);
CREATE INDEX ON ratings(averageRating);
CREATE INDEX ON ratings(numVotes);
CREATE INDEX ON ratings(movie_id);