153 lines
5.6 KiB
Python
153 lines
5.6 KiB
Python
import sqlite3
|
|
import logging
|
|
import os.path
|
|
import retronews
|
|
import threading
|
|
|
|
from typing import Optional
|
|
|
|
|
|
class Database:
|
|
SCHEMA = 6
|
|
|
|
def __init__(self):
|
|
self.logger = logging.getLogger(self.__class__.__name__)
|
|
|
|
file = os.path.join(os.path.dirname(__file__), 'mdf-retrobase.sqlite3')
|
|
self.sqlite = sqlite3.connect(file, check_same_thread=False)
|
|
self.lock = threading.Lock()
|
|
|
|
sqlite_version = self._get_sqlite_version()
|
|
self.logger.debug(f'SQLite version: {sqlite_version}')
|
|
|
|
schema_version = self.schema_get_version()
|
|
self.logger.debug(f'Schema version: {schema_version}')
|
|
|
|
self.schema_init(schema_version)
|
|
self.schema_set_version(self.SCHEMA)
|
|
|
|
def __del__(self):
|
|
if self.sqlite:
|
|
self.sqlite.commit()
|
|
self.sqlite.close()
|
|
|
|
def _get_sqlite_version(self) -> str:
|
|
cursor = self.sqlite.cursor()
|
|
cursor.execute("SELECT sqlite_version()")
|
|
return cursor.fetchone()[0]
|
|
|
|
def schema_get_version(self) -> int:
|
|
cursor = self.sqlite.execute('PRAGMA user_version')
|
|
return int(cursor.fetchone()[0])
|
|
|
|
def schema_set_version(self, v) -> None:
|
|
self.sqlite.execute('PRAGMA user_version={:d}'.format(v))
|
|
self.logger.info(f'Schema set to {v}')
|
|
|
|
def cursor(self) -> sqlite3.Cursor:
|
|
return self.sqlite.cursor()
|
|
|
|
def commit(self) -> None:
|
|
return self.sqlite.commit()
|
|
|
|
def schema_init(self, version: int) -> None:
|
|
cursor = self.cursor()
|
|
|
|
if version < 1:
|
|
# timestamps
|
|
cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_links (
|
|
issue_date TEXT PRIMARY KEY,
|
|
url TEXT NOT NULL,
|
|
pages INTEGER NOT NULL
|
|
)""")
|
|
|
|
if version < 2:
|
|
cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_pages (
|
|
collection_id INTEGER NOT NULL,
|
|
doc_id INTEGER NOT NULL,
|
|
page INTEGER NOT NULL,
|
|
height INTEGER NOT NULL,
|
|
width INTEGER NOT NULL,
|
|
dpi INTEGER NOT NULL
|
|
)""")
|
|
cursor.execute("""CREATE UNIQUE INDEX mdf_pages_idx ON mdf_pages (collection_id, doc_id, page)""")
|
|
|
|
if version < 3:
|
|
cursor.execute("ALTER TABLE mdf_pages ADD fail INTEGER NOT NULL")
|
|
|
|
if version < 4:
|
|
cursor.execute("""CREATE INDEX mdf_pages_fail_idx ON mdf_pages (fail)""")
|
|
|
|
if version < 5:
|
|
for col in ('collection_id', 'doc_id'):
|
|
cursor.execute(f"ALTER TABLE mdf_links ADD {col} INTEGER NOT NULL DEFAULT '0'")
|
|
cursor.execute("CREATE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
|
|
|
|
if version < 6:
|
|
cursor.execute("DROP INDEX mdf_links_col_doc_idx")
|
|
cursor.execute("CREATE UNIQUE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
|
|
|
|
self.commit()
|
|
|
|
def add_link(self, issue_date: str, url: str, pages: int):
|
|
with self.lock:
|
|
self.cursor().execute("REPLACE INTO mdf_links (issue_date, url, pages) VALUES (?, ?, ?)",
|
|
(issue_date, url, str(pages)))
|
|
self.commit()
|
|
|
|
def add_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
|
|
with self.lock:
|
|
self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, ?, ?, ?, 0)",
|
|
(collection_id, doc_id, page, width, height, dpi))
|
|
self.commit()
|
|
|
|
def update_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
|
|
with self.lock:
|
|
self.cursor().execute("UPDATE mdf_pages SET width=?, height=?, dpi=?, fail=0 WHERE collection_id=? AND doc_id=? AND page=?",
|
|
(width, height, dpi, collection_id, doc_id, page))
|
|
self.commit()
|
|
|
|
def add_page_failed(self, collection_id, doc_id, page):
|
|
with self.lock:
|
|
self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, 0, 0, 0, 1)",
|
|
(collection_id, doc_id, page))
|
|
self.commit()
|
|
|
|
def get_existing_pages(self, fail=0):
|
|
cur = self.cursor()
|
|
cur.execute("SELECT collection_id, doc_id, page FROM mdf_pages WHERE fail=?", (fail,))
|
|
return cur.fetchall()
|
|
|
|
def get_documents(self, range: Optional[tuple[str, str]] = None):
|
|
cur = self.cursor()
|
|
docs = []
|
|
|
|
sql = "SELECT issue_date, url, pages FROM mdf_links"
|
|
if range:
|
|
sql += f" WHERE issue_date BETWEEN '{range[0]}' AND '{range[1]}'"
|
|
cur.execute(sql)
|
|
for issue_date, url, pages in cur.fetchall():
|
|
pub_date, collection_id, doc_id = retronews.parse_url(url)
|
|
docs.append(dict(
|
|
collection_id=collection_id,
|
|
doc_id=doc_id,
|
|
pages=pages
|
|
))
|
|
|
|
return docs
|
|
|
|
def get_doc_pages(self, collection_id, doc_id):
|
|
cur = self.cursor()
|
|
cur.execute("SELECT page, width, height, dpi FROM mdf_pages WHERE collection_id=? AND doc_id=?",
|
|
(collection_id, doc_id))
|
|
return cur.fetchall()
|
|
|
|
def fix_documents(self):
|
|
cur = self.cursor()
|
|
cur.execute("SELECT issue_date, url FROM mdf_links")
|
|
for issue_date, url in cur.fetchall():
|
|
pub_date, cid, did = retronews.parse_url(url)
|
|
cur.execute("UPDATE mdf_links SET collection_id=?, doc_id=? WHERE issue_date=?",
|
|
(cid, did, issue_date))
|
|
self.commit()
|