Evgeny Zinoviev 3847423443 wip
2023-02-18 20:07:00 +03:00

153 lines
5.6 KiB
Python

import sqlite3
import logging
import os.path
import retronews
import threading
from typing import Optional
class Database:
SCHEMA = 6
def __init__(self):
self.logger = logging.getLogger(self.__class__.__name__)
file = os.path.join(os.path.dirname(__file__), '..', 'mdf-retrobase.sqlite3')
self.sqlite = sqlite3.connect(file, check_same_thread=False)
self.lock = threading.Lock()
sqlite_version = self._get_sqlite_version()
self.logger.debug(f'SQLite version: {sqlite_version}')
schema_version = self.schema_get_version()
self.logger.debug(f'Schema version: {schema_version}')
self.schema_init(schema_version)
self.schema_set_version(self.SCHEMA)
def __del__(self):
if self.sqlite:
self.sqlite.commit()
self.sqlite.close()
def _get_sqlite_version(self) -> str:
cursor = self.sqlite.cursor()
cursor.execute("SELECT sqlite_version()")
return cursor.fetchone()[0]
def schema_get_version(self) -> int:
cursor = self.sqlite.execute('PRAGMA user_version')
return int(cursor.fetchone()[0])
def schema_set_version(self, v) -> None:
self.sqlite.execute('PRAGMA user_version={:d}'.format(v))
self.logger.info(f'Schema set to {v}')
def cursor(self) -> sqlite3.Cursor:
return self.sqlite.cursor()
def commit(self) -> None:
return self.sqlite.commit()
def schema_init(self, version: int) -> None:
cursor = self.cursor()
if version < 1:
# timestamps
cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_links (
issue_date TEXT PRIMARY KEY,
url TEXT NOT NULL,
pages INTEGER NOT NULL
)""")
if version < 2:
cursor.execute("""CREATE TABLE IF NOT EXISTS mdf_pages (
collection_id INTEGER NOT NULL,
doc_id INTEGER NOT NULL,
page INTEGER NOT NULL,
height INTEGER NOT NULL,
width INTEGER NOT NULL,
dpi INTEGER NOT NULL
)""")
cursor.execute("""CREATE UNIQUE INDEX mdf_pages_idx ON mdf_pages (collection_id, doc_id, page)""")
if version < 3:
cursor.execute("ALTER TABLE mdf_pages ADD fail INTEGER NOT NULL")
if version < 4:
cursor.execute("""CREATE INDEX mdf_pages_fail_idx ON mdf_pages (fail)""")
if version < 5:
for col in ('collection_id', 'doc_id'):
cursor.execute(f"ALTER TABLE mdf_links ADD {col} INTEGER NOT NULL DEFAULT '0'")
cursor.execute("CREATE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
if version < 6:
cursor.execute("DROP INDEX mdf_links_col_doc_idx")
cursor.execute("CREATE UNIQUE INDEX mdf_links_col_doc_idx ON mdf_links (collection_id, doc_id)")
self.commit()
def add_link(self, issue_date: str, url: str, pages: int):
with self.lock:
self.cursor().execute("REPLACE INTO mdf_links (issue_date, url, pages) VALUES (?, ?, ?)",
(issue_date, url, str(pages)))
self.commit()
def add_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
with self.lock:
self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, ?, ?, ?, 0)",
(collection_id, doc_id, page, width, height, dpi))
self.commit()
def update_page(self, collection_id: int, doc_id: int, page: int, width: int, height: int, dpi: int):
with self.lock:
self.cursor().execute("UPDATE mdf_pages SET width=?, height=?, dpi=?, fail=0 WHERE collection_id=? AND doc_id=? AND page=?",
(width, height, dpi, collection_id, doc_id, page))
self.commit()
def add_page_failed(self, collection_id, doc_id, page):
with self.lock:
self.cursor().execute("INSERT INTO mdf_pages (collection_id, doc_id, page, width, height, dpi, fail) VALUES (?, ?, ?, 0, 0, 0, 1)",
(collection_id, doc_id, page))
self.commit()
def get_existing_pages(self, fail=0):
cur = self.cursor()
cur.execute("SELECT collection_id, doc_id, page FROM mdf_pages WHERE fail=?", (fail,))
return cur.fetchall()
def get_documents(self, range: Optional[tuple[str, str]] = None):
cur = self.cursor()
docs = []
sql = "SELECT issue_date, url, pages FROM mdf_links"
if range:
sql += f" WHERE issue_date BETWEEN '{range[0]}' AND '{range[1]}'"
cur.execute(sql)
for issue_date, url, pages in cur.fetchall():
pub_date, collection_id, doc_id = retronews.parse_url(url)
docs.append(dict(
collection_id=collection_id,
doc_id=doc_id,
pages=pages
))
return docs
def get_doc_pages(self, collection_id, doc_id):
cur = self.cursor()
cur.execute("SELECT page, width, height, dpi FROM mdf_pages WHERE collection_id=? AND doc_id=?",
(collection_id, doc_id))
return cur.fetchall()
def fix_documents(self):
cur = self.cursor()
cur.execute("SELECT issue_date, url FROM mdf_links")
for issue_date, url in cur.fetchall():
pub_date, cid, did = retronews.parse_url(url)
cur.execute("UPDATE mdf_links SET collection_id=?, doc_id=? WHERE issue_date=?",
(cid, did, issue_date))
self.commit()