type == FilesItemType::FOLDER; } public function isFile(): bool { return $this->type == FilesItemType::FILE; } } trait FilesItemSizeTrait { public int $size; public function getSize(): ?int { return $this->isFile() ? $this->size : null; } } /** * @param string $table * @param string $field_id * @param int[] $ids * @param string[] $keywords Must already be lower-cased * @param int $before * @param int $after * @return array */ function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) { $results = []; foreach ($ids as $id) $results[$id] = null; $db = DB(); $dynamic_sql_parts = []; $combined_parts = []; foreach ($keywords as $keyword) { $part = "LOCATE('".$db->escape($keyword)."', text)"; $dynamic_sql_parts[] = $part; } if (count($dynamic_sql_parts) > 1) { foreach ($dynamic_sql_parts as $part) $combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)"; $combined_parts = implode(', ', $combined_parts); $combined_parts = 'LEAST('.$combined_parts.')'; } else { $combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)"; } $total = $before + $after; $sql = "SELECT {$field_id} AS id, GREATEST( 1, {$combined_parts} - {$before} ) AS excerpt_start_index, SUBSTRING( text, GREATEST( 1, {$combined_parts} - {$before} ), LEAST( CHAR_LENGTH(text), {$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before}) ) ) AS excerpt FROM {$table} WHERE {$field_id} IN (".implode(',', $ids).")"; $q = $db->query($sql); while ($row = $db->fetch($q)) { $results[$row['id']] = [ 'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']), 'index' => (int)$row['excerpt_start_index'] ]; } return $results; } function _search(string $index, string $q, int $offset, int $count, callable $items_getter, ?callable $sphinx_client_setup = null): array { $query_filtered = sphinx_mkquery($q); $cl = sphinx_client(); $cl->setLimits($offset, $count); $cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED); if (is_callable($sphinx_client_setup)) $sphinx_client_setup($cl); else { $cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25); $cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE); } // run search $final_query = "$query_filtered"; $result = $cl->query($final_query, $index); $error = $cl->getLastError(); $warning = $cl->getLastWarning(); if ($error) logError(__FUNCTION__, $error); if ($warning) logWarning(__FUNCTION__, $warning); if ($result === false) return ['count' => 0, 'items' => []]; $total_found = (int)$result['total_found']; $items = []; if (!empty($result['matches'])) $items = $items_getter($result['matches']); return ['count' => $total_found, 'items' => $items]; } // --------------------------------------------------- // ------------------- Collections ------------------- // --------------------------------------------------- enum FilesCollection: string { case WilliamFriedman = 'wff'; case MercureDeFrance = 'mdf'; case Baconiana = 'baconiana'; } class CollectionItem implements FilesItemInterface { public function __construct( protected FilesCollection $collection ) {} public function getTitleHtml(): ?string { return null; } public function getId(): string { return $this->collection->value; } public function isFolder(): bool { return true; } public function isFile(): bool { return false; } public function isAvailable(): bool { return true; } public function getUrl(): string { return '/files/'.$this->collection->value.'/'; } public function getSize(): ?int { return null; } public function getTitle(): string { return lang("files_{$this->collection->value}_collection"); } public function getMeta(?string $hl_matched = null): array { return []; } public function isTargetBlank(): bool { return false; } public function getSubtitle(): ?string { return null; } } // ---------------------------------------------------------------- // ------------------- William Friedman Archive ------------------- // ---------------------------------------------------------------- class WFFCollectionItem extends model implements FilesItemInterface { const DB_TABLE = 'wff_collection'; use FilesItemTypeTrait; use FilesItemSizeTrait; public int $id; public int $parentId; public string $title; public string $documentId; public string $path; public int $filesCount; public function getTitleHtml(): ?string { return null; } public function getId(): string { return (string)$this->id; } public function isAvailable(): bool { return true; } public function getTitle(): string { return $this->title; } public function getDocumentId(): string { return $this->isFolder() ? str_replace('_', ' ', basename($this->path)) : $this->documentId; } public function isTargetBlank(): bool { return $this->isFile(); } public function getSubtitle(): ?string { return null; } public function getUrl(): string { global $config; return $this->isFolder() ? "/files/wff/{$this->id}/" : "https://{$config['files_domain']}/NSA Friedman Documents/{$this->path}"; } public function getMeta(?string $hl_matched = null): array { if ($this->isFolder()) { if (!$this->parentId) return []; return [ 'items' => [ hl_matched($this->getDocumentId(), $hl_matched), lang_num('files_count', $this->filesCount) ] ]; } return [ 'inline' => false, 'items' => [ hl_matched('Document '.$this->documentId), sizeString($this->size), 'PDF' ] ]; } } /** * @param int $folder_id * @param bool $with_parents * @return WFFCollectionItem|WFFCollectionItem[]|null */ function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null { $db = DB(); $q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id); if (!$db->numRows($q)) return null; $item = new WFFCollectionItem($db->fetch($q)); if (!$item->isFolder()) return null; if ($with_parents) { $items = [$item]; if ($item->parentId) { $parents = wff_get_folder($item->parentId, true); if ($parents !== null) $items = array_merge($items, $parents); } return $items; } return $item; } /** * @param int|int[]|null $parent_id * @return array */ function wff_get(int|array|null $parent_id = null) { $db = DB(); $where = []; $args = []; if (!is_null($parent_id)) { if (is_int($parent_id)) { $where[] = "parent_id=?"; $args[] = $parent_id; } else { $where[] = "parent_id IN (".implode(", ", $parent_id).")"; } } $sql = "SELECT * FROM wff_collection"; if (!empty($where)) $sql .= " WHERE ".implode(" AND ", $where); $sql .= " ORDER BY title"; $q = $db->query($sql, ...$args); return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q)); } /** * @param int[] $ids * @return WFFCollectionItem[] */ function wff_get_by_id(array $ids): array { $db = DB(); $q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")"); return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q)); } function wff_search(string $q, int $offset = 0, int $count = 0): array { return _search(WFF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count, items_getter: function($matches) { return wff_get_by_id(array_keys($matches)); }, sphinx_client_setup: function(SphinxClient $cl) { $cl->setFieldWeights([ 'title' => 50, 'document_id' => 60, ]); $cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25); $cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC'); } ); } function wff_reindex(): void { sphinx_execute("TRUNCATE RTINDEX ".WFF_ARCHIVE_SPHINX_RTINDEX); $db = DB(); $q = $db->query("SELECT * FROM wff_collection"); while ($row = $db->fetch($q)) { $item = new WFFCollectionItem($row); $text = ''; if ($item->isFile()) { $text_q = $db->query("SELECT text FROM wff_texts WHERE wff_id=?", $item->id); if ($db->numRows($text_q)) $text = $db->result($text_q); } sphinx_execute("INSERT INTO ".WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)", $item->id, $item->getDocumentId(), $item->title, $text, (int)$item->isFolder(), $item->parentId); } } function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array { return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after); } // --------------------------------------------------------- // ------------------- Mercure de France ------------------- // --------------------------------------------------------- class MDFCollectionItem extends model implements FilesItemInterface { const DB_TABLE = 'mdf_collection'; use FilesItemTypeTrait; use FilesItemSizeTrait; public int $id; public int $issue; public string $path; public string $date; public int $volume; public int $pageFrom; public int $pageTo; public int $pdfPages; public int $size; public function isAvailable(): bool { return true; } public function getTitleHtml(): ?string { return null; } public function getTitle(): string { return "№{$this->issue}, {$this->getHumanFriendlyDate()}"; } public function getHumanFriendlyDate(): string { $dt = new DateTime($this->date); return $dt->format('j M Y'); } public function isTargetBlank(): bool { return true; } public function getId(): string { return $this->id; } public function getUrl(): string { global $config; return 'https://'.$config['files_domain'].'/Mercure-de-France-OCR/'.$this->path; } public function getMeta(?string $hl_matched = null): array { return [ 'inline' => true, 'items' => [ 'Vol. '.$this->getRomanVolume(), 'pp. '.$this->pageFrom.'-'.$this->pageTo, sizeString($this->size), 'PDF' ] ]; } public function getRomanVolume(): string { return arabic_to_roman($this->volume); } public function getSubtitle(): ?string { return null; //return 'Vol. '.$this->getRomanVolume().', pp. '.$this->pageFrom.'-'.$this->pageTo; } } /** * @return MDFCollectionItem[] */ function mdf_get(): array { $db = DB(); $q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`"); return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q)); } /** * @param int[] $ids * @return MDFCollectionItem[] */ function mdf_get_by_id(array $ids): array { $db = DB(); $q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")"); return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q)); } function mdf_search(string $q, int $offset = 0, int $count = 0): array { return _search(MDF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count, items_getter: function($matches) { return mdf_get_by_id(array_keys($matches)); }, sphinx_client_setup: function(SphinxClient $cl) { $cl->setFieldWeights([ 'date' => 10, 'issue' => 9, 'text' => 8 ]); $cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25); $cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE); } ); } function mdf_reindex(): void { sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX); $db = DB(); $mdf = mdf_get(); foreach ($mdf as $item) { $text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id)); sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)", $item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text); } } function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array { return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after); } // ------------------------------------------------- // ------------------- Baconiana ------------------- // ------------------------------------------------- class BaconianaCollectionItem extends model implements FilesItemInterface { const DB_TABLE = 'baconiana_collection'; use FilesItemTypeTrait; use FilesItemSizeTrait; public int $id; public int $parentId; public int $year; public string $issues; public string $path; public bool $jobc; // Journal of the Bacon Society public string $title; // Only for folders public function isAvailable(): bool { return true; } public function getTitleHtml(): ?string { return null; } public function getTitle(): string { if ($this->title !== '') return $this->title; return ($this->jobc ? lang('baconiana_old_name') : lang('baconiana')).' №'.$this->issues; } public function isTargetBlank(): bool { return $this->isFile(); } public function getId(): string { return $this->id; } public function getUrl(): string { if ($this->isFolder()) { return '/files/'.FilesCollection::Baconiana->value.'/'.$this->id.'/'; } global $config; return 'https://'.$config['files_domain'].'/'.$this->path; } public function getMeta(?string $hl_matched = null): array { $items = []; if ($this->isFolder()) return $items; if ($this->year >= 2007) $items = array_merge($items, ['Online Edition']); $items = array_merge($items, [ sizeString($this->size), 'PDF' ]); return [ 'inline' => false, 'items' => $items ]; } public function getSubtitle(): ?string { return $this->year > 0 ? '('.$this->year.')' : null; } } /** * @return BaconianaCollectionItem[] */ function baconiana_get(?int $parent_id = 0): array { $db = DB(); $sql = "SELECT * FROM baconiana_collection"; if ($parent_id !== null) $sql .= " WHERE parent_id='".$db->escape($parent_id)."'"; $sql .= " ORDER BY type, year, id"; $q = $db->query($sql); return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q)); } /** * @param int[] $ids * @return BaconianaCollectionItem[] */ function baconiana_get_by_id(array $ids): array { $db = DB(); $q = $db->query("SELECT * FROM baconiana_collection WHERE id IN (".implode(',', $ids).")"); return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q)); } /** * @param int $folder_id * @param bool $with_parents * @return BaconianaCollectionItem|BaconianaCollectionItem[]|null */ function baconiana_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null { $db = DB(); $q = $db->query("SELECT * FROM baconiana_collection WHERE id=?", $folder_id); if (!$db->numRows($q)) return null; $item = new BaconianaCollectionItem($db->fetch($q)); if (!$item->isFolder()) return null; if ($with_parents) { $items = [$item]; if ($item->parentId) { $parents = baconiana_get_folder($item->parentId, true); if ($parents !== null) $items = array_merge($items, $parents); } return $items; } return $item; } function baconiana_search(string $q, int $offset = 0, int $count = 0): array { return _search(BACONIANA_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count, items_getter: function($matches) { return baconiana_get_by_id(array_keys($matches)); }, sphinx_client_setup: function(SphinxClient $cl) { $cl->setFieldWeights([ 'year' => 10, 'issues' => 9, 'text' => 8 ]); $cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25); $cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE); } ); } function baconiana_reindex(): void { sphinx_execute("TRUNCATE RTINDEX ".BACONIANA_ARCHIVE_SPHINX_RTINDEX); $db = DB(); $baconiana = baconiana_get(null); foreach ($baconiana as $item) { $text_q = $db->query("SELECT text FROM baconiana_texts WHERE bcn_id=?", $item->id); if (!$db->numRows($text_q)) continue; $text = $db->result($text_q); sphinx_execute("INSERT INTO ".BACONIANA_ARCHIVE_SPHINX_RTINDEX." (id, title, year, text) VALUES (?, ?, ?, ?)", $item->id, "$item->year ($item->issues)", $item->year, $text); } } function baconiana_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array { return _get_text_excerpts('baconiana_texts', 'bcn_id', $ids, $keywords, $before, $after); } // ---------------------------------------------------------- // ------------------- Books and Articles ------------------- // ---------------------------------------------------------- enum BookFileType: string { case NONE = 'none'; case BOOK = 'book'; case ARTICLE = 'article'; } enum BookCategory: string { case BOOKS = 'books'; case MISC = 'misc'; } class BookItem extends model implements FilesItemInterface { const DB_TABLE = 'books'; public int $id; public int $parentId; public string $author; public string $title; public string $subtitle; public int $year; public int $size; public FilesItemType $type; public BookFileType $fileType; public string $path; public bool $external; public BookCategory $category; use FilesItemSizeTrait; use FilesItemTypeTrait; public function getId(): string { return $this->id; } public function getUrl(): string { if ($this->isFolder() && !$this->external) return '/files/'.$this->id.'/'; global $config; $buf = 'https://'.$config['files_domain']; if (!str_starts_with($this->path, '/')) $buf .= '/'; $buf .= $this->path; return $buf; } public function getTitleHtml(): ?string { if ($this->isFolder() || !$this->author) return null; $buf = ''.htmlescape($this->author).''; if (!str_ends_with($this->author, '.')) $buf .= '.'; $buf .= ' '.htmlescape($this->title).''; return $buf; } public function getTitle(): string { return $this->title; } public function getMeta(?string $hl_matched = null): array { if ($this->isFolder()) return []; $items = [ sizeString($this->size), strtoupper($this->getExtension()) ]; return [ 'inline' => false, 'items' => $items ]; } protected function getExtension(): string { return extension(basename($this->path)); } public function isAvailable(): bool { return true; } public function isTargetBlank(): bool { return $this->isFile() || $this->external; } public function getSubtitle(): ?string { if (!$this->year && !$this->subtitle) return null; $buf = '('; $buf .= $this->subtitle ?: $this->year; $buf .= ')'; return $buf; } } /** * @return BookItem[] */ function books_get(int $parent_id = 0, BookCategory $category = BookCategory::BOOKS): array { $db = DB(); if ($category == BookCategory::BOOKS) { $order_by = "type, ".($parent_id != 0 ? 'year, ': '')."author, title"; } else $order_by = "type, title"; $q = $db->query("SELECT * FROM books WHERE category=? AND parent_id=? ORDER BY $order_by", $category->value, $parent_id); return array_map('BookItem::create_instance', $db->fetchAll($q)); } function books_get_folder(int $id, bool $with_parents = false): BookItem|array|null { $db = DB(); $q = $db->query("SELECT * FROM books WHERE id=?", $id); if (!$db->numRows($q)) return null; $item = new BookItem($db->fetch($q)); if (!$item->isFolder()) return null; if ($with_parents) { $items = [$item]; if ($item->parentId) { $parents = books_get_folder($item->parentId, true); if ($parents !== null) $items = array_merge($items, $parents); } return $items; } return $item; }