type == FilesItemType::FOLDER; } public function isFile(): bool { return $this->type == FilesItemType::FILE; } } trait FilesItemSizeTrait { public int $size; public function getSize(): ?int { return $this->isFile() ? $this->size : null; } } class CollectionItem implements FilesItemInterface { public function __construct( protected FilesCollection $collection ) {} public function getTitleHtml(): ?string { return null; } public function getId(): string { return $this->collection->value; } public function isFolder(): bool { return true; } public function isFile(): bool { return false; } public function isAvailable(): bool { return true; } public function getUrl(): string { global $config; switch ($this->collection) { case FilesCollection::Baconiana: return 'https://'.$config['files_domain'].'/Baconiana/'; default: return '/files/'.$this->collection->value.'/'; } } public function getSize(): ?int { return null; } public function getTitle(): string { return lang("files_{$this->collection->value}_collection"); } public function getMeta(?string $hl_matched = null): array { return []; } public function isTargetBlank(): bool { return $this->collection === FilesCollection::Baconiana; } public function getSubtitle(): ?string { return null; } } class WFFCollectionItem extends model implements FilesItemInterface { const DB_TABLE = 'wff_collection'; use FilesItemTypeTrait; use FilesItemSizeTrait; public int $id; public int $parentId; public string $title; public string $documentId; public string $path; public int $filesCount; public function getTitleHtml(): ?string { return null; } public function getId(): string { return (string)$this->id; } public function isAvailable(): bool { return true; } public function getTitle(): string { return $this->title; } public function getDocumentId(): string { return $this->isFolder() ? str_replace('_', ' ', basename($this->path)) : $this->documentId; } public function isTargetBlank(): bool { return $this->isFile(); } public function getSubtitle(): ?string { return null; } public function getUrl(): string { global $config; return $this->isFolder() ? "/files/wff/{$this->id}/" : "https://{$config['files_domain']}/NSA Friedman Documents/{$this->path}"; } public function getMeta(?string $hl_matched = null): array { if ($this->isFolder()) { if (!$this->parentId) return []; return [ 'items' => [ hl_matched($this->getDocumentId(), $hl_matched), lang_num('files_count', $this->filesCount) ] ]; } return [ 'inline' => false, 'items' => [ hl_matched('Document '.$this->documentId), sizeString($this->size), 'PDF' ] ]; } } class MDFCollectionItem extends model implements FilesItemInterface { const DB_TABLE = 'mdf_collection'; use FilesItemTypeTrait; use FilesItemSizeTrait; public int $id; public int $issue; public string $path; public string $date; public int $volume; public int $pageFrom; public int $pageTo; public int $pdfPages; public int $size; public function isAvailable(): bool { return true; } public function getTitleHtml(): ?string { return null; } public function getTitle(): string { return "№{$this->issue}, {$this->getHumanFriendlyDate()}"; } public function getHumanFriendlyDate(): string { $dt = new DateTime($this->date); return $dt->format('j M Y'); } public function isTargetBlank(): bool { return true; } public function getId(): string { return $this->id; } public function getUrl(): string { global $config; return 'https://'.$config['files_domain'].'/Mercure-de-France-OCR/'.$this->path; } public function getMeta(?string $hl_matched = null): array { return [ 'inline' => true, 'items' => [ 'Vol. '.$this->getRomanVolume(), 'pp. '.$this->pageFrom.'-'.$this->pageTo, sizeString($this->size), 'PDF' ] ]; } public function getRomanVolume(): string { return _arabic_to_roman($this->volume); } public function getSubtitle(): ?string { return null; //return 'Vol. '.$this->getRomanVolume().', pp. '.$this->pageFrom.'-'.$this->pageTo; } } class BookItem extends model implements FilesItemInterface { const DB_TABLE = 'books'; public int $id; public int $parentId; public string $author; public string $title; public int $year; public int $size; public FilesItemType $type; public BookFileType $fileType; public string $path; public bool $external; use FilesItemSizeTrait; use FilesItemTypeTrait; public function getId(): string { return $this->id; } public function getUrl(): string { if ($this->isFolder() && !$this->external) return '/files/'.$this->id.'/'; global $config; $buf = 'https://'.$config['files_domain']; if (!str_starts_with($this->path, '/')) $buf .= '/'; $buf .= $this->path; return $buf; } public function getTitleHtml(): ?string { if ($this->isFolder() || !$this->author) return null; $buf = ''.htmlescape($this->author).''; if (!str_ends_with($this->author, '.')) $buf .= '.'; $buf .= ' '.htmlescape($this->title).''; return $buf; } public function getTitle(): string { return $this->title; } public function getMeta(?string $hl_matched = null): array { if ($this->isFolder()) return []; $items = [ sizeString($this->size), strtoupper($this->getExtension()) ]; return [ 'inline' => false, 'items' => $items ]; } protected function getExtension(): string { return extension(basename($this->path)); } public function isAvailable(): bool { return true; } public function isTargetBlank(): bool { return $this->isFile() || $this->external; } public function getSubtitle(): ?string { if (!$this->year) return null; return '('.$this->year.')'; } } /** * @param int $folder_id * @param bool $with_parents * @return WFFCollectionItem|WFFCollectionItem[]|null */ function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null { $db = DB(); $q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id); if (!$db->numRows($q)) return null; $item = new WFFCollectionItem($db->fetch($q)); if (!$item->isFolder()) return null; if ($with_parents) { $items = [$item]; if ($item->parentId) { $parents = wff_get_folder($item->parentId, true); if ($parents !== null) $items = array_merge($items, $parents); } return $items; } return $item; } /** * @param int|int[]|null $parent_id * @return array */ function wff_get(int|array|null $parent_id = null) { $db = DB(); $where = []; $args = []; if (!is_null($parent_id)) { if (is_int($parent_id)) { $where[] = "parent_id=?"; $args[] = $parent_id; } else { $where[] = "parent_id IN (".implode(", ", $parent_id).")"; } } $sql = "SELECT * FROM wff_collection"; if (!empty($where)) $sql .= " WHERE ".implode(" AND ", $where); $sql .= " ORDER BY title"; $q = $db->query($sql, ...$args); return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q)); } /** * @param int[] $ids * @return WFFCollectionItem[] */ function wff_get_by_id(array $ids): array { $db = DB(); $q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")"); return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q)); } function wff_search(string $q, int $offset = 0, int $count = 0): array { $query_filtered = sphinx_mkquery($q); $cl = sphinx_client(); $cl->setLimits($offset, $count); $cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED); $cl->setFieldWeights([ 'title' => 50, 'document_id' => 60, ]); $cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25); $cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC'); // run search $final_query = "$query_filtered"; $result = $cl->query($final_query, WFF_ARCHIVE_SPHINX_RTINDEX); $error = $cl->getLastError(); $warning = $cl->getLastWarning(); if ($error) logError(__FUNCTION__, $error); if ($warning) logWarning(__FUNCTION__, $warning); if ($result === false) return ['count' => 0, 'items' => []]; $total_found = (int)$result['total_found']; $items = []; if (!empty($result['matches'])) $items = wff_get_by_id(array_keys($result['matches'])); return ['count' => $total_found, 'items' => $items]; } function wff_reindex(): void { sphinx_execute("TRUNCATE RTINDEX ".WFF_ARCHIVE_SPHINX_RTINDEX); $db = DB(); $q = $db->query("SELECT * FROM wff_collection"); while ($row = $db->fetch($q)) { $item = new WFFCollectionItem($row); if ($item->isFile()) { $txt = file_get_contents('/home/user/nsa/txt/'.str_replace('.pdf', '.txt', basename($item->path))); } else { $txt = ''; } sphinx_execute("INSERT INTO ".WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)", $item->id, $item->getDocumentId(), $item->title, $txt, (int)$item->isFolder(), $item->parentId); } } function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array { return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after); } function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array { return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after); } /** * @param string $table * @param string $field_id * @param int[] $ids * @param string[] $keywords Must already be lower-cased * @param int $before * @param int $after * @return array */ function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) { $results = []; foreach ($ids as $id) $results[$id] = null; $db = DB(); $dynamic_sql_parts = []; $combined_parts = []; foreach ($keywords as $keyword) { $part = "LOCATE('".$db->escape($keyword)."', text)"; $dynamic_sql_parts[] = $part; } if (count($dynamic_sql_parts) > 1) { foreach ($dynamic_sql_parts as $part) $combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)"; $combined_parts = implode(', ', $combined_parts); $combined_parts = 'LEAST('.$combined_parts.')'; } else { $combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)"; } $total = $before + $after; $sql = "SELECT {$field_id} AS id, GREATEST( 1, {$combined_parts} - {$before} ) AS excerpt_start_index, SUBSTRING( text, GREATEST( 1, {$combined_parts} - {$before} ), LEAST( CHAR_LENGTH(text), {$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before}) ) ) AS excerpt FROM {$table} WHERE {$field_id} IN (".implode(',', $ids).")"; $q = $db->query($sql); while ($row = $db->fetch($q)) { $results[$row['id']] = [ 'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']), 'index' => (int)$row['excerpt_start_index'] ]; } return $results; } /** * @return MDFCollectionItem[] */ function mdf_get(): array { $db = DB(); $q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`"); return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q)); } /** * @param int[] $ids * @return MDFCollectionItem[] */ function mdf_get_by_id(array $ids): array { $db = DB(); $q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")"); return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q)); } function mdf_search(string $q, int $offset = 0, int $count = 0): array { $query_filtered = sphinx_mkquery($q); $cl = sphinx_client(); $cl->setLimits($offset, $count); $cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED); $cl->setFieldWeights([ 'date' => 10, 'issue' => 9, 'text' => 8 ]); $cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25); $cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE); // run search $final_query = "$query_filtered"; $result = $cl->query($final_query, MDF_ARCHIVE_SPHINX_RTINDEX); $error = $cl->getLastError(); $warning = $cl->getLastWarning(); if ($error) logError(__FUNCTION__, $error); if ($warning) logWarning(__FUNCTION__, $warning); if ($result === false) return ['count' => 0, 'items' => []]; $total_found = (int)$result['total_found']; $items = []; if (!empty($result['matches'])) $items = mdf_get_by_id(array_keys($result['matches'])); return ['count' => $total_found, 'items' => $items]; } function mdf_reindex(): void { sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX); $db = DB(); $mdf = mdf_get(); foreach ($mdf as $item) { $text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id)); sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)", $item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text); } } /** * @return BookItem[] */ function books_get(int $parent_id = 0, BookCategory $category = BookCategory::BOOKS): array { $db = DB(); if ($category == BookCategory::BOOKS) { $order_by = "type, ".($parent_id != 0 ? 'year, ': '')."author, title"; } else $order_by = "type, title"; $q = $db->query("SELECT * FROM books WHERE category=? AND parent_id=? ORDER BY $order_by", $category->value, $parent_id); return array_map('BookItem::create_instance', $db->fetchAll($q)); } function books_get_folder(int $id): ?BookItem { $db = DB(); $q = $db->query("SELECT * FROM books WHERE id=?", $id); if (!$db->numRows($q)) return null; $item = new BookItem($db->fetch($q)); if (!$item->isFolder()) return null; return $item; } function _arabic_to_roman($number) { $map = [ 1000 => 'M', 900 => 'CM', 500 => 'D', 400 => 'CD', 100 => 'C', 90 => 'XC', 50 => 'L', 40 => 'XL', 10 => 'X', 9 => 'IX', 5 => 'V', 4 => 'IV', 1 => 'I', ]; $result = ''; foreach ($map as $arabic => $roman) { while ($number >= $arabic) { $result .= $roman; $number -= $arabic; } } return $result; }