baconiana archive
This commit is contained in:
parent
caeafc3f54
commit
53d8861dbb
@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
const ROUTER_VERSION = 6;
|
||||
const ROUTER_VERSION = 7;
|
||||
const ROUTER_MC_KEY = '4in1/routes';
|
||||
|
||||
$RouterInput = [];
|
||||
|
@ -410,3 +410,31 @@ function format_time($ts, array $opts = array()) {
|
||||
|
||||
return $date;
|
||||
}
|
||||
|
||||
function arabic_to_roman($number) {
|
||||
$map = [
|
||||
1000 => 'M',
|
||||
900 => 'CM',
|
||||
500 => 'D',
|
||||
400 => 'CD',
|
||||
100 => 'C',
|
||||
90 => 'XC',
|
||||
50 => 'L',
|
||||
40 => 'XL',
|
||||
10 => 'X',
|
||||
9 => 'IX',
|
||||
5 => 'V',
|
||||
4 => 'IV',
|
||||
1 => 'I',
|
||||
];
|
||||
$result = '';
|
||||
|
||||
foreach ($map as $arabic => $roman) {
|
||||
while ($number >= $arabic) {
|
||||
$result .= $roman;
|
||||
$number -= $arabic;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
@ -34,7 +34,6 @@ class FilesHandler extends request_handler {
|
||||
function GET_collection() {
|
||||
list($collection, $folder_id, $query, $offset) = input('collection, i:folder_id, q, i:offset');
|
||||
$collection = FilesCollection::from($collection);
|
||||
$files = [];
|
||||
$parents = null;
|
||||
|
||||
$query = trim($query);
|
||||
@ -48,125 +47,86 @@ class FilesHandler extends request_handler {
|
||||
|
||||
$vars = [];
|
||||
$text_excerpts = null;
|
||||
$func_prefix = $collection->value;
|
||||
|
||||
switch ($collection) {
|
||||
case FilesCollection::WilliamFriedman:
|
||||
if ($query !== null) {
|
||||
$files = wff_search($query, $offset, self::SEARCH_RESULTS_PER_PAGE);
|
||||
$vars += [
|
||||
'search_count' => $files['count'],
|
||||
'search_query' => $query
|
||||
];
|
||||
if ($query !== null) {
|
||||
$files = call_user_func("{$func_prefix}_search", $query, $offset, self::SEARCH_RESULTS_PER_PAGE);
|
||||
$vars += [
|
||||
'search_count' => $files['count'],
|
||||
'search_query' => $query
|
||||
];
|
||||
|
||||
/** @var WFFCollectionItem[] $files */
|
||||
$files = $files['items'];
|
||||
/** @var WFFCollectionItem[]|MDFCollectionItem[]|BaconianaCollectionItem[] $files */
|
||||
$files = $files['items'];
|
||||
|
||||
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
|
||||
$found = [];
|
||||
$result_ids = [];
|
||||
foreach ($files as $file) {
|
||||
if ($file->isFolder())
|
||||
continue;
|
||||
$result_ids[] = $file->id;
|
||||
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
|
||||
$found = [];
|
||||
$result_ids = [];
|
||||
foreach ($files as $file) {
|
||||
if ($file->isFolder())
|
||||
continue;
|
||||
$result_ids[] = $file->id;
|
||||
|
||||
foreach ([
|
||||
mb_strtolower($file->getTitle()),
|
||||
strtolower($file->documentId)
|
||||
] as $haystack) {
|
||||
foreach ($query_words as $qw) {
|
||||
if (mb_strpos($haystack, $qw) !== false) {
|
||||
$found[$file->id] = true;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$found = array_map('intval', array_keys($found));
|
||||
$not_found = array_diff($result_ids, $found);
|
||||
if (!empty($not_found))
|
||||
$text_excerpts = wff_get_text_excerpts($not_found, $query_words);
|
||||
|
||||
if (is_xhr_request()) {
|
||||
ajax_ok([
|
||||
...$vars,
|
||||
'new_offset' => $offset + count($files),
|
||||
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
|
||||
]);
|
||||
}
|
||||
} else {
|
||||
if ($folder_id) {
|
||||
$parents = wff_get_folder($folder_id, true);
|
||||
if (!$parents)
|
||||
not_found();
|
||||
if (count($parents) > 1)
|
||||
$parents = array_reverse($parents);
|
||||
}
|
||||
$files = wff_get($folder_id);
|
||||
}
|
||||
|
||||
$title = lang('files_wff_collection');
|
||||
if ($folder_id)
|
||||
$title .= ' - '.htmlescape($parents[count($parents)-1]->getTitle());
|
||||
if ($query)
|
||||
$title .= ' - '.htmlescape($query);
|
||||
set_title($title);
|
||||
|
||||
break;
|
||||
|
||||
case FilesCollection::MercureDeFrance:
|
||||
if ($query !== null) {
|
||||
$files = mdf_search($query, $offset, self::SEARCH_RESULTS_PER_PAGE);
|
||||
$vars += [
|
||||
'search_count' => $files['count'],
|
||||
'search_query' => $query
|
||||
];
|
||||
|
||||
/** @var MDFCollectionItem[] $files */
|
||||
$files = $files['items'];
|
||||
|
||||
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
|
||||
$found = [];
|
||||
$result_ids = [];
|
||||
foreach ($files as $file) {
|
||||
$result_ids[] = $file->id;
|
||||
|
||||
foreach ([
|
||||
switch ($collection) {
|
||||
case FilesCollection::MercureDeFrance:
|
||||
$candidates = [
|
||||
$file->date,
|
||||
(string)$file->issue
|
||||
] as $haystack) {
|
||||
foreach ($query_words as $qw) {
|
||||
if (mb_strpos($haystack, $qw) !== false) {
|
||||
$found[$file->id] = true;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$found = array_map('intval', array_keys($found));
|
||||
$not_found = array_diff($result_ids, $found);
|
||||
if (!empty($not_found))
|
||||
$text_excerpts = mdf_get_text_excerpts($not_found, $query_words);
|
||||
|
||||
if (is_xhr_request()) {
|
||||
ajax_ok([
|
||||
...$vars,
|
||||
'new_offset' => $offset + count($files),
|
||||
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
|
||||
]);
|
||||
}
|
||||
} else {
|
||||
$files = mdf_get();
|
||||
];
|
||||
break;
|
||||
case FilesCollection::WilliamFriedman:
|
||||
$candidates = [
|
||||
mb_strtolower($file->getTitle()),
|
||||
strtolower($file->documentId)
|
||||
];
|
||||
break;
|
||||
case FilesCollection::Baconiana:
|
||||
$candidates = [
|
||||
// TODO
|
||||
];
|
||||
break;
|
||||
}
|
||||
|
||||
$title = lang('files_mdf_collection');
|
||||
if ($query)
|
||||
$title .= ' - '.htmlescape($query);
|
||||
set_title($title);
|
||||
break;
|
||||
foreach ($candidates as $haystack) {
|
||||
foreach ($query_words as $qw) {
|
||||
if (mb_strpos($haystack, $qw) !== false) {
|
||||
$found[$file->id] = true;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$found = array_map('intval', array_keys($found));
|
||||
$not_found = array_diff($result_ids, $found);
|
||||
if (!empty($not_found))
|
||||
$text_excerpts = call_user_func("{$func_prefix}_get_text_excerpts", $not_found, $query_words);
|
||||
|
||||
if (is_xhr_request()) {
|
||||
ajax_ok([
|
||||
...$vars,
|
||||
'new_offset' => $offset + count($files),
|
||||
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
|
||||
]);
|
||||
}
|
||||
} else {
|
||||
if (in_array($collection, [FilesCollection::WilliamFriedman, FilesCollection::Baconiana]) && $folder_id) {
|
||||
$parents = call_user_func("{$func_prefix}_get_folder", $folder_id, true);
|
||||
if (!$parents)
|
||||
not_found();
|
||||
if (count($parents) > 1)
|
||||
$parents = array_reverse($parents);
|
||||
}
|
||||
$files = call_user_func("{$func_prefix}_get", $folder_id);
|
||||
}
|
||||
|
||||
$title = lang('files_'.$collection->value.'_collection');
|
||||
if ($folder_id && $parents)
|
||||
$title .= ' - '.htmlescape($parents[count($parents)-1]->getTitle());
|
||||
if ($query)
|
||||
$title .= ' - '.htmlescape($query);
|
||||
set_title($title);
|
||||
|
||||
render('files/collection',
|
||||
...$vars,
|
||||
collection: $collection,
|
||||
|
763
lib/files.php
763
lib/files.php
@ -1,37 +1,23 @@
|
||||
<?php
|
||||
|
||||
use Sphinx\SphinxClient;
|
||||
|
||||
require_once 'engine/sphinx.php';
|
||||
|
||||
|
||||
// ----------------------------------------------
|
||||
// ------------------- Common -------------------
|
||||
// ----------------------------------------------
|
||||
|
||||
const WFF_ARCHIVE_SPHINX_RTINDEX = 'wff_collection';
|
||||
const MDF_ARCHIVE_SPHINX_RTINDEX = 'mdf_archive';
|
||||
//const BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
|
||||
|
||||
enum FilesCollection: string {
|
||||
case WilliamFriedman = 'wff';
|
||||
case MercureDeFrance = 'mdf';
|
||||
case Baconiana = 'baconiana';
|
||||
|
||||
public function isSearchSupported(): bool {
|
||||
return $this == FilesCollection::WilliamFriedman || $this == FilesCollection::MercureDeFrance;
|
||||
}
|
||||
}
|
||||
const BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
|
||||
|
||||
enum FilesItemType: string {
|
||||
case FILE = 'file';
|
||||
case FOLDER = 'folder';
|
||||
}
|
||||
|
||||
enum BookFileType: string {
|
||||
case NONE = 'none';
|
||||
case BOOK = 'book';
|
||||
case ARTICLE = 'article';
|
||||
}
|
||||
|
||||
enum BookCategory: string {
|
||||
case BOOKS = 'books';
|
||||
case MISC = 'misc';
|
||||
}
|
||||
|
||||
interface FilesItemInterface {
|
||||
public function getId(): string;
|
||||
public function isFolder(): bool;
|
||||
@ -57,8 +43,123 @@ trait FilesItemSizeTrait {
|
||||
public function getSize(): ?int { return $this->isFile() ? $this->size : null; }
|
||||
}
|
||||
|
||||
class CollectionItem implements FilesItemInterface {
|
||||
/**
|
||||
* @param string $table
|
||||
* @param string $field_id
|
||||
* @param int[] $ids
|
||||
* @param string[] $keywords Must already be lower-cased
|
||||
* @param int $before
|
||||
* @param int $after
|
||||
* @return array
|
||||
*/
|
||||
function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
|
||||
$results = [];
|
||||
foreach ($ids as $id)
|
||||
$results[$id] = null;
|
||||
|
||||
$db = DB();
|
||||
|
||||
$dynamic_sql_parts = [];
|
||||
$combined_parts = [];
|
||||
foreach ($keywords as $keyword) {
|
||||
$part = "LOCATE('".$db->escape($keyword)."', text)";
|
||||
$dynamic_sql_parts[] = $part;
|
||||
}
|
||||
if (count($dynamic_sql_parts) > 1) {
|
||||
foreach ($dynamic_sql_parts as $part)
|
||||
$combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)";
|
||||
$combined_parts = implode(', ', $combined_parts);
|
||||
$combined_parts = 'LEAST('.$combined_parts.')';
|
||||
} else {
|
||||
$combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)";
|
||||
}
|
||||
|
||||
$total = $before + $after;
|
||||
$sql = "SELECT
|
||||
{$field_id} AS id,
|
||||
GREATEST(
|
||||
1,
|
||||
{$combined_parts} - {$before}
|
||||
) AS excerpt_start_index,
|
||||
SUBSTRING(
|
||||
text,
|
||||
GREATEST(
|
||||
1,
|
||||
{$combined_parts} - {$before}
|
||||
),
|
||||
LEAST(
|
||||
CHAR_LENGTH(text),
|
||||
{$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before})
|
||||
)
|
||||
) AS excerpt
|
||||
FROM
|
||||
{$table}
|
||||
WHERE
|
||||
{$field_id} IN (".implode(',', $ids).")";
|
||||
|
||||
$q = $db->query($sql);
|
||||
while ($row = $db->fetch($q)) {
|
||||
$results[$row['id']] = [
|
||||
'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']),
|
||||
'index' => (int)$row['excerpt_start_index']
|
||||
];
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
function _search(string $index,
|
||||
string $q,
|
||||
int $offset,
|
||||
int $count,
|
||||
callable $items_getter,
|
||||
?callable $sphinx_client_setup = null): array {
|
||||
$query_filtered = sphinx_mkquery($q);
|
||||
|
||||
$cl = sphinx_client();
|
||||
$cl->setLimits($offset, $count);
|
||||
|
||||
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
|
||||
|
||||
if (is_callable($sphinx_client_setup))
|
||||
$sphinx_client_setup($cl);
|
||||
else {
|
||||
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
||||
}
|
||||
|
||||
// run search
|
||||
$final_query = "$query_filtered";
|
||||
$result = $cl->query($final_query, $index);
|
||||
$error = $cl->getLastError();
|
||||
$warning = $cl->getLastWarning();
|
||||
if ($error)
|
||||
logError(__FUNCTION__, $error);
|
||||
if ($warning)
|
||||
logWarning(__FUNCTION__, $warning);
|
||||
if ($result === false)
|
||||
return ['count' => 0, 'items' => []];
|
||||
|
||||
$total_found = (int)$result['total_found'];
|
||||
|
||||
$items = [];
|
||||
if (!empty($result['matches']))
|
||||
$items = $items_getter($result['matches']);
|
||||
|
||||
return ['count' => $total_found, 'items' => $items];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------
|
||||
// ------------------- Collections -------------------
|
||||
// ---------------------------------------------------
|
||||
|
||||
enum FilesCollection: string {
|
||||
case WilliamFriedman = 'wff';
|
||||
case MercureDeFrance = 'mdf';
|
||||
case Baconiana = 'baconiana';
|
||||
}
|
||||
|
||||
class CollectionItem implements FilesItemInterface {
|
||||
public function __construct(
|
||||
protected FilesCollection $collection
|
||||
) {}
|
||||
@ -69,21 +170,20 @@ class CollectionItem implements FilesItemInterface {
|
||||
public function isFile(): bool { return false; }
|
||||
public function isAvailable(): bool { return true; }
|
||||
public function getUrl(): string {
|
||||
global $config;
|
||||
switch ($this->collection) {
|
||||
case FilesCollection::Baconiana:
|
||||
return 'https://'.$config['files_domain'].'/Baconiana/';
|
||||
default:
|
||||
return '/files/'.$this->collection->value.'/';
|
||||
}
|
||||
return '/files/'.$this->collection->value.'/';
|
||||
}
|
||||
public function getSize(): ?int { return null; }
|
||||
public function getTitle(): string { return lang("files_{$this->collection->value}_collection"); }
|
||||
public function getMeta(?string $hl_matched = null): array { return []; }
|
||||
public function isTargetBlank(): bool { return $this->collection === FilesCollection::Baconiana; }
|
||||
public function isTargetBlank(): bool { return false; }
|
||||
public function getSubtitle(): ?string { return null; }
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// ------------------- William Friedman Archive -------------------
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
class WFFCollectionItem extends model implements FilesItemInterface {
|
||||
|
||||
const DB_TABLE = 'wff_collection';
|
||||
@ -136,6 +236,109 @@ class WFFCollectionItem extends model implements FilesItemInterface {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $folder_id
|
||||
* @param bool $with_parents
|
||||
* @return WFFCollectionItem|WFFCollectionItem[]|null
|
||||
*/
|
||||
function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id);
|
||||
if (!$db->numRows($q))
|
||||
return null;
|
||||
$item = new WFFCollectionItem($db->fetch($q));
|
||||
if (!$item->isFolder())
|
||||
return null;
|
||||
if ($with_parents) {
|
||||
$items = [$item];
|
||||
if ($item->parentId) {
|
||||
$parents = wff_get_folder($item->parentId, true);
|
||||
if ($parents !== null)
|
||||
$items = array_merge($items, $parents);
|
||||
}
|
||||
return $items;
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int|int[]|null $parent_id
|
||||
* @return array
|
||||
*/
|
||||
function wff_get(int|array|null $parent_id = null) {
|
||||
$db = DB();
|
||||
|
||||
$where = [];
|
||||
$args = [];
|
||||
|
||||
if (!is_null($parent_id)) {
|
||||
if (is_int($parent_id)) {
|
||||
$where[] = "parent_id=?";
|
||||
$args[] = $parent_id;
|
||||
} else {
|
||||
$where[] = "parent_id IN (".implode(", ", $parent_id).")";
|
||||
}
|
||||
}
|
||||
$sql = "SELECT * FROM wff_collection";
|
||||
if (!empty($where))
|
||||
$sql .= " WHERE ".implode(" AND ", $where);
|
||||
$sql .= " ORDER BY title";
|
||||
$q = $db->query($sql, ...$args);
|
||||
|
||||
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int[] $ids
|
||||
* @return WFFCollectionItem[]
|
||||
*/
|
||||
function wff_get_by_id(array $ids): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")");
|
||||
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
function wff_search(string $q, int $offset = 0, int $count = 0): array {
|
||||
return _search(WFF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
|
||||
items_getter: function($matches) {
|
||||
return wff_get_by_id(array_keys($matches));
|
||||
},
|
||||
sphinx_client_setup: function(SphinxClient $cl) {
|
||||
$cl->setFieldWeights([
|
||||
'title' => 50,
|
||||
'document_id' => 60,
|
||||
]);
|
||||
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC');
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function wff_reindex(): void {
|
||||
sphinx_execute("TRUNCATE RTINDEX ".WFF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM wff_collection");
|
||||
while ($row = $db->fetch($q)) {
|
||||
$item = new WFFCollectionItem($row);
|
||||
if ($item->isFile()) {
|
||||
$txt = file_get_contents('/home/user/nsa/txt/'.str_replace('.pdf', '.txt', basename($item->path)));
|
||||
} else {
|
||||
$txt = '';
|
||||
}
|
||||
sphinx_execute("INSERT INTO ".WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
$item->id, $item->getDocumentId(), $item->title, $txt, (int)$item->isFolder(), $item->parentId);
|
||||
}
|
||||
}
|
||||
|
||||
function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||
return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// ------------------- Mercure de France -------------------
|
||||
// ---------------------------------------------------------
|
||||
|
||||
class MDFCollectionItem extends model implements FilesItemInterface {
|
||||
|
||||
const DB_TABLE = 'mdf_collection';
|
||||
@ -186,7 +389,7 @@ class MDFCollectionItem extends model implements FilesItemInterface {
|
||||
}
|
||||
|
||||
public function getRomanVolume(): string {
|
||||
return _arabic_to_roman($this->volume);
|
||||
return arabic_to_roman($this->volume);
|
||||
}
|
||||
|
||||
public function getSubtitle(): ?string {
|
||||
@ -195,6 +398,219 @@ class MDFCollectionItem extends model implements FilesItemInterface {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return MDFCollectionItem[]
|
||||
*/
|
||||
function mdf_get(): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`");
|
||||
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int[] $ids
|
||||
* @return MDFCollectionItem[]
|
||||
*/
|
||||
function mdf_get_by_id(array $ids): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
|
||||
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
function mdf_search(string $q, int $offset = 0, int $count = 0): array {
|
||||
return _search(MDF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
|
||||
items_getter: function($matches) {
|
||||
return mdf_get_by_id(array_keys($matches));
|
||||
},
|
||||
sphinx_client_setup: function(SphinxClient $cl) {
|
||||
$cl->setFieldWeights([
|
||||
'date' => 10,
|
||||
'issue' => 9,
|
||||
'text' => 8
|
||||
]);
|
||||
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function mdf_reindex(): void {
|
||||
sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$db = DB();
|
||||
$mdf = mdf_get();
|
||||
foreach ($mdf as $item) {
|
||||
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
|
||||
sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
|
||||
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
|
||||
}
|
||||
}
|
||||
|
||||
function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||
return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------
|
||||
// ------------------- Baconiana -------------------
|
||||
// -------------------------------------------------
|
||||
|
||||
class BaconianaCollectionItem extends model implements FilesItemInterface {
|
||||
|
||||
const DB_TABLE = 'baconiana_collection';
|
||||
|
||||
use FilesItemTypeTrait;
|
||||
use FilesItemSizeTrait;
|
||||
|
||||
public int $id;
|
||||
public int $parentId;
|
||||
public int $year;
|
||||
public string $issues;
|
||||
public string $path;
|
||||
public bool $jobc; // Journal of the Bacon Society
|
||||
public string $title; // Only for folders
|
||||
|
||||
public function isAvailable(): bool { return true; }
|
||||
|
||||
public function getTitleHtml(): ?string { return null; }
|
||||
|
||||
public function getTitle(): string {
|
||||
if ($this->title !== '')
|
||||
return $this->title;
|
||||
|
||||
return ($this->jobc ? lang('baconiana_old_name') : lang('baconiana')).' №'.$this->issues;
|
||||
}
|
||||
|
||||
public function isTargetBlank(): bool { return $this->isFile(); }
|
||||
public function getId(): string { return $this->id; }
|
||||
|
||||
public function getUrl(): string {
|
||||
if ($this->isFolder()) {
|
||||
return '/files/'.FilesCollection::Baconiana->value.'/'.$this->id.'/';
|
||||
}
|
||||
global $config;
|
||||
return 'https://'.$config['files_domain'].'/'.$this->path;
|
||||
}
|
||||
|
||||
public function getMeta(?string $hl_matched = null): array {
|
||||
$items = [];
|
||||
if ($this->isFolder())
|
||||
return $items;
|
||||
|
||||
if ($this->year >= 2007)
|
||||
$items = array_merge($items, ['Online Edition']);
|
||||
|
||||
$items = array_merge($items, [
|
||||
sizeString($this->size),
|
||||
'PDF'
|
||||
]);
|
||||
|
||||
return [
|
||||
'inline' => false,
|
||||
'items' => $items
|
||||
];
|
||||
}
|
||||
|
||||
public function getSubtitle(): ?string {
|
||||
return $this->year > 0 ? '('.$this->year.')' : null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return BaconianaCollectionItem[]
|
||||
*/
|
||||
function baconiana_get(?int $parent_id = 0): array {
|
||||
$db = DB();
|
||||
$sql = "SELECT * FROM baconiana_collection";
|
||||
if ($parent_id !== null)
|
||||
$sql .= " WHERE parent_id='".$db->escape($parent_id)."'";
|
||||
$sql .= " ORDER BY type, year, id";
|
||||
$q = $db->query($sql);
|
||||
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int[] $ids
|
||||
* @return BaconianaCollectionItem[]
|
||||
*/
|
||||
function baconiana_get_by_id(array $ids): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM baconiana_collection WHERE id IN (".implode(',', $ids).")");
|
||||
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $folder_id
|
||||
* @param bool $with_parents
|
||||
* @return BaconianaCollectionItem|BaconianaCollectionItem[]|null
|
||||
*/
|
||||
function baconiana_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM baconiana_collection WHERE id=?", $folder_id);
|
||||
if (!$db->numRows($q))
|
||||
return null;
|
||||
$item = new BaconianaCollectionItem($db->fetch($q));
|
||||
if (!$item->isFolder())
|
||||
return null;
|
||||
if ($with_parents) {
|
||||
$items = [$item];
|
||||
if ($item->parentId) {
|
||||
$parents = baconiana_get_folder($item->parentId, true);
|
||||
if ($parents !== null)
|
||||
$items = array_merge($items, $parents);
|
||||
}
|
||||
return $items;
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
function baconiana_search(string $q, int $offset = 0, int $count = 0): array {
|
||||
return _search(BACONIANA_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
|
||||
items_getter: function($matches) {
|
||||
return baconiana_get_by_id(array_keys($matches));
|
||||
},
|
||||
sphinx_client_setup: function(SphinxClient $cl) {
|
||||
$cl->setFieldWeights([
|
||||
'year' => 10,
|
||||
'issues' => 9,
|
||||
'text' => 8
|
||||
]);
|
||||
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function baconiana_reindex(): void {
|
||||
sphinx_execute("TRUNCATE RTINDEX ".BACONIANA_ARCHIVE_SPHINX_RTINDEX);
|
||||
$db = DB();
|
||||
$baconiana = baconiana_get(null);
|
||||
foreach ($baconiana as $item) {
|
||||
$text = $db->result($db->query("SELECT text FROM baconiana_texts WHERE bcn_id=?", $item->id));
|
||||
sphinx_execute("INSERT INTO ".BACONIANA_ARCHIVE_SPHINX_RTINDEX." (id, title, year, text) VALUES (?, ?, ?, ?)",
|
||||
$item->id, "$item->year ($item->issues)", $item->year, $text);
|
||||
}
|
||||
}
|
||||
|
||||
function baconiana_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||
return _get_text_excerpts('baconiana_texts', 'bcn_id', $ids, $keywords, $before, $after);
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------
|
||||
// ------------------- Books and Articles -------------------
|
||||
// ----------------------------------------------------------
|
||||
|
||||
enum BookFileType: string {
|
||||
case NONE = 'none';
|
||||
case BOOK = 'book';
|
||||
case ARTICLE = 'article';
|
||||
}
|
||||
|
||||
enum BookCategory: string {
|
||||
case BOOKS = 'books';
|
||||
case MISC = 'misc';
|
||||
}
|
||||
|
||||
class BookItem extends model implements FilesItemInterface {
|
||||
|
||||
const DB_TABLE = 'books';
|
||||
@ -276,263 +692,10 @@ class BookItem extends model implements FilesItemInterface {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $folder_id
|
||||
* @param bool $with_parents
|
||||
* @return WFFCollectionItem|WFFCollectionItem[]|null
|
||||
*/
|
||||
function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id);
|
||||
if (!$db->numRows($q))
|
||||
return null;
|
||||
$item = new WFFCollectionItem($db->fetch($q));
|
||||
if (!$item->isFolder())
|
||||
return null;
|
||||
if ($with_parents) {
|
||||
$items = [$item];
|
||||
if ($item->parentId) {
|
||||
$parents = wff_get_folder($item->parentId, true);
|
||||
if ($parents !== null)
|
||||
$items = array_merge($items, $parents);
|
||||
}
|
||||
return $items;
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int|int[]|null $parent_id
|
||||
* @return array
|
||||
*/
|
||||
function wff_get(int|array|null $parent_id = null) {
|
||||
$db = DB();
|
||||
|
||||
$where = [];
|
||||
$args = [];
|
||||
|
||||
if (!is_null($parent_id)) {
|
||||
if (is_int($parent_id)) {
|
||||
$where[] = "parent_id=?";
|
||||
$args[] = $parent_id;
|
||||
} else {
|
||||
$where[] = "parent_id IN (".implode(", ", $parent_id).")";
|
||||
}
|
||||
}
|
||||
$sql = "SELECT * FROM wff_collection";
|
||||
if (!empty($where))
|
||||
$sql .= " WHERE ".implode(" AND ", $where);
|
||||
$sql .= " ORDER BY title";
|
||||
$q = $db->query($sql, ...$args);
|
||||
|
||||
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int[] $ids
|
||||
* @return WFFCollectionItem[]
|
||||
*/
|
||||
function wff_get_by_id(array $ids): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")");
|
||||
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
function wff_search(string $q, int $offset = 0, int $count = 0): array {
|
||||
$query_filtered = sphinx_mkquery($q);
|
||||
|
||||
$cl = sphinx_client();
|
||||
$cl->setLimits($offset, $count);
|
||||
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
|
||||
$cl->setFieldWeights([
|
||||
'title' => 50,
|
||||
'document_id' => 60,
|
||||
]);
|
||||
|
||||
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC');
|
||||
|
||||
// run search
|
||||
$final_query = "$query_filtered";
|
||||
$result = $cl->query($final_query, WFF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$error = $cl->getLastError();
|
||||
$warning = $cl->getLastWarning();
|
||||
if ($error)
|
||||
logError(__FUNCTION__, $error);
|
||||
if ($warning)
|
||||
logWarning(__FUNCTION__, $warning);
|
||||
if ($result === false)
|
||||
return ['count' => 0, 'items' => []];
|
||||
|
||||
$total_found = (int)$result['total_found'];
|
||||
|
||||
$items = [];
|
||||
if (!empty($result['matches']))
|
||||
$items = wff_get_by_id(array_keys($result['matches']));
|
||||
|
||||
return ['count' => $total_found, 'items' => $items];
|
||||
}
|
||||
|
||||
function wff_reindex(): void {
|
||||
sphinx_execute("TRUNCATE RTINDEX ".WFF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM wff_collection");
|
||||
while ($row = $db->fetch($q)) {
|
||||
$item = new WFFCollectionItem($row);
|
||||
if ($item->isFile()) {
|
||||
$txt = file_get_contents('/home/user/nsa/txt/'.str_replace('.pdf', '.txt', basename($item->path)));
|
||||
} else {
|
||||
$txt = '';
|
||||
}
|
||||
sphinx_execute("INSERT INTO ".WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
$item->id, $item->getDocumentId(), $item->title, $txt, (int)$item->isFolder(), $item->parentId);
|
||||
}
|
||||
}
|
||||
|
||||
function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||
return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
|
||||
}
|
||||
|
||||
function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||
return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $table
|
||||
* @param string $field_id
|
||||
* @param int[] $ids
|
||||
* @param string[] $keywords Must already be lower-cased
|
||||
* @param int $before
|
||||
* @param int $after
|
||||
* @return array
|
||||
*/
|
||||
function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
|
||||
$results = [];
|
||||
foreach ($ids as $id)
|
||||
$results[$id] = null;
|
||||
|
||||
$db = DB();
|
||||
|
||||
$dynamic_sql_parts = [];
|
||||
$combined_parts = [];
|
||||
foreach ($keywords as $keyword) {
|
||||
$part = "LOCATE('".$db->escape($keyword)."', text)";
|
||||
$dynamic_sql_parts[] = $part;
|
||||
}
|
||||
if (count($dynamic_sql_parts) > 1) {
|
||||
foreach ($dynamic_sql_parts as $part)
|
||||
$combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)";
|
||||
$combined_parts = implode(', ', $combined_parts);
|
||||
$combined_parts = 'LEAST('.$combined_parts.')';
|
||||
} else {
|
||||
$combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)";
|
||||
}
|
||||
|
||||
$total = $before + $after;
|
||||
$sql = "SELECT
|
||||
{$field_id} AS id,
|
||||
GREATEST(
|
||||
1,
|
||||
{$combined_parts} - {$before}
|
||||
) AS excerpt_start_index,
|
||||
SUBSTRING(
|
||||
text,
|
||||
GREATEST(
|
||||
1,
|
||||
{$combined_parts} - {$before}
|
||||
),
|
||||
LEAST(
|
||||
CHAR_LENGTH(text),
|
||||
{$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before})
|
||||
)
|
||||
) AS excerpt
|
||||
FROM
|
||||
{$table}
|
||||
WHERE
|
||||
{$field_id} IN (".implode(',', $ids).")";
|
||||
|
||||
$q = $db->query($sql);
|
||||
while ($row = $db->fetch($q)) {
|
||||
$results[$row['id']] = [
|
||||
'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']),
|
||||
'index' => (int)$row['excerpt_start_index']
|
||||
];
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return MDFCollectionItem[]
|
||||
*/
|
||||
function mdf_get(): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`");
|
||||
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int[] $ids
|
||||
* @return MDFCollectionItem[]
|
||||
*/
|
||||
function mdf_get_by_id(array $ids): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
|
||||
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
function mdf_search(string $q, int $offset = 0, int $count = 0): array {
|
||||
$query_filtered = sphinx_mkquery($q);
|
||||
|
||||
$cl = sphinx_client();
|
||||
$cl->setLimits($offset, $count);
|
||||
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
|
||||
$cl->setFieldWeights([
|
||||
'date' => 10,
|
||||
'issue' => 9,
|
||||
'text' => 8
|
||||
]);
|
||||
|
||||
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
||||
|
||||
// run search
|
||||
$final_query = "$query_filtered";
|
||||
$result = $cl->query($final_query, MDF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$error = $cl->getLastError();
|
||||
$warning = $cl->getLastWarning();
|
||||
if ($error)
|
||||
logError(__FUNCTION__, $error);
|
||||
if ($warning)
|
||||
logWarning(__FUNCTION__, $warning);
|
||||
if ($result === false)
|
||||
return ['count' => 0, 'items' => []];
|
||||
|
||||
$total_found = (int)$result['total_found'];
|
||||
|
||||
$items = [];
|
||||
if (!empty($result['matches']))
|
||||
$items = mdf_get_by_id(array_keys($result['matches']));
|
||||
|
||||
return ['count' => $total_found, 'items' => $items];
|
||||
}
|
||||
|
||||
function mdf_reindex(): void {
|
||||
sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$db = DB();
|
||||
$mdf = mdf_get();
|
||||
foreach ($mdf as $item) {
|
||||
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
|
||||
sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
|
||||
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return BookItem[]
|
||||
*/
|
||||
function books_get(int $parent_id = 0,
|
||||
BookCategory $category = BookCategory::BOOKS): array {
|
||||
function books_get(int $parent_id = 0, BookCategory $category = BookCategory::BOOKS): array {
|
||||
$db = DB();
|
||||
|
||||
if ($category == BookCategory::BOOKS) {
|
||||
@ -556,31 +719,3 @@ function books_get_folder(int $id): ?BookItem {
|
||||
return null;
|
||||
return $item;
|
||||
}
|
||||
|
||||
function _arabic_to_roman($number) {
|
||||
$map = [
|
||||
1000 => 'M',
|
||||
900 => 'CM',
|
||||
500 => 'D',
|
||||
400 => 'CD',
|
||||
100 => 'C',
|
||||
90 => 'XC',
|
||||
50 => 'L',
|
||||
40 => 'XL',
|
||||
10 => 'X',
|
||||
9 => 'IX',
|
||||
5 => 'V',
|
||||
4 => 'IV',
|
||||
1 => 'I',
|
||||
];
|
||||
$result = '';
|
||||
|
||||
foreach ($map as $arabic => $roman) {
|
||||
while ($number >= $arabic) {
|
||||
$result .= $roman;
|
||||
$number -= $arabic;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
12
routes.php
12
routes.php
@ -2,9 +2,9 @@
|
||||
|
||||
return (function() {
|
||||
require_once 'lib/files.php';
|
||||
$files_collections = array_map(fn(FilesCollection $fn) => $fn->value, FilesCollection::cases());
|
||||
|
||||
$wff = FilesCollection::WilliamFriedman->value;
|
||||
$files_collections = array_map(fn(FilesCollection $fn) => $fn->value, FilesCollection::cases());
|
||||
$coll_with_folder_support = [FilesCollection::WilliamFriedman->value, FilesCollection::Baconiana->value];
|
||||
|
||||
$routes = [
|
||||
'Main' => [
|
||||
@ -16,10 +16,10 @@ return (function() {
|
||||
'articles/([a-z0-9-]+)/' => 'post name=$(1)',
|
||||
],
|
||||
'Files' => [
|
||||
'files/' => 'files',
|
||||
'files/(\d+)/' => 'folder folder_id=$(1)',
|
||||
'files/{'.implode(',', $files_collections).'}/' => 'collection collection=${1}',
|
||||
'files/'.$wff.'/(\d+)/' => 'collection collection='.$wff.' folder_id=$(1)',
|
||||
'files/' => 'files',
|
||||
'files/(\d+)/' => 'folder folder_id=$(1)',
|
||||
'files/{'.implode(',', $files_collections).'}/' => 'collection collection=${1}',
|
||||
'files/{'.implode(',', $coll_with_folder_support).'}/(\d+)/' => 'collection collection=${1} folder_id=$(1)',
|
||||
],
|
||||
'Services' => [
|
||||
'robots.txt' => 'robots_txt',
|
||||
|
@ -94,7 +94,7 @@ function collection(SkinContext $ctx,
|
||||
$bc[] = ['text' => $ctx->lang('files_'.$collection->value.'_collection')];
|
||||
}
|
||||
|
||||
$do_show_search = empty($parents) && $collection->isSearchSupported();
|
||||
$do_show_search = empty($parents);
|
||||
$do_show_more = $search_count > 0 && count($files) < $search_count;
|
||||
|
||||
$html = <<<HTML
|
||||
@ -155,7 +155,7 @@ function collection_search(SkinContext $ctx, $count, $query, ?string $placeholde
|
||||
<div class="files-search-wrap">
|
||||
<div class="files-search" id="files_search">
|
||||
<div class="files-search-icon">{$icons->search_20()}</div>
|
||||
<input type="text" value="{$query}" placeholder="{$ctx->if_then_else($placeholder !== null, $placeholder, $ctx->lang('files_search_ph'))}" id="files_search_input">
|
||||
<input type="text" value="{$query}" placeholder="{$ctx->if_then_else($placeholder !== null, $placeholder, 'Enter your request..')}" id="files_search_input">
|
||||
<div class="files-search-clear-icon" id="files_search_clear_icon" style="display: {$clear_dsp}">{$icons->clear_16()}</div>
|
||||
</div>
|
||||
|
||||
|
@ -122,6 +122,10 @@ files_baconiana_collection_short: Baconiana
|
||||
|
||||
files_wff_search_ph: Document number, title or text
|
||||
files_mdf_search_ph: Issue number, date or text
|
||||
files_baconiana_search_ph: Issue number, year or text
|
||||
baconiana: Baconiana
|
||||
baconiana_old_name: Journal of The Bacon Society
|
||||
baconiana_online_ed: Online Edition
|
||||
|
||||
files_search_results_count:
|
||||
- "%s result"
|
||||
|
Loading…
x
Reference in New Issue
Block a user