baconiana archive

This commit is contained in:
E. S. 2024-03-14 18:45:10 +00:00
parent caeafc3f54
commit 53d8861dbb
7 changed files with 561 additions and 434 deletions

View File

@ -1,6 +1,6 @@
<?php
const ROUTER_VERSION = 6;
const ROUTER_VERSION = 7;
const ROUTER_MC_KEY = '4in1/routes';
$RouterInput = [];

View File

@ -410,3 +410,31 @@ function format_time($ts, array $opts = array()) {
return $date;
}
function arabic_to_roman($number) {
$map = [
1000 => 'M',
900 => 'CM',
500 => 'D',
400 => 'CD',
100 => 'C',
90 => 'XC',
50 => 'L',
40 => 'XL',
10 => 'X',
9 => 'IX',
5 => 'V',
4 => 'IV',
1 => 'I',
];
$result = '';
foreach ($map as $arabic => $roman) {
while ($number >= $arabic) {
$result .= $roman;
$number -= $arabic;
}
}
return $result;
}

View File

@ -34,7 +34,6 @@ class FilesHandler extends request_handler {
function GET_collection() {
list($collection, $folder_id, $query, $offset) = input('collection, i:folder_id, q, i:offset');
$collection = FilesCollection::from($collection);
$files = [];
$parents = null;
$query = trim($query);
@ -48,125 +47,86 @@ class FilesHandler extends request_handler {
$vars = [];
$text_excerpts = null;
$func_prefix = $collection->value;
switch ($collection) {
case FilesCollection::WilliamFriedman:
if ($query !== null) {
$files = wff_search($query, $offset, self::SEARCH_RESULTS_PER_PAGE);
$vars += [
'search_count' => $files['count'],
'search_query' => $query
];
if ($query !== null) {
$files = call_user_func("{$func_prefix}_search", $query, $offset, self::SEARCH_RESULTS_PER_PAGE);
$vars += [
'search_count' => $files['count'],
'search_query' => $query
];
/** @var WFFCollectionItem[] $files */
$files = $files['items'];
/** @var WFFCollectionItem[]|MDFCollectionItem[]|BaconianaCollectionItem[] $files */
$files = $files['items'];
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
$found = [];
$result_ids = [];
foreach ($files as $file) {
if ($file->isFolder())
continue;
$result_ids[] = $file->id;
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
$found = [];
$result_ids = [];
foreach ($files as $file) {
if ($file->isFolder())
continue;
$result_ids[] = $file->id;
foreach ([
mb_strtolower($file->getTitle()),
strtolower($file->documentId)
] as $haystack) {
foreach ($query_words as $qw) {
if (mb_strpos($haystack, $qw) !== false) {
$found[$file->id] = true;
continue 2;
}
}
}
}
$found = array_map('intval', array_keys($found));
$not_found = array_diff($result_ids, $found);
if (!empty($not_found))
$text_excerpts = wff_get_text_excerpts($not_found, $query_words);
if (is_xhr_request()) {
ajax_ok([
...$vars,
'new_offset' => $offset + count($files),
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
]);
}
} else {
if ($folder_id) {
$parents = wff_get_folder($folder_id, true);
if (!$parents)
not_found();
if (count($parents) > 1)
$parents = array_reverse($parents);
}
$files = wff_get($folder_id);
}
$title = lang('files_wff_collection');
if ($folder_id)
$title .= ' - '.htmlescape($parents[count($parents)-1]->getTitle());
if ($query)
$title .= ' - '.htmlescape($query);
set_title($title);
break;
case FilesCollection::MercureDeFrance:
if ($query !== null) {
$files = mdf_search($query, $offset, self::SEARCH_RESULTS_PER_PAGE);
$vars += [
'search_count' => $files['count'],
'search_query' => $query
];
/** @var MDFCollectionItem[] $files */
$files = $files['items'];
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
$found = [];
$result_ids = [];
foreach ($files as $file) {
$result_ids[] = $file->id;
foreach ([
switch ($collection) {
case FilesCollection::MercureDeFrance:
$candidates = [
$file->date,
(string)$file->issue
] as $haystack) {
foreach ($query_words as $qw) {
if (mb_strpos($haystack, $qw) !== false) {
$found[$file->id] = true;
continue 2;
}
}
}
}
$found = array_map('intval', array_keys($found));
$not_found = array_diff($result_ids, $found);
if (!empty($not_found))
$text_excerpts = mdf_get_text_excerpts($not_found, $query_words);
if (is_xhr_request()) {
ajax_ok([
...$vars,
'new_offset' => $offset + count($files),
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
]);
}
} else {
$files = mdf_get();
];
break;
case FilesCollection::WilliamFriedman:
$candidates = [
mb_strtolower($file->getTitle()),
strtolower($file->documentId)
];
break;
case FilesCollection::Baconiana:
$candidates = [
// TODO
];
break;
}
$title = lang('files_mdf_collection');
if ($query)
$title .= ' - '.htmlescape($query);
set_title($title);
break;
foreach ($candidates as $haystack) {
foreach ($query_words as $qw) {
if (mb_strpos($haystack, $qw) !== false) {
$found[$file->id] = true;
continue 2;
}
}
}
}
$found = array_map('intval', array_keys($found));
$not_found = array_diff($result_ids, $found);
if (!empty($not_found))
$text_excerpts = call_user_func("{$func_prefix}_get_text_excerpts", $not_found, $query_words);
if (is_xhr_request()) {
ajax_ok([
...$vars,
'new_offset' => $offset + count($files),
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
]);
}
} else {
if (in_array($collection, [FilesCollection::WilliamFriedman, FilesCollection::Baconiana]) && $folder_id) {
$parents = call_user_func("{$func_prefix}_get_folder", $folder_id, true);
if (!$parents)
not_found();
if (count($parents) > 1)
$parents = array_reverse($parents);
}
$files = call_user_func("{$func_prefix}_get", $folder_id);
}
$title = lang('files_'.$collection->value.'_collection');
if ($folder_id && $parents)
$title .= ' - '.htmlescape($parents[count($parents)-1]->getTitle());
if ($query)
$title .= ' - '.htmlescape($query);
set_title($title);
render('files/collection',
...$vars,
collection: $collection,

View File

@ -1,37 +1,23 @@
<?php
use Sphinx\SphinxClient;
require_once 'engine/sphinx.php';
// ----------------------------------------------
// ------------------- Common -------------------
// ----------------------------------------------
const WFF_ARCHIVE_SPHINX_RTINDEX = 'wff_collection';
const MDF_ARCHIVE_SPHINX_RTINDEX = 'mdf_archive';
//const BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
enum FilesCollection: string {
case WilliamFriedman = 'wff';
case MercureDeFrance = 'mdf';
case Baconiana = 'baconiana';
public function isSearchSupported(): bool {
return $this == FilesCollection::WilliamFriedman || $this == FilesCollection::MercureDeFrance;
}
}
const BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
enum FilesItemType: string {
case FILE = 'file';
case FOLDER = 'folder';
}
enum BookFileType: string {
case NONE = 'none';
case BOOK = 'book';
case ARTICLE = 'article';
}
enum BookCategory: string {
case BOOKS = 'books';
case MISC = 'misc';
}
interface FilesItemInterface {
public function getId(): string;
public function isFolder(): bool;
@ -57,8 +43,123 @@ trait FilesItemSizeTrait {
public function getSize(): ?int { return $this->isFile() ? $this->size : null; }
}
class CollectionItem implements FilesItemInterface {
/**
* @param string $table
* @param string $field_id
* @param int[] $ids
* @param string[] $keywords Must already be lower-cased
* @param int $before
* @param int $after
* @return array
*/
function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
$results = [];
foreach ($ids as $id)
$results[$id] = null;
$db = DB();
$dynamic_sql_parts = [];
$combined_parts = [];
foreach ($keywords as $keyword) {
$part = "LOCATE('".$db->escape($keyword)."', text)";
$dynamic_sql_parts[] = $part;
}
if (count($dynamic_sql_parts) > 1) {
foreach ($dynamic_sql_parts as $part)
$combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)";
$combined_parts = implode(', ', $combined_parts);
$combined_parts = 'LEAST('.$combined_parts.')';
} else {
$combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)";
}
$total = $before + $after;
$sql = "SELECT
{$field_id} AS id,
GREATEST(
1,
{$combined_parts} - {$before}
) AS excerpt_start_index,
SUBSTRING(
text,
GREATEST(
1,
{$combined_parts} - {$before}
),
LEAST(
CHAR_LENGTH(text),
{$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before})
)
) AS excerpt
FROM
{$table}
WHERE
{$field_id} IN (".implode(',', $ids).")";
$q = $db->query($sql);
while ($row = $db->fetch($q)) {
$results[$row['id']] = [
'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']),
'index' => (int)$row['excerpt_start_index']
];
}
return $results;
}
function _search(string $index,
string $q,
int $offset,
int $count,
callable $items_getter,
?callable $sphinx_client_setup = null): array {
$query_filtered = sphinx_mkquery($q);
$cl = sphinx_client();
$cl->setLimits($offset, $count);
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
if (is_callable($sphinx_client_setup))
$sphinx_client_setup($cl);
else {
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
}
// run search
$final_query = "$query_filtered";
$result = $cl->query($final_query, $index);
$error = $cl->getLastError();
$warning = $cl->getLastWarning();
if ($error)
logError(__FUNCTION__, $error);
if ($warning)
logWarning(__FUNCTION__, $warning);
if ($result === false)
return ['count' => 0, 'items' => []];
$total_found = (int)$result['total_found'];
$items = [];
if (!empty($result['matches']))
$items = $items_getter($result['matches']);
return ['count' => $total_found, 'items' => $items];
}
// ---------------------------------------------------
// ------------------- Collections -------------------
// ---------------------------------------------------
enum FilesCollection: string {
case WilliamFriedman = 'wff';
case MercureDeFrance = 'mdf';
case Baconiana = 'baconiana';
}
class CollectionItem implements FilesItemInterface {
public function __construct(
protected FilesCollection $collection
) {}
@ -69,21 +170,20 @@ class CollectionItem implements FilesItemInterface {
public function isFile(): bool { return false; }
public function isAvailable(): bool { return true; }
public function getUrl(): string {
global $config;
switch ($this->collection) {
case FilesCollection::Baconiana:
return 'https://'.$config['files_domain'].'/Baconiana/';
default:
return '/files/'.$this->collection->value.'/';
}
return '/files/'.$this->collection->value.'/';
}
public function getSize(): ?int { return null; }
public function getTitle(): string { return lang("files_{$this->collection->value}_collection"); }
public function getMeta(?string $hl_matched = null): array { return []; }
public function isTargetBlank(): bool { return $this->collection === FilesCollection::Baconiana; }
public function isTargetBlank(): bool { return false; }
public function getSubtitle(): ?string { return null; }
}
// ----------------------------------------------------------------
// ------------------- William Friedman Archive -------------------
// ----------------------------------------------------------------
class WFFCollectionItem extends model implements FilesItemInterface {
const DB_TABLE = 'wff_collection';
@ -136,6 +236,109 @@ class WFFCollectionItem extends model implements FilesItemInterface {
}
/**
* @param int $folder_id
* @param bool $with_parents
* @return WFFCollectionItem|WFFCollectionItem[]|null
*/
function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
$db = DB();
$q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id);
if (!$db->numRows($q))
return null;
$item = new WFFCollectionItem($db->fetch($q));
if (!$item->isFolder())
return null;
if ($with_parents) {
$items = [$item];
if ($item->parentId) {
$parents = wff_get_folder($item->parentId, true);
if ($parents !== null)
$items = array_merge($items, $parents);
}
return $items;
}
return $item;
}
/**
* @param int|int[]|null $parent_id
* @return array
*/
function wff_get(int|array|null $parent_id = null) {
$db = DB();
$where = [];
$args = [];
if (!is_null($parent_id)) {
if (is_int($parent_id)) {
$where[] = "parent_id=?";
$args[] = $parent_id;
} else {
$where[] = "parent_id IN (".implode(", ", $parent_id).")";
}
}
$sql = "SELECT * FROM wff_collection";
if (!empty($where))
$sql .= " WHERE ".implode(" AND ", $where);
$sql .= " ORDER BY title";
$q = $db->query($sql, ...$args);
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return WFFCollectionItem[]
*/
function wff_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")");
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
}
function wff_search(string $q, int $offset = 0, int $count = 0): array {
return _search(WFF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
items_getter: function($matches) {
return wff_get_by_id(array_keys($matches));
},
sphinx_client_setup: function(SphinxClient $cl) {
$cl->setFieldWeights([
'title' => 50,
'document_id' => 60,
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC');
}
);
}
function wff_reindex(): void {
sphinx_execute("TRUNCATE RTINDEX ".WFF_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$q = $db->query("SELECT * FROM wff_collection");
while ($row = $db->fetch($q)) {
$item = new WFFCollectionItem($row);
if ($item->isFile()) {
$txt = file_get_contents('/home/user/nsa/txt/'.str_replace('.pdf', '.txt', basename($item->path)));
} else {
$txt = '';
}
sphinx_execute("INSERT INTO ".WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)",
$item->id, $item->getDocumentId(), $item->title, $txt, (int)$item->isFolder(), $item->parentId);
}
}
function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
}
// ---------------------------------------------------------
// ------------------- Mercure de France -------------------
// ---------------------------------------------------------
class MDFCollectionItem extends model implements FilesItemInterface {
const DB_TABLE = 'mdf_collection';
@ -186,7 +389,7 @@ class MDFCollectionItem extends model implements FilesItemInterface {
}
public function getRomanVolume(): string {
return _arabic_to_roman($this->volume);
return arabic_to_roman($this->volume);
}
public function getSubtitle(): ?string {
@ -195,6 +398,219 @@ class MDFCollectionItem extends model implements FilesItemInterface {
}
}
/**
* @return MDFCollectionItem[]
*/
function mdf_get(): array {
$db = DB();
$q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`");
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return MDFCollectionItem[]
*/
function mdf_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
}
function mdf_search(string $q, int $offset = 0, int $count = 0): array {
return _search(MDF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
items_getter: function($matches) {
return mdf_get_by_id(array_keys($matches));
},
sphinx_client_setup: function(SphinxClient $cl) {
$cl->setFieldWeights([
'date' => 10,
'issue' => 9,
'text' => 8
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
}
);
}
function mdf_reindex(): void {
sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$mdf = mdf_get();
foreach ($mdf as $item) {
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
}
}
function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
}
// -------------------------------------------------
// ------------------- Baconiana -------------------
// -------------------------------------------------
class BaconianaCollectionItem extends model implements FilesItemInterface {
const DB_TABLE = 'baconiana_collection';
use FilesItemTypeTrait;
use FilesItemSizeTrait;
public int $id;
public int $parentId;
public int $year;
public string $issues;
public string $path;
public bool $jobc; // Journal of the Bacon Society
public string $title; // Only for folders
public function isAvailable(): bool { return true; }
public function getTitleHtml(): ?string { return null; }
public function getTitle(): string {
if ($this->title !== '')
return $this->title;
return ($this->jobc ? lang('baconiana_old_name') : lang('baconiana')).' №'.$this->issues;
}
public function isTargetBlank(): bool { return $this->isFile(); }
public function getId(): string { return $this->id; }
public function getUrl(): string {
if ($this->isFolder()) {
return '/files/'.FilesCollection::Baconiana->value.'/'.$this->id.'/';
}
global $config;
return 'https://'.$config['files_domain'].'/'.$this->path;
}
public function getMeta(?string $hl_matched = null): array {
$items = [];
if ($this->isFolder())
return $items;
if ($this->year >= 2007)
$items = array_merge($items, ['Online Edition']);
$items = array_merge($items, [
sizeString($this->size),
'PDF'
]);
return [
'inline' => false,
'items' => $items
];
}
public function getSubtitle(): ?string {
return $this->year > 0 ? '('.$this->year.')' : null;
}
}
/**
* @return BaconianaCollectionItem[]
*/
function baconiana_get(?int $parent_id = 0): array {
$db = DB();
$sql = "SELECT * FROM baconiana_collection";
if ($parent_id !== null)
$sql .= " WHERE parent_id='".$db->escape($parent_id)."'";
$sql .= " ORDER BY type, year, id";
$q = $db->query($sql);
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return BaconianaCollectionItem[]
*/
function baconiana_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM baconiana_collection WHERE id IN (".implode(',', $ids).")");
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int $folder_id
* @param bool $with_parents
* @return BaconianaCollectionItem|BaconianaCollectionItem[]|null
*/
function baconiana_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
$db = DB();
$q = $db->query("SELECT * FROM baconiana_collection WHERE id=?", $folder_id);
if (!$db->numRows($q))
return null;
$item = new BaconianaCollectionItem($db->fetch($q));
if (!$item->isFolder())
return null;
if ($with_parents) {
$items = [$item];
if ($item->parentId) {
$parents = baconiana_get_folder($item->parentId, true);
if ($parents !== null)
$items = array_merge($items, $parents);
}
return $items;
}
return $item;
}
function baconiana_search(string $q, int $offset = 0, int $count = 0): array {
return _search(BACONIANA_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
items_getter: function($matches) {
return baconiana_get_by_id(array_keys($matches));
},
sphinx_client_setup: function(SphinxClient $cl) {
$cl->setFieldWeights([
'year' => 10,
'issues' => 9,
'text' => 8
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
}
);
}
function baconiana_reindex(): void {
sphinx_execute("TRUNCATE RTINDEX ".BACONIANA_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$baconiana = baconiana_get(null);
foreach ($baconiana as $item) {
$text = $db->result($db->query("SELECT text FROM baconiana_texts WHERE bcn_id=?", $item->id));
sphinx_execute("INSERT INTO ".BACONIANA_ARCHIVE_SPHINX_RTINDEX." (id, title, year, text) VALUES (?, ?, ?, ?)",
$item->id, "$item->year ($item->issues)", $item->year, $text);
}
}
function baconiana_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return _get_text_excerpts('baconiana_texts', 'bcn_id', $ids, $keywords, $before, $after);
}
// ----------------------------------------------------------
// ------------------- Books and Articles -------------------
// ----------------------------------------------------------
enum BookFileType: string {
case NONE = 'none';
case BOOK = 'book';
case ARTICLE = 'article';
}
enum BookCategory: string {
case BOOKS = 'books';
case MISC = 'misc';
}
class BookItem extends model implements FilesItemInterface {
const DB_TABLE = 'books';
@ -276,263 +692,10 @@ class BookItem extends model implements FilesItemInterface {
}
}
/**
* @param int $folder_id
* @param bool $with_parents
* @return WFFCollectionItem|WFFCollectionItem[]|null
*/
function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
$db = DB();
$q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id);
if (!$db->numRows($q))
return null;
$item = new WFFCollectionItem($db->fetch($q));
if (!$item->isFolder())
return null;
if ($with_parents) {
$items = [$item];
if ($item->parentId) {
$parents = wff_get_folder($item->parentId, true);
if ($parents !== null)
$items = array_merge($items, $parents);
}
return $items;
}
return $item;
}
/**
* @param int|int[]|null $parent_id
* @return array
*/
function wff_get(int|array|null $parent_id = null) {
$db = DB();
$where = [];
$args = [];
if (!is_null($parent_id)) {
if (is_int($parent_id)) {
$where[] = "parent_id=?";
$args[] = $parent_id;
} else {
$where[] = "parent_id IN (".implode(", ", $parent_id).")";
}
}
$sql = "SELECT * FROM wff_collection";
if (!empty($where))
$sql .= " WHERE ".implode(" AND ", $where);
$sql .= " ORDER BY title";
$q = $db->query($sql, ...$args);
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return WFFCollectionItem[]
*/
function wff_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")");
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
}
function wff_search(string $q, int $offset = 0, int $count = 0): array {
$query_filtered = sphinx_mkquery($q);
$cl = sphinx_client();
$cl->setLimits($offset, $count);
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
$cl->setFieldWeights([
'title' => 50,
'document_id' => 60,
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC');
// run search
$final_query = "$query_filtered";
$result = $cl->query($final_query, WFF_ARCHIVE_SPHINX_RTINDEX);
$error = $cl->getLastError();
$warning = $cl->getLastWarning();
if ($error)
logError(__FUNCTION__, $error);
if ($warning)
logWarning(__FUNCTION__, $warning);
if ($result === false)
return ['count' => 0, 'items' => []];
$total_found = (int)$result['total_found'];
$items = [];
if (!empty($result['matches']))
$items = wff_get_by_id(array_keys($result['matches']));
return ['count' => $total_found, 'items' => $items];
}
function wff_reindex(): void {
sphinx_execute("TRUNCATE RTINDEX ".WFF_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$q = $db->query("SELECT * FROM wff_collection");
while ($row = $db->fetch($q)) {
$item = new WFFCollectionItem($row);
if ($item->isFile()) {
$txt = file_get_contents('/home/user/nsa/txt/'.str_replace('.pdf', '.txt', basename($item->path)));
} else {
$txt = '';
}
sphinx_execute("INSERT INTO ".WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)",
$item->id, $item->getDocumentId(), $item->title, $txt, (int)$item->isFolder(), $item->parentId);
}
}
function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
}
function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
}
/**
* @param string $table
* @param string $field_id
* @param int[] $ids
* @param string[] $keywords Must already be lower-cased
* @param int $before
* @param int $after
* @return array
*/
function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
$results = [];
foreach ($ids as $id)
$results[$id] = null;
$db = DB();
$dynamic_sql_parts = [];
$combined_parts = [];
foreach ($keywords as $keyword) {
$part = "LOCATE('".$db->escape($keyword)."', text)";
$dynamic_sql_parts[] = $part;
}
if (count($dynamic_sql_parts) > 1) {
foreach ($dynamic_sql_parts as $part)
$combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)";
$combined_parts = implode(', ', $combined_parts);
$combined_parts = 'LEAST('.$combined_parts.')';
} else {
$combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)";
}
$total = $before + $after;
$sql = "SELECT
{$field_id} AS id,
GREATEST(
1,
{$combined_parts} - {$before}
) AS excerpt_start_index,
SUBSTRING(
text,
GREATEST(
1,
{$combined_parts} - {$before}
),
LEAST(
CHAR_LENGTH(text),
{$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before})
)
) AS excerpt
FROM
{$table}
WHERE
{$field_id} IN (".implode(',', $ids).")";
$q = $db->query($sql);
while ($row = $db->fetch($q)) {
$results[$row['id']] = [
'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']),
'index' => (int)$row['excerpt_start_index']
];
}
return $results;
}
/**
* @return MDFCollectionItem[]
*/
function mdf_get(): array {
$db = DB();
$q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`");
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return MDFCollectionItem[]
*/
function mdf_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
}
function mdf_search(string $q, int $offset = 0, int $count = 0): array {
$query_filtered = sphinx_mkquery($q);
$cl = sphinx_client();
$cl->setLimits($offset, $count);
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
$cl->setFieldWeights([
'date' => 10,
'issue' => 9,
'text' => 8
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
// run search
$final_query = "$query_filtered";
$result = $cl->query($final_query, MDF_ARCHIVE_SPHINX_RTINDEX);
$error = $cl->getLastError();
$warning = $cl->getLastWarning();
if ($error)
logError(__FUNCTION__, $error);
if ($warning)
logWarning(__FUNCTION__, $warning);
if ($result === false)
return ['count' => 0, 'items' => []];
$total_found = (int)$result['total_found'];
$items = [];
if (!empty($result['matches']))
$items = mdf_get_by_id(array_keys($result['matches']));
return ['count' => $total_found, 'items' => $items];
}
function mdf_reindex(): void {
sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$mdf = mdf_get();
foreach ($mdf as $item) {
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
}
}
/**
* @return BookItem[]
*/
function books_get(int $parent_id = 0,
BookCategory $category = BookCategory::BOOKS): array {
function books_get(int $parent_id = 0, BookCategory $category = BookCategory::BOOKS): array {
$db = DB();
if ($category == BookCategory::BOOKS) {
@ -556,31 +719,3 @@ function books_get_folder(int $id): ?BookItem {
return null;
return $item;
}
function _arabic_to_roman($number) {
$map = [
1000 => 'M',
900 => 'CM',
500 => 'D',
400 => 'CD',
100 => 'C',
90 => 'XC',
50 => 'L',
40 => 'XL',
10 => 'X',
9 => 'IX',
5 => 'V',
4 => 'IV',
1 => 'I',
];
$result = '';
foreach ($map as $arabic => $roman) {
while ($number >= $arabic) {
$result .= $roman;
$number -= $arabic;
}
}
return $result;
}

View File

@ -2,9 +2,9 @@
return (function() {
require_once 'lib/files.php';
$files_collections = array_map(fn(FilesCollection $fn) => $fn->value, FilesCollection::cases());
$wff = FilesCollection::WilliamFriedman->value;
$files_collections = array_map(fn(FilesCollection $fn) => $fn->value, FilesCollection::cases());
$coll_with_folder_support = [FilesCollection::WilliamFriedman->value, FilesCollection::Baconiana->value];
$routes = [
'Main' => [
@ -16,10 +16,10 @@ return (function() {
'articles/([a-z0-9-]+)/' => 'post name=$(1)',
],
'Files' => [
'files/' => 'files',
'files/(\d+)/' => 'folder folder_id=$(1)',
'files/{'.implode(',', $files_collections).'}/' => 'collection collection=${1}',
'files/'.$wff.'/(\d+)/' => 'collection collection='.$wff.' folder_id=$(1)',
'files/' => 'files',
'files/(\d+)/' => 'folder folder_id=$(1)',
'files/{'.implode(',', $files_collections).'}/' => 'collection collection=${1}',
'files/{'.implode(',', $coll_with_folder_support).'}/(\d+)/' => 'collection collection=${1} folder_id=$(1)',
],
'Services' => [
'robots.txt' => 'robots_txt',

View File

@ -94,7 +94,7 @@ function collection(SkinContext $ctx,
$bc[] = ['text' => $ctx->lang('files_'.$collection->value.'_collection')];
}
$do_show_search = empty($parents) && $collection->isSearchSupported();
$do_show_search = empty($parents);
$do_show_more = $search_count > 0 && count($files) < $search_count;
$html = <<<HTML
@ -155,7 +155,7 @@ function collection_search(SkinContext $ctx, $count, $query, ?string $placeholde
<div class="files-search-wrap">
<div class="files-search" id="files_search">
<div class="files-search-icon">{$icons->search_20()}</div>
<input type="text" value="{$query}" placeholder="{$ctx->if_then_else($placeholder !== null, $placeholder, $ctx->lang('files_search_ph'))}" id="files_search_input">
<input type="text" value="{$query}" placeholder="{$ctx->if_then_else($placeholder !== null, $placeholder, 'Enter your request..')}" id="files_search_input">
<div class="files-search-clear-icon" id="files_search_clear_icon" style="display: {$clear_dsp}">{$icons->clear_16()}</div>
</div>

View File

@ -122,6 +122,10 @@ files_baconiana_collection_short: Baconiana
files_wff_search_ph: Document number, title or text
files_mdf_search_ph: Issue number, date or text
files_baconiana_search_ph: Issue number, year or text
baconiana: Baconiana
baconiana_old_name: Journal of The Bacon Society
baconiana_online_ed: Online Edition
files_search_results_count:
- "%s result"