mdf: add search
This commit is contained in:
parent
d5c352db13
commit
c542307f00
@ -308,15 +308,20 @@ function pcre_check_error(mixed &$result, bool $no_error = false): bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $s
|
||||
* @param string|string[]|null $keywords
|
||||
* @return string
|
||||
*/
|
||||
function hl_matched(string $s, string|array|null $keywords = []): string {
|
||||
function hl_matched(string $s, string|Stringable|SkinString|array|null $keywords = []): string {
|
||||
if (is_null($keywords))
|
||||
return htmlescape($s);
|
||||
|
||||
if ($keywords instanceof Stringable)
|
||||
$keywords = $keywords->__toString();
|
||||
else if (is_array($keywords)) {
|
||||
$keywords = array_map(function($s) {
|
||||
if ($s instanceof Stringable)
|
||||
return $s->__toString();
|
||||
return $s;
|
||||
}, $keywords);
|
||||
}
|
||||
|
||||
if (is_string($keywords))
|
||||
$keywords = preg_split('/\s+/', $keywords);
|
||||
|
||||
|
@ -115,8 +115,55 @@ class FilesHandler extends request_handler {
|
||||
break;
|
||||
|
||||
case FilesCollection::MercureDeFrance:
|
||||
if ($query !== null) {
|
||||
$files = mdf_search($query, $offset, self::SEARCH_RESULTS_PER_PAGE);
|
||||
$vars += [
|
||||
'search_count' => $files['count'],
|
||||
'search_query' => $query
|
||||
];
|
||||
|
||||
/** @var MDFCollectionItem[] $files */
|
||||
$files = $files['items'];
|
||||
|
||||
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
|
||||
$found = [];
|
||||
$result_ids = [];
|
||||
foreach ($files as $file) {
|
||||
$result_ids[] = $file->id;
|
||||
|
||||
foreach ([
|
||||
$file->date,
|
||||
(string)$file->issue
|
||||
] as $haystack) {
|
||||
foreach ($query_words as $qw) {
|
||||
if (mb_strpos($haystack, $qw) !== false) {
|
||||
$found[$file->id] = true;
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$found = array_map('intval', array_keys($found));
|
||||
$not_found = array_diff($result_ids, $found);
|
||||
if (!empty($not_found))
|
||||
$text_excerpts = mdf_get_text_excerpts($not_found, $query_words);
|
||||
|
||||
if (is_xhr_request()) {
|
||||
ajax_ok([
|
||||
...$vars,
|
||||
'new_offset' => $offset + count($files),
|
||||
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
|
||||
]);
|
||||
}
|
||||
} else {
|
||||
$files = mdf_get();
|
||||
set_title('$files_mdf_collection');
|
||||
}
|
||||
|
||||
$title = lang('files_mdf_collection');
|
||||
if ($query)
|
||||
$title .= ' - '.htmlescape($query);
|
||||
set_title($title);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
require_once 'engine/sphinx.php';
|
||||
|
||||
const WFF_ARCHIVE_SPHINX_RTINDEX = 'wff_collection';
|
||||
//const MDF_ARCHIVE_SPHINX/**/_RTINDEX = 'mdf_archive';
|
||||
const MDF_ARCHIVE_SPHINX_RTINDEX = 'mdf_archive';
|
||||
//const BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
|
||||
|
||||
enum FilesCollection: string {
|
||||
@ -12,7 +12,7 @@ enum FilesCollection: string {
|
||||
case Baconiana = 'baconiana';
|
||||
|
||||
public function isSearchSupported(): bool {
|
||||
return $this == FilesCollection::WilliamFriedman;
|
||||
return $this == FilesCollection::WilliamFriedman || $this == FilesCollection::MercureDeFrance;
|
||||
}
|
||||
}
|
||||
|
||||
@ -161,13 +161,13 @@ class MDFCollectionItem extends model implements FilesItemInterface {
|
||||
return "№{$this->issue}, {$this->getHumanFriendlyDate()}";
|
||||
}
|
||||
|
||||
protected function getHumanFriendlyDate(): string {
|
||||
public function getHumanFriendlyDate(): string {
|
||||
$dt = new DateTime($this->date);
|
||||
return $dt->format('j M Y');
|
||||
}
|
||||
|
||||
public function isTargetBlank(): bool { return true; }
|
||||
public function getId(): string { return (string)$this->issue; }
|
||||
public function getId(): string { return $this->id; }
|
||||
public function getUrl(): string {
|
||||
global $config;
|
||||
return 'https://'.$config['files_domain'].'/Mercure-de-France-OCR/'.$this->path;
|
||||
@ -339,9 +339,7 @@ function wff_get_by_id(array $ids): array {
|
||||
}
|
||||
|
||||
function wff_search(string $q, int $offset = 0, int $count = 0): array {
|
||||
$query_filtered = sphinx_mkquery($q, [
|
||||
'star' => false,
|
||||
]);
|
||||
$query_filtered = sphinx_mkquery($q);
|
||||
|
||||
$cl = sphinx_client();
|
||||
$cl->setLimits($offset, $count);
|
||||
@ -391,17 +389,24 @@ function wff_reindex(): void {
|
||||
}
|
||||
}
|
||||
|
||||
function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||
return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
|
||||
}
|
||||
|
||||
function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||
return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $table
|
||||
* @param string $field_id
|
||||
* @param int[] $ids
|
||||
* @param string[] $keywords Must already be lower-cased
|
||||
* @param int $before
|
||||
* @param int $after
|
||||
* @return array
|
||||
*/
|
||||
function wff_get_text_excerpts(array $ids,
|
||||
array $keywords,
|
||||
int $before = 50,
|
||||
int $after = 40): array {
|
||||
function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
|
||||
$results = [];
|
||||
foreach ($ids as $id)
|
||||
$results[$id] = null;
|
||||
@ -411,7 +416,7 @@ function wff_get_text_excerpts(array $ids,
|
||||
$dynamic_sql_parts = [];
|
||||
$combined_parts = [];
|
||||
foreach ($keywords as $keyword) {
|
||||
$part = "LOCATE('".$db->escape($keyword)."', LOWER(text))";
|
||||
$part = "LOCATE('".$db->escape($keyword)."', text)";
|
||||
$dynamic_sql_parts[] = $part;
|
||||
}
|
||||
if (count($dynamic_sql_parts) > 1) {
|
||||
@ -425,7 +430,7 @@ function wff_get_text_excerpts(array $ids,
|
||||
|
||||
$total = $before + $after;
|
||||
$sql = "SELECT
|
||||
wff_id AS id,
|
||||
{$field_id} AS id,
|
||||
GREATEST(
|
||||
1,
|
||||
{$combined_parts} - {$before}
|
||||
@ -442,9 +447,9 @@ function wff_get_text_excerpts(array $ids,
|
||||
)
|
||||
) AS excerpt
|
||||
FROM
|
||||
wff_texts
|
||||
{$table}
|
||||
WHERE
|
||||
wff_id IN (".implode(',', $ids).")";
|
||||
{$field_id} IN (".implode(',', $ids).")";
|
||||
|
||||
$q = $db->query($sql);
|
||||
while ($row = $db->fetch($q)) {
|
||||
@ -466,6 +471,63 @@ function mdf_get(): array {
|
||||
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int[] $ids
|
||||
* @return MDFCollectionItem[]
|
||||
*/
|
||||
function mdf_get_by_id(array $ids): array {
|
||||
$db = DB();
|
||||
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
|
||||
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||
}
|
||||
|
||||
function mdf_search(string $q, int $offset = 0, int $count = 0): array {
|
||||
$query_filtered = sphinx_mkquery($q);
|
||||
|
||||
$cl = sphinx_client();
|
||||
$cl->setLimits($offset, $count);
|
||||
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
|
||||
$cl->setFieldWeights([
|
||||
'date' => 10,
|
||||
'issue' => 9,
|
||||
'text' => 8
|
||||
]);
|
||||
|
||||
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
||||
|
||||
// run search
|
||||
$final_query = "$query_filtered";
|
||||
$result = $cl->query($final_query, MDF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$error = $cl->getLastError();
|
||||
$warning = $cl->getLastWarning();
|
||||
if ($error)
|
||||
logError(__FUNCTION__, $error);
|
||||
if ($warning)
|
||||
logWarning(__FUNCTION__, $warning);
|
||||
if ($result === false)
|
||||
return ['count' => 0, 'items' => []];
|
||||
|
||||
$total_found = (int)$result['total_found'];
|
||||
|
||||
$items = [];
|
||||
if (!empty($result['matches']))
|
||||
$items = mdf_get_by_id(array_keys($result['matches']));
|
||||
|
||||
return ['count' => $total_found, 'items' => $items];
|
||||
}
|
||||
|
||||
function mdf_reindex(): void {
|
||||
sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX);
|
||||
$db = DB();
|
||||
$mdf = mdf_get();
|
||||
foreach ($mdf as $item) {
|
||||
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
|
||||
sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
|
||||
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return BookItem[]
|
||||
*/
|
||||
|
@ -58,7 +58,7 @@ return <<<HTML
|
||||
<!-- /Yandex.Metrika counter -->
|
||||
</body>
|
||||
</html>
|
||||
<!-- {$exec_time} s -->
|
||||
{$ctx->if_admin(fn() => "<!-- {$exec_time} s -->")}
|
||||
HTML;
|
||||
}
|
||||
|
||||
|
@ -187,7 +187,7 @@ if ($disabled)
|
||||
|
||||
$mapper = function($s) use ($unsafe_query) {
|
||||
if ($unsafe_query !== null) {
|
||||
return hl_matched($s, [$unsafe_query]);
|
||||
return hl_matched($s, $unsafe_query);
|
||||
} else {
|
||||
return htmlescape($s);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user