mdf: add search
This commit is contained in:
parent
d5c352db13
commit
c542307f00
@ -308,15 +308,20 @@ function pcre_check_error(mixed &$result, bool $no_error = false): bool {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
function hl_matched(string $s, string|Stringable|SkinString|array|null $keywords = []): string {
|
||||||
* @param string $s
|
|
||||||
* @param string|string[]|null $keywords
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
function hl_matched(string $s, string|array|null $keywords = []): string {
|
|
||||||
if (is_null($keywords))
|
if (is_null($keywords))
|
||||||
return htmlescape($s);
|
return htmlescape($s);
|
||||||
|
|
||||||
|
if ($keywords instanceof Stringable)
|
||||||
|
$keywords = $keywords->__toString();
|
||||||
|
else if (is_array($keywords)) {
|
||||||
|
$keywords = array_map(function($s) {
|
||||||
|
if ($s instanceof Stringable)
|
||||||
|
return $s->__toString();
|
||||||
|
return $s;
|
||||||
|
}, $keywords);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_string($keywords))
|
if (is_string($keywords))
|
||||||
$keywords = preg_split('/\s+/', $keywords);
|
$keywords = preg_split('/\s+/', $keywords);
|
||||||
|
|
||||||
|
@ -115,8 +115,55 @@ class FilesHandler extends request_handler {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case FilesCollection::MercureDeFrance:
|
case FilesCollection::MercureDeFrance:
|
||||||
$files = mdf_get();
|
if ($query !== null) {
|
||||||
set_title('$files_mdf_collection');
|
$files = mdf_search($query, $offset, self::SEARCH_RESULTS_PER_PAGE);
|
||||||
|
$vars += [
|
||||||
|
'search_count' => $files['count'],
|
||||||
|
'search_query' => $query
|
||||||
|
];
|
||||||
|
|
||||||
|
/** @var MDFCollectionItem[] $files */
|
||||||
|
$files = $files['items'];
|
||||||
|
|
||||||
|
$query_words = array_map('mb_strtolower', preg_split('/\s+/', $query));
|
||||||
|
$found = [];
|
||||||
|
$result_ids = [];
|
||||||
|
foreach ($files as $file) {
|
||||||
|
$result_ids[] = $file->id;
|
||||||
|
|
||||||
|
foreach ([
|
||||||
|
$file->date,
|
||||||
|
(string)$file->issue
|
||||||
|
] as $haystack) {
|
||||||
|
foreach ($query_words as $qw) {
|
||||||
|
if (mb_strpos($haystack, $qw) !== false) {
|
||||||
|
$found[$file->id] = true;
|
||||||
|
continue 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$found = array_map('intval', array_keys($found));
|
||||||
|
$not_found = array_diff($result_ids, $found);
|
||||||
|
if (!empty($not_found))
|
||||||
|
$text_excerpts = mdf_get_text_excerpts($not_found, $query_words);
|
||||||
|
|
||||||
|
if (is_xhr_request()) {
|
||||||
|
ajax_ok([
|
||||||
|
...$vars,
|
||||||
|
'new_offset' => $offset + count($files),
|
||||||
|
'html' => skin('files')->collection_files($files, $query, $text_excerpts)
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$files = mdf_get();
|
||||||
|
}
|
||||||
|
|
||||||
|
$title = lang('files_mdf_collection');
|
||||||
|
if ($query)
|
||||||
|
$title .= ' - '.htmlescape($query);
|
||||||
|
set_title($title);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
require_once 'engine/sphinx.php';
|
require_once 'engine/sphinx.php';
|
||||||
|
|
||||||
const WFF_ARCHIVE_SPHINX_RTINDEX = 'wff_collection';
|
const WFF_ARCHIVE_SPHINX_RTINDEX = 'wff_collection';
|
||||||
//const MDF_ARCHIVE_SPHINX/**/_RTINDEX = 'mdf_archive';
|
const MDF_ARCHIVE_SPHINX_RTINDEX = 'mdf_archive';
|
||||||
//const BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
|
//const BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
|
||||||
|
|
||||||
enum FilesCollection: string {
|
enum FilesCollection: string {
|
||||||
@ -12,7 +12,7 @@ enum FilesCollection: string {
|
|||||||
case Baconiana = 'baconiana';
|
case Baconiana = 'baconiana';
|
||||||
|
|
||||||
public function isSearchSupported(): bool {
|
public function isSearchSupported(): bool {
|
||||||
return $this == FilesCollection::WilliamFriedman;
|
return $this == FilesCollection::WilliamFriedman || $this == FilesCollection::MercureDeFrance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,13 +161,13 @@ class MDFCollectionItem extends model implements FilesItemInterface {
|
|||||||
return "№{$this->issue}, {$this->getHumanFriendlyDate()}";
|
return "№{$this->issue}, {$this->getHumanFriendlyDate()}";
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function getHumanFriendlyDate(): string {
|
public function getHumanFriendlyDate(): string {
|
||||||
$dt = new DateTime($this->date);
|
$dt = new DateTime($this->date);
|
||||||
return $dt->format('j M Y');
|
return $dt->format('j M Y');
|
||||||
}
|
}
|
||||||
|
|
||||||
public function isTargetBlank(): bool { return true; }
|
public function isTargetBlank(): bool { return true; }
|
||||||
public function getId(): string { return (string)$this->issue; }
|
public function getId(): string { return $this->id; }
|
||||||
public function getUrl(): string {
|
public function getUrl(): string {
|
||||||
global $config;
|
global $config;
|
||||||
return 'https://'.$config['files_domain'].'/Mercure-de-France-OCR/'.$this->path;
|
return 'https://'.$config['files_domain'].'/Mercure-de-France-OCR/'.$this->path;
|
||||||
@ -339,9 +339,7 @@ function wff_get_by_id(array $ids): array {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function wff_search(string $q, int $offset = 0, int $count = 0): array {
|
function wff_search(string $q, int $offset = 0, int $count = 0): array {
|
||||||
$query_filtered = sphinx_mkquery($q, [
|
$query_filtered = sphinx_mkquery($q);
|
||||||
'star' => false,
|
|
||||||
]);
|
|
||||||
|
|
||||||
$cl = sphinx_client();
|
$cl = sphinx_client();
|
||||||
$cl->setLimits($offset, $count);
|
$cl->setLimits($offset, $count);
|
||||||
@ -391,17 +389,24 @@ function wff_reindex(): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||||
|
return _get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
|
||||||
|
}
|
||||||
|
|
||||||
|
function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
||||||
|
return _get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* @param string $table
|
||||||
|
* @param string $field_id
|
||||||
* @param int[] $ids
|
* @param int[] $ids
|
||||||
* @param string[] $keywords Must already be lower-cased
|
* @param string[] $keywords Must already be lower-cased
|
||||||
* @param int $before
|
* @param int $before
|
||||||
* @param int $after
|
* @param int $after
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
function wff_get_text_excerpts(array $ids,
|
function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
|
||||||
array $keywords,
|
|
||||||
int $before = 50,
|
|
||||||
int $after = 40): array {
|
|
||||||
$results = [];
|
$results = [];
|
||||||
foreach ($ids as $id)
|
foreach ($ids as $id)
|
||||||
$results[$id] = null;
|
$results[$id] = null;
|
||||||
@ -411,7 +416,7 @@ function wff_get_text_excerpts(array $ids,
|
|||||||
$dynamic_sql_parts = [];
|
$dynamic_sql_parts = [];
|
||||||
$combined_parts = [];
|
$combined_parts = [];
|
||||||
foreach ($keywords as $keyword) {
|
foreach ($keywords as $keyword) {
|
||||||
$part = "LOCATE('".$db->escape($keyword)."', LOWER(text))";
|
$part = "LOCATE('".$db->escape($keyword)."', text)";
|
||||||
$dynamic_sql_parts[] = $part;
|
$dynamic_sql_parts[] = $part;
|
||||||
}
|
}
|
||||||
if (count($dynamic_sql_parts) > 1) {
|
if (count($dynamic_sql_parts) > 1) {
|
||||||
@ -425,7 +430,7 @@ function wff_get_text_excerpts(array $ids,
|
|||||||
|
|
||||||
$total = $before + $after;
|
$total = $before + $after;
|
||||||
$sql = "SELECT
|
$sql = "SELECT
|
||||||
wff_id AS id,
|
{$field_id} AS id,
|
||||||
GREATEST(
|
GREATEST(
|
||||||
1,
|
1,
|
||||||
{$combined_parts} - {$before}
|
{$combined_parts} - {$before}
|
||||||
@ -442,9 +447,9 @@ function wff_get_text_excerpts(array $ids,
|
|||||||
)
|
)
|
||||||
) AS excerpt
|
) AS excerpt
|
||||||
FROM
|
FROM
|
||||||
wff_texts
|
{$table}
|
||||||
WHERE
|
WHERE
|
||||||
wff_id IN (".implode(',', $ids).")";
|
{$field_id} IN (".implode(',', $ids).")";
|
||||||
|
|
||||||
$q = $db->query($sql);
|
$q = $db->query($sql);
|
||||||
while ($row = $db->fetch($q)) {
|
while ($row = $db->fetch($q)) {
|
||||||
@ -466,6 +471,63 @@ function mdf_get(): array {
|
|||||||
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param int[] $ids
|
||||||
|
* @return MDFCollectionItem[]
|
||||||
|
*/
|
||||||
|
function mdf_get_by_id(array $ids): array {
|
||||||
|
$db = DB();
|
||||||
|
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
|
||||||
|
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
||||||
|
}
|
||||||
|
|
||||||
|
function mdf_search(string $q, int $offset = 0, int $count = 0): array {
|
||||||
|
$query_filtered = sphinx_mkquery($q);
|
||||||
|
|
||||||
|
$cl = sphinx_client();
|
||||||
|
$cl->setLimits($offset, $count);
|
||||||
|
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
|
||||||
|
$cl->setFieldWeights([
|
||||||
|
'date' => 10,
|
||||||
|
'issue' => 9,
|
||||||
|
'text' => 8
|
||||||
|
]);
|
||||||
|
|
||||||
|
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
||||||
|
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
||||||
|
|
||||||
|
// run search
|
||||||
|
$final_query = "$query_filtered";
|
||||||
|
$result = $cl->query($final_query, MDF_ARCHIVE_SPHINX_RTINDEX);
|
||||||
|
$error = $cl->getLastError();
|
||||||
|
$warning = $cl->getLastWarning();
|
||||||
|
if ($error)
|
||||||
|
logError(__FUNCTION__, $error);
|
||||||
|
if ($warning)
|
||||||
|
logWarning(__FUNCTION__, $warning);
|
||||||
|
if ($result === false)
|
||||||
|
return ['count' => 0, 'items' => []];
|
||||||
|
|
||||||
|
$total_found = (int)$result['total_found'];
|
||||||
|
|
||||||
|
$items = [];
|
||||||
|
if (!empty($result['matches']))
|
||||||
|
$items = mdf_get_by_id(array_keys($result['matches']));
|
||||||
|
|
||||||
|
return ['count' => $total_found, 'items' => $items];
|
||||||
|
}
|
||||||
|
|
||||||
|
function mdf_reindex(): void {
|
||||||
|
sphinx_execute("TRUNCATE RTINDEX ".MDF_ARCHIVE_SPHINX_RTINDEX);
|
||||||
|
$db = DB();
|
||||||
|
$mdf = mdf_get();
|
||||||
|
foreach ($mdf as $item) {
|
||||||
|
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
|
||||||
|
sphinx_execute("INSERT INTO ".MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
|
||||||
|
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return BookItem[]
|
* @return BookItem[]
|
||||||
*/
|
*/
|
||||||
|
@ -58,7 +58,7 @@ return <<<HTML
|
|||||||
<!-- /Yandex.Metrika counter -->
|
<!-- /Yandex.Metrika counter -->
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
<!-- {$exec_time} s -->
|
{$ctx->if_admin(fn() => "<!-- {$exec_time} s -->")}
|
||||||
HTML;
|
HTML;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@ if ($disabled)
|
|||||||
|
|
||||||
$mapper = function($s) use ($unsafe_query) {
|
$mapper = function($s) use ($unsafe_query) {
|
||||||
if ($unsafe_query !== null) {
|
if ($unsafe_query !== null) {
|
||||||
return hl_matched($s, [$unsafe_query]);
|
return hl_matched($s, $unsafe_query);
|
||||||
} else {
|
} else {
|
||||||
return htmlescape($s);
|
return htmlescape($s);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user