4in1_ws_web/lib/files.php

391 lines
14 KiB
PHP

<?php
use Sphinx\SphinxClient;
class files {
const string WFF_ARCHIVE_SPHINX_RTINDEX = 'wff_collection';
const string MDF_ARCHIVE_SPHINX_RTINDEX = 'mdf_archive';
const string BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
/**
* @param string $table
* @param string $field_id
* @param int[] $ids
* @param string[] $keywords Must already be lower-cased
* @param int $before
* @param int $after
* @return array
*/
public static function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
$results = [];
foreach ($ids as $id)
$results[$id] = null;
$db = DB();
$dynamic_sql_parts = [];
$combined_parts = [];
foreach ($keywords as $keyword) {
$part = "LOCATE('".$db->escape($keyword)."', text)";
$dynamic_sql_parts[] = $part;
}
if (count($dynamic_sql_parts) > 1) {
foreach ($dynamic_sql_parts as $part)
$combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)";
$combined_parts = implode(', ', $combined_parts);
$combined_parts = 'LEAST('.$combined_parts.')';
} else {
$combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)";
}
$total = $before + $after;
$sql = "SELECT
{$field_id} AS id,
GREATEST(
1,
{$combined_parts} - {$before}
) AS excerpt_start_index,
SUBSTRING(
text,
GREATEST(
1,
{$combined_parts} - {$before}
),
LEAST(
CHAR_LENGTH(text),
{$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before})
)
) AS excerpt
FROM
{$table}
WHERE
{$field_id} IN (".implode(',', $ids).")";
$q = $db->query($sql);
while ($row = $db->fetch($q)) {
$results[$row['id']] = [
'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']),
'index' => (int)$row['excerpt_start_index']
];
}
return $results;
}
public static function _search(string $index,
string $q,
int $offset,
int $count,
callable $items_getter,
?callable $sphinx_client_setup = null): array {
$query_filtered = sphinx::mkquery($q);
$cl = sphinx::getClient();
$cl->setLimits($offset, $count);
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
if (is_callable($sphinx_client_setup))
$sphinx_client_setup($cl);
else {
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
}
// run search
$final_query = "$query_filtered";
$result = $cl->query($final_query, $index);
$error = $cl->getLastError();
$warning = $cl->getLastWarning();
if ($error)
logError(__FUNCTION__, $error);
if ($warning)
logWarning(__FUNCTION__, $warning);
if ($result === false)
return ['count' => 0, 'items' => []];
$total_found = (int)$result['total_found'];
$items = [];
if (!empty($result['matches']))
$items = $items_getter($result['matches']);
return ['count' => $total_found, 'items' => $items];
}
/**
* @param int $folder_id
* @param bool $with_parents
* @return WFFCollectionItem|WFFCollectionItem[]|null
*/
public static function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
$db = DB();
$q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id);
if (!$db->numRows($q))
return null;
$item = new WFFCollectionItem($db->fetch($q));
if (!$item->isFolder())
return null;
if ($with_parents) {
$items = [$item];
if ($item->parentId) {
$parents = self::wff_get_folder($item->parentId, true);
if ($parents !== null)
$items = array_merge($items, $parents);
}
return $items;
}
return $item;
}
/**
* @param int|int[]|null $parent_id
* @return array
*/
public static function wff_get(int|array|null $parent_id = null) {
$db = DB();
$where = [];
$args = [];
if (!is_null($parent_id)) {
if (is_int($parent_id)) {
$where[] = "parent_id=?";
$args[] = $parent_id;
} else {
$where[] = "parent_id IN (".implode(", ", $parent_id).")";
}
}
$sql = "SELECT * FROM wff_collection";
if (!empty($where))
$sql .= " WHERE ".implode(" AND ", $where);
$sql .= " ORDER BY title";
$q = $db->query($sql, ...$args);
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return WFFCollectionItem[]
*/
public static function wff_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")");
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
}
public static function wff_search(string $q, int $offset = 0, int $count = 0): array {
return self::_search(self::WFF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
items_getter: function($matches) {
return self::wff_get_by_id(array_keys($matches));
},
sphinx_client_setup: function(SphinxClient $cl) {
$cl->setFieldWeights([
'title' => 50,
'document_id' => 60,
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC');
}
);
}
public static function wff_reindex(): void {
sphinx::execute("TRUNCATE RTINDEX ".self::WFF_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$q = $db->query("SELECT * FROM wff_collection");
while ($row = $db->fetch($q)) {
$item = new WFFCollectionItem($row);
$text = '';
if ($item->isFile()) {
$text_q = $db->query("SELECT text FROM wff_texts WHERE wff_id=?", $item->id);
if ($db->numRows($text_q))
$text = $db->result($text_q);
}
sphinx::execute("INSERT INTO ".self::WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)",
$item->id, $item->getDocumentId(), $item->title, $text, (int)$item->isFolder(), $item->parentId);
}
}
public static function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return self::_get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
}
/**
* @return MDFCollectionItem[]
*/
public static function mdf_get(): array {
$db = DB();
$q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`");
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return MDFCollectionItem[]
*/
public static function mdf_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
}
public static function mdf_search(string $q, int $offset = 0, int $count = 0): array {
return self::_search(self::MDF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
items_getter: function($matches) {
return self::mdf_get_by_id(array_keys($matches));
},
sphinx_client_setup: function(SphinxClient $cl) {
$cl->setFieldWeights([
'date' => 10,
'issue' => 9,
'text' => 8
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
}
);
}
public static function mdf_reindex(): void {
sphinx::execute("TRUNCATE RTINDEX ".self::MDF_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$mdf = self::mdf_get();
foreach ($mdf as $item) {
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
sphinx::execute("INSERT INTO ".self::MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
}
}
public static function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return self::_get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
}
/**
* @return BaconianaCollectionItem[]
*/
public static function baconiana_get(?int $parent_id = 0): array {
$db = DB();
$sql = "SELECT * FROM baconiana_collection";
if ($parent_id !== null)
$sql .= " WHERE parent_id='".$db->escape($parent_id)."'";
$sql .= " ORDER BY type, year, id";
$q = $db->query($sql);
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int[] $ids
* @return BaconianaCollectionItem[]
*/
public static function baconiana_get_by_id(array $ids): array {
$db = DB();
$q = $db->query("SELECT * FROM baconiana_collection WHERE id IN (".implode(',', $ids).")");
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
}
/**
* @param int $folder_id
* @param bool $with_parents
* @return BaconianaCollectionItem|BaconianaCollectionItem[]|null
*/
public static function baconiana_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
$db = DB();
$q = $db->query("SELECT * FROM baconiana_collection WHERE id=?", $folder_id);
if (!$db->numRows($q))
return null;
$item = new BaconianaCollectionItem($db->fetch($q));
if (!$item->isFolder())
return null;
if ($with_parents) {
$items = [$item];
if ($item->parentId) {
$parents = self::baconiana_get_folder($item->parentId, true);
if ($parents !== null)
$items = array_merge($items, $parents);
}
return $items;
}
return $item;
}
public static function baconiana_search(string $q, int $offset = 0, int $count = 0): array {
return self::_search(self::BACONIANA_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
items_getter: function($matches) {
return self::baconiana_get_by_id(array_keys($matches));
},
sphinx_client_setup: function(SphinxClient $cl) {
$cl->setFieldWeights([
'year' => 10,
'issues' => 9,
'text' => 8
]);
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
}
);
}
public static function baconiana_reindex(): void {
sphinx::execute("TRUNCATE RTINDEX ".self::BACONIANA_ARCHIVE_SPHINX_RTINDEX);
$db = DB();
$baconiana = self::baconiana_get(null);
foreach ($baconiana as $item) {
$text_q = $db->query("SELECT text FROM baconiana_texts WHERE bcn_id=?", $item->id);
if (!$db->numRows($text_q))
continue;
$text = $db->result($text_q);
sphinx::execute("INSERT INTO ".self::BACONIANA_ARCHIVE_SPHINX_RTINDEX." (id, title, year, text) VALUES (?, ?, ?, ?)",
$item->id, "$item->year ($item->issues)", $item->year, $text);
}
}
public static function baconiana_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
return self::_get_text_excerpts('baconiana_texts', 'bcn_id', $ids, $keywords, $before, $after);
}
/**
* @return BookItem[]
*/
public static function books_get(int $parent_id = 0, BookCategory $category = BookCategory::BOOKS): array {
$db = DB();
if ($category == BookCategory::BOOKS) {
$order_by = "type, ".($parent_id != 0 ? 'year, ': '')."author, title";
}
else
$order_by = "type, title";
$q = $db->query("SELECT * FROM books WHERE category=? AND parent_id=? ORDER BY $order_by",
$category->value, $parent_id);
return array_map('BookItem::create_instance', $db->fetchAll($q));
}
public static function books_get_folder(int $id, bool $with_parents = false): BookItem|array|null {
$db = DB();
$q = $db->query("SELECT * FROM books WHERE id=?", $id);
if (!$db->numRows($q))
return null;
$item = new BookItem($db->fetch($q));
if (!$item->isFolder())
return null;
if ($with_parents) {
$items = [$item];
if ($item->parentId) {
$parents = self::books_get_folder($item->parentId, true);
if ($parents !== null)
$items = array_merge($items, $parents);
}
return $items;
}
return $item;
}
}