391 lines
14 KiB
PHP
391 lines
14 KiB
PHP
<?php
|
|
|
|
use Sphinx\SphinxClient;
|
|
|
|
class files {
|
|
|
|
const string WFF_ARCHIVE_SPHINX_RTINDEX = 'wff_collection';
|
|
const string MDF_ARCHIVE_SPHINX_RTINDEX = 'mdf_archive';
|
|
const string BACONIANA_ARCHIVE_SPHINX_RTINDEX = 'baconiana_archive';
|
|
|
|
/**
|
|
* @param string $table
|
|
* @param string $field_id
|
|
* @param int[] $ids
|
|
* @param string[] $keywords Must already be lower-cased
|
|
* @param int $before
|
|
* @param int $after
|
|
* @return array
|
|
*/
|
|
public static function _get_text_excerpts(string $table, string $field_id, array $ids, array $keywords, int $before, int $after) {
|
|
$results = [];
|
|
foreach ($ids as $id)
|
|
$results[$id] = null;
|
|
|
|
$db = DB();
|
|
|
|
$dynamic_sql_parts = [];
|
|
$combined_parts = [];
|
|
foreach ($keywords as $keyword) {
|
|
$part = "LOCATE('".$db->escape($keyword)."', text)";
|
|
$dynamic_sql_parts[] = $part;
|
|
}
|
|
if (count($dynamic_sql_parts) > 1) {
|
|
foreach ($dynamic_sql_parts as $part)
|
|
$combined_parts[] = "IF({$part} > 0, {$part}, CHAR_LENGTH(text) + 1)";
|
|
$combined_parts = implode(', ', $combined_parts);
|
|
$combined_parts = 'LEAST('.$combined_parts.')';
|
|
} else {
|
|
$combined_parts = "IF({$dynamic_sql_parts[0]} > 0, {$dynamic_sql_parts[0]}, CHAR_LENGTH(text) + 1)";
|
|
}
|
|
|
|
$total = $before + $after;
|
|
$sql = "SELECT
|
|
{$field_id} AS id,
|
|
GREATEST(
|
|
1,
|
|
{$combined_parts} - {$before}
|
|
) AS excerpt_start_index,
|
|
SUBSTRING(
|
|
text,
|
|
GREATEST(
|
|
1,
|
|
{$combined_parts} - {$before}
|
|
),
|
|
LEAST(
|
|
CHAR_LENGTH(text),
|
|
{$total} + {$combined_parts} - GREATEST(1, {$combined_parts} - {$before})
|
|
)
|
|
) AS excerpt
|
|
FROM
|
|
{$table}
|
|
WHERE
|
|
{$field_id} IN (".implode(',', $ids).")";
|
|
|
|
$q = $db->query($sql);
|
|
while ($row = $db->fetch($q)) {
|
|
$results[$row['id']] = [
|
|
'excerpt' => preg_replace('/\s+/', ' ', $row['excerpt']),
|
|
'index' => (int)$row['excerpt_start_index']
|
|
];
|
|
}
|
|
|
|
return $results;
|
|
}
|
|
|
|
public static function _search(string $index,
|
|
string $q,
|
|
int $offset,
|
|
int $count,
|
|
callable $items_getter,
|
|
?callable $sphinx_client_setup = null): array {
|
|
$query_filtered = sphinx::mkquery($q);
|
|
|
|
$cl = sphinx::getClient();
|
|
$cl->setLimits($offset, $count);
|
|
|
|
$cl->setMatchMode(Sphinx\SphinxClient::SPH_MATCH_EXTENDED);
|
|
|
|
if (is_callable($sphinx_client_setup))
|
|
$sphinx_client_setup($cl);
|
|
else {
|
|
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
|
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
|
}
|
|
|
|
// run search
|
|
$final_query = "$query_filtered";
|
|
$result = $cl->query($final_query, $index);
|
|
$error = $cl->getLastError();
|
|
$warning = $cl->getLastWarning();
|
|
if ($error)
|
|
logError(__FUNCTION__, $error);
|
|
if ($warning)
|
|
logWarning(__FUNCTION__, $warning);
|
|
if ($result === false)
|
|
return ['count' => 0, 'items' => []];
|
|
|
|
$total_found = (int)$result['total_found'];
|
|
|
|
$items = [];
|
|
if (!empty($result['matches']))
|
|
$items = $items_getter($result['matches']);
|
|
|
|
return ['count' => $total_found, 'items' => $items];
|
|
}
|
|
|
|
|
|
/**
|
|
* @param int $folder_id
|
|
* @param bool $with_parents
|
|
* @return WFFCollectionItem|WFFCollectionItem[]|null
|
|
*/
|
|
public static function wff_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM wff_collection WHERE id=?", $folder_id);
|
|
if (!$db->numRows($q))
|
|
return null;
|
|
$item = new WFFCollectionItem($db->fetch($q));
|
|
if (!$item->isFolder())
|
|
return null;
|
|
if ($with_parents) {
|
|
$items = [$item];
|
|
if ($item->parentId) {
|
|
$parents = self::wff_get_folder($item->parentId, true);
|
|
if ($parents !== null)
|
|
$items = array_merge($items, $parents);
|
|
}
|
|
return $items;
|
|
}
|
|
return $item;
|
|
}
|
|
|
|
/**
|
|
* @param int|int[]|null $parent_id
|
|
* @return array
|
|
*/
|
|
public static function wff_get(int|array|null $parent_id = null) {
|
|
$db = DB();
|
|
|
|
$where = [];
|
|
$args = [];
|
|
|
|
if (!is_null($parent_id)) {
|
|
if (is_int($parent_id)) {
|
|
$where[] = "parent_id=?";
|
|
$args[] = $parent_id;
|
|
} else {
|
|
$where[] = "parent_id IN (".implode(", ", $parent_id).")";
|
|
}
|
|
}
|
|
$sql = "SELECT * FROM wff_collection";
|
|
if (!empty($where))
|
|
$sql .= " WHERE ".implode(" AND ", $where);
|
|
$sql .= " ORDER BY title";
|
|
$q = $db->query($sql, ...$args);
|
|
|
|
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
|
|
}
|
|
|
|
/**
|
|
* @param int[] $ids
|
|
* @return WFFCollectionItem[]
|
|
*/
|
|
public static function wff_get_by_id(array $ids): array {
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM wff_collection WHERE id IN (".implode(',', $ids).")");
|
|
return array_map('WFFCollectionItem::create_instance', $db->fetchAll($q));
|
|
}
|
|
|
|
public static function wff_search(string $q, int $offset = 0, int $count = 0): array {
|
|
return self::_search(self::WFF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
|
|
items_getter: function($matches) {
|
|
return self::wff_get_by_id(array_keys($matches));
|
|
},
|
|
sphinx_client_setup: function(SphinxClient $cl) {
|
|
$cl->setFieldWeights([
|
|
'title' => 50,
|
|
'document_id' => 60,
|
|
]);
|
|
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
|
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_EXTENDED, '@relevance DESC, is_folder DESC');
|
|
}
|
|
);
|
|
}
|
|
|
|
public static function wff_reindex(): void {
|
|
sphinx::execute("TRUNCATE RTINDEX ".self::WFF_ARCHIVE_SPHINX_RTINDEX);
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM wff_collection");
|
|
while ($row = $db->fetch($q)) {
|
|
$item = new WFFCollectionItem($row);
|
|
$text = '';
|
|
if ($item->isFile()) {
|
|
$text_q = $db->query("SELECT text FROM wff_texts WHERE wff_id=?", $item->id);
|
|
if ($db->numRows($text_q))
|
|
$text = $db->result($text_q);
|
|
}
|
|
sphinx::execute("INSERT INTO ".self::WFF_ARCHIVE_SPHINX_RTINDEX." (id, document_id, title, text, is_folder, parent_id) VALUES (?, ?, ?, ?, ?, ?)",
|
|
$item->id, $item->getDocumentId(), $item->title, $text, (int)$item->isFolder(), $item->parentId);
|
|
}
|
|
}
|
|
|
|
public static function wff_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
|
return self::_get_text_excerpts('wff_texts', 'wff_id', $ids, $keywords, $before, $after);
|
|
}
|
|
|
|
/**
|
|
* @return MDFCollectionItem[]
|
|
*/
|
|
public static function mdf_get(): array {
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM mdf_collection ORDER BY `date`");
|
|
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
|
}
|
|
|
|
/**
|
|
* @param int[] $ids
|
|
* @return MDFCollectionItem[]
|
|
*/
|
|
public static function mdf_get_by_id(array $ids): array {
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM mdf_collection WHERE id IN (".implode(',', $ids).")");
|
|
return array_map('MDFCollectionItem::create_instance', $db->fetchAll($q));
|
|
}
|
|
|
|
public static function mdf_search(string $q, int $offset = 0, int $count = 0): array {
|
|
return self::_search(self::MDF_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
|
|
items_getter: function($matches) {
|
|
return self::mdf_get_by_id(array_keys($matches));
|
|
},
|
|
sphinx_client_setup: function(SphinxClient $cl) {
|
|
$cl->setFieldWeights([
|
|
'date' => 10,
|
|
'issue' => 9,
|
|
'text' => 8
|
|
]);
|
|
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
|
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
|
}
|
|
);
|
|
}
|
|
|
|
public static function mdf_reindex(): void {
|
|
sphinx::execute("TRUNCATE RTINDEX ".self::MDF_ARCHIVE_SPHINX_RTINDEX);
|
|
$db = DB();
|
|
$mdf = self::mdf_get();
|
|
foreach ($mdf as $item) {
|
|
$text = $db->result($db->query("SELECT text FROM mdf_texts WHERE mdf_id=?", $item->id));
|
|
sphinx::execute("INSERT INTO ".self::MDF_ARCHIVE_SPHINX_RTINDEX." (id, volume, issue, date, text) VALUES (?, ?, ?, ?, ?)",
|
|
$item->id, $item->volume, (string)$item->issue, $item->getHumanFriendlyDate(), $text);
|
|
}
|
|
}
|
|
|
|
public static function mdf_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
|
return self::_get_text_excerpts('mdf_texts', 'mdf_id', $ids, $keywords, $before, $after);
|
|
}
|
|
|
|
|
|
/**
|
|
* @return BaconianaCollectionItem[]
|
|
*/
|
|
public static function baconiana_get(?int $parent_id = 0): array {
|
|
$db = DB();
|
|
$sql = "SELECT * FROM baconiana_collection";
|
|
if ($parent_id !== null)
|
|
$sql .= " WHERE parent_id='".$db->escape($parent_id)."'";
|
|
$sql .= " ORDER BY type, year, id";
|
|
$q = $db->query($sql);
|
|
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
|
|
}
|
|
|
|
/**
|
|
* @param int[] $ids
|
|
* @return BaconianaCollectionItem[]
|
|
*/
|
|
public static function baconiana_get_by_id(array $ids): array {
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM baconiana_collection WHERE id IN (".implode(',', $ids).")");
|
|
return array_map('BaconianaCollectionItem::create_instance', $db->fetchAll($q));
|
|
}
|
|
|
|
/**
|
|
* @param int $folder_id
|
|
* @param bool $with_parents
|
|
* @return BaconianaCollectionItem|BaconianaCollectionItem[]|null
|
|
*/
|
|
public static function baconiana_get_folder(int $folder_id, bool $with_parents = false): WFFCollectionItem|array|null {
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM baconiana_collection WHERE id=?", $folder_id);
|
|
if (!$db->numRows($q))
|
|
return null;
|
|
$item = new BaconianaCollectionItem($db->fetch($q));
|
|
if (!$item->isFolder())
|
|
return null;
|
|
if ($with_parents) {
|
|
$items = [$item];
|
|
if ($item->parentId) {
|
|
$parents = self::baconiana_get_folder($item->parentId, true);
|
|
if ($parents !== null)
|
|
$items = array_merge($items, $parents);
|
|
}
|
|
return $items;
|
|
}
|
|
return $item;
|
|
}
|
|
|
|
public static function baconiana_search(string $q, int $offset = 0, int $count = 0): array {
|
|
return self::_search(self::BACONIANA_ARCHIVE_SPHINX_RTINDEX, $q, $offset, $count,
|
|
items_getter: function($matches) {
|
|
return self::baconiana_get_by_id(array_keys($matches));
|
|
},
|
|
sphinx_client_setup: function(SphinxClient $cl) {
|
|
$cl->setFieldWeights([
|
|
'year' => 10,
|
|
'issues' => 9,
|
|
'text' => 8
|
|
]);
|
|
$cl->setRankingMode(Sphinx\SphinxClient::SPH_RANK_PROXIMITY_BM25);
|
|
$cl->setSortMode(Sphinx\SphinxClient::SPH_SORT_RELEVANCE);
|
|
}
|
|
);
|
|
}
|
|
|
|
public static function baconiana_reindex(): void {
|
|
sphinx::execute("TRUNCATE RTINDEX ".self::BACONIANA_ARCHIVE_SPHINX_RTINDEX);
|
|
$db = DB();
|
|
$baconiana = self::baconiana_get(null);
|
|
foreach ($baconiana as $item) {
|
|
$text_q = $db->query("SELECT text FROM baconiana_texts WHERE bcn_id=?", $item->id);
|
|
if (!$db->numRows($text_q))
|
|
continue;
|
|
$text = $db->result($text_q);
|
|
sphinx::execute("INSERT INTO ".self::BACONIANA_ARCHIVE_SPHINX_RTINDEX." (id, title, year, text) VALUES (?, ?, ?, ?)",
|
|
$item->id, "$item->year ($item->issues)", $item->year, $text);
|
|
}
|
|
}
|
|
|
|
public static function baconiana_get_text_excerpts(array $ids, array $keywords, int $before = 50, int $after = 40): array {
|
|
return self::_get_text_excerpts('baconiana_texts', 'bcn_id', $ids, $keywords, $before, $after);
|
|
}
|
|
|
|
|
|
/**
|
|
* @return BookItem[]
|
|
*/
|
|
public static function books_get(int $parent_id = 0, BookCategory $category = BookCategory::BOOKS): array {
|
|
$db = DB();
|
|
|
|
if ($category == BookCategory::BOOKS) {
|
|
$order_by = "type, ".($parent_id != 0 ? 'year, ': '')."author, title";
|
|
}
|
|
else
|
|
$order_by = "type, title";
|
|
|
|
$q = $db->query("SELECT * FROM books WHERE category=? AND parent_id=? ORDER BY $order_by",
|
|
$category->value, $parent_id);
|
|
return array_map('BookItem::create_instance', $db->fetchAll($q));
|
|
}
|
|
|
|
public static function books_get_folder(int $id, bool $with_parents = false): BookItem|array|null {
|
|
$db = DB();
|
|
$q = $db->query("SELECT * FROM books WHERE id=?", $id);
|
|
if (!$db->numRows($q))
|
|
return null;
|
|
$item = new BookItem($db->fetch($q));
|
|
if (!$item->isFolder())
|
|
return null;
|
|
if ($with_parents) {
|
|
$items = [$item];
|
|
if ($item->parentId) {
|
|
$parents = self::books_get_folder($item->parentId, true);
|
|
if ($parents !== null)
|
|
$items = array_merge($items, $parents);
|
|
}
|
|
return $items;
|
|
}
|
|
return $item;
|
|
}
|
|
|
|
}
|