first commit
This commit is contained in:
commit
3e3b50f26c
18
README.md
Normal file
18
README.md
Normal file
@ -0,0 +1,18 @@
|
||||
# vkflex-php
|
||||
|
||||
Расширение для PHP для склонения имен. Использует vkext/flex из [KittenPHP](https://github.com/vk-com/kphp-kdb).
|
||||
Включена поддержка русского и украинского.
|
||||
|
||||
### Использование
|
||||
|
||||
`vkflex($name, $case, $sex, $lang, $type)`
|
||||
|
||||
- `$name` - имя или фамилия в кодировке cp1251;
|
||||
- `$case` - падеж: `Gen`, `Dat`, `Acc`, `Ins` или `Abl`;
|
||||
- `$sex` - `0` - мужской, `1` - женский;
|
||||
- `$lang` - `0` - русский, `1` - украинский;
|
||||
- `$type` - `0` - имя, `1` - фамилия.
|
||||
|
||||
Возвращает строку в кодировке cp1251.
|
||||
|
||||
См. пример в `test.php`.
|
7
config.m4
Normal file
7
config.m4
Normal file
@ -0,0 +1,7 @@
|
||||
PHP_ARG_ENABLE(vkflex, whether to enable vkflex extension
|
||||
[ --enable-vkflex Enable vkflex extension])
|
||||
|
||||
if test "$PHP_VKFLEX" = "yes"; then
|
||||
AC_DEFINE(HAVE_VKFLEX, 1, [Whether you have vkflex extension])
|
||||
PHP_NEW_EXTENSION(vkflex, vkflex.c vkext_flex.c, $ext_shared)
|
||||
fi
|
25
test.php
Normal file
25
test.php
Normal file
@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
* @param string $case
|
||||
* @param int $sex: 0 for male, 1 for female
|
||||
* @param int $type: 0 for names, 1 for surnames
|
||||
*/
|
||||
function flex($name, $case, $sex, $type) {
|
||||
global $lang;
|
||||
$name = iconv('utf-8', 'cp1251', $name);
|
||||
$result = vkflex($name, $case, $sex, $lang, $type);
|
||||
return iconv('cp1251', 'utf-8', $result);
|
||||
}
|
||||
|
||||
dl('vkflex.so');
|
||||
$names = ['0 Евгений Зиновьев', '0 Владимир Путин', '1 Катя Лебедева', '1 Анастасия Бабич'];
|
||||
$lang = 0; // ru
|
||||
$case = 'Gen';
|
||||
|
||||
foreach ($names as $name) {
|
||||
list($sex, $name, $surname) = explode(' ', $name);
|
||||
$sex = (int)$sex;
|
||||
echo sprintf("%s %s\n", flex($name, $case, $sex, 0), flex($surname, $case, $sex, 1));
|
||||
}
|
180
vkext_flex.c
Normal file
180
vkext_flex.c
Normal file
@ -0,0 +1,180 @@
|
||||
/*
|
||||
This file is part of VK/KittenPHP-DB-Engine.
|
||||
|
||||
VK/KittenPHP-DB-Engine is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
VK/KittenPHP-DB-Engine is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
This program is released under the GPL with the additional exemption
|
||||
that compiling, linking, and/or using OpenSSL is allowed.
|
||||
You are free to remove this exemption from derived works.
|
||||
|
||||
Copyright 2011-2013 Vkontakte Ltd
|
||||
2011-2013 Vitaliy Valtman
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "vkext_flex.h"
|
||||
#include "vkext_flex_auto.c"
|
||||
#include "php.h"
|
||||
|
||||
#define BUFF_LEN (1 << 16)
|
||||
static char buff[BUFF_LEN];
|
||||
|
||||
char *vk_estrdup (const char *s) {
|
||||
char *d = emalloc (strlen (s) + 1);
|
||||
if (d == NULL) return NULL;
|
||||
strcpy (d,s);
|
||||
return d;
|
||||
}
|
||||
|
||||
char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id) {
|
||||
if (name_len > (1 << 10)) {
|
||||
return vk_estrdup (name);
|
||||
}
|
||||
struct lang *cur_lang;
|
||||
if (lang_id < 0 || lang_id >= LANG_NUM || !langs[lang_id]) {
|
||||
return vk_estrdup (name);
|
||||
}
|
||||
cur_lang = langs[lang_id];
|
||||
assert (cur_lang);
|
||||
int t = -1;
|
||||
if (!strcmp (type, "names")) {
|
||||
if (cur_lang->names_start < 0) {
|
||||
return vk_estrdup (name);
|
||||
}
|
||||
t = cur_lang->names_start;
|
||||
} else if (!strcmp (type, "surnames")) {
|
||||
if (cur_lang->surnames_start < 0) {
|
||||
return vk_estrdup (name);
|
||||
}
|
||||
t = cur_lang->surnames_start;
|
||||
} else {
|
||||
return vk_estrdup (name);
|
||||
}
|
||||
assert (t >= 0);
|
||||
if (sex != 1) {
|
||||
sex = 0;
|
||||
}
|
||||
int ca = -1;
|
||||
int i;
|
||||
for (i = 0; i < CASES_NUM; i++) if (!strcmp (cases_names[i], case_name)) {
|
||||
ca = i;
|
||||
break;
|
||||
}
|
||||
if (ca == -1 || ca >= cur_lang->cases_num) {
|
||||
return vk_estrdup (name);
|
||||
}
|
||||
assert (ca >= 0 && ca < cur_lang->cases_num);
|
||||
|
||||
int p = 0;
|
||||
int wp = 0;
|
||||
while (p < name_len) {
|
||||
int pp = p;
|
||||
while (pp < name_len && name[pp] != '-') {
|
||||
pp++;
|
||||
}
|
||||
int hyphen = (name[pp] == '-');
|
||||
int tt = t;
|
||||
int best = -1;
|
||||
int save_pp = pp;
|
||||
int new_tt;
|
||||
int isf = 0;
|
||||
if (pp - p > 0) {
|
||||
const char *fle = cur_lang->flexible_symbols;
|
||||
while (*fle) {
|
||||
if (*fle == name[pp - 1]) {
|
||||
isf = 1;
|
||||
break;
|
||||
}
|
||||
fle ++;
|
||||
}
|
||||
}
|
||||
while (1 && isf) {
|
||||
assert (tt >= 0);
|
||||
if (cur_lang->nodes[tt].tail_len >= 0 && (!cur_lang->nodes[tt].hyphen || hyphen)) {
|
||||
best = tt;
|
||||
}
|
||||
unsigned char c;
|
||||
if (pp == p - 1) {
|
||||
break;
|
||||
}
|
||||
pp --;
|
||||
if (pp < p) {
|
||||
c = 0;
|
||||
} else {
|
||||
c = name[pp];
|
||||
}
|
||||
new_tt = -1;
|
||||
int l = cur_lang->nodes[tt].children_start;
|
||||
int r = cur_lang->nodes[tt].children_end;
|
||||
if (r - l <= 4) {
|
||||
for (i = l; i < r; i++) if (cur_lang->children[2 * i] == c) {
|
||||
new_tt = cur_lang->children[2 * i + 1] ;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
int x;
|
||||
while (r - l > 1) {
|
||||
x = (r + l) >> 1;
|
||||
if (cur_lang->children[2 * x] <= c) {
|
||||
l = x;
|
||||
} else {
|
||||
r = x;
|
||||
}
|
||||
}
|
||||
if (cur_lang->children[2 * l] == c) {
|
||||
new_tt = cur_lang->children[2 * l + 1];
|
||||
}
|
||||
}
|
||||
if (new_tt == -1) {
|
||||
break;
|
||||
} else {
|
||||
tt = new_tt;
|
||||
}
|
||||
}
|
||||
if (best == -1) {
|
||||
memcpy (buff + wp, name + p, save_pp - p);
|
||||
wp += (save_pp - p);
|
||||
} else {
|
||||
int r = -1;
|
||||
if (!sex) {
|
||||
r = cur_lang->nodes[best].male_endings;
|
||||
} else {
|
||||
r = cur_lang->nodes[best].female_endings;
|
||||
}
|
||||
if (r < 0 || !cur_lang->endings[r * cur_lang->cases_num + ca]) {
|
||||
memcpy (buff + wp, name + p, save_pp - p);
|
||||
wp += (save_pp - p);
|
||||
} else {
|
||||
int ml = save_pp - p - cur_lang->nodes[best].tail_len;
|
||||
if (ml < 0) {
|
||||
ml = 0;
|
||||
}
|
||||
memcpy (buff + wp, name + p, ml);
|
||||
wp += ml;
|
||||
strcpy (buff + wp, cur_lang->endings[r * cur_lang->cases_num + ca]);
|
||||
wp += strlen (cur_lang->endings[r * cur_lang->cases_num + ca]);
|
||||
}
|
||||
}
|
||||
if (hyphen) {
|
||||
buff[wp++] = '-';
|
||||
} else {
|
||||
buff[wp++] = 0;
|
||||
}
|
||||
p = save_pp + 1;
|
||||
}
|
||||
|
||||
return vk_estrdup (buff);
|
||||
}
|
66
vkext_flex.h
Normal file
66
vkext_flex.h
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
This file is part of VK/KittenPHP-DB-Engine.
|
||||
|
||||
VK/KittenPHP-DB-Engine is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
VK/KittenPHP-DB-Engine is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
This program is released under the GPL with the additional exemption
|
||||
that compiling, linking, and/or using OpenSSL is allowed.
|
||||
You are free to remove this exemption from derived works.
|
||||
|
||||
Copyright 2011-2013 Vkontakte Ltd
|
||||
2011-2013 Vitaliy Valtman
|
||||
*/
|
||||
|
||||
#ifndef __VKEXT_FLEX_H__
|
||||
#define __VKEXT_FLEX_H__
|
||||
|
||||
#define CASE_NUMBER 8
|
||||
|
||||
#if defined __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
struct vk_node {
|
||||
short tail_len;
|
||||
short hyphen;
|
||||
int male_endings;
|
||||
int female_endings;
|
||||
int children_start;
|
||||
int children_end;
|
||||
};
|
||||
|
||||
struct lang {
|
||||
const char *flexible_symbols;
|
||||
int names_start;
|
||||
int surnames_start;
|
||||
int cases_num;
|
||||
const int *children;
|
||||
const char **endings;
|
||||
struct vk_node nodes[];
|
||||
};
|
||||
|
||||
char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id);
|
||||
|
||||
#if defined __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
5848
vkext_flex_auto.c
Normal file
5848
vkext_flex_auto.c
Normal file
File diff suppressed because one or more lines are too long
62
vkflex.c
Normal file
62
vkflex.c
Normal file
@ -0,0 +1,62 @@
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "php.h"
|
||||
#include "vkext_flex.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define PHP_MY_EXTENSION_VERSION "1.0"
|
||||
#define PHP_MY_EXTENSION_EXTNAME "vkflex"
|
||||
|
||||
extern zend_module_entry vkflex_module_entry;
|
||||
#define phpext_my_extension_ptr &vkflex_entry
|
||||
|
||||
// declaration of a custom my_function()
|
||||
PHP_FUNCTION(vkflex);
|
||||
|
||||
// list of custom PHP functions provided by this extension
|
||||
// set {NULL, NULL, NULL} as the last record to mark the end of list
|
||||
static zend_function_entry my_functions[] = {
|
||||
PHP_FE(vkflex, NULL)
|
||||
{NULL, NULL, NULL}
|
||||
};
|
||||
|
||||
// the following code creates an entry for the module and registers it with Zend.
|
||||
zend_module_entry vkflex_module_entry = {
|
||||
#if ZEND_MODULE_API_NO >= 20010901
|
||||
STANDARD_MODULE_HEADER,
|
||||
#endif
|
||||
PHP_MY_EXTENSION_EXTNAME,
|
||||
my_functions,
|
||||
NULL, // name of the MINIT function or NULL if not applicable
|
||||
NULL, // name of the MSHUTDOWN function or NULL if not applicable
|
||||
NULL, // name of the RINIT function or NULL if not applicable
|
||||
NULL, // name of the RSHUTDOWN function or NULL if not applicable
|
||||
NULL, // name of the MINFO function or NULL if not applicable
|
||||
#if ZEND_MODULE_API_NO >= 20010901
|
||||
PHP_MY_EXTENSION_VERSION,
|
||||
#endif
|
||||
STANDARD_MODULE_PROPERTIES
|
||||
};
|
||||
|
||||
ZEND_GET_MODULE(vkflex)
|
||||
|
||||
// implementation of a custom my_function()
|
||||
PHP_FUNCTION(vkflex)
|
||||
{
|
||||
char *name, *case_;
|
||||
long name_len, case_len;
|
||||
//long name_len, case_len;
|
||||
long sex, lang, type;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sslll",
|
||||
&name, &name_len, &case_, &case_len, &sex, &lang, &type) == FAILURE) {
|
||||
RETURN_NULL();
|
||||
}
|
||||
|
||||
char *result = do_flex(name, name_len, case_, case_len,
|
||||
(int)sex, (type == 0 ? "names" : "surnames"), (int)lang);
|
||||
|
||||
RETURN_STRING(result);
|
||||
efree(result);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user