first commit
This commit is contained in:
commit
3e3b50f26c
18
README.md
Normal file
18
README.md
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# vkflex-php
|
||||||
|
|
||||||
|
Расширение для PHP для склонения имен. Использует vkext/flex из [KittenPHP](https://github.com/vk-com/kphp-kdb).
|
||||||
|
Включена поддержка русского и украинского.
|
||||||
|
|
||||||
|
### Использование
|
||||||
|
|
||||||
|
`vkflex($name, $case, $sex, $lang, $type)`
|
||||||
|
|
||||||
|
- `$name` - имя или фамилия в кодировке cp1251;
|
||||||
|
- `$case` - падеж: `Gen`, `Dat`, `Acc`, `Ins` или `Abl`;
|
||||||
|
- `$sex` - `0` - мужской, `1` - женский;
|
||||||
|
- `$lang` - `0` - русский, `1` - украинский;
|
||||||
|
- `$type` - `0` - имя, `1` - фамилия.
|
||||||
|
|
||||||
|
Возвращает строку в кодировке cp1251.
|
||||||
|
|
||||||
|
См. пример в `test.php`.
|
7
config.m4
Normal file
7
config.m4
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
PHP_ARG_ENABLE(vkflex, whether to enable vkflex extension
|
||||||
|
[ --enable-vkflex Enable vkflex extension])
|
||||||
|
|
||||||
|
if test "$PHP_VKFLEX" = "yes"; then
|
||||||
|
AC_DEFINE(HAVE_VKFLEX, 1, [Whether you have vkflex extension])
|
||||||
|
PHP_NEW_EXTENSION(vkflex, vkflex.c vkext_flex.c, $ext_shared)
|
||||||
|
fi
|
25
test.php
Normal file
25
test.php
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $name
|
||||||
|
* @param string $case
|
||||||
|
* @param int $sex: 0 for male, 1 for female
|
||||||
|
* @param int $type: 0 for names, 1 for surnames
|
||||||
|
*/
|
||||||
|
function flex($name, $case, $sex, $type) {
|
||||||
|
global $lang;
|
||||||
|
$name = iconv('utf-8', 'cp1251', $name);
|
||||||
|
$result = vkflex($name, $case, $sex, $lang, $type);
|
||||||
|
return iconv('cp1251', 'utf-8', $result);
|
||||||
|
}
|
||||||
|
|
||||||
|
dl('vkflex.so');
|
||||||
|
$names = ['0 Евгений Зиновьев', '0 Владимир Путин', '1 Катя Лебедева', '1 Анастасия Бабич'];
|
||||||
|
$lang = 0; // ru
|
||||||
|
$case = 'Gen';
|
||||||
|
|
||||||
|
foreach ($names as $name) {
|
||||||
|
list($sex, $name, $surname) = explode(' ', $name);
|
||||||
|
$sex = (int)$sex;
|
||||||
|
echo sprintf("%s %s\n", flex($name, $case, $sex, 0), flex($surname, $case, $sex, 1));
|
||||||
|
}
|
180
vkext_flex.c
Normal file
180
vkext_flex.c
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
/*
|
||||||
|
This file is part of VK/KittenPHP-DB-Engine.
|
||||||
|
|
||||||
|
VK/KittenPHP-DB-Engine is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
VK/KittenPHP-DB-Engine is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
This program is released under the GPL with the additional exemption
|
||||||
|
that compiling, linking, and/or using OpenSSL is allowed.
|
||||||
|
You are free to remove this exemption from derived works.
|
||||||
|
|
||||||
|
Copyright 2011-2013 Vkontakte Ltd
|
||||||
|
2011-2013 Vitaliy Valtman
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include "vkext_flex.h"
|
||||||
|
#include "vkext_flex_auto.c"
|
||||||
|
#include "php.h"
|
||||||
|
|
||||||
|
#define BUFF_LEN (1 << 16)
|
||||||
|
static char buff[BUFF_LEN];
|
||||||
|
|
||||||
|
char *vk_estrdup (const char *s) {
|
||||||
|
char *d = emalloc (strlen (s) + 1);
|
||||||
|
if (d == NULL) return NULL;
|
||||||
|
strcpy (d,s);
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id) {
|
||||||
|
if (name_len > (1 << 10)) {
|
||||||
|
return vk_estrdup (name);
|
||||||
|
}
|
||||||
|
struct lang *cur_lang;
|
||||||
|
if (lang_id < 0 || lang_id >= LANG_NUM || !langs[lang_id]) {
|
||||||
|
return vk_estrdup (name);
|
||||||
|
}
|
||||||
|
cur_lang = langs[lang_id];
|
||||||
|
assert (cur_lang);
|
||||||
|
int t = -1;
|
||||||
|
if (!strcmp (type, "names")) {
|
||||||
|
if (cur_lang->names_start < 0) {
|
||||||
|
return vk_estrdup (name);
|
||||||
|
}
|
||||||
|
t = cur_lang->names_start;
|
||||||
|
} else if (!strcmp (type, "surnames")) {
|
||||||
|
if (cur_lang->surnames_start < 0) {
|
||||||
|
return vk_estrdup (name);
|
||||||
|
}
|
||||||
|
t = cur_lang->surnames_start;
|
||||||
|
} else {
|
||||||
|
return vk_estrdup (name);
|
||||||
|
}
|
||||||
|
assert (t >= 0);
|
||||||
|
if (sex != 1) {
|
||||||
|
sex = 0;
|
||||||
|
}
|
||||||
|
int ca = -1;
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < CASES_NUM; i++) if (!strcmp (cases_names[i], case_name)) {
|
||||||
|
ca = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ca == -1 || ca >= cur_lang->cases_num) {
|
||||||
|
return vk_estrdup (name);
|
||||||
|
}
|
||||||
|
assert (ca >= 0 && ca < cur_lang->cases_num);
|
||||||
|
|
||||||
|
int p = 0;
|
||||||
|
int wp = 0;
|
||||||
|
while (p < name_len) {
|
||||||
|
int pp = p;
|
||||||
|
while (pp < name_len && name[pp] != '-') {
|
||||||
|
pp++;
|
||||||
|
}
|
||||||
|
int hyphen = (name[pp] == '-');
|
||||||
|
int tt = t;
|
||||||
|
int best = -1;
|
||||||
|
int save_pp = pp;
|
||||||
|
int new_tt;
|
||||||
|
int isf = 0;
|
||||||
|
if (pp - p > 0) {
|
||||||
|
const char *fle = cur_lang->flexible_symbols;
|
||||||
|
while (*fle) {
|
||||||
|
if (*fle == name[pp - 1]) {
|
||||||
|
isf = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
fle ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (1 && isf) {
|
||||||
|
assert (tt >= 0);
|
||||||
|
if (cur_lang->nodes[tt].tail_len >= 0 && (!cur_lang->nodes[tt].hyphen || hyphen)) {
|
||||||
|
best = tt;
|
||||||
|
}
|
||||||
|
unsigned char c;
|
||||||
|
if (pp == p - 1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pp --;
|
||||||
|
if (pp < p) {
|
||||||
|
c = 0;
|
||||||
|
} else {
|
||||||
|
c = name[pp];
|
||||||
|
}
|
||||||
|
new_tt = -1;
|
||||||
|
int l = cur_lang->nodes[tt].children_start;
|
||||||
|
int r = cur_lang->nodes[tt].children_end;
|
||||||
|
if (r - l <= 4) {
|
||||||
|
for (i = l; i < r; i++) if (cur_lang->children[2 * i] == c) {
|
||||||
|
new_tt = cur_lang->children[2 * i + 1] ;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int x;
|
||||||
|
while (r - l > 1) {
|
||||||
|
x = (r + l) >> 1;
|
||||||
|
if (cur_lang->children[2 * x] <= c) {
|
||||||
|
l = x;
|
||||||
|
} else {
|
||||||
|
r = x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cur_lang->children[2 * l] == c) {
|
||||||
|
new_tt = cur_lang->children[2 * l + 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (new_tt == -1) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
tt = new_tt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (best == -1) {
|
||||||
|
memcpy (buff + wp, name + p, save_pp - p);
|
||||||
|
wp += (save_pp - p);
|
||||||
|
} else {
|
||||||
|
int r = -1;
|
||||||
|
if (!sex) {
|
||||||
|
r = cur_lang->nodes[best].male_endings;
|
||||||
|
} else {
|
||||||
|
r = cur_lang->nodes[best].female_endings;
|
||||||
|
}
|
||||||
|
if (r < 0 || !cur_lang->endings[r * cur_lang->cases_num + ca]) {
|
||||||
|
memcpy (buff + wp, name + p, save_pp - p);
|
||||||
|
wp += (save_pp - p);
|
||||||
|
} else {
|
||||||
|
int ml = save_pp - p - cur_lang->nodes[best].tail_len;
|
||||||
|
if (ml < 0) {
|
||||||
|
ml = 0;
|
||||||
|
}
|
||||||
|
memcpy (buff + wp, name + p, ml);
|
||||||
|
wp += ml;
|
||||||
|
strcpy (buff + wp, cur_lang->endings[r * cur_lang->cases_num + ca]);
|
||||||
|
wp += strlen (cur_lang->endings[r * cur_lang->cases_num + ca]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (hyphen) {
|
||||||
|
buff[wp++] = '-';
|
||||||
|
} else {
|
||||||
|
buff[wp++] = 0;
|
||||||
|
}
|
||||||
|
p = save_pp + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return vk_estrdup (buff);
|
||||||
|
}
|
66
vkext_flex.h
Normal file
66
vkext_flex.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
This file is part of VK/KittenPHP-DB-Engine.
|
||||||
|
|
||||||
|
VK/KittenPHP-DB-Engine is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
VK/KittenPHP-DB-Engine is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
This program is released under the GPL with the additional exemption
|
||||||
|
that compiling, linking, and/or using OpenSSL is allowed.
|
||||||
|
You are free to remove this exemption from derived works.
|
||||||
|
|
||||||
|
Copyright 2011-2013 Vkontakte Ltd
|
||||||
|
2011-2013 Vitaliy Valtman
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __VKEXT_FLEX_H__
|
||||||
|
#define __VKEXT_FLEX_H__
|
||||||
|
|
||||||
|
#define CASE_NUMBER 8
|
||||||
|
|
||||||
|
#if defined __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
|
||||||
|
struct vk_node {
|
||||||
|
short tail_len;
|
||||||
|
short hyphen;
|
||||||
|
int male_endings;
|
||||||
|
int female_endings;
|
||||||
|
int children_start;
|
||||||
|
int children_end;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct lang {
|
||||||
|
const char *flexible_symbols;
|
||||||
|
int names_start;
|
||||||
|
int surnames_start;
|
||||||
|
int cases_num;
|
||||||
|
const int *children;
|
||||||
|
const char **endings;
|
||||||
|
struct vk_node nodes[];
|
||||||
|
};
|
||||||
|
|
||||||
|
char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id);
|
||||||
|
|
||||||
|
#if defined __cplusplus
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
5848
vkext_flex_auto.c
Normal file
5848
vkext_flex_auto.c
Normal file
File diff suppressed because one or more lines are too long
62
vkflex.c
Normal file
62
vkflex.c
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
#include "php.h"
|
||||||
|
#include "vkext_flex.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define PHP_MY_EXTENSION_VERSION "1.0"
|
||||||
|
#define PHP_MY_EXTENSION_EXTNAME "vkflex"
|
||||||
|
|
||||||
|
extern zend_module_entry vkflex_module_entry;
|
||||||
|
#define phpext_my_extension_ptr &vkflex_entry
|
||||||
|
|
||||||
|
// declaration of a custom my_function()
|
||||||
|
PHP_FUNCTION(vkflex);
|
||||||
|
|
||||||
|
// list of custom PHP functions provided by this extension
|
||||||
|
// set {NULL, NULL, NULL} as the last record to mark the end of list
|
||||||
|
static zend_function_entry my_functions[] = {
|
||||||
|
PHP_FE(vkflex, NULL)
|
||||||
|
{NULL, NULL, NULL}
|
||||||
|
};
|
||||||
|
|
||||||
|
// the following code creates an entry for the module and registers it with Zend.
|
||||||
|
zend_module_entry vkflex_module_entry = {
|
||||||
|
#if ZEND_MODULE_API_NO >= 20010901
|
||||||
|
STANDARD_MODULE_HEADER,
|
||||||
|
#endif
|
||||||
|
PHP_MY_EXTENSION_EXTNAME,
|
||||||
|
my_functions,
|
||||||
|
NULL, // name of the MINIT function or NULL if not applicable
|
||||||
|
NULL, // name of the MSHUTDOWN function or NULL if not applicable
|
||||||
|
NULL, // name of the RINIT function or NULL if not applicable
|
||||||
|
NULL, // name of the RSHUTDOWN function or NULL if not applicable
|
||||||
|
NULL, // name of the MINFO function or NULL if not applicable
|
||||||
|
#if ZEND_MODULE_API_NO >= 20010901
|
||||||
|
PHP_MY_EXTENSION_VERSION,
|
||||||
|
#endif
|
||||||
|
STANDARD_MODULE_PROPERTIES
|
||||||
|
};
|
||||||
|
|
||||||
|
ZEND_GET_MODULE(vkflex)
|
||||||
|
|
||||||
|
// implementation of a custom my_function()
|
||||||
|
PHP_FUNCTION(vkflex)
|
||||||
|
{
|
||||||
|
char *name, *case_;
|
||||||
|
long name_len, case_len;
|
||||||
|
//long name_len, case_len;
|
||||||
|
long sex, lang, type;
|
||||||
|
|
||||||
|
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sslll",
|
||||||
|
&name, &name_len, &case_, &case_len, &sex, &lang, &type) == FAILURE) {
|
||||||
|
RETURN_NULL();
|
||||||
|
}
|
||||||
|
|
||||||
|
char *result = do_flex(name, name_len, case_, case_len,
|
||||||
|
(int)sex, (type == 0 ? "names" : "surnames"), (int)lang);
|
||||||
|
|
||||||
|
RETURN_STRING(result);
|
||||||
|
efree(result);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user