first commit

This commit is contained in:
ch1p 2017-01-11 22:57:38 +03:00
commit 3e3b50f26c
7 changed files with 6206 additions and 0 deletions

18
README.md Normal file
View File

@ -0,0 +1,18 @@
# vkflex-php
Расширение для PHP для склонения имен. Использует vkext/flex из [KittenPHP](https://github.com/vk-com/kphp-kdb).
Включена поддержка русского и украинского.
### Использование
`vkflex($name, $case, $sex, $lang, $type)`
- `$name` - имя или фамилия в кодировке cp1251;
- `$case` - падеж: `Gen`, `Dat`, `Acc`, `Ins` или `Abl`;
- `$sex` - `0` - мужской, `1` - женский;
- `$lang` - `0` - русский, `1` - украинский;
- `$type` - `0` - имя, `1` - фамилия.
Возвращает строку в кодировке cp1251.
См. пример в `test.php`.

7
config.m4 Normal file
View File

@ -0,0 +1,7 @@
PHP_ARG_ENABLE(vkflex, whether to enable vkflex extension
[ --enable-vkflex Enable vkflex extension])
if test "$PHP_VKFLEX" = "yes"; then
AC_DEFINE(HAVE_VKFLEX, 1, [Whether you have vkflex extension])
PHP_NEW_EXTENSION(vkflex, vkflex.c vkext_flex.c, $ext_shared)
fi

25
test.php Normal file
View File

@ -0,0 +1,25 @@
<?php
/**
* @param string $name
* @param string $case
* @param int $sex: 0 for male, 1 for female
* @param int $type: 0 for names, 1 for surnames
*/
function flex($name, $case, $sex, $type) {
global $lang;
$name = iconv('utf-8', 'cp1251', $name);
$result = vkflex($name, $case, $sex, $lang, $type);
return iconv('cp1251', 'utf-8', $result);
}
dl('vkflex.so');
$names = ['0 Евгений Зиновьев', '0 Владимир Путин', '1 Катя Лебедева', '1 Анастасия Бабич'];
$lang = 0; // ru
$case = 'Gen';
foreach ($names as $name) {
list($sex, $name, $surname) = explode(' ', $name);
$sex = (int)$sex;
echo sprintf("%s %s\n", flex($name, $case, $sex, 0), flex($surname, $case, $sex, 1));
}

180
vkext_flex.c Normal file
View File

@ -0,0 +1,180 @@
/*
This file is part of VK/KittenPHP-DB-Engine.
VK/KittenPHP-DB-Engine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
VK/KittenPHP-DB-Engine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
This program is released under the GPL with the additional exemption
that compiling, linking, and/or using OpenSSL is allowed.
You are free to remove this exemption from derived works.
Copyright 2011-2013 Vkontakte Ltd
2011-2013 Vitaliy Valtman
*/
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "vkext_flex.h"
#include "vkext_flex_auto.c"
#include "php.h"
#define BUFF_LEN (1 << 16)
static char buff[BUFF_LEN];
char *vk_estrdup (const char *s) {
char *d = emalloc (strlen (s) + 1);
if (d == NULL) return NULL;
strcpy (d,s);
return d;
}
char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id) {
if (name_len > (1 << 10)) {
return vk_estrdup (name);
}
struct lang *cur_lang;
if (lang_id < 0 || lang_id >= LANG_NUM || !langs[lang_id]) {
return vk_estrdup (name);
}
cur_lang = langs[lang_id];
assert (cur_lang);
int t = -1;
if (!strcmp (type, "names")) {
if (cur_lang->names_start < 0) {
return vk_estrdup (name);
}
t = cur_lang->names_start;
} else if (!strcmp (type, "surnames")) {
if (cur_lang->surnames_start < 0) {
return vk_estrdup (name);
}
t = cur_lang->surnames_start;
} else {
return vk_estrdup (name);
}
assert (t >= 0);
if (sex != 1) {
sex = 0;
}
int ca = -1;
int i;
for (i = 0; i < CASES_NUM; i++) if (!strcmp (cases_names[i], case_name)) {
ca = i;
break;
}
if (ca == -1 || ca >= cur_lang->cases_num) {
return vk_estrdup (name);
}
assert (ca >= 0 && ca < cur_lang->cases_num);
int p = 0;
int wp = 0;
while (p < name_len) {
int pp = p;
while (pp < name_len && name[pp] != '-') {
pp++;
}
int hyphen = (name[pp] == '-');
int tt = t;
int best = -1;
int save_pp = pp;
int new_tt;
int isf = 0;
if (pp - p > 0) {
const char *fle = cur_lang->flexible_symbols;
while (*fle) {
if (*fle == name[pp - 1]) {
isf = 1;
break;
}
fle ++;
}
}
while (1 && isf) {
assert (tt >= 0);
if (cur_lang->nodes[tt].tail_len >= 0 && (!cur_lang->nodes[tt].hyphen || hyphen)) {
best = tt;
}
unsigned char c;
if (pp == p - 1) {
break;
}
pp --;
if (pp < p) {
c = 0;
} else {
c = name[pp];
}
new_tt = -1;
int l = cur_lang->nodes[tt].children_start;
int r = cur_lang->nodes[tt].children_end;
if (r - l <= 4) {
for (i = l; i < r; i++) if (cur_lang->children[2 * i] == c) {
new_tt = cur_lang->children[2 * i + 1] ;
break;
}
} else {
int x;
while (r - l > 1) {
x = (r + l) >> 1;
if (cur_lang->children[2 * x] <= c) {
l = x;
} else {
r = x;
}
}
if (cur_lang->children[2 * l] == c) {
new_tt = cur_lang->children[2 * l + 1];
}
}
if (new_tt == -1) {
break;
} else {
tt = new_tt;
}
}
if (best == -1) {
memcpy (buff + wp, name + p, save_pp - p);
wp += (save_pp - p);
} else {
int r = -1;
if (!sex) {
r = cur_lang->nodes[best].male_endings;
} else {
r = cur_lang->nodes[best].female_endings;
}
if (r < 0 || !cur_lang->endings[r * cur_lang->cases_num + ca]) {
memcpy (buff + wp, name + p, save_pp - p);
wp += (save_pp - p);
} else {
int ml = save_pp - p - cur_lang->nodes[best].tail_len;
if (ml < 0) {
ml = 0;
}
memcpy (buff + wp, name + p, ml);
wp += ml;
strcpy (buff + wp, cur_lang->endings[r * cur_lang->cases_num + ca]);
wp += strlen (cur_lang->endings[r * cur_lang->cases_num + ca]);
}
}
if (hyphen) {
buff[wp++] = '-';
} else {
buff[wp++] = 0;
}
p = save_pp + 1;
}
return vk_estrdup (buff);
}

66
vkext_flex.h Normal file
View File

@ -0,0 +1,66 @@
/*
This file is part of VK/KittenPHP-DB-Engine.
VK/KittenPHP-DB-Engine is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
VK/KittenPHP-DB-Engine is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
This program is released under the GPL with the additional exemption
that compiling, linking, and/or using OpenSSL is allowed.
You are free to remove this exemption from derived works.
Copyright 2011-2013 Vkontakte Ltd
2011-2013 Vitaliy Valtman
*/
#ifndef __VKEXT_FLEX_H__
#define __VKEXT_FLEX_H__
#define CASE_NUMBER 8
#if defined __cplusplus
extern "C" {
#endif
#include <stdio.h>
struct vk_node {
short tail_len;
short hyphen;
int male_endings;
int female_endings;
int children_start;
int children_end;
};
struct lang {
const char *flexible_symbols;
int names_start;
int surnames_start;
int cases_num;
const int *children;
const char **endings;
struct vk_node nodes[];
};
char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id);
#if defined __cplusplus
};
#endif
#endif

5848
vkext_flex_auto.c Normal file

File diff suppressed because one or more lines are too long

62
vkflex.c Normal file
View File

@ -0,0 +1,62 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "vkext_flex.h"
#include <stdio.h>
#define PHP_MY_EXTENSION_VERSION "1.0"
#define PHP_MY_EXTENSION_EXTNAME "vkflex"
extern zend_module_entry vkflex_module_entry;
#define phpext_my_extension_ptr &vkflex_entry
// declaration of a custom my_function()
PHP_FUNCTION(vkflex);
// list of custom PHP functions provided by this extension
// set {NULL, NULL, NULL} as the last record to mark the end of list
static zend_function_entry my_functions[] = {
PHP_FE(vkflex, NULL)
{NULL, NULL, NULL}
};
// the following code creates an entry for the module and registers it with Zend.
zend_module_entry vkflex_module_entry = {
#if ZEND_MODULE_API_NO >= 20010901
STANDARD_MODULE_HEADER,
#endif
PHP_MY_EXTENSION_EXTNAME,
my_functions,
NULL, // name of the MINIT function or NULL if not applicable
NULL, // name of the MSHUTDOWN function or NULL if not applicable
NULL, // name of the RINIT function or NULL if not applicable
NULL, // name of the RSHUTDOWN function or NULL if not applicable
NULL, // name of the MINFO function or NULL if not applicable
#if ZEND_MODULE_API_NO >= 20010901
PHP_MY_EXTENSION_VERSION,
#endif
STANDARD_MODULE_PROPERTIES
};
ZEND_GET_MODULE(vkflex)
// implementation of a custom my_function()
PHP_FUNCTION(vkflex)
{
char *name, *case_;
long name_len, case_len;
//long name_len, case_len;
long sex, lang, type;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sslll",
&name, &name_len, &case_, &case_len, &sex, &lang, &type) == FAILURE) {
RETURN_NULL();
}
char *result = do_flex(name, name_len, case_, case_len,
(int)sex, (type == 0 ? "names" : "surnames"), (int)lang);
RETURN_STRING(result);
efree(result);
}