Fix memory leaks and corruption

2016-09-22 14:09:41 +02:00 · 2016-09-22 14:09:41 +02:00 · ef4d6b7f79
commit ef4d6b7f79
parent d00e1d5ecc
6 changed files with 169 additions and 180 deletions
--- a/iconvlite.cpp
+++ b/iconvlite.cpp
@ -3,7 +3,76 @@

 using namespace std;

-static void cp2utf1(char *out, const char *in) {
+typedef struct ConvLetter {
+    unsigned char    win1251;
+    int             unicode;
+} Letter;
+
+static Letter g_letters[] = {
+    {0x82, 0x201A}, // SINGLE LOW-9 QUOTATION MARK
+    {0x83, 0x0453}, // CYRILLIC SMALL LETTER GJE
+    {0x84, 0x201E}, // DOUBLE LOW-9 QUOTATION MARK
+    {0x85, 0x2026}, // HORIZONTAL ELLIPSIS
+    {0x86, 0x2020}, // DAGGER
+    {0x87, 0x2021}, // DOUBLE DAGGER
+    {0x88, 0x20AC}, // EURO SIGN
+    {0x89, 0x2030}, // PER MILLE SIGN
+    {0x8A, 0x0409}, // CYRILLIC CAPITAL LETTER LJE
+    {0x8B, 0x2039}, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    {0x8C, 0x040A}, // CYRILLIC CAPITAL LETTER NJE
+    {0x8D, 0x040C}, // CYRILLIC CAPITAL LETTER KJE
+    {0x8E, 0x040B}, // CYRILLIC CAPITAL LETTER TSHE
+    {0x8F, 0x040F}, // CYRILLIC CAPITAL LETTER DZHE
+    {0x90, 0x0452}, // CYRILLIC SMALL LETTER DJE
+    {0x91, 0x2018}, // LEFT SINGLE QUOTATION MARK
+    {0x92, 0x2019}, // RIGHT SINGLE QUOTATION MARK
+    {0x93, 0x201C}, // LEFT DOUBLE QUOTATION MARK
+    {0x94, 0x201D}, // RIGHT DOUBLE QUOTATION MARK
+    {0x95, 0x2022}, // BULLET
+    {0x96, 0x2013}, // EN DASH
+    {0x97, 0x2014}, // EM DASH
+    {0x99, 0x2122}, // TRADE MARK SIGN
+    {0x9A, 0x0459}, // CYRILLIC SMALL LETTER LJE
+    {0x9B, 0x203A}, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    {0x9C, 0x045A}, // CYRILLIC SMALL LETTER NJE
+    {0x9D, 0x045C}, // CYRILLIC SMALL LETTER KJE
+    {0x9E, 0x045B}, // CYRILLIC SMALL LETTER TSHE
+    {0x9F, 0x045F}, // CYRILLIC SMALL LETTER DZHE
+    {0xA0, 0x00A0}, // NO-BREAK SPACE
+    {0xA1, 0x040E}, // CYRILLIC CAPITAL LETTER SHORT U
+    {0xA2, 0x045E}, // CYRILLIC SMALL LETTER SHORT U
+    {0xA3, 0x0408}, // CYRILLIC CAPITAL LETTER JE
+    {0xA4, 0x00A4}, // CURRENCY SIGN
+    {0xA5, 0x0490}, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+    {0xA6, 0x00A6}, // BROKEN BAR
+    {0xA7, 0x00A7}, // SECTION SIGN
+    {0xA8, 0x0401}, // CYRILLIC CAPITAL LETTER IO
+    {0xA9, 0x00A9}, // COPYRIGHT SIGN
+    {0xAA, 0x0404}, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    {0xAB, 0x00AB}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    {0xAC, 0x00AC}, // NOT SIGN
+    {0xAD, 0x00AD}, // SOFT HYPHEN
+    {0xAE, 0x00AE}, // REGISTERED SIGN
+    {0xAF, 0x0407}, // CYRILLIC CAPITAL LETTER YI
+    {0xB0, 0x00B0}, // DEGREE SIGN
+    {0xB1, 0x00B1}, // PLUS-MINUS SIGN
+    {0xB2, 0x0406}, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+    {0xB3, 0x0456}, // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+    {0xB4, 0x0491}, // CYRILLIC SMALL LETTER GHE WITH UPTURN
+    {0xB5, 0x00B5}, // MICRO SIGN
+    {0xB6, 0x00B6}, // PILCROW SIGN
+    {0xB7, 0x00B7}, // MIDDLE DOT
+    {0xB8, 0x0451}, // CYRILLIC SMALL LETTER IO
+    {0xB9, 0x2116}, // NUMERO SIGN
+    {0xBA, 0x0454}, // CYRILLIC SMALL LETTER UKRAINIAN IE
+    {0xBB, 0x00BB}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    {0xBC, 0x0458}, // CYRILLIC SMALL LETTER JE
+    {0xBD, 0x0405}, // CYRILLIC CAPITAL LETTER DZE
+    {0xBE, 0x0455}, // CYRILLIC SMALL LETTER DZE
+    {0xBF, 0x0457} // CYRILLIC SMALL LETTER YI
+};
+
+static void cp1251_to_utf8(char *out, const char *in, size_t len) {
    static const int table[128] = {
        0x82D0,0x83D0,0x9A80E2,0x93D1,0x9E80E2,0xA680E2,0xA080E2,0xA180E2,
        0xAC82E2,0xB080E2,0x89D0,0xB980E2,0x8AD0,0x8CD0,0x8BD0,0x8FD0,
@ -22,7 +91,7 @@ static void cp2utf1(char *out, const char *in) {
        0x80D1,0x81D1,0x82D1,0x83D1,0x84D1,0x85D1,0x86D1,0x87D1,
        0x88D1,0x89D1,0x8AD1,0x8BD1,0x8CD1,0x8DD1,0x8ED1,0x8FD1
    };
-    while (*in)
+    while (*in) {
        if (*in & 0x80) {
            int v = table[(int)(0x7f & *in++)];
            if (!v)
@ -31,75 +100,71 @@ static void cp2utf1(char *out, const char *in) {
            *out++ = (char)(v >> 8);
            if (v >>= 16)
                *out++ = (char)v;
-        }
-        else
+        } else {
            *out++ = *in++;
+        }
+    }
    *out = 0;
 }
-string cp2utf(string s) {
-    int c,i;
-    int len = s.size();
-    string ns;
-    for(i=0; i<len; i++) {
-        c=s[i];
-        char buf[4], in[2] = {0, 0};
-        *in = c;
-        cp2utf1(buf, in);
-        ns+=string(buf);
+
+static int utf8_to_cp1251(const char* utf8, char* windows1251, size_t n) {
+    int i = 0;
+    int j = 0;
+    for(; i < (int)n && utf8[i] != 0; ++i) {
+        char prefix = utf8[i];
+        char suffix = utf8[i+1];
+        if ((prefix & 0x80) == 0) {
+            windows1251[j] = (char)prefix;
+            ++j;
+        } else if ((~prefix) & 0x20) {
+            int first5bit = prefix & 0x1F;
+            first5bit <<= 6;
+            int sec6bit = suffix & 0x3F;
+            int unicode_char = first5bit + sec6bit;
+
+            if ( unicode_char >= 0x410 && unicode_char <= 0x44F ) {
+                windows1251[j] = (char)(unicode_char - 0x350);
+            } else if (unicode_char >= 0x80 && unicode_char <= 0xFF) {
+                windows1251[j] = (char)(unicode_char);
+            } else if (unicode_char >= 0x402 && unicode_char <= 0x403) {
+                windows1251[j] = (char)(unicode_char - 0x382);
+            } else {
+                int count = sizeof(g_letters) / sizeof(Letter);
+                for (int k = 0; k < count; ++k) {
+                    if (unicode_char == g_letters[k].unicode) {
+                        windows1251[j] = g_letters[k].win1251;
+                        goto NEXT_LETTER;
+                    }
+                }
+                // can't convert this char
+                return 0;
+            }
+NEXT_LETTER:
+            ++i;
+            ++j;
+        } else {
+            // can't convert this chars
+            return 0;
+        }
    }
-   return ns;
+    windows1251[j] = 0;
+    return 1;
 }

 string utf2cp(string s) {
    size_t len = s.size();
-    const char *buff = s.c_str();
-    char *output = new char[len];
-    convert_utf8_to_windows1251(buff, output, len);
+    char* output = new char[len+1];
+    utf8_to_cp1251(s.c_str(), output, len);
    string ns(output);
+    delete[] output;
    return ns;
 }

-int convert_utf8_to_windows1251(const char* utf8, char* windows1251, size_t n)
-{
-        int i = 0;
-        int j = 0;
-        for(; i < (int)n && utf8[i] != 0; ++i) {
-                char prefix = utf8[i];
-                char suffix = utf8[i+1];
-                if ((prefix & 0x80) == 0) {
-                        windows1251[j] = (char)prefix;
-                        ++j;
-                } else if ((~prefix) & 0x20) {
-                        int first5bit = prefix & 0x1F;
-                        first5bit <<= 6;
-                        int sec6bit = suffix & 0x3F;
-                        int unicode_char = first5bit + sec6bit;
-
-                        if ( unicode_char >= 0x410 && unicode_char <= 0x44F ) {
-                                windows1251[j] = (char)(unicode_char - 0x350);
-                        } else if (unicode_char >= 0x80 && unicode_char <= 0xFF) {
-                                windows1251[j] = (char)(unicode_char);
-                        } else if (unicode_char >= 0x402 && unicode_char <= 0x403) {
-                                windows1251[j] = (char)(unicode_char - 0x382);
-                        } else {
-                                int count = sizeof(g_letters) / sizeof(Letter);
-                                for (int k = 0; k < count; ++k) {
-                                        if (unicode_char == g_letters[k].unicode) {
-                                                windows1251[j] = g_letters[k].win1251;
-                                                goto NEXT_LETTER;
-                                        }
-                                }
-                                // can't convert this char
-                                return 0;
-                        }
-NEXT_LETTER:
-                        ++i;
-                        ++j;
-                } else {
-                        // can't convert this chars
-                        return 0;
-                }
-        }
-        windows1251[j] = 0;
-        return 1;
-}
+string cp2utf(string s) {
+    size_t len = s.size();
+    char* output = new char[len*3+1];
+    cp1251_to_utf8(output, s.c_str(), len);
+    string ns(output);
+    delete[] output;
+    return ns;
+}
--- a/iconvlite.h
+++ b/iconvlite.h
@ -1,85 +1,9 @@
-/*
-iconvlite.h
-Iconv Lite
-Simple cpp functions to convert strings from cp1251 to utf8 and ftom utf8 to cp1251
-*/
-
 #ifndef ICONVLITE_H
 #define ICONVLITE_H

 using namespace std;

 string cp2utf(string s);
-int convert_utf8_to_windows1251(const char* utf8, char* windows1251, size_t n);
 string utf2cp(string s);

-typedef struct ConvLetter {
-        unsigned char    win1251;
-        int             unicode;
-} Letter;
-
-static Letter g_letters[] = {
-        {0x82, 0x201A}, // SINGLE LOW-9 QUOTATION MARK
-        {0x83, 0x0453}, // CYRILLIC SMALL LETTER GJE
-        {0x84, 0x201E}, // DOUBLE LOW-9 QUOTATION MARK
-        {0x85, 0x2026}, // HORIZONTAL ELLIPSIS
-        {0x86, 0x2020}, // DAGGER
-        {0x87, 0x2021}, // DOUBLE DAGGER
-        {0x88, 0x20AC}, // EURO SIGN
-        {0x89, 0x2030}, // PER MILLE SIGN
-        {0x8A, 0x0409}, // CYRILLIC CAPITAL LETTER LJE
-        {0x8B, 0x2039}, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-        {0x8C, 0x040A}, // CYRILLIC CAPITAL LETTER NJE
-        {0x8D, 0x040C}, // CYRILLIC CAPITAL LETTER KJE
-        {0x8E, 0x040B}, // CYRILLIC CAPITAL LETTER TSHE
-        {0x8F, 0x040F}, // CYRILLIC CAPITAL LETTER DZHE
-        {0x90, 0x0452}, // CYRILLIC SMALL LETTER DJE
-        {0x91, 0x2018}, // LEFT SINGLE QUOTATION MARK
-        {0x92, 0x2019}, // RIGHT SINGLE QUOTATION MARK
-        {0x93, 0x201C}, // LEFT DOUBLE QUOTATION MARK
-        {0x94, 0x201D}, // RIGHT DOUBLE QUOTATION MARK
-        {0x95, 0x2022}, // BULLET
-        {0x96, 0x2013}, // EN DASH
-        {0x97, 0x2014}, // EM DASH
-        {0x99, 0x2122}, // TRADE MARK SIGN
-        {0x9A, 0x0459}, // CYRILLIC SMALL LETTER LJE
-        {0x9B, 0x203A}, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-        {0x9C, 0x045A}, // CYRILLIC SMALL LETTER NJE
-        {0x9D, 0x045C}, // CYRILLIC SMALL LETTER KJE
-        {0x9E, 0x045B}, // CYRILLIC SMALL LETTER TSHE
-        {0x9F, 0x045F}, // CYRILLIC SMALL LETTER DZHE
-        {0xA0, 0x00A0}, // NO-BREAK SPACE
-        {0xA1, 0x040E}, // CYRILLIC CAPITAL LETTER SHORT U
-        {0xA2, 0x045E}, // CYRILLIC SMALL LETTER SHORT U
-        {0xA3, 0x0408}, // CYRILLIC CAPITAL LETTER JE
-        {0xA4, 0x00A4}, // CURRENCY SIGN
-        {0xA5, 0x0490}, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
-        {0xA6, 0x00A6}, // BROKEN BAR
-        {0xA7, 0x00A7}, // SECTION SIGN
-        {0xA8, 0x0401}, // CYRILLIC CAPITAL LETTER IO
-        {0xA9, 0x00A9}, // COPYRIGHT SIGN
-        {0xAA, 0x0404}, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
-        {0xAB, 0x00AB}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-        {0xAC, 0x00AC}, // NOT SIGN
-        {0xAD, 0x00AD}, // SOFT HYPHEN
-        {0xAE, 0x00AE}, // REGISTERED SIGN
-        {0xAF, 0x0407}, // CYRILLIC CAPITAL LETTER YI
-        {0xB0, 0x00B0}, // DEGREE SIGN
-        {0xB1, 0x00B1}, // PLUS-MINUS SIGN
-        {0xB2, 0x0406}, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-        {0xB3, 0x0456}, // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-        {0xB4, 0x0491}, // CYRILLIC SMALL LETTER GHE WITH UPTURN
-        {0xB5, 0x00B5}, // MICRO SIGN
-        {0xB6, 0x00B6}, // PILCROW SIGN
-        {0xB7, 0x00B7}, // MIDDLE DOT
-        {0xB8, 0x0451}, // CYRILLIC SMALL LETTER IO
-        {0xB9, 0x2116}, // NUMERO SIGN
-        {0xBA, 0x0454}, // CYRILLIC SMALL LETTER UKRAINIAN IE
-        {0xBB, 0x00BB}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-        {0xBC, 0x0458}, // CYRILLIC SMALL LETTER JE
-        {0xBD, 0x0405}, // CYRILLIC CAPITAL LETTER DZE
-        {0xBE, 0x0455}, // CYRILLIC SMALL LETTER DZE
-        {0xBF, 0x0457} // CYRILLIC SMALL LETTER YI
-};
-
-#endif
+#endif
--- a/index.cpp
+++ b/index.cpp
@ -2,52 +2,49 @@
 #include <v8.h>

 #include <iostream>
+#include <stdlib.h>
 #include "iconvlite.h"
 #include "vkext_flex.h"

 using namespace v8;
-using namespace std;
-
-const char* ToCString(const String::Utf8Value& value) {
-  return *value ? *value : "<string conversion failed>";
-}

 void flex(const FunctionCallbackInfo<Value>& args) {
-  Isolate* isolate = args.GetIsolate();
+    Isolate* isolate = args.GetIsolate();

-  String::Utf8Value nameArg(args[0]);
-  string nameString(*nameArg);
-  string nameStringWindows1251 = utf2cp(nameString);
-  
-  String::Utf8Value caseArg(args[2]);
-  string caseString(*caseArg);
-  string caseStringWindows1251 = utf2cp(caseString);
+    String::Utf8Value nameArg(args[0]);
+    std::string nameString(*nameArg);
+    std::string nameStringWindows1251 = utf2cp(nameString);

-  String::Utf8Value typeArg(args[3]);
-  string typeString(*typeArg);
-  string ctypeStringWindows1251 = utf2cp(typeString);
+    String::Utf8Value caseArg(args[2]);
+    std::string caseString(*caseArg);
+    std::string caseStringWindows1251 = utf2cp(caseString);

-  int sex = (int)args[1]->NumberValue();
-  int lang = (int)args[4]->NumberValue();
+    String::Utf8Value typeArg(args[3]);
+    std::string typeString(*typeArg);
+    std::string ctypeStringWindows1251 = utf2cp(typeString);

-  char *result = do_flex(
-    nameStringWindows1251.c_str(),
-    nameStringWindows1251.length(),
-    caseStringWindows1251.c_str(),
-    caseStringWindows1251.length(),
-    sex,
-    ctypeStringWindows1251.c_str(),
-    ctypeStringWindows1251.length(),
-    lang);
+    int sex = (int)args[1]->NumberValue();
+    int lang = (int)args[4]->NumberValue();

-  string resultStringWindows1251(result);
-  string resultString = cp2utf(resultStringWindows1251);
+    char *result = do_flex(
+        nameStringWindows1251.c_str(),
+        nameStringWindows1251.length(),
+        caseStringWindows1251.c_str(),
+        caseStringWindows1251.length(),
+        sex,
+        ctypeStringWindows1251.c_str(),
+        lang);

-  args.GetReturnValue().Set(String::NewFromUtf8(isolate, resultString.c_str()));
+    std::string resultStringWindows1251(result);
+    free(result);
+
+    std::string resultString = cp2utf(resultStringWindows1251);
+
+    args.GetReturnValue().Set(String::NewFromUtf8(isolate, resultString.c_str()));
 }

 void Init(Handle<Object> exports) {
    NODE_SET_METHOD(exports, "flex", flex);
 }

-NODE_MODULE(hello, Init);
+NODE_MODULE(vkext_flex, Init)
--- a/test.js
+++ b/test.js
@ -3,12 +3,14 @@ const vkflex = require('./index.js')
 let nameCases = ['Gen', 'Dat', 'Acc', 'Ins', 'Abl']
 let names = [
  ['Евгений', 'Зиновьев', 0],
-  ['Павел', 'Дуров', 0],
-  ['Анастасия', 'Семенюк', 1],
-  ['Катя', 'Лебедева', 1]
+  //['Павел', 'Дуров', 0],
+  //['Анастасия', 'Семенюк', 1],
+  //['Катя', 'Лебедева', 1]
+  //['Denis', 'Komissarov', 0]
 ]

 console.time('flex')
+for (let i = 0; i < 100; i++) {
 for (let [name, surname, sex] of names) {
  console.log('Testing "'+name+' '+surname+'"...')

@ -18,4 +20,5 @@ for (let [name, surname, sex] of names) {

  console.log('')
 }
+}
 console.timeEnd('flex')
--- a/vkext_flex.c
+++ b/vkext_flex.c
@ -38,7 +38,7 @@ char *estrdup (const char *s) {
    return d;
 }

-char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int type_len, int lang_id) {
+char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id) {
  if (name_len  > (1 << 10)) {
    return estrdup (name);
  }
--- a/vkext_flex.h
+++ b/vkext_flex.h
@ -14,7 +14,7 @@
    You should have received a copy of the GNU General Public License
    along with VK/KittenPHP-DB-Engine.  If not, see <http://www.gnu.org/licenses/>.

-    This program is released under the GPL with the additional exemption 
+    This program is released under the GPL with the additional exemption
    that compiling, linking, and/or using OpenSSL is allowed.
    You are free to remove this exemption from derived works.

@ -30,10 +30,10 @@
 #if defined __cplusplus
 extern "C" {
 #endif
-    
+
 #include <stdio.h>

-    
+
 struct vk_node {
  short tail_len;
  short hyphen;
@ -52,9 +52,9 @@ struct lang {
  const char **endings;
  struct vk_node nodes[];
 };
-    
-char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int type_len, int lang_id);
-    
+
+char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id);
+
 #if defined __cplusplus
 };
 #endif