[sr-dev] [PATCH 1/1] websocket: remove libunistring dependency

Timo Teräs timo.teras at iki.fi
Fri Dec 20 11:47:45 CET 2013


libunistring is old, slow, messy code and uncompilable on new systems.
remove the sole user of it, and replace it with inline utf8 decoder
implementation from http://bjoern.hoehrmann.de/utf-8/decoder/dfa/.

improves performance and portability as libunistring is not needed.
---
@Peter, Hugh: Would it be ok for me to push this?

 modules/websocket/Makefile                |  2 +-
 modules/websocket/README                  |  1 -
 modules/websocket/doc/websocket_admin.xml |  3 --
 modules/websocket/utf8_decode.h           | 52 +++++++++++++++++++++++++++++++
 modules/websocket/ws_frame.c              |  4 +--
 5 files changed, 55 insertions(+), 7 deletions(-)
 create mode 100644 modules/websocket/utf8_decode.h

diff --git a/modules/websocket/Makefile b/modules/websocket/Makefile
index bb7c809..c686a82 100644
--- a/modules/websocket/Makefile
+++ b/modules/websocket/Makefile
@@ -27,7 +27,7 @@ else
 	#       E.g.: make TLS_HOOKS=1 TLS_EXTRA_LIBS="-lz -lkrb5"
 endif
 
-LIBS+= $(TLS_EXTRA_LIBS) -lunistring
+LIBS+= $(TLS_EXTRA_LIBS)
 
 # Static linking, if you'd like to use TLS and WEBSOCKET at the same time
 #
diff --git a/modules/websocket/README b/modules/websocket/README
index 49d8693..bdba3e4 100644
--- a/modules/websocket/README
+++ b/modules/websocket/README
@@ -316,7 +316,6 @@ onreply_route[WS_REPLY] {
    The following libraries must be installed before running Kamailio with
    this module loaded:
      * OpenSSL.
-     * GNU libunistring.
 
 4. Parameters
 
diff --git a/modules/websocket/doc/websocket_admin.xml b/modules/websocket/doc/websocket_admin.xml
index fa7d300..e40dc09 100644
--- a/modules/websocket/doc/websocket_admin.xml
+++ b/modules/websocket/doc/websocket_admin.xml
@@ -262,9 +262,6 @@ onreply_route[WS_REPLY] {
 		<listitem>
 		<para><emphasis>OpenSSL</emphasis>.</para>
 		</listitem>
-		<listitem>
-		<para><emphasis>GNU libunistring</emphasis>.</para>
-		</listitem>
 		</itemizedlist>
 		</para>
 	</section>
diff --git a/modules/websocket/utf8_decode.h b/modules/websocket/utf8_decode.h
new file mode 100644
index 0000000..b274fe7
--- /dev/null
+++ b/modules/websocket/utf8_decode.h
@@ -0,0 +1,52 @@
+#include <stdint.h>
+#include <stddef.h>
+
+// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern at hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 12
+
+static const uint8_t utf8d[] = {
+  // The first part of the table maps bytes to character classes that
+  // to reduce the size of the transition table and create bitmasks.
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+  // The second part is a transition table that maps a combination
+  // of a state of the automaton and a character class to a state.
+   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+  12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte)
+{
+  uint32_t type = utf8d[byte];
+
+  *codep = (*state != UTF8_ACCEPT) ?
+    (byte & 0x3fu) | (*codep << 6) :
+    (0xff >> type) & (byte);
+
+  *state = utf8d[256 + *state + type];
+  return *state;
+}
+
+static inline int IsUTF8(uint8_t* s, size_t len)
+{
+  uint32_t codepoint, state = 0;
+
+  while (len--)
+    decode(&state, &codepoint, *s++);
+
+  return state == UTF8_ACCEPT;
+}
+
diff --git a/modules/websocket/ws_frame.c b/modules/websocket/ws_frame.c
index a3a4cef..3562437 100644
--- a/modules/websocket/ws_frame.c
+++ b/modules/websocket/ws_frame.c
@@ -22,7 +22,7 @@
  */
 
 #include <limits.h>
-#include <unistr.h>
+#include "utf8_decode.h"
 #include "../../events.h"
 #include "../../receive.h"
 #include "../../stats.h"
@@ -695,7 +695,7 @@ int ws_frame_transmit(void *data)
 	frame.fin = 1;
 	/* Can't be sure whether this message is UTF-8 or not so check to see
 	   if it "might" be UTF-8 and send as binary if it definitely isn't */
-	frame.opcode = (u8_check((uint8_t *) wsev->buf, wsev->len) == NULL) ?
+	frame.opcode = IsUTF8((uint8_t *) wsev->buf, wsev->len) ?
 				OPCODE_TEXT_FRAME : OPCODE_BINARY_FRAME;
 	frame.payload_len = wsev->len;
 	frame.payload_data = wsev->buf;
-- 
1.8.5.1




More information about the sr-dev mailing list