summaryrefslogtreecommitdiff
path: root/libsocialweb/sw-utils.c
diff options
context:
space:
mode:
authorRob Bradford <rob@linux.intel.com>2011-02-15 14:45:27 (GMT)
committerRob Bradford <rob@linux.intel.com>2011-02-15 15:29:59 (GMT)
commit9d67459bf724056e8c17fe07a852414f7ffdde7a (patch)
tree5c543c37ba75ca61b3fecdb055cd1cd937bf302e /libsocialweb/sw-utils.c
parent03247c2b75644e70b19a775bf22a129f8a2d4c2a (diff)
downloadlibsocialweb-9d67459bf724056e8c17fe07a852414f7ffdde7a.tar.gz
libsocialweb-9d67459bf724056e8c17fe07a852414f7ffdde7a.tar.xz
utils: Add function for unescaping xml entities
Part of the fix for: https://bugs.meego.com/show_bug.cgi?id=10553
Diffstat (limited to 'libsocialweb/sw-utils.c')
-rw-r--r--libsocialweb/sw-utils.c92
1 files changed, 92 insertions, 0 deletions
diff --git a/libsocialweb/sw-utils.c b/libsocialweb/sw-utils.c
index 5ed7dc4..d7daf4f 100644
--- a/libsocialweb/sw-utils.c
+++ b/libsocialweb/sw-utils.c
@@ -19,6 +19,7 @@
#include <config.h>
#include "sw-utils.h"
#include <string.h>
+#include <stdio.h>
#include <libsoup/soup.h>
time_t
@@ -92,3 +93,94 @@ sw_next_opid (void)
return g_atomic_int_exchange_and_add (&opid, 1);
}
+
+/**
+ * sw_unescape_entities
+ *
+ * Replace the xml entities in the given string in place.
+ *
+ * Returns: the string with the entities replaced
+ */
+gchar *
+sw_unescape_entities (gchar *string)
+{
+ gchar *p = string;
+ gchar bucket[10];
+ size_t length;
+
+ length = strlen (string);
+
+ for (; p[0]; p++)
+ {
+ if (p[0] == '&')
+ {
+ gint length_diff;
+ gchar *q;;
+ gint bucket_i = 0;
+ gunichar replacement = 0;
+ gint replacement_length;
+
+ /* p stays the same until the end of this block */
+
+ q = p + 1; /* Move onto next character */
+
+ /* Fill the bucket with the characters in the entity reference */
+ while (q[0] != ';' && q[0] && bucket_i < 9)
+ {
+ bucket[bucket_i] = q[0];
+ q++;
+ bucket_i++;
+ }
+ bucket[bucket_i]='\0';
+
+ /* http://bit.ly/EJujl */
+ if (g_str_equal (bucket, "quot"))
+ replacement = 0x0022;
+ else if (g_str_equal (bucket, "amp"))
+ replacement = 0x0026;
+ else if (g_str_equal (bucket, "apos"))
+ replacement = 0x0027;
+ else if (g_str_equal (bucket, "lt"))
+ replacement = 0x003c;
+ else if (g_str_equal (bucket, "gt"))
+ replacement = 0x003e;
+ else if (bucket[0] == '#' && bucket[1] == 'x')
+ {
+ /* Convert the bucket hex -> gunichar */
+ sscanf (&bucket[2], "%x", &replacement);
+ }
+ else if (bucket[0] == '#')
+ {
+ /* Convert the bucket decimal -> gunichar */
+ sscanf (&bucket[1], "%u", &replacement);
+ }
+ else
+ {
+ continue;
+ }
+
+ replacement_length = g_unichar_to_utf8 (replacement, p);
+
+ /*
+ * The utf8 representation is always fewer bytes than the entity
+ * string itself
+ */
+ length_diff = bucket_i + 2 - replacement_length;
+ if (length_diff > 0)
+ {
+ size_t len; /* # bytes until the end of the remaining string */
+
+ /* This number *excludes* the \0 */
+ len = length - (p - string + bucket_i + 2);
+ g_memmove (p + replacement_length, p + bucket_i + 2, len + 1);
+ }
+
+ p = p + replacement_length;
+ }
+ }
+
+ if (!g_utf8_validate (string, -1, NULL))
+ g_critical ("Invalid utf-8");
+
+ return string;
+}