summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim-Philipp Müller <tim@centricular.com>2015-04-26 13:27:30 (GMT)
committerTim-Philipp Müller <tim@centricular.com>2015-04-26 13:41:30 (GMT)
commit9ef16721edbca2b08a1554356f9bcb07c5ece2c0 (patch)
tree7d9f3c9fa428a09fb72756696e1bef43b0de5f01
parent7e8e020cfddd1b7f7ef5b939addf89b66da36aad (diff)
downloadgst-plugins-base-9ef16721edbca2b08a1554356f9bcb07c5ece2c0.tar.gz
gst-plugins-base-9ef16721edbca2b08a1554356f9bcb07c5ece2c0.tar.xz
typefinding: detect MSS manifests without using g_convert()
Embedded systems often have limited charset conversion functionality, so don't rely on g_convert() (i.e. iconv) for UTF-16 to UTF-8 conversions, we can easily enough do that ourselves by converting to native endianness and then using GLib's helper functions.
-rw-r--r--gst/typefind/gsttypefindfunctions.c108
1 files changed, 64 insertions, 44 deletions
diff --git a/gst/typefind/gsttypefindfunctions.c b/gst/typefind/gsttypefindfunctions.c
index c6073c3..eb99ef7 100644
--- a/gst/typefind/gsttypefindfunctions.c
+++ b/gst/typefind/gsttypefindfunctions.c
@@ -3555,63 +3555,83 @@ swf_type_find (GstTypeFind * tf, gpointer unused)
/*** application/vnd.ms-sstr+xml ***/
+static void
+mss_manifest_load_utf16 (gunichar2 * utf16_ne, const guint8 * utf16_data,
+ gsize data_size, guint data_endianness)
+{
+ memcpy (utf16_ne, utf16_data, data_size);
+ if (data_endianness != G_BYTE_ORDER) {
+ guint i;
+
+ for (i = 0; i < data_size / 2; ++i)
+ utf16_ne[i] = GUINT16_SWAP_LE_BE (utf16_ne[i]);
+ }
+}
+
static GstStaticCaps mss_manifest_caps =
GST_STATIC_CAPS ("application/vnd.ms-sstr+xml");
#define MSS_MANIFEST_CAPS (gst_static_caps_get(&mss_manifest_caps))
static void
mss_manifest_type_find (GstTypeFind * tf, gpointer unused)
{
+ gunichar2 utf16_ne[512];
+ const guint8 *data;
+ guint data_endianness = 0;
+ glong n_read = 0, size = 0;
+ guint length;
+ gchar *utf8;
+
if (xml_check_first_element (tf, "SmoothStreamingMedia", 20, TRUE)) {
gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
- } else {
- const guint8 *data;
- gboolean utf16_le, utf16_be;
- const gchar *convert_from = NULL;
- guint8 *converted_data;
+ return;
+ }
- /* try detecting the charset */
- data = gst_type_find_peek (tf, 0, 2);
+ length = gst_type_find_get_length (tf);
- if (data == NULL)
- return;
+ /* try detecting the charset */
+ data = gst_type_find_peek (tf, 0, 2);
- /* look for a possible BOM */
- utf16_le = data[0] == 0xFF && data[1] == 0xFE;
- utf16_be = data[0] == 0xFE && data[1] == 0xFF;
- if (utf16_le) {
- convert_from = "UTF-16LE";
- } else if (utf16_be) {
- convert_from = "UTF-16BE";
- }
+ if (data == NULL)
+ return;
- if (convert_from) {
- gsize new_size = 0;
- guint length = gst_type_find_get_length (tf);
-
- /* try a default that should be enough */
- if (length == 0)
- length = 512;
- data = gst_type_find_peek (tf, 0, length);
-
- if (data) {
- /* skip the BOM */
- data += 2;
- length -= 2;
-
- converted_data =
- (guint8 *) g_convert ((gchar *) data, length, "UTF-8", convert_from,
- NULL, &new_size, NULL);
- if (converted_data) {
- if (xml_check_first_element_from_data (converted_data, new_size,
- "SmoothStreamingMedia", 20, TRUE))
- gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM,
- MSS_MANIFEST_CAPS);
-
- g_free (converted_data);
- }
- }
- }
+ /* look for a possible BOM */
+ if (data[0] == 0xFF && data[1] == 0xFE)
+ data_endianness = G_LITTLE_ENDIAN;
+ else if (data[0] == 0xFE && data[1] == 0xFF)
+ data_endianness = G_BIG_ENDIAN;
+ else
+ return;
+
+ /* try a default that should be enough */
+ if (length == 0)
+ length = 512;
+ else if (length < 64)
+ return;
+
+ /* FIXME: we probably don't need or want the entire thing.. */
+ data = gst_type_find_peek (tf, 0, length);
+
+ if (data == NULL)
+ return;
+
+ /* skip the BOM */
+ data += 2;
+ length -= 2;
+
+ length = GST_ROUND_DOWN_2 (length);
+
+ /* convert to native endian UTF-16 */
+ mss_manifest_load_utf16 (utf16_ne, data, length, data_endianness);
+
+ /* and now convert to UTF-8 */
+ utf8 = g_utf16_to_utf8 (utf16_ne, length / 2, &n_read, &size, NULL);
+ if (utf8 != NULL && n_read > 0) {
+ if (xml_check_first_element_from_data ((const guint8 *) utf8, size,
+ "SmoothStreamingMedia", 20, TRUE))
+ gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
}
+
+ g_free (utf8);
}
/*** image/jpeg ***/