diff --git a/rtexif/rtexif.cc b/rtexif/rtexif.cc index 6179688ae..3547f6ee3 100644 --- a/rtexif/rtexif.cc +++ b/rtexif/rtexif.cc @@ -1928,24 +1928,48 @@ void Tag::initInt (int data, TagType t, int cnt) setInt (data, 0, t); } +void Tag::swapByteOrder2(char *buffer, int count) +{ + char* ptr = buffer; + for (int i = 0; i < count; i+=2) { + unsigned char c = ptr[0]; + ptr[0] = ptr[1]; + ptr[1] = c; + ptr += 2; + } +} void Tag::initUserComment (const Glib::ustring &text) { + const bool useBOM = false; // set it to true if you want to output BOM in UCS-2/UTF-8 UserComments ; this could be turned to an options entry type = UNDEFINED; if (text.is_ascii()) { - count = 8 + strlen (text.c_str()); - valuesize = count; + valuesize = count = 8 + strlen (text.c_str()); value = new unsigned char[valuesize]; - strcpy ((char*)value, "ASCII"); - value[5] = value[6] = value[7] = 0; - strcpy ((char*)value + 8, text.c_str()); + memcpy(value, "ASCII\0\0\0", 8); + memcpy(value + 8, text.c_str(), valuesize - 8); } else { - wchar_t *commentStr = (wchar_t*)g_utf8_to_utf16 (text.c_str(), -1, NULL, NULL, NULL); - count = 8 + wcslen(commentStr)*2; - valuesize = count; - value = (unsigned char*)new char[valuesize]; - strcpy ((char*)value, "UNICODE"); - value[7] = 0; - wcscpy(((wchar_t*)value) + 4, commentStr); + wchar_t *commentStr = (wchar_t*)g_utf8_to_utf16 (text.c_str(), -1, nullptr, nullptr, nullptr); + size_t wcStrSize = wcslen(commentStr); + valuesize = count = wcStrSize * 2 + 8 + (useBOM ? 2 : 0); + value = new unsigned char[valuesize]; + memcpy(value, "UNICODE\0", 8); + + if (useBOM) { + if (getOrder() == INTEL) { //Little Endian + value[8] = 0xFF; + value[9] = 0xFE; + } else { + value[8] = 0xFE; + value[9] = 0xFF; + } + } + + // Swapping byte order to match the Exif's byte order + if (getOrder() != HOSTORDER) { + swapByteOrder2((char*)commentStr, wcStrSize * 2); + } + + memcpy(value + 8 + (useBOM ? 2 : 0), (char*)commentStr, wcStrSize * 2); g_free(commentStr); } } diff --git a/rtexif/rtexif.h b/rtexif/rtexif.h index 125d38c94..452cedba1 100644 --- a/rtexif/rtexif.h +++ b/rtexif/rtexif.h @@ -236,6 +236,8 @@ public: void initLongArray (const char* data, int len); void initRational (int num, int den); + static void swapByteOrder2 (char *buffer, int count); + // get basic tag properties int getID () const { diff --git a/rtexif/stdattribs.cc b/rtexif/stdattribs.cc index 4982b0b81..ca19b7f1c 100644 --- a/rtexif/stdattribs.cc +++ b/rtexif/stdattribs.cc @@ -452,12 +452,109 @@ public: } count = std::min (count, 65535); // limit to 65535 chars to avoid crashes in case of corrupted metadata - char *buffer = new char[count - 7]; + char *buffer = new char[count - 6]; // include 2 ending null chars for UCS-2 string (possibly) + char *value = (char*)t->getValue(); - if (!memcmp ((char*)t->getValue(), "ASCII\0\0\0", 8)) { - strncpy (buffer, (char*)t->getValue() + 8, count - 8); + if (!memcmp(value, "ASCII\0\0\0", 8)) { + memcpy(buffer, value + 8, count - 8); buffer[count - 8] = '\0'; + } else if (!memcmp(value, "UNICODE\0", 8)) { + memcpy(buffer, value + 8, count - 8); + buffer[count - 7] = buffer[count - 8] = '\0'; + Glib::ustring tmp1(buffer); + + + bool hasBOM = false; + enum ByteOrder bo = UNKNOWN; + if (count % 2 || (count >= 11 && (buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF))) { + // odd string length can only be UTF-8, don't change anything + std::string retVal (buffer + 3); + delete [] buffer; + return retVal; + } else if (count >= 10) { + if (buffer[0] == 0xFF && buffer[1] == 0xFE) { + bo = INTEL; // little endian + hasBOM = true; + } else if (buffer[0] == 0xFE && buffer[1] == 0xFF) { + bo = MOTOROLA; // big endian + hasBOM = true; + } + } + if (bo == UNKNOWN) { + // auto-detecting byte order; we still don't know if it's UCS-2 or UTF-8 + int a = 0, b = 0, c = 0, d = 0; + for (int j = 8; j < count; j++) { + char cc = value[j]; + if (!(j%2)) { + // counting zeros for first byte + if (!cc) { + ++a; + } + } else { + // counting zeros for second byte + if (!cc) { + ++b; + } + } + if (!(cc & 0x80) || ((cc & 0xC0) == 0xC0) || ((cc & 0xC0) == 0x80)) { + ++c; + } + if ((cc & 0xC0) == 0x80) { + ++d; + } + } + if (c == (count - 8) && d) { + // this is an UTF-8 string + std::string retVal (buffer); + delete [] buffer; + return retVal; + } + if ((a || b) && a != b) { + bo = a > b ? MOTOROLA : INTEL; + } + } + if (bo == UNKNOWN) { + // assuming platform's byte order +#if __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ + bo = INTEL; +#else + bo = MOTOROLA; +#endif + } + + // now swapping if necessary + if (!hasBOM && bo != HOSTORDER) { + if (t->getOrder() != HOSTORDER) { + Tag::swapByteOrder2(buffer, count - 8); + } + } + + glong written; + char* utf8Str = g_utf16_to_utf8((unsigned short int*)buffer, -1, nullptr, &written, nullptr); + delete [] buffer; + buffer = new char[written + 1]; + memcpy(buffer, utf8Str, written); + buffer[written] = 0; + } else if (!memcmp(value, "\0\0\0\0\0\0\0\0", 8)) { + // local charset string, whatever it is + memcpy(buffer, value + 8, count - 8); + buffer[count - 7] = buffer[count - 8] = '\0'; + + gsize written = 0; + char *utf8Str = g_locale_to_utf8(buffer, count - 8, nullptr, &written, nullptr); + if (utf8Str && written) { + delete [] buffer; + size_t length = strlen(utf8Str); + buffer = new char[length + 1]; + strcpy(buffer, utf8Str); + } else { + buffer[0] = 0; + } + if (utf8Str) { + g_free(utf8Str); + } } else { + // JIS: unsupported buffer[0] = 0; } @@ -467,11 +564,8 @@ public: } virtual void fromString (Tag* t, const std::string& value) { - char *buffer = new char[t->getCount()]; - memcpy (buffer, "ASCII\0\0\0", 8); - strcpy (buffer + 8, value.c_str()); - t->fromString (buffer, value.size() + 9); - delete [] buffer; + Glib::ustring tmpStr(value); + t->userCommentFromString (tmpStr); } }; UserCommentInterpreter userCommentInterpreter;