1 files changed, 531 insertions, 0 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
new file mode 100644
index 000000000..415dcbbc7
--- /dev/null
+++ b/src/common/string_util.cpp
@@ -0,0 +1,531 @@
+// Copyright 2013 Dolphin Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <algorithm>
+
+#include "common.h"
+#include "common_paths.h"
+#include "string_util.h"
+
+#ifdef _WIN32
+    #include <Windows.h>
+#else
+    #include <iconv.h>
+    #include <errno.h>
+#endif
+
+// faster than sscanf
+bool AsciiToHex(const char* _szValue, u32& result)
+{
+    char *endptr = NULL;
+    const u32 value = strtoul(_szValue, &endptr, 16);
+
+    if (!endptr || *endptr)
+        return false;
+
+    result = value;
+    return true;
+}
+
+bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
+{
+    int writtenCount;
+
+#ifdef _WIN32
+    // You would think *printf are simple, right? Iterate on each character,
+    // if it's a format specifier handle it properly, etc.
+    //
+    // Nooooo. Not according to the C standard.
+    //
+    // According to the C99 standard (7.19.6.1 "The fprintf function")
+    //     The format shall be a multibyte character sequence
+    //
+    // Because some character encodings might have '%' signs in the middle of
+    // a multibyte sequence (SJIS for example only specifies that the first
+    // byte of a 2 byte sequence is "high", the second byte can be anything),
+    // printf functions have to decode the multibyte sequences and try their
+    // best to not screw up.
+    //
+    // Unfortunately, on Windows, the locale for most languages is not UTF-8
+    // as we would need. Notably, for zh_TW, Windows chooses EUC-CN as the
+    // locale, and completely fails when trying to decode UTF-8 as EUC-CN.
+    //
+    // On the other hand, the fix is simple: because we use UTF-8, no such
+    // multibyte handling is required as we can simply assume that no '%' char
+    // will be present in the middle of a multibyte sequence.
+    //
+    // This is why we lookup an ANSI (cp1252) locale here and use _vsnprintf_l.
+    static locale_t c_locale = NULL;
+    if (!c_locale)
+        c_locale = _create_locale(LC_ALL, ".1252");
+    writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
+#else
+    writtenCount = vsnprintf(out, outsize, format, args);
+#endif
+
+    if (writtenCount > 0 && writtenCount < outsize)
+    {
+        out[writtenCount] = '\0';
+        return true;
+    }
+    else
+    {
+        out[outsize - 1] = '\0';
+        return false;
+    }
+}
+
+std::string StringFromFormat(const char* format, ...)
+{
+    va_list args;
+    char *buf = NULL;
+#ifdef _WIN32
+    int required = 0;
+
+    va_start(args, format);
+    required = _vscprintf(format, args);
+    buf = new char[required + 1];
+    CharArrayFromFormatV(buf, required + 1, format, args);
+    va_end(args);
+
+    std::string temp = buf;
+    delete[] buf;
+#else
+    va_start(args, format);
+    if (vasprintf(&buf, format, args) < 0)
+        ERROR_LOG(COMMON, "Unable to allocate memory for string");
+    va_end(args);
+
+    std::string temp = buf;
+    free(buf);
+#endif
+    return temp;
+}
+
+// For Debugging. Read out an u8 array.
+std::string ArrayToString(const u8 *data, u32 size, int line_len, bool spaces)
+{
+    std::ostringstream oss;
+    oss << std::setfill('0') << std::hex;
+    
+    for (int line = 0; size; ++data, --size)
+    {
+        oss << std::setw(2) << (int)*data;
+        
+        if (line_len == ++line)
+        {
+            oss << '\n';
+            line = 0;
+        }
+        else if (spaces)
+            oss << ' ';
+    }
+
+    return oss.str();
+}
+
+// Turns "  hej " into "hej". Also handles tabs.
+std::string StripSpaces(const std::string &str)
+{
+    const size_t s = str.find_first_not_of(" \t\r\n");
+
+    if (str.npos != s)
+        return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1);
+    else
+        return "";
+}
+
+// "\"hello\"" is turned to "hello"
+// This one assumes that the string has already been space stripped in both
+// ends, as done by StripSpaces above, for example.
+std::string StripQuotes(const std::string& s)
+{
+    if (s.size() && '\"' == s[0] && '\"' == *s.rbegin())
+        return s.substr(1, s.size() - 2);
+    else
+        return s;
+}
+
+bool TryParse(const std::string &str, u32 *const output)
+{
+    char *endptr = NULL;
+
+    // Reset errno to a value other than ERANGE
+    errno = 0;
+
+    unsigned long value = strtoul(str.c_str(), &endptr, 0);
+    
+    if (!endptr || *endptr)
+        return false;
+
+    if (errno == ERANGE)
+        return false;
+
+#if ULONG_MAX > UINT_MAX
+    if (value >= 0x100000000ull
+        && value <= 0xFFFFFFFF00000000ull)
+        return false;
+#endif
+
+    *output = static_cast<u32>(value);
+    return true;
+}
+
+bool TryParse(const std::string &str, bool *const output)
+{
+    if ("1" == str || !strcasecmp("true", str.c_str()))
+        *output = true;
+    else if ("0" == str || !strcasecmp("false", str.c_str()))
+        *output = false;
+    else
+        return false;
+
+    return true;
+}
+
+std::string StringFromInt(int value)
+{
+    char temp[16];
+    sprintf(temp, "%i", value);
+    return temp;
+}
+
+std::string StringFromBool(bool value)
+{
+    return value ? "True" : "False";
+}
+
+bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _pFilename, std::string* _pExtension)
+{
+    if (full_path.empty())
+        return false;
+
+    size_t dir_end = full_path.find_last_of("/"
+    // windows needs the : included for something like just "C:" to be considered a directory
+#ifdef _WIN32
+        ":"
+#endif
+    );
+    if (std::string::npos == dir_end)
+        dir_end = 0;
+    else
+        dir_end += 1;
+
+    size_t fname_end = full_path.rfind('.');
+    if (fname_end < dir_end || std::string::npos == fname_end)
+        fname_end = full_path.size();
+
+    if (_pPath)
+        *_pPath = full_path.substr(0, dir_end);
+
+    if (_pFilename)
+        *_pFilename = full_path.substr(dir_end, fname_end - dir_end);
+
+    if (_pExtension)
+        *_pExtension = full_path.substr(fname_end);
+
+    return true;
+}
+
+void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _Path, const std::string& _Filename)
+{
+    _CompleteFilename = _Path;
+
+    // check for seperator
+    if (DIR_SEP_CHR != *_CompleteFilename.rbegin())
+        _CompleteFilename += DIR_SEP_CHR;
+
+    // add the filename
+    _CompleteFilename += _Filename;
+}
+
+void SplitString(const std::string& str, const char delim, std::vector<std::string>& output)
+{
+    std::istringstream iss(str);
+    output.resize(1);
+
+    while (std::getline(iss, *output.rbegin(), delim))
+        output.push_back("");
+
+    output.pop_back();
+}
+
+std::string TabsToSpaces(int tab_size, const std::string &in)
+{
+    const std::string spaces(tab_size, ' ');
+    std::string out(in);
+
+    size_t i = 0;
+    while (out.npos != (i = out.find('\t')))
+        out.replace(i, 1, spaces);
+
+    return out;
+}
+
+std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest)
+{
+    while(1)
+    {
+        size_t pos = result.find(src);
+        if (pos == std::string::npos) break;
+        result.replace(pos, src.size(), dest);
+    }
+    return result;
+}
+
+// UriDecode and UriEncode are from http://www.codeguru.com/cpp/cpp/string/conversions/print.php/c12759
+// by jinq0123 (November 2, 2006)
+
+// Uri encode and decode.
+// RFC1630, RFC1738, RFC2396
+
+//#include <string>
+//#include <assert.h>
+
+const char HEX2DEC[256] = 
+{
+    /*       0  1  2  3   4  5  6  7   8  9  A  B   C  D  E  F */
+    /* 0 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* 1 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* 2 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* 3 */  0, 1, 2, 3,  4, 5, 6, 7,  8, 9,16,16, 16,16,16,16,
+
+    /* 4 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
+    /* 5 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* 6 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
+    /* 7 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+
+    /* 8 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* 9 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* A */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* B */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+
+    /* C */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* D */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* E */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
+    /* F */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16
+};
+
+std::string UriDecode(const std::string & sSrc)
+{
+    // Note from RFC1630:  "Sequences which start with a percent sign
+    // but are not followed by two hexadecimal characters (0-9, A-F) are reserved
+    // for future extension"
+
+    const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
+    const size_t SRC_LEN = sSrc.length();
+    const unsigned char * const SRC_END = pSrc + SRC_LEN;
+    const unsigned char * const SRC_LAST_DEC = SRC_END - 2;   // last decodable '%' 
+
+    char * const pStart = new char[SRC_LEN];
+    char * pEnd = pStart;
+
+    while (pSrc < SRC_LAST_DEC)
+    {
+        if (*pSrc == '%')
+        {
+            char dec1, dec2;
+            if (16 != (dec1 = HEX2DEC[*(pSrc + 1)])
+                && 16 != (dec2 = HEX2DEC[*(pSrc + 2)]))
+            {
+                *pEnd++ = (dec1 << 4) + dec2;
+                pSrc += 3;
+                continue;
+            }
+        }
+
+        *pEnd++ = *pSrc++;
+    }
+
+    // the last 2- chars
+    while (pSrc < SRC_END)
+        *pEnd++ = *pSrc++;
+
+    std::string sResult(pStart, pEnd);
+    delete [] pStart;
+    return sResult;
+}
+
+// Only alphanum is safe.
+const char SAFE[256] =
+{
+    /*      0 1 2 3  4 5 6 7  8 9 A B  C D E F */
+    /* 0 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* 1 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* 3 */ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
+
+    /* 4 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+    /* 5 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
+    /* 6 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
+    /* 7 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
+
+    /* 8 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* 9 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* A */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* B */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+
+    /* C */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* D */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* E */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
+    /* F */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
+};
+
+std::string UriEncode(const std::string & sSrc)
+{
+    const char DEC2HEX[16 + 1] = "0123456789ABCDEF";
+    const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
+    const size_t SRC_LEN = sSrc.length();
+    unsigned char * const pStart = new unsigned char[SRC_LEN * 3];
+    unsigned char * pEnd = pStart;
+    const unsigned char * const SRC_END = pSrc + SRC_LEN;
+
+    for (; pSrc < SRC_END; ++pSrc)
+    {
+        if (SAFE[*pSrc]) 
+            *pEnd++ = *pSrc;
+        else
+        {
+            // escape this char
+            *pEnd++ = '%';
+            *pEnd++ = DEC2HEX[*pSrc >> 4];
+            *pEnd++ = DEC2HEX[*pSrc & 0x0F];
+        }
+    }
+
+    std::string sResult((char *)pStart, (char *)pEnd);
+    delete [] pStart;
+    return sResult;
+}
+
+#ifdef _WIN32
+
+std::string UTF16ToUTF8(const std::wstring& input)
+{
+    auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr);
+
+    std::string output;
+    output.resize(size);
+
+    if (size == 0 || size != WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), &output[0], output.size(), nullptr, nullptr))
+        output.clear();
+
+    return output;
+}
+
+std::wstring CPToUTF16(u32 code_page, const std::string& input)
+{
+    auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0);
+
+    std::wstring output;
+    output.resize(size);
+
+    if (size == 0 || size != MultiByteToWideChar(code_page, 0, input.data(), input.size(), &output[0], output.size()))
+        output.clear();
+
+    return output;
+}
+
+std::wstring UTF8ToUTF16(const std::string& input)
+{
+    return CPToUTF16(CP_UTF8, input);
+}
+
+std::string SHIFTJISToUTF8(const std::string& input)
+{
+    return UTF16ToUTF8(CPToUTF16(932, input));
+}
+
+std::string CP1252ToUTF8(const std::string& input)
+{
+    return UTF16ToUTF8(CPToUTF16(1252, input));
+}
+
+#else
+
+template <typename T>
+std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
+{
+    std::string result;
+
+    iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
+    if ((iconv_t)-1 == conv_desc)
+    {
+        ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
+    }
+    else
+    {
+        size_t const in_bytes = sizeof(T) * input.size();
+        size_t const out_buffer_size = 4 * in_bytes;
+
+        std::string out_buffer;
+        out_buffer.resize(out_buffer_size);
+
+        auto src_buffer = &input[0];
+        size_t src_bytes = in_bytes;
+        auto dst_buffer = &out_buffer[0];
+        size_t dst_bytes = out_buffer.size();
+
+        while (src_bytes != 0)
+        {
+            size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
+                &dst_buffer, &dst_bytes);
+
+            if ((size_t)-1 == iconv_result)
+            {
+                if (EILSEQ == errno || EINVAL == errno)
+                {
+                    // Try to skip the bad character
+                    if (src_bytes != 0)
+                    {
+                        --src_bytes;
+                        ++src_buffer;
+                    }
+                }
+                else
+                {
+                    ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
+                    break;
+                }
+            }
+        }
+
+        out_buffer.resize(out_buffer_size - dst_bytes);
+        out_buffer.swap(result);
+        
+        iconv_close(conv_desc);
+    }
+    
+    return result;
+}
+
+std::string CP1252ToUTF8(const std::string& input)
+{
+    //return CodeToUTF8("CP1252//TRANSLIT", input);
+    //return CodeToUTF8("CP1252//IGNORE", input);
+    return CodeToUTF8("CP1252", input);
+}
+
+std::string SHIFTJISToUTF8(const std::string& input)
+{
+    //return CodeToUTF8("CP932", input);
+    return CodeToUTF8("SJIS", input);
+}
+
+std::string UTF16ToUTF8(const std::wstring& input)
+{
+    std::string result =
+    //    CodeToUTF8("UCS-2", input);
+    //    CodeToUTF8("UCS-2LE", input);
+    //    CodeToUTF8("UTF-16", input);
+        CodeToUTF8("UTF-16LE", input);
+
+    // TODO: why is this needed?
+    result.erase(std::remove(result.begin(), result.end(), 0x00), result.end());
+    return result;
+}
+
+#endif