diff options
Diffstat (limited to 'src/common/string_util.cpp')
-rw-r--r-- | src/common/string_util.cpp | 531 |
1 files changed, 531 insertions, 0 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp new file mode 100644 index 000000000..415dcbbc7 --- /dev/null +++ b/src/common/string_util.cpp @@ -0,0 +1,531 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include <stdlib.h> +#include <stdio.h> +#include <algorithm> + +#include "common.h" +#include "common_paths.h" +#include "string_util.h" + +#ifdef _WIN32 + #include <Windows.h> +#else + #include <iconv.h> + #include <errno.h> +#endif + +// faster than sscanf +bool AsciiToHex(const char* _szValue, u32& result) +{ + char *endptr = NULL; + const u32 value = strtoul(_szValue, &endptr, 16); + + if (!endptr || *endptr) + return false; + + result = value; + return true; +} + +bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args) +{ + int writtenCount; + +#ifdef _WIN32 + // You would think *printf are simple, right? Iterate on each character, + // if it's a format specifier handle it properly, etc. + // + // Nooooo. Not according to the C standard. + // + // According to the C99 standard (7.19.6.1 "The fprintf function") + // The format shall be a multibyte character sequence + // + // Because some character encodings might have '%' signs in the middle of + // a multibyte sequence (SJIS for example only specifies that the first + // byte of a 2 byte sequence is "high", the second byte can be anything), + // printf functions have to decode the multibyte sequences and try their + // best to not screw up. + // + // Unfortunately, on Windows, the locale for most languages is not UTF-8 + // as we would need. Notably, for zh_TW, Windows chooses EUC-CN as the + // locale, and completely fails when trying to decode UTF-8 as EUC-CN. + // + // On the other hand, the fix is simple: because we use UTF-8, no such + // multibyte handling is required as we can simply assume that no '%' char + // will be present in the middle of a multibyte sequence. + // + // This is why we lookup an ANSI (cp1252) locale here and use _vsnprintf_l. + static locale_t c_locale = NULL; + if (!c_locale) + c_locale = _create_locale(LC_ALL, ".1252"); + writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args); +#else + writtenCount = vsnprintf(out, outsize, format, args); +#endif + + if (writtenCount > 0 && writtenCount < outsize) + { + out[writtenCount] = '\0'; + return true; + } + else + { + out[outsize - 1] = '\0'; + return false; + } +} + +std::string StringFromFormat(const char* format, ...) +{ + va_list args; + char *buf = NULL; +#ifdef _WIN32 + int required = 0; + + va_start(args, format); + required = _vscprintf(format, args); + buf = new char[required + 1]; + CharArrayFromFormatV(buf, required + 1, format, args); + va_end(args); + + std::string temp = buf; + delete[] buf; +#else + va_start(args, format); + if (vasprintf(&buf, format, args) < 0) + ERROR_LOG(COMMON, "Unable to allocate memory for string"); + va_end(args); + + std::string temp = buf; + free(buf); +#endif + return temp; +} + +// For Debugging. Read out an u8 array. +std::string ArrayToString(const u8 *data, u32 size, int line_len, bool spaces) +{ + std::ostringstream oss; + oss << std::setfill('0') << std::hex; + + for (int line = 0; size; ++data, --size) + { + oss << std::setw(2) << (int)*data; + + if (line_len == ++line) + { + oss << '\n'; + line = 0; + } + else if (spaces) + oss << ' '; + } + + return oss.str(); +} + +// Turns " hej " into "hej". Also handles tabs. +std::string StripSpaces(const std::string &str) +{ + const size_t s = str.find_first_not_of(" \t\r\n"); + + if (str.npos != s) + return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1); + else + return ""; +} + +// "\"hello\"" is turned to "hello" +// This one assumes that the string has already been space stripped in both +// ends, as done by StripSpaces above, for example. +std::string StripQuotes(const std::string& s) +{ + if (s.size() && '\"' == s[0] && '\"' == *s.rbegin()) + return s.substr(1, s.size() - 2); + else + return s; +} + +bool TryParse(const std::string &str, u32 *const output) +{ + char *endptr = NULL; + + // Reset errno to a value other than ERANGE + errno = 0; + + unsigned long value = strtoul(str.c_str(), &endptr, 0); + + if (!endptr || *endptr) + return false; + + if (errno == ERANGE) + return false; + +#if ULONG_MAX > UINT_MAX + if (value >= 0x100000000ull + && value <= 0xFFFFFFFF00000000ull) + return false; +#endif + + *output = static_cast<u32>(value); + return true; +} + +bool TryParse(const std::string &str, bool *const output) +{ + if ("1" == str || !strcasecmp("true", str.c_str())) + *output = true; + else if ("0" == str || !strcasecmp("false", str.c_str())) + *output = false; + else + return false; + + return true; +} + +std::string StringFromInt(int value) +{ + char temp[16]; + sprintf(temp, "%i", value); + return temp; +} + +std::string StringFromBool(bool value) +{ + return value ? "True" : "False"; +} + +bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _pFilename, std::string* _pExtension) +{ + if (full_path.empty()) + return false; + + size_t dir_end = full_path.find_last_of("/" + // windows needs the : included for something like just "C:" to be considered a directory +#ifdef _WIN32 + ":" +#endif + ); + if (std::string::npos == dir_end) + dir_end = 0; + else + dir_end += 1; + + size_t fname_end = full_path.rfind('.'); + if (fname_end < dir_end || std::string::npos == fname_end) + fname_end = full_path.size(); + + if (_pPath) + *_pPath = full_path.substr(0, dir_end); + + if (_pFilename) + *_pFilename = full_path.substr(dir_end, fname_end - dir_end); + + if (_pExtension) + *_pExtension = full_path.substr(fname_end); + + return true; +} + +void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _Path, const std::string& _Filename) +{ + _CompleteFilename = _Path; + + // check for seperator + if (DIR_SEP_CHR != *_CompleteFilename.rbegin()) + _CompleteFilename += DIR_SEP_CHR; + + // add the filename + _CompleteFilename += _Filename; +} + +void SplitString(const std::string& str, const char delim, std::vector<std::string>& output) +{ + std::istringstream iss(str); + output.resize(1); + + while (std::getline(iss, *output.rbegin(), delim)) + output.push_back(""); + + output.pop_back(); +} + +std::string TabsToSpaces(int tab_size, const std::string &in) +{ + const std::string spaces(tab_size, ' '); + std::string out(in); + + size_t i = 0; + while (out.npos != (i = out.find('\t'))) + out.replace(i, 1, spaces); + + return out; +} + +std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest) +{ + while(1) + { + size_t pos = result.find(src); + if (pos == std::string::npos) break; + result.replace(pos, src.size(), dest); + } + return result; +} + +// UriDecode and UriEncode are from http://www.codeguru.com/cpp/cpp/string/conversions/print.php/c12759 +// by jinq0123 (November 2, 2006) + +// Uri encode and decode. +// RFC1630, RFC1738, RFC2396 + +//#include <string> +//#include <assert.h> + +const char HEX2DEC[256] = +{ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /* 0 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 1 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 2 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,16,16, 16,16,16,16, + + /* 4 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16, + /* 5 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 6 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16, + /* 7 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + + /* 8 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* 9 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* A */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* B */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + + /* C */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* D */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* E */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16, + /* F */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16 +}; + +std::string UriDecode(const std::string & sSrc) +{ + // Note from RFC1630: "Sequences which start with a percent sign + // but are not followed by two hexadecimal characters (0-9, A-F) are reserved + // for future extension" + + const unsigned char * pSrc = (const unsigned char *)sSrc.c_str(); + const size_t SRC_LEN = sSrc.length(); + const unsigned char * const SRC_END = pSrc + SRC_LEN; + const unsigned char * const SRC_LAST_DEC = SRC_END - 2; // last decodable '%' + + char * const pStart = new char[SRC_LEN]; + char * pEnd = pStart; + + while (pSrc < SRC_LAST_DEC) + { + if (*pSrc == '%') + { + char dec1, dec2; + if (16 != (dec1 = HEX2DEC[*(pSrc + 1)]) + && 16 != (dec2 = HEX2DEC[*(pSrc + 2)])) + { + *pEnd++ = (dec1 << 4) + dec2; + pSrc += 3; + continue; + } + } + + *pEnd++ = *pSrc++; + } + + // the last 2- chars + while (pSrc < SRC_END) + *pEnd++ = *pSrc++; + + std::string sResult(pStart, pEnd); + delete [] pStart; + return sResult; +} + +// Only alphanum is safe. +const char SAFE[256] = +{ + /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ + /* 0 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* 1 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* 3 */ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0, + + /* 4 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, + /* 5 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0, + /* 6 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, + /* 7 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0, + + /* 8 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* 9 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* A */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* B */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + + /* C */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* D */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* E */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, + /* F */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 +}; + +std::string UriEncode(const std::string & sSrc) +{ + const char DEC2HEX[16 + 1] = "0123456789ABCDEF"; + const unsigned char * pSrc = (const unsigned char *)sSrc.c_str(); + const size_t SRC_LEN = sSrc.length(); + unsigned char * const pStart = new unsigned char[SRC_LEN * 3]; + unsigned char * pEnd = pStart; + const unsigned char * const SRC_END = pSrc + SRC_LEN; + + for (; pSrc < SRC_END; ++pSrc) + { + if (SAFE[*pSrc]) + *pEnd++ = *pSrc; + else + { + // escape this char + *pEnd++ = '%'; + *pEnd++ = DEC2HEX[*pSrc >> 4]; + *pEnd++ = DEC2HEX[*pSrc & 0x0F]; + } + } + + std::string sResult((char *)pStart, (char *)pEnd); + delete [] pStart; + return sResult; +} + +#ifdef _WIN32 + +std::string UTF16ToUTF8(const std::wstring& input) +{ + auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr); + + std::string output; + output.resize(size); + + if (size == 0 || size != WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), &output[0], output.size(), nullptr, nullptr)) + output.clear(); + + return output; +} + +std::wstring CPToUTF16(u32 code_page, const std::string& input) +{ + auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0); + + std::wstring output; + output.resize(size); + + if (size == 0 || size != MultiByteToWideChar(code_page, 0, input.data(), input.size(), &output[0], output.size())) + output.clear(); + + return output; +} + +std::wstring UTF8ToUTF16(const std::string& input) +{ + return CPToUTF16(CP_UTF8, input); +} + +std::string SHIFTJISToUTF8(const std::string& input) +{ + return UTF16ToUTF8(CPToUTF16(932, input)); +} + +std::string CP1252ToUTF8(const std::string& input) +{ + return UTF16ToUTF8(CPToUTF16(1252, input)); +} + +#else + +template <typename T> +std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input) +{ + std::string result; + + iconv_t const conv_desc = iconv_open("UTF-8", fromcode); + if ((iconv_t)-1 == conv_desc) + { + ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno)); + } + else + { + size_t const in_bytes = sizeof(T) * input.size(); + size_t const out_buffer_size = 4 * in_bytes; + + std::string out_buffer; + out_buffer.resize(out_buffer_size); + + auto src_buffer = &input[0]; + size_t src_bytes = in_bytes; + auto dst_buffer = &out_buffer[0]; + size_t dst_bytes = out_buffer.size(); + + while (src_bytes != 0) + { + size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes, + &dst_buffer, &dst_bytes); + + if ((size_t)-1 == iconv_result) + { + if (EILSEQ == errno || EINVAL == errno) + { + // Try to skip the bad character + if (src_bytes != 0) + { + --src_bytes; + ++src_buffer; + } + } + else + { + ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno)); + break; + } + } + } + + out_buffer.resize(out_buffer_size - dst_bytes); + out_buffer.swap(result); + + iconv_close(conv_desc); + } + + return result; +} + +std::string CP1252ToUTF8(const std::string& input) +{ + //return CodeToUTF8("CP1252//TRANSLIT", input); + //return CodeToUTF8("CP1252//IGNORE", input); + return CodeToUTF8("CP1252", input); +} + +std::string SHIFTJISToUTF8(const std::string& input) +{ + //return CodeToUTF8("CP932", input); + return CodeToUTF8("SJIS", input); +} + +std::string UTF16ToUTF8(const std::wstring& input) +{ + std::string result = + // CodeToUTF8("UCS-2", input); + // CodeToUTF8("UCS-2LE", input); + // CodeToUTF8("UTF-16", input); + CodeToUTF8("UTF-16LE", input); + + // TODO: why is this needed? + result.erase(std::remove(result.begin(), result.end(), 0x00), result.end()); + return result; +} + +#endif |