从UTF-8转换为unicode c ++

问题描述:

如何在c ++应用程序中转换ú,其中应用程序接收字符为UTF-8编码%C3%BA并将其存储为Unicode等效的%FA。我只想知道如何编写代码来执行这个编码过程

How do I convert ú within a c++ application where the application receives the character as UTF-8 encoding %C3%BA and store it as the unicode equivalent %FA. I just want to know how I would go about writing code to perform this encoding process

我刚写了一些代码来做这个昨天...

I just wrote some code to do this yesterday...

我不是说这是完美的方式来做到这一点,但它似乎适用于所有测试用例,我已经通过它我为这个目的写了两个方向)。

I'm not saying this is the "perfect" way to do this, but it appears to work for all testcases I've run through it (I wrote both directions for that purpose).

我会把它转换成一个整数值。

I'll leave it to you to translate "%NN" to an integer value.

#include <iostream>
#include <deque>

std::deque<int> unicode_to_utf8(int charcode)
{
    std::deque<int> d;
    if (charcode < 128)
    {
        d.push_back(charcode);
    }
    else
    {
        int first_bits = 6; 
        const int other_bits = 6;
        int first_val = 0xC0;
        int t = 0;
        while (charcode >= (1 << first_bits))
        {
            {
                t = 128 | (charcode & ((1 << other_bits)-1));
                charcode >>= other_bits;
                first_val |= 1 << (first_bits);
                first_bits--;
            }
            d.push_front(t);
        }
        t = first_val | charcode;
        d.push_front(t);
    }
    return d;
}


int utf8_to_unicode(std::deque<int> &coded)
{
    int charcode = 0;
    int t = coded.front();
    coded.pop_front();
    if (t < 128)
    {
        return t;
    }
    int high_bit_mask = (1 << 6) -1;
    int high_bit_shift = 0;
    int total_bits = 0;
    const int other_bits = 6;
    while((t & 0xC0) == 0xC0)
    {
        t <<= 1;
        t &= 0xff;
        total_bits += 6;
        high_bit_mask >>= 1; 
        high_bit_shift++;
        charcode <<= other_bits;
        charcode |= coded.front() & ((1 << other_bits)-1);
        coded.pop_front();
    } 
    charcode |= ((t >> high_bit_shift) & high_bit_mask) << total_bits;
    return charcode;
}

int main()
{
    int charcode; 

    for(;;)
    {
        std::cout << "Enter unicode value:" << std::endl;
        std::cin >> charcode; 
        auto x = unicode_to_utf8(charcode);
        for(auto c : x)
        {
            std::cout << "\\x" << std::hex << c << " ";
        }
        std::cout << std::endl;
        int c = utf8_to_unicode(x);
        std::cout << "reversed:" << std::dec << c << std::hex << " in hex:" << c << std::endl;
    }
}