هذه الصفحة ستساعدك
[تحدي برمجي] أكتب دالتين، الأولى تحول من ترميز UTF-32 إلى UTF-8، والثانية العكس
محاولتي بـ ++C
الدالة الأولى
#define getbit(c,n) ((c >> n) & 0x1)
void UTF8_to_UTF32(char *utf8,uint32_t *utf32)
{
int i=0,i32=0;
char c8;
uint32_t c32;
c8 = utf8[0];
while(c8 != 0)
{
c32 = 0;
if( getbit(c8,7) == 0)
{
c32|= c8;
utf32[i32++] = c32;
}
else if(getbit(c8,6) == 1 && getbit(c8,5) == 0)
{
c8 ^= 0xc0;
c32 = c8 << 6;
c8 = utf8[++i];
c8 ^= 0x80;
c32|= c8;
utf32[i32++] = c32;
}
else if(getbit(c8,4) == 0)
{
c8 ^= 0xe0;
c32 = c8 << 12;
c8 = utf8[++i];
c8 ^= 0x80;
c32 |= c8 << 6;
c8 = utf8[++i];
c8 ^= 0x80;
c32 |= c8;
utf32[i32++] = c32;
}
else if(getbit(c8,3) == 0)
{
c8^= 0xf0;
c32 = c8 << 18;
c8 = utf8[++i];
c8 ^= 0x80;
c32 |= c8 << 12;
c8 = utf8[++i];
c8 ^= 0x80;
c32 |= c8 << 6;
c8 = utf8[++i];
c8 ^= 0x80;
c32 |= c8;
utf32[i32++] = c32;
}
c8 = utf8[++i];
}
utf32[i32]= 0;
}
الدالة الثانية
void UTF32_to_UTF8(uint32_t *utf32, char *utf8)
{
int i=0,i32=0;
char c8;
uint32_t c32;
c32 = utf32[0];
while(c32 != 0)
{
c8 = 0;
if(c32 <= 0x007f){
c8 = (char)c32;
utf8[i++] = c8;
}
else if(c32 <= 0x07ff){
c8 = (c32 & 0x0fc0) >> 6;
c8 |= 0xc0;
utf8[i++] =c8;
c8 = (c32 & 0x03f);
c8 |= 0x80;
utf8[i++] =c8;
}
else if(c32 <= 0xffff){
c8 = (c32 & 0xf000) >> 12;
c8 |= 0xe0;
utf8[i++] = c8;
c8 = (c32 & 0xfc0) >> 6;
c8 |= 0x80;
utf8[i++] = c8;
c8 = (c32 & 0x3f);
c8 |= 0x80;
utf8[i++] = c8;
}
else if(c32 <= 0x01fffff){
c8 = (c32 & 0x1c0000) >> 18;
c8 |= 0xf0;
utf8[i++] = c8;
c8 = (c32 & 0x3f000) >> 12;
c8 |= 0x80;
utf8[i++] = c8;
c8 = (c32 & 0xfc0) >> 6;
c8 |= 0x80;
utf8[i++]= c8;
c8 = (c32 & 0x3f);
c8 |= 0x80;
utf8[i++]= c8;
}
c32 = utf32[++i32];
}
utf8[i]=0;
}
هاتان هما الدالّتان اللتان استخدمهما في مرمّز لغة الأسُس وهما تعتمدان على مكتبات سي++11 المعيارية. قد لا تدخلان ضمن التحدي لأنهما تعتمدان على مكتبة خارجية للتحويل بدل القيام بذلك بنفسيهما، لكنهما حل أفضل لمن يحتاج للتحويل بين الترميزين:
std::ostream & outStream = std::cout;
typedef std::codecvt FacetType;
static std::locale utf8Locale("en_US.UTF8");
static const FacetType& utf8Facet = std::use_facet(utf8Locale);
void convertStr(Char const *input, int inputLength, WChar *output, int outputSize, int &processedInputLength, int &resultedOutputLength)
{
std::mbstate_t mystate = std::mbstate_t();
Char const *fromNext;
WChar *toNext;
// translate characters:
utf8Facet.in(mystate, input, input+inputLength, fromNext, output, output+outputSize, toNext);
processedInputLength = fromNext-input;
resultedOutputLength = toNext-output;
}
void convertStr(WChar const *input, int inputLength, Char *output, int outputSize, int &processedInputLength, int &resultedOutputLength)
{
std::mbstate_t mystate = std::mbstate_t();
WChar const *fromNext;
Char *toNext;
// translate characters:
utf8Facet.out(mystate, input, input+inputLength, fromNext, output, output+outputSize, toNext);
processedInputLength = fromNext-input;
resultedOutputLength = toNext-output;
}
التعليقات