c语言实现解析转义字符串 发表于 2018-07-24 | 分类于 c 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364#include<stdio.h>#include<string.h>#include<stdlib.h>static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };static const char *parse_string(const char *str){ const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2; if (*str!='\"') {return 0;} /* not a string! */ while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */ out=(char*)malloc(len+1); /* This is how long we need for the string, roughly. */ if (!out) return 0; ptr=str+1;ptr2=out; while (*ptr!='\"' && *ptr) { if (*ptr!='\\') *ptr2++=*ptr++; else { ptr++; switch (*ptr) { case 'b': *ptr2++='\b'; break; case 'f': *ptr2++='\f'; break; case 'n': *ptr2++='\n'; break; case 'r': *ptr2++='\r'; break; case 't': *ptr2++='\t'; break; case 'u': /* transcode utf16 to utf8. */ sscanf(ptr+1,"%4x",&uc);ptr+=4; /* get the unicode char. */ if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */ if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */ { if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */ sscanf(ptr+3,"%4x",&uc2);ptr+=6; if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */ uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); } len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len; switch (len) { case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; case 1: *--ptr2 =(uc | firstByteMark[len]); } ptr2+=len; break; default: *ptr2++=*ptr; break; } ptr++; } } *ptr2=0; if (*ptr=='\"') ptr++; return out;}int main(void) { printf("%s\r\n","hh\r\nh"); return 0;}