StringHelper.class.php
25.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
<?php
/**
* 字符串处理
*
* example:
* <pre>
*
* </pre>
*
* @name Util_StringHelper
* @version
* @package Util.StringHelper
*/
class Util_StringHelper
{
/**
* 按字来切分字符 (UTF-8截字)
*
* @param $str String
* @param $length Integer
* @param $start Integer
* @param $encoding String
* @return String
*/
public function mbSubstr ($str, $length, $start = 0, $suffix = '...', $encoding = "utf-8")
{
$str = trim($str);
if (mb_strlen($str) == $length)
{return $str;}
$strs = mb_substr($str, $start, $length, $encoding);
if ((mb_strlen($str) / 3) > $length)
{
$strs .= $suffix;
}
return $strs;
}
/**
* 按字节来切分字符
*
* @param $str String
* @param $length Integer
* @param $start Integer
* @param $encoding String
*/
public function mbStrcut ($str, $length, $start = 0, $encoding = "utf-8")
{
return mb_strcut($str, $start, $length, $encoding);
}
/**
* 判定是否是某个编码的合法字串
*
* @param $str String
* @param $coding String
* @return String
*/
public function isValidCoding ($str, $coding = "UTF-8")
{
return mb_check_encoding($str, $coding);
}
/**
* gbk 转 unicode
*
* @param $text String
* @return String
*/
public function baseGbkToUnicode ($str)
{
$rtext = "";
preg_match_all("/[\x81-\xfe]?./", $str, $regs);
foreach ($regs[0] as $v)
{
if (ord($v) > 127)
{
$rtext .= "&#" . base_convert(bin2hex(iconv("gb2312", "ucs-2", $v)), 16, 10) . ";";
}
else
{
$rtext .= $v;
}
}
return $rtext;
}
/**
* 转换16进制到UTF-8
*
* @param $text String
* @return String
*/
public function unicode16ToUtf ($text)
{
preg_match_all("/\%u([a-zA-Z0-9]{0,4}+)/", $text, $regs);
foreach ($regs[0] as $v)
{
$to = "&#" . base_convert(str_replace("%u", "", $v), 16, 10) . ";";
$text = str_replace($v, $to, $text);
}
return $text;
}
/**
* utf 装 gbk
*
* @param $str String
* @return String
*/
public function utfToGbk ($str)
{
$info = mb_convert_encoding($str, "gbk", "utf-8");
return $info;
}
/**
* UTF8 转换
*
* @param $str String
* @return String
*/
public function gbkToUtf8 ($str)
{
$str = mb_convert_encoding($str, "utf-8", "gbk");
$convmap = array(0x0080, 0xffff, 0x0000, 0xffff);
// 0x0026, 0x0026, 0x0000, 0xffff); <-这个是转&号为&
$str = mb_encode_numericentity($str, $convmap, "utf-8");
return $str;
}
/**
* Hex2Str 解码
*
* @param $msg String
* @return String
*/
public function Hex2Str ($msg, $isString = true)
{
$outstr = "";
$l = strlen($msg);
if ($l % 2 != 0)
{
$l --;
}
$par = "H" . $l;
$outstr = pack($par, $msg);
if ($isString == false)
{return $outstr;}
return $this->gbkToUtf8($this->unicodeToGBK($outstr));
}
/**
* Str2Hex 编码
*
* @param $msg String
* @return String
*/
public function Str2Hex ($msg)
{
$str = unpack("C*", $this->utf8TOunicode($msg));
$result = "";
for ($i = 1; $i <= count($str); $i ++)
{
$tmp = DecHex($str[$i]);
if (strlen($tmp) == 1)
{
$tmp = "0" . $tmp;
}
$result .= $tmp;
}
return $result;
}
/**
* 转换编码
*
* @param $str String
* @return String
*/
public function gbkToUnicode ($str)
{
return mb_convert_encoding($str, "unicode", "gbk");
}
/**
* 转换成GBK
*
* @param $str String
* @return String
*/
public function unicodeToGBK ($str)
{
return mb_convert_encoding($str, "gbk", "unicode");
}
/**
* unicode 转UTF-8
*
* @param $str String
* @return String
*/
public function unicodeTOUtf8 ($str)
{
return mb_convert_encoding($str, "utf-8", "unicode");
}
/**
* UTF-8转unicode
*
* @param $str String
* @return String
*/
public function utf8TOunicode ($str)
{
return mb_convert_encoding($str, "unicode", "utf-8");
}
/**
* 对addslashes处理中文字符出现错误的解决
*
* @param $string string 要转义的字符串
* @param $escape: boolean 是否对中文特殊处理,默认为false
* @return string
*/
public function addslash ($string, $escape = false)
{
if (! $escape)
{return addslashes($string);}
$string = ereg_replace("([^\xA1-\xFE])[\x5c]", "\\1\\", $string);
$string = str_replace("\\", "\\\\", $string);
$string = str_replace("'", "\\'", $string);
$string = str_replace("\"", "\\\"", $string);
return $string;
}
/**
* CRC转换为16进制
*
* @param $str String
* @return String
*/
public function crc2Dechex ($val)
{
return dechex(crc32($val));
}
/**
* 对象进行编码crc32有符号
*
* @param $val String
* @return String
*/
public function obj2Crc2 ($val)
{
$crc = crc32($val);
return $crc;
}
/**
* 格式化html代码的 (包括中文)
*
* @param $str String
* @return String
*/
public function htmlEntities ($str)
{
return htmlentities($str);
}
/**
* 格式化html代码的 (不包括中文)
*
* @param $str String
* @return String
*/
public function htmlSpecialChars ($str)
{
return htmlspecialchars($str);
}
/**
*
*
*
* 按字来切分字符 (UTF-8截字) 并 格式化代码
*
* @param $str String
* @param $length Integer
* @param $start Integer
* @param $suffix String
* @param $encoding String
* @return String
*/
public function mbSubstrHtml ($str, $length, $start = 0, $suffix = '...', $encoding = "utf-8")
{
return $this->htmlSpecialChars($this->mbSubstr($str, $length, $start, $suffix, $encoding));
}
/**
* 替换数据
*
* @param $str String
* @param $pattern String
* @param $replacement String
* @return String
*/
public function pregReplace ($str, $pattern = '/\[(.*?)\]/i', $replacement = '')
{
return preg_replace($pattern, $replacement, $str);
}
/**
* 获取字符串首字母, 可传入汉字,字母 ,数字
*
* @param $string String
* @return String
*/
public function getFirstLetter ($string)
{
$string = iconv('utf-8', 'gbk', $string); // 字符编码转换
$dict = array('a' => 0xB0C4, 'b' => 0xB2C0, 'c' => 0xB4ED, 'd' => 0xB6E9, 'e' => 0xB7A1, 'f' => 0xB8C0, 'g' => 0xB9FD, 'h' => 0xBBF6, 'j' => 0xBFA5, 'k' => 0xC0AB, 'l' => 0xC2E7, 'm' => 0xC4C2, 'n' => 0xC5B5, 'o' => 0xC5BD, 'p' => 0xC6D9, 'q' => 0xC8BA, 'r' => 0xC8F5, 's' => 0xCBF9, 't' => 0xCDD9, 'w' => 0xCEF3, 'x' => 0xD188, 'y' => 0xD4D0, 'z' => 0xD7F9);
$letter = substr($string, 0, 4);
if ($letter >= chr(0x81) && $letter <= chr(0xfe))
{
$num = hexdec(bin2hex(substr($string, 0, 2)));
foreach ($dict as $k => $v)
{
if ($v >= $num)
break;
}
return strtoupper($k);
}
elseif ((ord($letter) > 64 && ord($letter) < 91) || (ord($letter) > 96 && ord($letter) < 123))
{
return strtoupper($letter{0});
}
elseif ($letter >= '0' && $letter <= '9')
{
return $letter;
}
else
{
return false;
}
}
/**
* 去除html标签
*
* @param $string String
* @return String
*/
public function pregReplaceHtml ($string)
{
if (empty($string))
{return $string;}
$pattern = array("'<script[^>]*?>[\s\S]*?</script>'si", "'<[\/\!]*?[^<>]*?>'si", "'<iframe[^>]*?>[\s\S]*?</iframe>'si");
$replacement = '';
return preg_replace($pattern, $replacement, $string);
}
/**
* 去除危险标签
*
* @param $string String
* @return String
*/
public function replaceDangerCode ($string)
{
if (empty($string))
{return $string;}
$pattern = array("'<style[^>]*?>[\s\S]*?</style>'si", "'<iframe[^>]*?>[\s\S]*?</iframe>'si", "'<script[^>]*?>[\s\S]*?</script>'si", "'<link[^>]*?/>'si");
$replacement = '';
return preg_replace($pattern, $replacement, $string);
}
/**
* SEO
*
* @param $string String
* @return String
*/
public function toSEO ($string)
{
$search = array(' ', '%20', '&', '?', '@', '\/');
$replace = array('_', '_', '_', '_', '_', '_');
$string = str_replace($search, $replace, $string);
return $string;
}
/**
* 字符替换
*
* @param $document String
* @return String
*/
public static function striptext ($document)
{
$search = array("'<script[^>]*?>.*?</script>'si", // strip out
// javascript
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags
"'([\r\n])[\s]+'", // strip out white space
"'&(quot|#34|#034|#x22);'i", // replace html entities
"'&(amp|#38|#038|#x26);'i", // added hexadecimal values
"'&(lt|#60|#060|#x3c);'i", "'&(gt|#62|#062|#x3e);'i", "'&(nbsp|#160|#xa0);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&(reg|#174);'i", "'&(deg|#176);'i", "'&(#39|#039|#x27);'", "'&(euro|#8364);'i", // europe
"'&a(uml|UML);'", // german
"'&o(uml|UML);'", "'&u(uml|UML);'", "'&A(uml|UML);'", "'&O(uml|UML);'", "'&U(uml|UML);'", "'ß'i");
$replace = array("", "", "\\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), chr(174), chr(176), chr(39), chr(128), "?", "?", "?", "?", "?", "?", "?");
return preg_replace($search, $replace, $document);
}
public function stripTags ($html)
{
$search = array(' ', "'<script[^>]*?>.*?</script>'si", "'<[\/\!]*?[^<>]*?>'si", "'([\r\n])[\s]+'", "'&(quot|#34|#034|#x22);'i", "'&(amp|#38|#038|#x26);'i", "'&(lt|#60|#060|#x3c);'i", "'&(gt|#62|#062|#x3e);'i", "'&(nbsp|#160|#xa0);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&(reg|#174);'i", "'&(deg|#176);'i", "'&(#39|#039|#x27);'", "'&(euro|#8364);'i", "'&a(uml|UML);'", "'&o(uml|UML);'", "'&u(uml|UML);'", "'&A(uml|UML);'", "'&O(uml|UML);'", "'&U(uml|UML);'", "'ß'i", "\n", "\r", "\t", "\0", " ");
return trim(str_replace($search, '', strip_tags($html)));
}
/**
*
*
*
* 得到一个博客的唯一编码
*
* @param $uid integer 用户编码
* @return string
*/
public function getBlogUuid ($uid = '')
{
return uniqid();
}
/**
*
*
*
* 得到一个短链接的唯一编码
*
* @param $uid integer 用户编码
* @return string
*/
public function getSUrlUuid ($uid = '')
{
$sUrl = uniqid('', true);
return str_replace('.', '', $sUrl);
}
/**
* 转换长链接到短链接
*
* @param $url string
* @return string
*/
public static function getShortLink ($url)
{
$url_int = sprintf('%u', '0x' . hash('crc32', $url) + 0);
$str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890';
$ret = '';
while ($url_int > 0)
{
$ret .= substr($str, $url_int % 62, 1);
$url_int = (int) floor($url_int / 62);
}
return $ret;
}
/**
* 过滤中文字符串
*
* @param $string
* @param $length
* @param $charset
* @param $etc
* @return unknown_type
*/
public static function substr_filter ($string, $length = 280, $charset = 'UTF-8', $etc = '...')
{
$filter_string = strip_tags(htmlspecialchars_decode($string));
if (mb_strwidth($filter_string, 'UTF-8') <= $length)
{return $filter_string;}
return mb_strimwidth($filter_string, 0, $length, '', $charset) . $etc;
}
/**
* 切割中文字符串
*
* @param $string
* @param $length
* @param $charset
* @param $etc
* @return unknown_type
*/
public static function substr_cn ($string, $length = 280, $charset = 'UTF-8', $etc = '...')
{
if (mb_strwidth($string, 'UTF-8') <= $length)
return $string;
return mb_strimwidth($string, 0, $length, '', $charset) . $etc;
}
/**
* 转换中文首字母的函数
*
* @param $_String string
* @param $isInitial boolean 是否只返回首字母
* @param $_Code string 编码格式 utf8 or gb2312
* @return string
*/
public static function Pinyin ($_String, $isInitial = true, $_Code = "utf8")
{
$_DataKey = "a|ai|an|ang|ao|ba|bai|ban|bang|bao|bei|ben|beng|bi|bian|biao|bie|bin|bing|bo|bu|ca|cai|can|cang|cao|ce|ceng|cha" . "|chai|chan|chang|chao|che|chen|cheng|chi|chong|chou|chu|chuai|chuan|chuang|chui|chun|chuo|ci|cong|cou|cu|" . "cuan|cui|cun|cuo|da|dai|dan|dang|dao|de|deng|di|dian|diao|die|ding|diu|dong|dou|du|duan|dui|dun|duo|e|en|er" . "|fa|fan|fang|fei|fen|feng|fo|fou|fu|ga|gai|gan|gang|gao|ge|gei|gen|geng|gong|gou|gu|gua|guai|guan|guang|gui" . "|gun|guo|ha|hai|han|hang|hao|he|hei|hen|heng|hong|hou|hu|hua|huai|huan|huang|hui|hun|huo|ji|jia|jian|jiang" . "|jiao|jie|jin|jing|jiong|jiu|ju|juan|jue|jun|ka|kai|kan|kang|kao|ke|ken|keng|kong|kou|ku|kua|kuai|kuan|kuang" . "|kui|kun|kuo|la|lai|lan|lang|lao|le|lei|leng|li|lia|lian|liang|liao|lie|lin|ling|liu|long|lou|lu|lv|luan|lue" . "|lun|luo|ma|mai|man|mang|mao|me|mei|men|meng|mi|mian|miao|mie|min|ming|miu|mo|mou|mu|na|nai|nan|nang|nao|ne" . "|nei|nen|neng|ni|nian|niang|niao|nie|nin|ning|niu|nong|nu|nv|nuan|nue|nuo|o|ou|pa|pai|pan|pang|pao|pei|pen" . "|peng|pi|pian|piao|pie|pin|ping|po|pu|qi|qia|qian|qiang|qiao|qie|qin|qing|qiong|qiu|qu|quan|que|qun|ran|rang" . "|rao|re|ren|reng|ri|rong|rou|ru|ruan|rui|run|ruo|sa|sai|san|sang|sao|se|sen|seng|sha|shai|shan|shang|shao|" . "she|shen|sheng|shi|shou|shu|shua|shuai|shuan|shuang|shui|shun|shuo|si|song|sou|su|suan|sui|sun|suo|ta|tai|" . "tan|tang|tao|te|teng|ti|tian|tiao|tie|ting|tong|tou|tu|tuan|tui|tun|tuo|wa|wai|wan|wang|wei|wen|weng|wo|wu" . "|xi|xia|xian|xiang|xiao|xie|xin|xing|xiong|xiu|xu|xuan|xue|xun|ya|yan|yang|yao|ye|yi|yin|ying|yo|yong|you" . "|yu|yuan|yue|yun|za|zai|zan|zang|zao|ze|zei|zen|zeng|zha|zhai|zhan|zhang|zhao|zhe|zhen|zheng|zhi|zhong|" . "zhou|zhu|zhua|zhuai|zhuan|zhuang|zhui|zhun|zhuo|zi|zong|zou|zu|zuan|zui|zun|zuo";
$_DataValue = "-20319|-20317|-20304|-20295|-20292|-20283|-20265|-20257|-20242|-20230|-20051|-20036|-20032|-20026|-20002|-19990" . "|-19986|-19982|-19976|-19805|-19784|-19775|-19774|-19763|-19756|-19751|-19746|-19741|-19739|-19728|-19725" . "|-19715|-19540|-19531|-19525|-19515|-19500|-19484|-19479|-19467|-19289|-19288|-19281|-19275|-19270|-19263" . "|-19261|-19249|-19243|-19242|-19238|-19235|-19227|-19224|-19218|-19212|-19038|-19023|-19018|-19006|-19003" . "|-18996|-18977|-18961|-18952|-18783|-18774|-18773|-18763|-18756|-18741|-18735|-18731|-18722|-18710|-18697" . "|-18696|-18526|-18518|-18501|-18490|-18478|-18463|-18448|-18447|-18446|-18239|-18237|-18231|-18220|-18211" . "|-18201|-18184|-18183|-18181|-18012|-17997|-17988|-17970|-17964|-17961|-17950|-17947|-17931|-17928|-17922" . "|-17759|-17752|-17733|-17730|-17721|-17703|-17701|-17697|-17692|-17683|-17676|-17496|-17487|-17482|-17468" . "|-17454|-17433|-17427|-17417|-17202|-17185|-16983|-16970|-16942|-16915|-16733|-16708|-16706|-16689|-16664" . "|-16657|-16647|-16474|-16470|-16465|-16459|-16452|-16448|-16433|-16429|-16427|-16423|-16419|-16412|-16407" . "|-16403|-16401|-16393|-16220|-16216|-16212|-16205|-16202|-16187|-16180|-16171|-16169|-16158|-16155|-15959" . "|-15958|-15944|-15933|-15920|-15915|-15903|-15889|-15878|-15707|-15701|-15681|-15667|-15661|-15659|-15652" . "|-15640|-15631|-15625|-15454|-15448|-15436|-15435|-15419|-15416|-15408|-15394|-15385|-15377|-15375|-15369" . "|-15363|-15362|-15183|-15180|-15165|-15158|-15153|-15150|-15149|-15144|-15143|-15141|-15140|-15139|-15128" . "|-15121|-15119|-15117|-15110|-15109|-14941|-14937|-14933|-14930|-14929|-14928|-14926|-14922|-14921|-14914" . "|-14908|-14902|-14894|-14889|-14882|-14873|-14871|-14857|-14678|-14674|-14670|-14668|-14663|-14654|-14645" . "|-14630|-14594|-14429|-14407|-14399|-14384|-14379|-14368|-14355|-14353|-14345|-14170|-14159|-14151|-14149" . "|-14145|-14140|-14137|-14135|-14125|-14123|-14122|-14112|-14109|-14099|-14097|-14094|-14092|-14090|-14087" . "|-14083|-13917|-13914|-13910|-13907|-13906|-13905|-13896|-13894|-13878|-13870|-13859|-13847|-13831|-13658" . "|-13611|-13601|-13406|-13404|-13400|-13398|-13395|-13391|-13387|-13383|-13367|-13359|-13356|-13343|-13340" . "|-13329|-13326|-13318|-13147|-13138|-13120|-13107|-13096|-13095|-13091|-13076|-13068|-13063|-13060|-12888" . "|-12875|-12871|-12860|-12858|-12852|-12849|-12838|-12831|-12829|-12812|-12802|-12607|-12597|-12594|-12585" . "|-12556|-12359|-12346|-12320|-12300|-12120|-12099|-12089|-12074|-12067|-12058|-12039|-11867|-11861|-11847" . "|-11831|-11798|-11781|-11604|-11589|-11536|-11358|-11340|-11339|-11324|-11303|-11097|-11077|-11067|-11055" . "|-11052|-11045|-11041|-11038|-11024|-11020|-11019|-11018|-11014|-10838|-10832|-10815|-10800|-10790|-10780" . "|-10764|-10587|-10544|-10533|-10519|-10331|-10329|-10328|-10322|-10315|-10309|-10307|-10296|-10281|-10274" . "|-10270|-10262|-10260|-10256|-10254";
$_TDataKey = explode("|", $_DataKey);
$_TDataValue = explode("|", $_DataValue);
$_Data = (PHP_VERSION >= "5.0") ? array_combine($_TDataKey, $_TDataValue) : self::_Array_Combine($_TDataKey, $_TDataValue);
arsort($_Data);
reset($_Data);
if ($_Code != "gb2312")
$_String = self::_U2_Utf8_Gb($_String);
$_Res = "";
for ($i = 0; $i < strlen($_String); $i ++)
{
$_P = ord(substr($_String, $i, 1));
if ($_P > 160)
{
$_Q = ord(substr($_String, ++ $i, 1));
$_P = $_P * 256 + $_Q - 65536;
}
$_Res .= self::_Pinyin($_P, $_Data, $isInitial);
}
return preg_replace("/[^a-z0-9]*/", "", $_Res);
}
private static function _U2_Utf8_Gb ($_C)
{
$_String = "";
if ($_C < 0x80)
$_String .= $_C;
elseif ($_C < 0x800)
{
$_String .= chr(0xC0 | $_C >> 6);
$_String .= chr(0x80 | $_C & 0x3F);
}
elseif ($_C < 0x10000)
{
$_String .= chr(0xE0 | $_C >> 12);
$_String .= chr(0x80 | $_C >> 6 & 0x3F);
$_String .= chr(0x80 | $_C & 0x3F);
}
elseif ($_C < 0x200000)
{
$_String .= chr(0xF0 | $_C >> 18);
$_String .= chr(0x80 | $_C >> 12 & 0x3F);
$_String .= chr(0x80 | $_C >> 6 & 0x3F);
$_String .= chr(0x80 | $_C & 0x3F);
}
return @iconv("UTF-8", "GBK", $_String);
}
private static function _Array_Combine ($_Arr1, $_Arr2)
{
for ($i = 0; $i < count($_Arr1); $i ++)
$_Res[$_Arr1[$i]] = $_Arr2[$i];
return $_Res;
}
private static function _Pinyin ($_Num, $_Data, $isInitial)
{
if ($_Num > 0 && $_Num < 160)
return chr($_Num);
elseif ($_Num < - 20319 || $_Num > - 10247)
return "";
else
{
foreach ($_Data as $k => $v)
{
if ($v <= $_Num)
break;
}
if ($isInitial)
$k = substr($k, 0, 1); // 是否只显示首写
return $k;
}
}
/**
* 将URI转为UTF-8编码格式
*
* @param $word string
* @return string
*/
public static function toUtf8 ($word)
{
$word = rawurldecode($word);
$curchar = mb_detect_encoding($word, 'UTF-8,CP936,EUC-CN,BIG-5,EUC-TW');
if ($curchar != 'UTF-8')
{
$word = mb_convert_encoding($word, 'UTF-8', $curchar);
}
if (preg_match('/%u/i', $word))
{
$word = self::unescape($word);
}
return $word;
}
public static function unescape ($str)
{
preg_match_all("/%u.{4}|&#x.{4};|&#d+;|.+/U", $str, $r);
$ar = $r[0];
foreach ($ar as $k => $v)
if (substr($v, 0, 2) == "%u" && strlen($v) == 6)
$ar[$k] = iconv("UCS-2", "UTF-8", pack("H4", substr($v, - 4)));
return join("", $ar);
}
/*
* 用于UTF8编码的程序
* 获得字符串的长度,一个中文表示3个长度
* itlearner注释
*/
public static function utf8_strlen ($str)
{
$count = 0;
for ($i = 0; $i < strlen($str); $i ++)
{
$value = ord($str[$i]);
if ($value > 127)
{
$count ++;
if ($value >= 192 && $value <= 223)
$i ++;
elseif ($value >= 224 && $value <= 239)
$i = $i + 2;
elseif ($value >= 240 && $value <= 247)
$i = $i + 3;
else
die('Not a UTF-8 compatible string');
}
$count ++;
}
return $count;
}
/**
* 计算字符的长度
*
* @param string $str 字符串
* @param int $chinese 指定一个中文算多少个字符
* @return int
*/
public static function countStrLen($str,$chinese = 2)
{
$count = 0;
for ($i = 0; $i < strlen($str); $i ++)
{
$value = ord($str[$i]);
if ($value > 127)
{
$count += $chinese ;
if ($value >= 192 && $value <= 223)
{
$i ++;
}
elseif ($value >= 224 && $value <= 239)
{
$i = $i + 2;
}
elseif ($value >= 240 && $value <= 247)
{
$i = $i + 3;
}
}else
{
$count ++;
}
}
return $count;
}
/**
* 去掉从word直接粘贴过来的没有用的格式
* @param unknown_type $content
* @param unknown_type $allowtags
* @return mixed
*/
public static function clearHtml($content,$allowtags='')
{
mb_regex_encoding('UTF-8');
//replace MS special characters first
$search = array('/‘/u', '/’/u', '/“/u', '/”/u', '/—/u');
$replace = array('\'', '\'', '"', '"', '-');
$content = preg_replace($search, $replace, $content);
//make sure _all_ html entities are converted to the plain ascii equivalents - it appears
//in some MS headers, some html entities are encoded and some aren't
$content = html_entity_decode($content, ENT_QUOTES, 'UTF-8');
//try to strip out any C style comments first, since these, embedded in html comments, seem to
//prevent strip_tags from removing html comments (MS Word introduced combination)
if(mb_stripos($content, '/*') !== FALSE){
$content = mb_eregi_replace('#/\*.*?\*/#s', '', $content, 'm');
}
//introduce a space into any arithmetic expressions that could be caught by strip_tags so that they won't be
//'<1' becomes '< 1'(note: somewhat application specific)
$content = preg_replace(array('/<([0-9]+)/'), array('< $1'), $content);
$content = strip_tags($content, $allowtags);
//eliminate extraneous whitespace from start and end of line, or anywhere there are two or more spaces, convert it to one
$content = preg_replace(array('/^\s\s+/', '/\s\s+$/', '/\s\s+/u'), array('', '', ' '), $content);
//strip out inline css and simplify style tags
$search = array('#<(strong|b)[^>]*>(.*?)</(strong|b)>#isu', '#<(em|i)[^>]*>(.*?)</(em|i)>#isu', '#<u[^>]*>(.*?)</u>#isu');
$replace = array('<b>$2</b>', '<i>$2</i>', '<u>$1</u>');
$content = preg_replace($search, $replace, $content);
//on some of the ?newer MS Word exports, where you get conditionals of the form 'if gte mso 9', etc., it appears
//that whatever is in one of the html comments prevents strip_tags from eradicating the html comment that contains
//some MS Style Definitions - this last bit gets rid of any leftover comments */
$num_matches = preg_match_all("/\<!--/u", $content, $matches);
if($num_matches){
$content = preg_replace('/\<!--(.)*--\>/isu', '', $content);
}
return $content;
}
}