Functions of code conversion UTF8-Win, Win-UTF8
Sometimes there are times when php needs to re-encode data from one encoding to another.
This article will describe some features that I do not just used.
The functions and scripts are not written by me, but working successfully and often help with php-javascript interactions.
So, the conversion from utf-8 to windows-1251 and vice versa:
Function first from utf8 to cp1251
Function utf8_win1 ($ s) {$ s = strtr ($ s, array ("\ xD0 \ xB0" => "a", "\ xD0 \ x90" => "A", "\ xD0 \ xB1" => " B "," \ xD0 \ x91 "=>" B "," \ xD0 \ xB2 "=>" in "," \ xD0 \ x92 "=>" B "," \ xD0 \ xB3 "=>" d " , "\ XD0 \ x93" => "G", "\ xD0 \ xB4" => "d", "\ xD0 \ x94" => "D", "\ xD0 \ xB5" => "e", " \ XD0 \ x95 "=>" E "," \ xD1 \ x91 "=>" e "," \ xD0 \ x81 "=>" E "," \ xD0 \ xB6 "=>" x "," \ xD0 \ X96 "=>" x "," \ xD0 \ xB7 "=>" x "," \ xD0 \ x97 "=>" Z "," \ xD0 \ xB8 "=>" and "," \ xD0 \ x98 "=>" "," \ XD0 \ xB9 "=>" "," \ xD0 \ x99 "=>" X "," \ xD0 \ xBA "=>" to "," \ xD0 \ x9A "= > "K", "\ xD0 \ xBB" => "l", "\ xD0 \ x9B" => "L", "\ xD0 \ xBC" => "m", "\ xD0 \ x9C" => " M "," \ xD0 \ xBD "=>" n "," \ xD0 \ x9D "=>" H "," \ xD0 \ xBE "=>" about "," \ xD0 \ x9E "=>" O " , "\ XD0 \ xBF" => "n", "\ xD0 \ x9F" => "P", "\ xD1 \ x80" => "p", "\ xD0 \ xA0" => "P", " \ XD1 \ x81 "=>" c "," \ xD0 \ xA1 "=>" C "," \ xD1 \ x82 "=>" t "," \ xD0 \ xA2 "=>" T "," \ xD1 \ X83 "=>" y "," \ xD0 \ xA3 "=>" Y "," \ xD1 \ x84 "=>" ф "," \ xD0 \ xA4 "=>" Ф "," \ xD1 \ x85 "=>" X "," \ xD0 \ xA5 "=>" X "," \ xD1 \ x86 "=>" c "," \ xD0 \ xA6 "=>" C "," \ xD1 \ x87 "= > "X", "\ xD0 \ xA7" => "x", "\ xD1 \ x88" => "w", "\ xD0 \ xA8" => "W", "\ xD1 \ x89" => " X "," \ xD0 \ xA9 "=>" W "," \ xD1 \ x8A "=>" "," \ xD0 \ xAA "=>" b "," \ xD1 \ x8B "=>" s " , "\ XD0 \ xAB" => "Ы", "\ xD1 \ x8C" => "ь", "\ xD0 \ xAC" => "b", "\ xD1 \ x8D" => "э", " \ XD0 \ xAD "=>" E "," \ xD1 \ x8E "=>" y "," \ xD0 \ xAE "=>" Y "," \ xD1 \ x8F "=>" I "," \ xD0 \ XAF "=>" I ")); Return $ s; }
The second function from utf8 to cp1251
Function utf8_win ($ s) {$ out = ""; $ C1 = ""; $ Byte2 = false; For ($ c = 0; $ c <strlen ($ s); $ c ++) {$ i = ord ($ s [$ c]); If ($ i <= 127) $ out. = $ S [$ c]; If ($ byte2) {$ new_c2 = ($ c1 & 3) * 64 + ($ i & 63); $ New_c1 = ($ c1 >> 2) & 5; $ New_i = $ new_c1 * 256 + $ new_c2; If ($ new_i == 1025) {$ out_i = 168; } Else {if ($ new_i == 1105) {$ out_i = 184; } Else {$ out_i = $ new_i-848; }} $ Out. = Chr ($ out_i); $ Byte2 = false; } If (($ i >> 5) == 6) {$ c1 = $ i; $ Byte2 = true; }} Return $ out; }
The function of the first windows-1251 to utf-8
Function win_utf8 ($ in_text) { $ Output = ""; $ Other [1025] = "E"; $ Other [1105] = "e"; $ Other [1028] = "E"; $ Other [1108] = "є"; $ Other [1030] = "I"; $ Other [1110] = "i"; $ Other [1031] = "Ї"; $ Other [1111] = "ї"; For ($ i = 0; $ i <strlen ($ in_text); $ i ++) { If (ord ($ in_text {$ i})> 191) { $ Output. = "& #". (Ord ($ in_text {$ i}) + 848). ";"; } Else { If (array_search ($ in_text {$ i}, $ other) === false) { $ Output. = $ In_text {$ i}; } Else { $ Output. = "& #". Array_search ($ in_text {$ i}, $ other). ";"; } } } Return $ output; }
Transcoding function from windows-1251 to utf-8
Previously I used iconv ("CP1251 ?," UTF-8 ?, $ text_for_convert) , but as you know, iconv gives a lot of load to the server and it works slower than the function below.
To abandon iconv forced another thing, that at some point when transcoding from windows-1251 to UTF-8 for no apparent reason, this function returned an empty string.
!!! If you swap $ in_arr and $ out_arr, then the function will work "in the opposite direction", i.e. Will convert UTF-8 to CP-1251.
Function cp1251_to_utf8 ($ txt) { $ In_arr = array ( Chr (208), chr (192), chr (193), chr (194), Chr (195), chr (196), chr (197), chr (168), Chr (198), chr (199), chr (200), chr (201), chr (202), chr (203), chr (204), chr (205) Chr (206), chr (207), chr (209), chr (210), chr (211), chr (212), chr (213), chr (214) Chr (215), chr (216), chr (217), chr (218), chr (219), chr (220), chr (221), chr (222) chr (223), chr (224), chr (225), chr (226) Chr (227), chr (228), chr (229), chr (184), Chr (230), chr (231), chr (232), chr (233), chr (234), chr (235), chr (236), chr (237) Chr (238), chr (239), chr (240), chr (241), Chr (242), chr (243), chr (244), chr (245), Chr (246), chr (247), chr (248), chr (249), Chr (250), chr (251), chr (252), chr (253), chr (254), chr (255) ); $ Out_arr = array ( chr (208) .chr (160), chr (208) .chr (144), chr (208) .chr (145) Chr (208). Chr (146), chr (208). Chr (147), chr (208) .chr (148), Chr (208) .chr (149), chr (208) .chr (129), chr (208) .chr (150), Chr (208) .chr (151), chr (208) .chr (152), chr (208) .chr (153), Chr (208) .chr (154), chr (208). Chr (155), chr (208) .chr (156), Chr (208) .chr (157), chr (208) .chr (158), chr (208) .chr (159), Chr (208) .chr (161), chr (208) .chr (162), chr (208) .chr (163), chr (208) .chr (164), chr (208) .chr (165), chr (208) .chr (166) chr (208) .chr (167), chr (208) .chr (168), chr (208) .chr (169) chr (208) .chr (170), chr (208) .chr (171), chr (208) .chr (172) Chr (208) .chr (173), chr (208) .chr (174), chr (208) .chr (175), Chr (208) .chr (176), chr (208) .chr (177), chr (208) .chr (178), Chr (208) .chr (179), chr (208) .chr (180), chr (208) .chr (181), chr (209) .chr (145), chr (208) .chr (182), chr (208) .chr (183) chr (208) .chr (184), chr (208) .chr (185), chr (208) .chr (186) chr (208) .chr (187), chr (208) .chr (188), chr (208) .chr (189) chr (208) .chr (190), chr (208) .chr (191), chr (209) .chr (128) Chr (209) .chr (129), chr (209) .chr (130), chr (209) .chr (131), Chr (209) .chr (132), chr (209) .chr (133), chr (209) .chr (134), Chr (209) .chr (135), chr (209) .chr (136), chr (209) .chr (137), chr (209) .chr (138), chr (209) .chr (139), chr (209) .chr (140) chr (209) .chr (141), chr (209) .chr (142), chr (209) .chr (143) ); $ Txt = str_replace ($ in_arr, $ out_arr, $ txt); Return $ txt; }
Comments
Commenting on, remember that the content and tone of your message can hurt the feelings of real people, show respect and tolerance to your interlocutors even if you do not share their opinion, your behavior in the conditions of freedom of expression and anonymity provided by the Internet, changes Not only virtual, but also the real world. All comments are hidden from the index, spam is controlled.