Functions of code conversion UTF8-Win, Win-UTF8

Sometimes there are times when php needs to re-encode data from one encoding to another.

This article will describe some features that I do not just used.

The functions and scripts are not written by me, but working successfully and often help with php-javascript interactions.

So, the conversion from utf-8 to windows-1251 and vice versa:

Function first from utf8 to cp1251

 Function utf8_win1 ($ s) {$ s = strtr ($ s, array ("\ xD0 \ xB0" => "a", "\ xD0 \ x90" => "A", "\ xD0 \ xB1" => " B "," \ xD0 \ x91 "=>" B "," \ xD0 \ xB2 "=>" in "," \ xD0 \ x92 "=>" B "," \ xD0 \ xB3 "=>" d " , "\ XD0 \ x93" => "G", "\ xD0 \ xB4" => "d", "\ xD0 \ x94" => "D", "\ xD0 \ xB5" => "e", " \ XD0 \ x95 "=>" E "," \ xD1 \ x91 "=>" e "," \ xD0 \ x81 "=>" E "," \ xD0 \ xB6 "=>" x "," \ xD0 \ X96 "=>" x "," \ xD0 \ xB7 "=>" x "," \ xD0 \ x97 "=>" Z "," \ xD0 \ xB8 "=>" and "," \ xD0 \ x98 "=>" "," \ XD0 \ xB9 "=>" "," \ xD0 \ x99 "=>" X "," \ xD0 \ xBA "=>" to "," \ xD0 \ x9A "= > "K", "\ xD0 \ xBB" => "l", "\ xD0 \ x9B" => "L", "\ xD0 \ xBC" => "m", "\ xD0 \ x9C" => " M "," \ xD0 \ xBD "=>" n "," \ xD0 \ x9D "=>" H "," \ xD0 \ xBE "=>" about "," \ xD0 \ x9E "=>" O " , "\ XD0 \ xBF" => "n", "\ xD0 \ x9F" => "P", "\ xD1 \ x80" => "p", "\ xD0 \ xA0" => "P", " \ XD1 \ x81 "=>" c "," \ xD0 \ xA1 "=>" C "," \ xD1 \ x82 "=>" t "," \ xD0 \ xA2 "=>" T "," \ xD1 \ X83 "=>" y "," \ xD0 \ xA3 "=>" Y "," \ xD1 \ x84 "=>" ф "," \ xD0 \ xA4 "=>" Ф "," \ xD1 \ x85 "=>" X "," \ xD0 \ xA5 "=>" X "," \ xD1 \ x86 "=>" c "," \ xD0 \ xA6 "=>" C "," \ xD1 \ x87 "= > "X", "\ xD0 \ xA7" => "x", "\ xD1 \ x88" => "w", "\ xD0 \ xA8" => "W", "\ xD1 \ x89" => " X "," \ xD0 \ xA9 "=>" W "," \ xD1 \ x8A "=>" "," \ xD0 \ xAA "=>" b "," \ xD1 \ x8B "=>" s " , "\ XD0 \ xAB" => "Ы", "\ xD1 \ x8C" => "ь", "\ xD0 \ xAC" => "b", "\ xD1 \ x8D" => "э", " \ XD0 \ xAD "=>" E "," \ xD1 \ x8E "=>" y "," \ xD0 \ xAE "=>" Y "," \ xD1 \ x8F "=>" I "," \ xD0 \ XAF "=>" I "));  Return $ s;  } 

The second function from utf8 to cp1251

 Function utf8_win ($ s) {$ out = "";  $ C1 = "";  $ Byte2 = false;  For ($ c = 0; $ c <strlen ($ s); $ c ++) {$ i = ord ($ s [$ c]);  If ($ i <= 127) $ out. = $ S [$ c];  If ($ byte2) {$ new_c2 = ($ c1 & 3) * 64 + ($ i & 63);  $ New_c1 = ($ c1 >> 2) & 5;  $ New_i = $ new_c1 * 256 + $ new_c2;  If ($ new_i == 1025) {$ out_i = 168;  } Else {if ($ new_i == 1105) {$ out_i = 184;  } Else {$ out_i = $ new_i-848;  }} $ Out. = Chr ($ out_i);  $ Byte2 = false;  } If (($ i >> 5) == 6) {$ c1 = $ i;  $ Byte2 = true;  }} Return $ out;  } 

The function of the first windows-1251 to utf-8

  Function win_utf8 ($ in_text) { 
  $ Output = "";
  $ Other [1025] = "E";
  $ Other [1105] = "e";
  $ Other [1028] = "E";
  $ Other [1108] = "є";
  $ Other [1030] = "I";
  $ Other [1110] = "i";
  $ Other [1031] = "Ї";
  $ Other [1111] = "ї";
  For ($ i = 0; $ i <strlen ($ in_text); $ i ++) {
  If (ord ($ in_text {$ i})> 191) {
  $ Output. = "& #". (Ord ($ in_text {$ i}) + 848). ";";
  } Else {
  If (array_search ($ in_text {$ i}, $ other) === false) {
  $ Output. = $ In_text {$ i};
  } Else {
  $ Output. = "& #". Array_search ($ in_text {$ i}, $ other). ";";
  }
  }
  }
  Return $ output;
 }

Transcoding function from windows-1251 to utf-8

Previously I used iconv ("CP1251 ?," UTF-8 ?, $ text_for_convert) , but as you know, iconv gives a lot of load to the server and it works slower than the function below.

To abandon iconv forced another thing, that at some point when transcoding from windows-1251 to UTF-8 for no apparent reason, this function returned an empty string.

!!! If you swap $ in_arr and $ out_arr, then the function will work "in the opposite direction", i.e. Will convert UTF-8 to CP-1251.

 Function cp1251_to_utf8 ($ txt) {
  $ In_arr = array (
  Chr (208), chr (192), chr (193), chr (194),
  Chr (195), chr (196), chr (197), chr (168),
  Chr (198), chr (199), chr (200), chr (201),
  chr (202), chr (203), chr (204), chr (205)
  Chr (206), chr (207), chr (209), chr (210),
  chr (211), chr (212), chr (213), chr (214)
  Chr (215), chr (216), chr (217), chr (218),
  chr (219), chr (220), chr (221), chr (222)
  chr (223), chr (224), chr (225), chr (226)
  Chr (227), chr (228), chr (229), chr (184),
  Chr (230), chr (231), chr (232), chr (233),
  chr (234), chr (235), chr (236), chr (237)
  Chr (238), chr (239), chr (240), chr (241),
  Chr (242), chr (243), chr (244), chr (245),
  Chr (246), chr (247), chr (248), chr (249),
  Chr (250), chr (251), chr (252), chr (253),
  chr (254), chr (255)
  ); 
 
  $ Out_arr = array (
  chr (208) .chr (160), chr (208) .chr (144), chr (208) .chr (145)
  Chr (208). Chr (146), chr (208). Chr (147), chr (208) .chr (148),
  Chr (208) .chr (149), chr (208) .chr (129), chr (208) .chr (150),
  Chr (208) .chr (151), chr (208) .chr (152), chr (208) .chr (153),
  Chr (208) .chr (154), chr (208). Chr (155), chr (208) .chr (156),
  Chr (208) .chr (157), chr (208) .chr (158), chr (208) .chr (159),
  Chr (208) .chr (161), chr (208) .chr (162), chr (208) .chr (163),
  chr (208) .chr (164), chr (208) .chr (165), chr (208) .chr (166)
  chr (208) .chr (167), chr (208) .chr (168), chr (208) .chr (169)
  chr (208) .chr (170), chr (208) .chr (171), chr (208) .chr (172)
  Chr (208) .chr (173), chr (208) .chr (174), chr (208) .chr (175),
  Chr (208) .chr (176), chr (208) .chr (177), chr (208) .chr (178),
  Chr (208) .chr (179), chr (208) .chr (180), chr (208) .chr (181),
  chr (209) .chr (145), chr (208) .chr (182), chr (208) .chr (183)
  chr (208) .chr (184), chr (208) .chr (185), chr (208) .chr (186)
  chr (208) .chr (187), chr (208) .chr (188), chr (208) .chr (189)
  chr (208) .chr (190), chr (208) .chr (191), chr (209) .chr (128)
  Chr (209) .chr (129), chr (209) .chr (130), chr (209) .chr (131),
  Chr (209) .chr (132), chr (209) .chr (133), chr (209) .chr (134),
  Chr (209) .chr (135), chr (209) .chr (136), chr (209) .chr (137),
  chr (209) .chr (138), chr (209) .chr (139), chr (209) .chr (140)
  chr (209) .chr (141), chr (209) .chr (142), chr (209) .chr (143)
  ); 
 
  $ Txt = str_replace ($ in_arr, $ out_arr, $ txt);
  Return $ txt;
 }