This page has been robot translated, sorry for typos if any. Original content here.

Functions of code conversion UTF8-Win, Win-UTF8

Sometimes there are times when php needs to recode data from one encoding to another.

In this article, we describe several functions that I myself have used many times.

Functions and scripts are not written by me, but they work and often help with php-javascript interactions.

So, the conversion from utf-8 to windows-1251 and vice versa:

Function first from utf8 to cp1251

 function utf8_win1 ($ s) {
 $ s = strtr ($ s, array ("\ xD0 \ xB0" => "a", "\ xD0 \ x90" => "A", "\ xD0 \ xB1" => "b", "\ xD0 \ x91 "=>" B ",
  "\ xD0 \ xB2" => "in", "\ xD0 \ x92" => "B", "\ xD0 \ xB3" => "d", "\ xD0 \ x93" => "G", "\ xD0 \ xB4 "=>" d ",
  "\ xD0 \ x94" => "x", "x" xD0 \ x81 "=>" E ",
  "\ xD0 \ xB6" => "x", "\ xD0 \ x96" => "W", "\ xD0 \ xB7" => "x", "\ xD0 \ x97" => "Z", "\ xD0 \ xB8 "=>" and ",
  "\ xD0 \ x98" => "AND", "\ xD0 \ xB9" => "Y", "\ xD0 \ x99" => "Y", "\ xD0 \ xBA" => "to", "\ xD0 \ x9A "=>" K ",
  "\ xD0 \ xBB" => "l", "\ xD0 \ x9B" => "L", "\ xD0 \ xBC" => "m", "\ xD0 \ x9C" => "M", "\ xD0 \ xBD "=>" n ",
  "\ xD0 \ x9D" => "H", "\ xD0 \ xBE" => "about", "\ xD0 \ x9E" => "O", "\ xD0 \ xBF" => "n", "\ xD0 \ x9F "=>" P ",
  "\ xD1 \ x80" => "p", "\ xD0 \ xA0" => "P", "\ xD1 \ x81" => "c", "\ xD0 \ xA1" => "C", "\ xD1 \ x82 "=>" t ",
  "\ xD0 \ xA2" => "T", "\ xD1 \ x83" => "y", "\ xD0 \ xA3" => "Y", "\ xD1 \ x84" => "φ", "\ xD0 \ xA4 "=>" F ",
  "\ xD1 \ x85" => "x", "\ xD0 \ xA5" => "X", "\ xD1 \ x86" => "c", "\ xD0 \ xA6" => "C", "\ xD1 \ x87 "=>" h ",
  "\ xD0 \ xA7" => "x", "\ xD1 \ x88" => "w", "\ xD0 \ xA8" => "W", "\ xD1 \ x89" => "ui", "\ xD0 \ xA9 "=>" Щ ",
  "\ xD1 \ x8A" => "" "," \ xD0 \ xAA "=>" b "," \ xD1 \ x8B "=>" s "," \ xD0 \ xAB "=>" N "," \ xD1 \ x8C "=>" ь ",
  "\ xD0 \ xAC" => "b", "\ xD1 \ x8D" => "e", "\ xD0 \ xAD" => "E", "\ xD1 \ x8E" => "y", "\ xD0 \ xAE "=>" 10 ",
  "\ xD1 \ x8F" => "I", "\ xD0 \ xAF" => "I"));
 return $ s;
 }

The second function from utf8 to cp1251

 function utf8_win ($ s) {$ out = "";  $ c1 = "";  $ byte2 = false;  for ($ c = 0; $ c <strlen ($ s); $ c ++) {$ i = ord ($ s [$ c]);  if ($ i <= 127) $ out. = $ s [$ c];  if ($ byte2) {$ new_c2 = ($ c1 & 3) * 64 + ($ i & 63);  $ new_c1 = ($ c1 >> 2) & 5;  $ new_i = $ new_c1 * 256 + $ new_c2;  if ($ new_i == 1025) {$ out_i = 168;  } else {if ($ new_i == 1105) {$ out_i = 184;  } else {$ out_i = $ new_i-848;  }} $ out. = chr ($ out_i);  $ byte2 = false;  } if (($ i >> 5) == 6) {$ c1 = $ i;  $ byte2 = true;  }} return $ out;  } 

Function first of windows-1251 in utf-8

 function win_utf8 ($ in_text) { 
    $ output = "";
    $ other [1025] = "Ё";
    $ other [1105] = "e";
    $ other [1028] = "Є";
    $ other [1108] = "є";
    $ other [1030] = "I";
    $ other [1110] = "i";
    $ other [1031] = "Ї";
    $ other [1111] = "ї";
    for ($ i = 0; $ i <strlen ($ in_text); $ i ++) {
       if (ord ($ in_text {$ i})> 191) {
          $ output. = "& #". (ord ($ in_text {$ i}) + 848). ";";
       } else {
          if (array_search ($ in_text {$ i}, $ other) === false) {
             $ output. = $ in_text {$ i};
          } else {
             $ output. = "& #". array_search ($ in_text {$ i}, $ other). ";";
          }
       }
    }
    return $ output;
 }

Transcoding function from windows-1251 to utf-8

Earlier I used iconv ("CP1251 ?," UTF-8 ?, $ text_for_convert) , but as you know, iconv gives a big load to the server and it works slower than the function below.

To refuse iconv forced another thing, that at a certain moment when transcoding from windows-1251 to UTF-8 for no apparent reason, this function returned an empty string.

!!! If you swap $ in_arr and $ out_arr, then the function will work "in the opposite direction", i.e. will convert UTF-8 to CP-1251.

 function cp1251_to_utf8 ($ txt) {
     $ in_arr = array (
         chr (208), chr (192), chr (193), chr (194),
         chr (195), chr (196), chr (197), chr (168),
         chr (198), chr (199), chr (200), chr (201),
         chr (202), chr (203), chr (204), chr (205),
         chr (206), chr (207), chr (209), chr (210),
         chr (211), chr (212), chr (213), chr (214),
         chr (215), chr (216), chr (217), chr (218),
         chr (219), chr (220), chr (221), chr (222),
         chr (223), chr (224), chr (225), chr (226),
         chr (227), chr (228), chr (229), chr (184),
         chr (230), chr (231), chr (232), chr (233),
         chr (234), chr (235), chr (236), chr (237),
         chr (238), chr (239), chr (240), chr (241),
         chr (242), chr (243), chr (244), chr (245),
         chr (246), chr (247), chr (248), chr (249),
         chr (250), chr (251), chr (252), chr (253),
         chr (254), chr (255)
     );   
 
     $ out_arr = array (
         chr (208) .chr (160), chr (208) .chr (144), chr (208) .chr (145),
         chr (208). chr (146), chr (208). chr (147), chr (208) .chr (148),
         chr (208) .chr (149), chr (208) .chr (129), chr (208) .chr (150),
         chr (208) .chr (151), chr (208) .chr (152), chr (208) .chr (153),
         chr (208) .chr (154), chr (208). chr (155), chr (208) .chr (156),
         chr (208) .chr (157), chr (208) .chr (158), chr (208) .chr (159),
         chr (208) .chr (161), chr (208) .chr (162), chr (208) .chr (163),
         chr (208). chr (164), chr (208). chr (165), chr (208) .chr (166),
         chr (208) .chr (167), chr (208). chr (168), chr (208) .chr (169),
         chr (208) .chr (170), chr (208) .chr (171), chr (208) .chr (172),
         chr (208) .chr (173), chr (208) .chr (174), chr (208) .chr (175),
         chr (208) .chr (176), chr (208) .chr (177), chr (208) .chr (178),
         chr (208) .chr (179), chr (208) .chr (180), chr (208) .chr (181),
         chr (209) .chr (145), chr (208) .chr (182), chr (208) .chr (183),
         chr (208) .chr (184), chr (208) .chr (185), chr (208) .chr (186),
         chr (208) .chr (187), chr (208) .chr (188), chr (208) .chr (189),
         chr (208) .chr (190), chr (208) .chr (191), chr (209) .chr (128),
         chr (209) .chr (129), chr (209) .chr (130), chr (209) .chr (131),
         chr (209) .chr (132), chr (209) .chr (133), chr (209) .chr (134),
         chr (209) .chr (135), chr (209) .chr (136), chr (209) .chr (137),
         chr (209) .chr (138), chr (209). chr (139), chr (209) .chr (140),
         chr (209) .chr (141), chr (209) .chr (142), chr (209) .chr (143)
     );   
 
     $ txt = str_replace ($ in_arr, $ out_arr, $ txt);
     return $ txt;
 }