EncodingHelper.php 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. <?php
  2. /**
  3. * Encoding Helper - convert various encodings to / from UTF-8
  4. * @package IDNA Convert
  5. * @subpackage charset transcoding
  6. * @author Matthias Sommerfeld, <mso@phlylabs.de>
  7. * @copyright 2003-2016 phlyLabs Berlin, http://phlylabs.de
  8. * @version 1.0.0 2016-01-08
  9. */
  10. namespace Mso\IdnaConvert;
  11. class EncodingHelper
  12. {
  13. /**
  14. * Convert a string from any of various encodings to UTF-8
  15. *
  16. * @param string $string String to encode
  17. *[@param string $encoding Encoding; Default: ISO-8859-1]
  18. *[@param bool $safe_mode Safe Mode: if set to TRUE, the original string is retunred on errors]
  19. * @return string|false The encoded string or false on failure
  20. * @since 0.0.1
  21. */
  22. public static function toUtf8($string = '', $encoding = 'iso-8859-1', $safe_mode = false)
  23. {
  24. $safe = ($safe_mode) ? $string : false;
  25. if (strtoupper($encoding) == 'UTF-8' || strtoupper($encoding) == 'UTF8') {
  26. return $string;
  27. }
  28. if (strtoupper($encoding) == 'ISO-8859-1') {
  29. return \utf8_encode($string);
  30. } if (strtoupper($encoding) == 'WINDOWS-1252') {
  31. return \utf8_encode(self::map_w1252_iso8859_1($string));
  32. }
  33. if (strtoupper($encoding) == 'UNICODE-1-1-UTF-7') {
  34. $encoding = 'utf-7';
  35. }
  36. if (function_exists('mb_convert_encoding')) {
  37. $conv = @mb_convert_encoding($string, 'UTF-8', strtoupper($encoding));
  38. if ($conv) {
  39. return $conv;
  40. }
  41. }
  42. if (function_exists('iconv')) {
  43. $conv = @iconv(strtoupper($encoding), 'UTF-8', $string);
  44. if ($conv) {
  45. return $conv;
  46. }
  47. }
  48. if (function_exists('libiconv')) {
  49. $conv = @libiconv(strtoupper($encoding), 'UTF-8', $string);
  50. if ($conv) {
  51. return $conv;
  52. }
  53. }
  54. return $safe;
  55. }
  56. /**
  57. * Convert a string from UTF-8 to any of various encodings
  58. *
  59. * @param string $string String to decode
  60. *[@param string $encoding Encoding; Default: ISO-8859-1]
  61. *[@param bool $safe_mode Safe Mode: if set to TRUE, the original string is retunred on errors]
  62. * @return string|false The decoded string or false on failure
  63. * @since 0.0.1
  64. */
  65. public static function fromUtf8($string = '', $encoding = 'iso-8859-1', $safe_mode = false)
  66. {
  67. $safe = ($safe_mode) ? $string : false;
  68. if (!$encoding) $encoding = 'ISO-8859-1';
  69. if (strtoupper($encoding) == 'UTF-8' || strtoupper($encoding) == 'UTF8') {
  70. return $string;
  71. }
  72. if (strtoupper($encoding) == 'ISO-8859-1') {
  73. return utf8_decode($string);
  74. }
  75. if (strtoupper($encoding) == 'WINDOWS-1252') {
  76. return self::map_iso8859_1_w1252(utf8_decode($string));
  77. }
  78. if (strtoupper($encoding) == 'UNICODE-1-1-UTF-7') {
  79. $encoding = 'utf-7';
  80. }
  81. if (function_exists('mb_convert_encoding')) {
  82. $conv = @mb_convert_encoding($string, strtoupper($encoding), 'UTF-8');
  83. if ($conv) {
  84. return $conv;
  85. }
  86. }
  87. if (function_exists('iconv')) {
  88. $conv = @iconv('UTF-8', strtoupper($encoding), $string);
  89. if ($conv) {
  90. return $conv;
  91. }
  92. }
  93. if (function_exists('libiconv')) {
  94. $conv = @libiconv('UTF-8', strtoupper($encoding), $string);
  95. if ($conv) {
  96. return $conv;
  97. }
  98. }
  99. return $safe;
  100. }
  101. /**
  102. * Special treatment for our guys in Redmond
  103. * Windows-1252 is basically ISO-8859-1 -- with some exceptions, which get accounted for here
  104. *
  105. * @param string $string Your input in Win1252
  106. * @return string The resulting ISO-8859-1 string
  107. * @since 0.0.1
  108. */
  109. protected static function map_w1252_iso8859_1($string = '')
  110. {
  111. if ($string == '') {
  112. return '';
  113. }
  114. $return = '';
  115. for ($i = 0; $i < strlen($string); ++$i) {
  116. $c = ord($string{$i});
  117. switch ($c) {
  118. case 129: $return .= chr(252); break;
  119. case 132: $return .= chr(228); break;
  120. case 142: $return .= chr(196); break;
  121. case 148: $return .= chr(246); break;
  122. case 153: $return .= chr(214); break;
  123. case 154: $return .= chr(220); break;
  124. case 225: $return .= chr(223); break;
  125. default: $return .= chr($c);
  126. }
  127. }
  128. return $return;
  129. }
  130. /**
  131. * Special treatment for our guys in Redmond
  132. * Windows-1252 is basically ISO-8859-1 -- with some exceptions, which get accounted for here
  133. *
  134. * @param string $string Your input in ISO-8859-1
  135. * @return string The resulting Win1252 string
  136. * @since 0.0.1
  137. */
  138. protected static function map_iso8859_1_w1252($string = '')
  139. {
  140. if ($string == '') {
  141. return '';
  142. }
  143. $return = '';
  144. for ($i = 0; $i < strlen($string); ++$i) {
  145. $c = ord($string{$i});
  146. switch ($c) {
  147. case 196: $return .= chr(142); break;
  148. case 214: $return .= chr(153); break;
  149. case 220: $return .= chr(154); break;
  150. case 223: $return .= chr(225); break;
  151. case 228: $return .= chr(132); break;
  152. case 246: $return .= chr(148); break;
  153. case 252: $return .= chr(129); break;
  154. default: $return .= chr($c);
  155. }
  156. }
  157. return $return;
  158. }
  159. }