/*
Every function seen so far is incomplete or resource consumpting. Here are two -- integer 2 utf sequence (i3u) and utf sequence to integer (u3i). Below is a code snippet that checks well behavior at the range boundaries.
Someday they might be hardcoded into PHP...
*/
function i3u($i) { // returns UCS-16 or UCS-32 to UTF-8 from an integer
$i=(int)$i; // integer?
if ($i<0) return false; // positive?
if ($i<=0x7f) return chr($i); // range 0
if (($i & 0x7fffffff) <> $i) return '?'; // 31 bit?
if ($i<=0x7ff) return chr(0xc0 | ($i >> 6)) . chr(0x80 | ($i & 0x3f));
if ($i<=0xffff) return chr(0xe0 | ($i >> 12)) . chr(0x80 | ($i >> 6) & 0x3f)
. chr(0x80 | $i & 0x3f);
if ($i<=0x1fffff) return chr(0xf0 | ($i >> 18)) . chr(0x80 | ($i >> 12) & 0x3f)
. chr(0x80 | ($i >> 6) & 0x3f) . chr(0x80 | $i & 0x3f);
if ($i<=0x3ffffff) return chr(0xf8 | ($i >> 24)) . chr(0x80 | ($i >> 18) & 0x3f)
. chr(0x80 | ($i >> 12) & 0x3f) . chr(0x80 | ($i >> 6) & 0x3f) . chr(0x80 | $i & 0x3f);
return chr(0xfc | ($i >> 30)) . chr(0x80 | ($i >> 24) & 0x3f) . chr(0x80 | ($i >> 18) & 0x3f)
. chr(0x80 | ($i >> 12) & 0x3f) . chr(0x80 | ($i >> 6) & 0x3f) . chr(0x80 | $i & 0x3f);
}
function u3i($s,$strict=1) { // returns integer on valid UTF-8 seq, NULL on empty, else FALSE
// NOT strict: takes only DATA bits, present or not; strict: length and bits checking
if ($s=='') return NULL;
$l=strlen($s); $o=ord($s{0});
if ($o <= 0x7f && $l==1) return $o;
if ($l>6 && $strict) return false;
if ($strict) for ($i=1;$i<$l;$i++) if (ord($s{$i}) > 0xbf || ord($s{$i})< 0x80) return false;
if ($o < 0xc2) return false; // no-go even if strict=0
if ($o <= 0xdf && ($l=2 && $strict)) return (($o & 0x1f) << 6 | (ord($s{1}) & 0x3f));
if ($o <= 0xef && ($l=3 && $strict)) return (($o & 0x0f) << 12 | (ord($s{1}) & 0x3f) << 6
| (ord($s{2}) & 0x3f));
if ($o <= 0xf7 && ($l=4 && $strict)) return (($o & 0x07) << 18 | (ord($s{1}) & 0x3f) << 12
| (ord($s{2}) & 0x3f) << 6 | (ord($s{3}) & 0x3f));
if ($o <= 0xfb && ($l=5 && $strict)) return (($o & 0x03) << 24 | (ord($s{1}) & 0x3f) << 18
| (ord($s{2}) & 0x3f) << 12 | (ord($s{3}) & 0x3f) << 6 | (ord($s{4}) & 0x3f));
if ($o <= 0xfd && ($l=6 && $strict)) return (($o & 0x01) << 30 | (ord($s{1}) & 0x3f) << 24
| (ord($s{2}) & 0x3f) << 18 | (ord($s{3}) & 0x3f) << 12
| (ord($s{4}) & 0x3f) << 6 | (ord($s{5}) & 0x3f));
return false;
}
// boundary behavior checking
$do=array(0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff);
foreach ($do as $ii) for ($i=$ii;$i<=$ii+1; $i++) {
$o=i3u($i);
for ($j=0;$j<strlen($o);$j++) print "O[$j]=" . sprintf('%08b',ord($o{$j})) . ", ";
print "c=$i, o=[$o].\n";
print "Back: [$o] => [" . u3i($o) . "]\n";
}