I created some functions for entity-safe splitting+lengthcounting:
<?php
function strlen_entities($text)
{
preg_match_all(
'/((?:&(?:#[0-9]{2,}|[a-z]{2,});)|(?:[^&])|'.
'(?:&(?!\w;)))s',$text,$textarray);
return count($textarray[0]);
}
function substr_entities($text,$start,$limit=0)
{
$return = '';
preg_match_all(
'/((?:&(?:#[0-9]{2,}|[a-z]{2,});)|(?:[^&])|'.
'(?:&(?!\w;)))s',$text,$textarray);
$textarray = $textarray[0];
$numchars = count($textarray)-1;
if ($start>=$numchars)
return false;
if ($start<0)
{
$start = ($numchars)+$start+1;
}
if ($start>=0)
{
if ($limit==0)
{
$end=$numchars;
}
elseif ($limit>0)
{
$end = $start+($limit-1);
}
else
{
$end = ($numchars)+$limit;
}
for ($i=$start;$i<=$end;$i++)
{
$return .= $textarray[$i];
}
return $return;
}
}
?>