尝试使用Tidy库,该库可用于清除不良的HTML和XMLhttp://php.net/manual/zh/book.tidy.php
<?xml version="1.0"?>
<Feed>
<RECORD>
<ID>117387</ID>
<ADVERTISERNAME>Test < texter</ADVERTISERNAME>
<AID>10544740</AID>
<NAME>This & This</NAME>
<DESCRIPTION>For one day only this is > than this.</DESCRIPTION>
</RECORD>
</Feed>
将是这样的:
function cleanupXML($xml) {
$xmlOut = '';
$inTag = false;
$xmlLen = strlen($xml);
for($i=0; $i < $xmlLen; ++$i) {
$char = $xml[$i];
// $nextChar = $xml[$i+1];
switch ($char) {
case '<':
if (!$inTag) {
// Seek forward for the next tag boundry
for($j = $i+1; $j < $xmlLen; ++$j) {
$nextChar = $xml[$j];
switch($nextChar) {
case '<': // Means a < in text
$char = htmlentities($char);
break 2;
case '>': // Means we are in a tag
$inTag = true;
break 2;
}
}
} else {
$char = htmlentities($char);
}
break;
case '>':
if (!$inTag) { // No need to seek ahead here
$char = htmlentities($char);
} else {
$inTag = false;
}
break;
default:
if (!$inTag) {
$char = htmlentities($char);
}
break;
}
$xmlOut .= $char;
}
return $xmlOut;
}