From: Andrew Ruthven Date: Thu, 29 Nov 2007 17:46:57 +0000 (+1300) Subject: Clean out the bad UTF-8 characters. X-Git-Tag: 0.6.0~27 X-Git-Url: http://git.etc.gen.nz/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5690b0b624e6cc2dccf2ab5c0218b3bff40ee724;p=mythtv-status.git Clean out the bad UTF-8 characters. --- diff --git a/ChangeLog b/ChangeLog index 87f6367..e03f14c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ 2007-11-28 Andrew Ruthven Be more wary about processing what the backend has sent us. + This includes cleaning up some invalid UTF-8 characters. Add support for reading XML from a file. Be a bit more forgiving on the XML we're receiving. Show how much disk space is used - currently only total. diff --git a/bin/mythtv-status b/bin/mythtv-status index 68192ae..aa5665e 100755 --- a/bin/mythtv-status +++ b/bin/mythtv-status @@ -355,6 +355,8 @@ sub load_xml { $parser->recover_silently(1) unless $verbose; + clean_xml(\$status); + my $xml = eval { $parser->parse_string( $status ) }; if ($@) { @@ -381,6 +383,20 @@ sub load_perl_api { return $myth; } +# We are sometimes passed dodgy XML from MythTV, make some attempts to clean +# it. +sub clean_xml { + my ($xml) = shift; + + # Deal to invalid Unicode. + for my $bad ("�", "�") { + if ($$xml =~ s/$bad/?/g) { + warn "Found and replaced: $bad\n" + if $verbose; + } + } +} + sub process_xml { my ($block, $xml) = @_;