From 5acdb022908bd1504f1f9fb4f766f458dc5ceaa0 Mon Sep 17 00:00:00 2001 From: Andrew Ruthven Date: Fri, 30 Nov 2007 06:46:57 +1300 Subject: [PATCH] Clean out the bad UTF-8 characters. --- ChangeLog | 1 + bin/mythtv-status | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index d521ac6..b7713e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ 2007-11-28 Andrew Ruthven Be more wary about processing what the backend has sent us. + This includes cleaning up some invalid UTF-8 characters. Add support for reading XML from a file. Be a bit more forgiving on the XML we're receiving. diff --git a/bin/mythtv-status b/bin/mythtv-status index 6d533b9..13090a1 100755 --- a/bin/mythtv-status +++ b/bin/mythtv-status @@ -305,6 +305,8 @@ sub load_xml { $parser->recover_silently(1) unless $verbose; + clean_xml(\$status); + my $xml = eval { $parser->parse_string( $status ) }; if ($@) { @@ -327,6 +329,20 @@ sub load_perl_api { return $myth; } +# We are sometimes passed dodgy XML from MythTV, make some attempts to clean +# it. +sub clean_xml { + my ($xml) = shift; + + # Deal to invalid Unicode. + for my $bad ("�", "�") { + if ($$xml =~ s/$bad/?/g) { + warn "Found and replaced: $bad\n" + if $verbose; + } + } +} + sub process_xml { my ($block, $xml) = @_; -- 2.30.2