Import version 0.6.0 from http://www.jsw.gen.nz/mythtv/xmltv-proc-nz

author Andrew Ruthven <andrew@etc.gen.nz>

Sat, 11 Jun 2022 05:25:34 +0000 (17:25 +1200)

committer Andrew Ruthven <andrew@etc.gen.nz>

Sat, 11 Jun 2022 05:25:34 +0000 (17:25 +1200)
author Andrew Ruthven <andrew@etc.gen.nz>
Sat, 11 Jun 2022 05:25:34 +0000 (17:25 +1200)
committer Andrew Ruthven <andrew@etc.gen.nz>
Sat, 11 Jun 2022 05:25:34 +0000 (17:25 +1200)
diff --git a/bin/xmltv-proc-nz b/bin/xmltv-proc-nz

new file mode 100644 (file)

index 0000000..206a670
--- /dev/null
+++ b/bin/xmltv-proc-nz
@@ -0,0 +1,922 @@
+#!/usr/bin/python3
+
+"""
+xmltv-proc-nz by Hadley Rich <hads@nice.net.nz>
+
+Licensed under the BSD License.
+
+Processes an XMLTV file in various ways. To use pipe an XML file like so:
+
+cat freeview.xml | xmltv-proc-nz > better-file.xml
+
+or:
+
+xmltv-proc-nz freeview.xml > better-file.xml
+
+
+Changes:
+
+JSW = Stephen Worthington <stephen@jsw.gen.nz>
+
+0.5.9b JSW
+       - Change BASE_URL from nzepg.org to epg.org.nz
+       - Fetch JSON data from mypvr.jsw.gen.nz instead of BASE_URL
+0.5.9c JSW
+       - Comment out TV1 BBCWorld processing as it is now unused and also
+         broken.
+0.5.9d JSW
+       - Add SearchReplaceTitleLocal() to use local web server JSON data.
+0.5.9e Wade MaxField <wade@hotblack.co.nz>
+       - Change EpDesc to work with NZ series/episode data in subtitles and
+         descriptions.
+0.5.9e JSW
+       - Process Sky Movies channels to put the subtitle data into the description, the title into the subtitle and change the title to "Movie".
+         This is a JSW customisation and will not be wanted by everyone, so it is controlled by the JSW flag.
+0.5.9f JSW
+       - Fix the PlusOnes processing for the new Freeview lineup from March 2022.
+0.5.9g JSW
+       - Fix exceptions in Sky Movies processing when there is no subtitle.
+0.6.0  JSW
+       - Convert to Python 3.
+       - Fix post processing.
+       - Delete BBCWorld processing.
+       - Generalise JSON base URL processing to use a JSON base URL list.
+       - Reverse the default for BaseProcessor.valid.  Set valid=True when valid data is obtained from one URL, even if other URLs fail.
+       - Remove JSW flag - now works by whether it finds the matching json data.
+       - Make PlusOnes use json configuration.
+"""
+#TODO: Find repeats
+#TODO: Regex replacements for categories
+
+import csv
+import json
+import logging
+import time
+import re
+import sys
+import urllib.request, urllib.parse, urllib.error
+from xml.etree import cElementTree as ElementTree
+from datetime import datetime, timedelta, tzinfo
+from optparse import OptionParser
+try:
+    import tmdb
+except ImportError:
+    tmdb = False
+try:
+    import tvdb_api
+except ImportError:
+    tvdb = False
+else:
+    tvdb = tvdb_api.Tvdb(language='en')
+
+NAME = 'xmltv-proc-nz'
+URL = 'http://nice.net.nz/xmltv-proc-nz'
+VERSION = '0.6.0 JSW'
+BASE_URL = 'http://epg.org.nz'
+JSON_BASE_URLS = ['http://epg.org.nz', 'http://localhost/json']
+TIME_FORMAT = '%Y%m%d%H%M%S'
+LOG_LEVEL = logging.INFO
+#LOG_LEVEL = logging.WARNING
+#LOG_LEVEL = logging.DEBUG
+
+log = logging.getLogger(NAME)
+logging.basicConfig(level=LOG_LEVEL, format='%(message)s')
+
+class UTC(tzinfo):
+    """
+    Represents the UTC timezone
+    """
+
+    def utcoffset(self, dt):
+        return timedelta(0)
+
+    def tzname(self, dt):
+        return "UTC"
+
+    def dst(self, dt):
+        return timedelta(0)
+
+class LocalTimezone(tzinfo):
+    """
+    Represents the computers local timezone
+    """
+
+    def __init__(self):
+        self.STDOFFSET = timedelta(seconds = -time.timezone)
+        if time.daylight:
+            self.DSTOFFSET = timedelta(seconds = -time.altzone)
+        else:
+            self.DSTOFFSET = self.STDOFFSET
+
+        self.DSTDIFF = self.DSTOFFSET - self.STDOFFSET
+        tzinfo.__init__(self)
+
+    def utcoffset(self, dt):
+        if self._isdst(dt):
+            return self.DSTOFFSET
+        else:
+            return self.STDOFFSET
+
+    def dst(self, dt):
+        if self._isdst(dt):
+            return self.DSTDIFF
+        else:
+            return timedelta(0)
+
+    def tzname(self, dt):
+        return time.tzname[self._isdst(dt)]
+
+    def _isdst(self, dt):
+        tt = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.weekday(), 0, -1)
+        stamp = time.mktime(tt)
+        tt = time.localtime(stamp)
+        return tt.tm_isdst > 0
+
+localtz = LocalTimezone()
+utc = UTC()
+
+def urlopen(url):
+    return urllib.request.urlopen(urllib.request.Request(url, headers={'User-Agent': '%s/%s' % (NAME, VERSION)}))
+
+# Convert a Python 2 sort() cmp= function into a key= function
+def cmp_to_key(mycmp):
+    class K:
+        def __init__(self, obj, *args):
+            self.obj = obj
+        def __lt__(self, other):
+            return mycmp(self.obj, other.obj) < 0
+        def __gt__(self, other):
+            return mycmp(self.obj, other.obj) > 0
+        def __eq__(self, other):
+            return mycmp(self.obj, other.obj) == 0
+        def __le__(self, other):
+            return mycmp(self.obj, other.obj) <= 0
+        def __ge__(self, other):
+            return mycmp(self.obj, other.obj) >= 0
+        def __ne__(self, other):
+            return mycmp(self.obj, other.obj) != 0
+    return K
+
+
+
+class BaseProcessor(object):
+    valid = False
+
+    def __call__(self, programme):
+        raise NotImplementedError
+
+    def post_process(self, programmes):
+        raise NotImplementedError
+
+class Overrides(BaseProcessor):
+    """
+    Use a web service to override shows in specific timeslots.
+    """
+    def __init__(self):
+        if not tvdb:
+            log.warning('Overrides: tvdb_api module not found.')
+        self.overrides = None
+        for json_base_url in JSON_BASE_URLS:
+            try:
+                data = urlopen('%s/overrides/+json' % json_base_url).read()
+            except IOError:
+                log.warning('Overrides: Fetching data from %s failed.' % json_base_url)
+            else:
+                try:
+                    overrides = json.loads(data)
+                    if self.overrides == None:
+                        self.overrides = overrides
+                    else:
+                        self.overrides += overrides
+                except ValueError:
+                    log.warning('Overrides: JSON parse from %s failed.' % json_base_url)
+                else:
+                    for o in self.overrides:
+                        o['start'] = datetime.strptime(o['start'], '%Y-%m-%d %H:%M:%S')
+                        o['start'] = o['start'].replace(tzinfo=utc)
+                        o['start'] = o['start'].astimezone(localtz)
+                        o['start'] = o['start'].replace(tzinfo=None)
+                    self.valid = True
+
+    def __call__(self, programme):
+        if not self.valid:
+            return
+
+        try:
+            start = programme.get('start')
+            stop = programme.get('stop')
+            if ' ' in start:
+                start, offset = start.split(' ')
+            if ' ' in stop:
+                stop = stop.split(' ')[0]
+            start = datetime.strptime(start, TIME_FORMAT)
+            stop = datetime.strptime(stop, TIME_FORMAT)
+            channel = programme.get('channel')
+        except:
+            log.debug('Overrides: Ignoring invalid programme')
+            return
+
+        for o in self.overrides:
+            if start == o['start'] and channel == o['xmltvid']:
+                log.info('Overrides: Found program on %s at %s', channel, start)
+                if programme.find('previously-shown') is not None:
+                    programme.remove(programme.find('previously-shown'))
+                if 'previously_shown' in o and o['previously_shown']:
+                    previously_shown = ElementTree.SubElement(programme, 'previously-shown')
+                if 'season' in o and o['season'] and 'episode' in o and o['episode']:
+                    if programme.find('episode-num') is not None:
+                        programme.remove(programme.find('episode-num'))
+                    episode_num = ElementTree.SubElement(programme, 'episode-num')
+                    episode_num.set('system', 'xmltv_ns')
+                    episode_num.text = '%s.%s.0' % (o['season'] - 1, o['episode'] - 1)
+                    if tvdb and 'tvdb_id' in o and o['tvdb_id']:
+                        show = tvdb[o['tvdb_id']]
+                        try:
+                            episode = show[o['season']][o['episode']]
+                        except:
+                            log.error('Error getting episode %02dx%02d of %s', o['season'], o['episode'], o['tvdb_id'])
+                            continue
+                        log.info(
+                            'Overrides: Using %s - %02dx%02d - %s',
+                            show['seriesname'],
+                            int(episode['seasonnumber']),
+                            int(episode['episodenumber']),
+                            episode['episodename']
+                        )
+                        if 'firstaired' in episode and episode['firstaired']:
+                            if programme.find('date') is not None:
+                                programme.remove(programme.find('date'))
+                            date = ElementTree.SubElement(programme, 'date')
+                            date.text = episode['firstaired'].replace('-', '')
+                        if programme.find('sub-title') is not None:
+                            programme.remove(programme.find('sub-title'))
+                        sub_title = ElementTree.SubElement(programme, 'sub-title')
+                        sub_title.text = episode['episodename']
+                        if programme.find('desc') is not None:
+                            if episode['overview']:
+                                programme.find('desc').text = episode['overview']
+                        else:
+                            desc = ElementTree.SubElement(programme, 'desc')
+                            desc.text = episode['overview']
+                        if 'rating' in episode and episode['rating']:
+                            if programme.find('star-rating') is not None:
+                                programme.remove(programme.find('star-rating'))
+                            rating = ElementTree.SubElement(programme, 'star-rating')
+                            value = ElementTree.SubElement(rating, 'value')
+                            value.text = '%s/10' % episode['rating']
+
+class PlusOnes(BaseProcessor):
+    def __init__(self):
+        self.xmltvids = None
+        for json_base_url in JSON_BASE_URLS:
+            try:
+                log.debug('PlusOnes: urlopen(%s/plus-ones/+json)' % json_base_url)
+                data = urlopen('%s/plus-ones/+json' % json_base_url).read()
+            except IOError:
+                log.warning('PlusOnes: Fetching data from %s failed.' % json_base_url)
+            else:
+                try:
+                    xmltvids = json.loads(data)
+                    if self.xmltvids == None:
+                        self.xmltvids = xmltvids
+                    else:
+                        self.xmltvids += xmltvids
+                    self.valid = True
+                    if log.getEffectiveLevel() >= logging.DEBUG:
+                        log.debug('PlusOnes from %s: ' % json_base_url)
+                        for xmltvid in xmltvids:
+                            log.debug('  ' + xmltvid)
+                except ValueError:
+                    log.warning('PlusOnes: JSON parse from %s failed.' % json_base_url)
+                    raise
+
+    def __call__(self, programme):
+        if not self.valid:
+            return
+        if programme.get('channel') in self.xmltvids:
+            previously_shown = ElementTree.SubElement(programme, 'previously-shown')
+
+class Movies(BaseProcessor):
+    """
+    Augment movies with data from themoviedb.com
+    """
+
+    def __init__(self):
+        self.cache = {}
+        if not tmdb:
+            log.warning('Movies: TMDB module not found.')
+        self.excludes = []
+        for json_base_url in JSON_BASE_URLS:
+            try:
+                data = urlopen('%s/movie-channels/+json' % json_base_url).read()
+            except IOError:
+                log.warning('Movies: Fetching channel data from %s failed.' % json_base_url)
+            else:
+                try:
+                    self.channels = json.loads(data)
+                except ValueError:
+                    log.warning('Movies: Parsing channel data failed.')
+            try:
+                data = urlopen('%s/movie-excludes/+json' % json_base_url).read()
+            except IOError:
+                log.warning('Movies: Fetching exclude data from %s failed.' % json_base_url)
+            else:
+                try:
+                    exclude_strings = json.loads(data)
+                    for e in exclude_strings:
+                        self.excludes.append(re.compile(e))
+                    self.valid = True
+                except ValueError:
+                    log.warning('Movies: Parsing exclude data from %s failed.' % json_base_url)
+
+    def __call__(self, programme):
+        if not self.valid:
+            return
+
+        try:
+            start = programme.get('start')
+            stop = programme.get('stop')
+            title = programme.find('title').text
+            channel = programme.get('channel')
+        except:
+            log.debug('Movies: Ignoring invalid programme')
+            return
+        if stop is None:
+            return
+        # Unfortunately strptime can't handle numeric timezones so we strip it.
+        # It's only for getting possible movies so won't matter too much.
+        if ' ' in start:
+            start = start.split(' ')[0]
+        if ' ' in stop:
+            stop = stop.split(' ')[0]
+        start_time = time.mktime(time.strptime(start, TIME_FORMAT))
+        stop_time = time.mktime(time.strptime(stop, TIME_FORMAT))
+        duration = stop_time - start_time
+        if duration <= 5400 or duration > 14400: # Between 90 mins and 4 hours
+            return
+        if channel not in self.channels:
+            return
+        for regex in self.excludes:
+            if regex.match(title):
+                return
+        log.debug('Movies: Possible movie "%s" (duration %dm)', title, duration/60)
+        movie = None
+        if title in self.cache:
+            if self.cache[title] is None:
+                log.debug('Movies: Cached ignore for "%s"', title)
+                return
+            else:
+                movie = self.cache[title]
+                log.debug('Movies: Cache hit for "%s"', title)
+        else:
+            try:
+                results = tmdb.search(title.replace('?', ''))
+            except:
+                log.exception('Movies: TMDB problem searching')
+                return
+            matches = []
+            for result in results:
+                if normalise_movie_title(title) == normalise_movie_title(result['name']) and result['language'] == 'en':
+                    matches.append(result)
+            log.debug('Movies: Exact title matches: %d', len(matches))
+            for movie in matches:
+                log.debug('Movies: Found match "%s" (%s)', movie['name'], movie['released'])
+            if len(matches) == 1:
+                try:
+                    log.debug('Movies: Cache miss for "%s"', title)
+                    movie = tmdb.getMovieInfo(matches[0]['id'])
+                except:
+                    log.exception('Movies: TMDB problem fetching info')
+                    return
+                self.cache[title] = movie
+            else:
+                self.cache[title] = None
+                return
+
+        log.info('Movies: Adding info from TMDB for %s', title)
+        show_type = ElementTree.SubElement(programme, 'category')
+        show_type.text = 'movie'
+        if 'categories' in movie and 'genre' in movie['categories']:
+            for c in movie['categories']['genre']:
+                exists = False
+                for old_cat in programme.findall('category'):
+                    if old_cat.text == c:
+                        exists = True
+                if not exists:
+                    category = ElementTree.SubElement(programme, 'category')
+                    category.text = c
+        if 'overview' in movie and movie['overview']:
+            if programme.find('desc') is not None:
+                programme.find('desc').text = movie['overview']
+            else:
+                desc = ElementTree.SubElement(programme, 'desc')
+                desc.text = movie['overview']
+        if 'url' in movie and movie['url']:
+            if programme.find('url') is not None:
+                programme.find('url').text = movie['url']
+            else:
+                url = ElementTree.SubElement(programme, 'url')
+                url.text = movie['url']
+        if 'runtime' in movie and movie['runtime']:
+            if programme.find('length') is not None:
+                programme.remove(programme.find('length'))
+            length = ElementTree.SubElement(programme, 'length')
+            length.set('units', 'minutes')
+            length.text = movie['runtime']
+        if 'released' in movie and movie['released']:
+            if programme.find('date') is not None:
+                programme.find('date').text = movie['released'].replace('-', '')
+            else:
+                date = ElementTree.SubElement(programme, 'date')
+                date.text = movie['released'].replace('-', '')
+        if 'rating' in movie and movie['rating']:
+            if programme.find('star-rating') is not None:
+                programme.remove(programme.find('star-rating'))
+            rating = ElementTree.SubElement(programme, 'star-rating')
+            value = ElementTree.SubElement(rating, 'value')
+            value.text = '%s/10' % movie['rating']
+        if 'cast' in movie:
+            if programme.find('credits') is not None:
+                programme.remove(programme.find('credits'))
+            credits = ElementTree.SubElement(programme, 'credits')
+            directors = []
+            actors = []
+            if 'director' in movie['cast']:
+                for d in movie['cast']['director']:
+                    director = ElementTree.SubElement(credits, 'director')
+                    director.text = d['name']
+            if 'actor' in movie['cast']:
+                for a in movie['cast']['actor']:
+                    actor = ElementTree.SubElement(credits, 'actor')
+                    actor.text = a['name']
+                    actor.set('role', a['character'])
+
+class HD(BaseProcessor):
+    """
+    Look for a HD note in a description.
+    """
+    regexes = (
+        re.compile(r'HD\.?$'),
+        re.compile(r'\(HD\)$'),
+    )
+
+    def __call__(self, programme):
+        desc = programme.find('desc')
+        if desc is not None and desc.text:
+            for regex in self.regexes:
+                matched = regex.search(desc.text)
+                if matched:
+                    log.debug('HD: Found "%s"', programme.find('title').text)
+                    if programme.find('video') is not None:
+                        if programme.find('quality') is None:
+                            quality = ElementTree.SubElement(programme.find('video'), 'quality')
+                            quality.text = 'HDTV'
+                        elif programme.find('quality').text != 'HDTV':
+                            programme.find('quality').text = 'HDTV'
+                    else:
+                        video = ElementTree.SubElement(programme, 'video')
+                        present = ElementTree.SubElement(video, 'present')
+                        present.text = 'yes'
+                        aspect = ElementTree.SubElement(video, 'aspect')
+                        aspect.text = '16:9'
+                        quality = ElementTree.SubElement(video, 'quality')
+                        quality.text = 'HDTV'
+                    desc.text = regex.sub('', desc.text)
+
+class Subtitle(BaseProcessor):
+    """
+    Look for a subtitle in a description.
+    """
+    regexes = (
+        re.compile(r"(Today|Tonight)?:? ?'(?P<subtitle>.*?)'\.\s?"),
+        re.compile(r"'(?P<subtitle>.{2,60}?)\.'\s"),
+        re.compile(r"(?P<subtitle>.{2,60}?):\s"),
+    )
+
+    def __call__(self, programme):
+        desc = programme.find('desc')
+        if desc is not None and desc.text:
+            for regex in self.regexes:
+                matched = regex.match(desc.text)
+                if matched and 'subtitle' not in programme:
+                    subtitle = ElementTree.SubElement(programme, 'sub-title')
+                    subtitle.text = matched.group('subtitle')
+                    log.debug('Subtitle: "%s" for "%s"', subtitle.text, programme.find('title').text)
+                    desc.text = regex.sub('', desc.text)
+
+class SeasonEpisodeFromDesc(BaseProcessor):
+    """
+    Look for a Season/Episode info in a description.
+    """
+    regexes = (
+        re.compile(r'(?i)\s?S\s?(\d+),?\s?Ep?\s?(\d+)'),
+        re.compile(r'(?i)\s?S\s?(\d+),?\s?Episode\s?(\d+)'),
+    )
+
+    def __call__(self, programme):
+        desc = programme.find('desc')
+        if desc is not None and desc.text:
+            for regex in self.regexes:
+                matched = regex.search(desc.text)
+                if matched:
+                    season, episode = [int(x) for x in matched.groups()]
+                    log.info('SeasonEpisodeDesc: Found season %s episode %s for "%s"', season, episode, programme.find('title').text)
+                    episode_num = ElementTree.SubElement(programme, 'episode-num')
+                    episode_num.set('system', 'xmltv_ns')
+                    episode_num.text = '%s.%s.0' % (season - 1, episode - 1)
+
+class SeasonEpisodeFromSubtitle(BaseProcessor):
+    """
+    Look for a Season/Episode info in a subtitle.
+    """
+    regexes = (
+        re.compile(r'(?i)\s?S\s?(\d+),?\s?Ep?\s?(\d+)'),
+        re.compile(r'(?i)\s?S\s?(\d+),?\s?Episode\s?(\d+)'),
+    )
+
+    def __call__(self, programme):
+        subtitle = programme.find('sub-title')
+        if subtitle is not None and subtitle.text:
+            for regex in self.regexes:
+                matched = regex.search(subtitle.text,)
+                if matched:
+                    season, episode = [int(x) for x in matched.groups()]
+                    log.info('SeasonEpisodeSubtitle: Found season %s episode %s for "%s"', season, episode, programme.find('title').text)
+                    episode_num = ElementTree.SubElement(programme, 'episode-num')
+                    episode_num.set('system', 'xmltv_ns')
+                    episode_num.text = '%s.%s.0' % (season - 1, episode - 1)
+
+class EpisodeFromDesc(BaseProcessor):
+    """
+    Look for a Episode info in a description.
+    """
+    regexes = (
+        re.compile(r'(?i)\s?Ep\.?\s?(\d+)'),
+        re.compile(r'(?i)\s?Episode\.?\s?(\d+)'),
+    )
+
+    def __call__(self, programme):
+        desc = programme.find('desc')
+        episode_num = programme.find('episode-num')
+        if episode_num is None:
+            if desc is not None and desc.text:
+                for regex in self.regexes:
+                    matched = regex.search(desc.text)
+                    if matched:
+                        episode = int(matched.group(1))
+                        log.info('EpisodeDesc: Found episode %s for "%s"', episode, programme.find('title').text)
+                        episode_num = ElementTree.SubElement(programme, 'episode-num')
+                        episode_num.set('system', 'xmltv_ns')
+                        episode_num.text = '.%s.0' % (episode - 1)
+
+class EpisodeFromSubtitle(BaseProcessor):
+    """
+    Look for a Episode info in a subtitle.
+    """
+    regexes = (
+        re.compile(r'(?i)\s?Ep\.?\s?(\d+)'),
+        re.compile(r'(?i)\s?Episode\.?\s?(\d+)'),
+    )
+
+    def __call__(self, programme):
+        subtitle = programme.find('sub-title')
+        episode_num = programme.find('episode-num')
+        if episode_num is None:
+            if subtitle is not None and subtitle.text:
+                for regex in self.regexes:
+                    matched = regex.search(subtitle.text)
+                    if matched:
+                        episode = int(matched.group(1))
+                        log.info('EpisodeSubtitle: Found episode %s for "%s"', episode, programme.find('title').text)
+                        episode_num = ElementTree.SubElement(programme, 'episode-num')
+                        episode_num.set('system', 'xmltv_ns')
+                        episode_num.text = '.%s.0' % (episode - 1)
+
+class SeasonFromDesc(BaseProcessor):
+    """
+    Look for a Season info in a description.
+    """
+    regexes = (
+        re.compile(r'(?i)^S\s?(\d+)'),
+        re.compile(r'(?i)\sS\s?(\d+)'),
+        re.compile(r'(?i)\s?Season\s?(\d+)'),
+    )
+
+    def __call__(self, programme):
+        desc = programme.find('desc')
+        episode_num = programme.find('episode-num')
+        if episode_num is None:
+            if desc is not None and desc.text:
+                for regex in self.regexes:
+                    matched = regex.search(desc.text)
+                    if matched:
+                        season = int(matched.group(1))
+                        log.info('SeasonDesc: Found season %s for "%s"', season, programme.find('title').text)
+                        episode_num = ElementTree.SubElement(programme, 'episode-num')
+                        episode_num.set('system', 'xmltv_ns')
+                        episode_num.text = '%s..0' % (season - 1)
+
+class SeasonFromSubtitle(BaseProcessor):
+    """
+    Look for a Season info in a subtitle.
+    """
+    regexes = (
+        re.compile(r'(?i)^S\s?(\d+)'),
+        re.compile(r'(?i)\sS\s?(\d+)'),
+        re.compile(r'(?i)\s?Season\s?(\d+)'),
+    )
+
+    def __call__(self, programme):
+        subtitle = programme.find('sub-title')
+        episode_num = programme.find('episode-num')
+        if episode_num is None:
+            if subtitle is not None and subtitle.text:
+                for regex in self.regexes:
+                    matched = regex.search(subtitle.text)
+                    if matched:
+                        season = int(matched.group(1))
+                        log.info('SeasonSubtitle: Found season %s for "%s"', season, programme.find('title').text)
+                        episode_num = ElementTree.SubElement(programme, 'episode-num')
+                        episode_num.set('system', 'xmltv_ns')
+                        episode_num.text = '%s..0' % (season - 1)
+
+class SearchReplaceTitle(BaseProcessor):
+    """
+    Use a web service to normalise titles.
+    """
+    def __init__(self):
+        self.replacements = None
+        for json_base_url in JSON_BASE_URLS:
+            try:
+                data = urlopen('%s/title-replacements/+json' % json_base_url).read()
+            except IOError:
+                log.warning('SearchReplaceTitle: Fetching replacements from %s failed.' % json_base_url)
+            else:
+                try:
+                    replacements = json.loads(data)
+                    if self.replacements == None:
+                        self.replacements = replacements
+                    else:
+                        self.replacements += replacements
+                    self.valid = True
+                    if log.getEffectiveLevel() >= logging.DEBUG:
+                        log.debug('SearchReplaceTitle from %s: ' % json_base_url)
+                        for replacement in replacements:
+                            log.debug('  ' + str(replacement))
+                except ValueError:
+                    log.warning('SearchReplaceTitle: JSON parse from %s failed.' % json_base_url)
+
+    def __call__(self, programme):
+        if not self.valid:
+            return
+
+        for r in self.replacements:
+            old_title = programme.find('title').text
+            if re.match(r['search'], old_title):
+                if r['description_match']:
+                    # If there's a description_match then make sure the programme
+                    # has a desc and it matches
+                    desc = programme.find('desc')
+                    if desc is None:
+                        continue
+                    if not re.match(r['description_match'], desc.text):
+                        continue
+                    desc.text = re.sub(r['description_match'], '', desc.text)
+                programme.find('title').text = re.sub(r['search'], r['replace'], programme.find('title').text)
+                if old_title != programme.find('title').text:
+                    log.info(
+                        'SearchReplaceTitle: Changed from "%s" to "%s"',
+                        old_title,
+                        programme.find('title').text
+                    )
+
+
+class Categories(BaseProcessor):
+    """
+    Use a web service to add categories by title.
+    """
+    def __init__(self):
+        self.categories = None
+        for json_base_url in JSON_BASE_URLS:
+            try:
+                data = urlopen('%s/categories/+json' % json_base_url).read()
+            except IOError:
+                log.warning('Categories: Fetching data from %s failed.' % json_base_url)
+            else:
+                try:
+                    categories = json.loads(data)
+                    if self.categories == None:
+                        self.categories = categories
+                    else:
+                        self.categories += categories
+                    self.valid = True
+                except ValueError:
+                    log.warning('Categories: JSON parse from %s failed.' % json_base_url)
+
+    def __call__(self, programme):
+        if self.valid:
+            for c in self.categories:
+                if 'category' not in c:
+                    continue
+                if programme.find('title').text == c['title']:
+                    # Remove existing categories
+                    for category in programme.findall('category'):
+                        programme.remove(category)
+                    show_type = ElementTree.SubElement(programme, 'category')
+                    show_type.text = c['show_type']
+                    if 'categories' in c:
+                        for newcat in c['categories']:
+                            category = ElementTree.SubElement(programme, 'category')
+                            category.text = newcat
+                    log.info(
+                        'Categories: Added categories for "%s"',
+                        programme.find('title').text
+                    )
+
+class SkyMoviesChannels(BaseProcessor):
+    """
+    Process Sky Movies channels to put the subtitle data into the description."
+    """
+
+    def __init__(self):
+        self.sky_movies_xmltvid_list = None
+        for json_base_url in JSON_BASE_URLS:
+            try:
+                data = urlopen('%s/sky_movies_xmltvids/+json' % json_base_url).read()
+            except IOError:
+                log.warning('SkyMoviesChannels: Fetching data from %s failed.' % json_base_url)
+            else:
+                try:
+                    sky_movies_xmltvid_list = json.loads(data)
+                    if self.categories == None:
+                        self.sky_movies_xmltvid_list = sky_movies_xmltvid_list
+                    else:
+                        self.sky_movies_xmltvid_list += sky_movies_xmltvid_list
+                    self.valid = True
+                    if log.getEffectiveLevel() >= logging.DEBUG:
+                        log.debug('SkyMoviesChannels from %s: ' % json_base_url)
+                        for sky_movies_xmltvid in sky_movies_xmltvid_list:
+                            log.debug('  ' + sky_movies_xmltvid)
+                except ValueError:
+                    log.warning('Categories: JSON parse from %s failed.' % json_base_url)
+
+    def __call__(self, programme):
+        if not self.valid:
+            return
+        if programme.get('channel') in self.sky_movies_xmltvid_list:
+            subtitle = programme.find('sub-title')
+            if subtitle == None:
+                log.info(
+                    'SkyMoviesChannels: channel=%s title=%s no subtitle',
+                    programme.get('channel'),
+                    programme.find('title').text
+                )
+            else:
+                #sys.stderr.write('programme=' + ElementTree.tostring(programme, encoding='utf-8') + '\n')
+                programme.find('desc').text = programme.find('sub-title').text + ' ' + programme.find('desc').text
+                programme.find('sub-title').text = ''
+                log.info(
+                    'SkyMoviesChannels: channel=%s title=%s fixed',
+                    programme.get('channel'),
+                    programme.find('title').text
+                )
+
+def compare_programme(x, y):
+    """
+       Comparison helper to sort the children elements of an
+       XMLTV programme tag.
+    """
+    programme_order = (
+        'title', 'sub-title', 'desc', 'credits', 'date',
+        'category', 'language', 'orig-language', 'length',
+        'icon', 'url', 'country', 'episode-num', 'video', 'audio',
+        'previously-shown', 'premiere', 'last-chance', 'new',
+        'subtitles', 'rating', 'star-rating',
+    )
+    if programme_order.index(x.tag) < programme_order.index(y.tag):
+        return -1
+    elif programme_order.index(x.tag) > programme_order.index(y.tag):
+        return 1
+    else:
+        return 0
+
+def normalise_movie_title(title):
+    """
+    Normalise titles to help comparisons.
+    """
+    normalised = title.lower()
+    if normalised.startswith('the '):
+        normalised = normalised[4:]
+    normalised = re.sub('[^a-z ]', '', normalised)
+    normalised = re.sub(' +', ' ', normalised)
+    normalised = normalised.replace(' the ', ' ')
+    return normalised
+
+def indent(elem, level=0):
+    """
+    Make ElementTree output pretty.
+    """
+    i = "\n" + level * "\t"
+    if len(elem):
+        if not elem.text or not elem.text.strip():
+            elem.text = i + "\t"
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+        for elem in elem:
+            indent(elem, level+1)
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+    else:
+        if level and (not elem.tail or not elem.tail.strip()):
+            elem.tail = i
+
+def check_for_updates():
+    """
+    Check for script updates.
+    """
+    try:
+        data = urlopen('%s/xmltv-proc-nz/+json' % BASE_URL).read()
+    except IOError:
+        log.critical('Cannot access Internet')
+        sys.exit(3)
+    else:
+        try:
+            stats = json.loads(data)
+        except ValueError as e:
+            print(e)
+            log.critical('Version check failed')
+            sys.exit(4)
+        if stats['version'] > VERSION:
+            log.warning(
+                'A new version (%s) is available at %s (current version %s)',
+                stats['version'],
+                URL,
+                VERSION
+            )
+            if stats['critical']:
+                log.critical('Version update is critical, exiting')
+                sys.exit(5)
+
+if __name__ == '__main__':
+    parser = OptionParser(version='%prog ' + str(VERSION))
+    parser.set_defaults(debug=False)
+    parser.add_option('--debug', action='store_true',
+        help='output debugging information.')
+    parser.add_option('--verbose', action='store_true',
+        help='output verbose information.')
+    (options, args) = parser.parse_args()
+
+    if options.verbose:
+        log.setLevel(logging.INFO)
+
+    if options.debug:
+        log.setLevel(logging.DEBUG)
+
+    check_for_updates()
+
+    if sys.stdin.isatty():
+        if len(args) == 0:
+            log.critical('No input file')
+            sys.exit(2)
+        data = open(args[0], 'rb').read()
+    else:
+        data = sys.stdin.buffer.read()
+
+    processors = [
+        PlusOnes(),
+        SearchReplaceTitle(),
+        Subtitle(),
+        Categories(),
+        Movies(),
+        HD(),
+        SeasonEpisodeFromDesc(),
+        SeasonEpisodeFromSubtitle(),
+        EpisodeFromDesc(),
+        EpisodeFromSubtitle(),
+        SeasonFromDesc(),
+        SeasonFromSubtitle(),
+        Overrides(),
+        SkyMoviesChannels()
+    ]
+
+    tree = ElementTree.XML(data)
+    for processor in processors:
+        for programme in tree.findall('.//programme'):
+            try:
+                processor(programme)
+            except:
+                log.exception("Failed processing with processor: %s", processor)
+        try:
+            processor.post_process(tree)
+        except NotImplementedError:
+            pass
+        except:
+            log.exception("Failed post processing with processor: %s", processor)
+
+    for programme in tree.findall('.//programme'):
+        programme[:] = sorted(programme, key=cmp_to_key(compare_programme))
+
+    indent(tree)
+    print('<?xml version="1.0" encoding="utf-8"?>')
+    print('<!DOCTYPE tv SYSTEM "xmltv.dtd">')
+    print(ElementTree.tostring(tree, encoding='unicode'))
author	Andrew Ruthven <andrew@etc.gen.nz>
	Sat, 11 Jun 2022 05:25:34 +0000 (17:25 +1200)
committer	Andrew Ruthven <andrew@etc.gen.nz>
	Sat, 11 Jun 2022 05:25:34 +0000 (17:25 +1200)