#!/usr/bin/perl -w eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}' if 0; # not running under some shell =pod =encoding utf8 =head1 NAME tv_grab_is - Grab TV listings for Iceland. =head1 SYNOPSIS tv_grab_is --help tv_grab_is [--config-file FILE] --configure [--gui OPTION] tv_grab_is [--config-file FILE] [--output FILE] [--days N] [--offset N] [--quiet] tv_grab_is --capabilities tv_grab_is --version =head1 DESCRIPTION Output TV listings for several channels available in Iceland. First run B to choose, which channels you want to download. Then running B with no arguments will output listings in XML format to standard output. B<--configure> Prompt for which channels, and write the configuration file. B<--config-file FILE> Set the name of the configuration file, the default is B<~/.xmltv/tv_grab_is.conf>. This is the file written by B<--configure> and read when grabbing. B<--gui OPTION> Use this option to enable a graphical interface to be used. OPTION may be 'Tk', or left blank for the best available choice. Additional allowed values of OPTION are 'Term' for normal terminal output (default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar. B<--output FILE> Write to FILE rather than standard output. B<--days N> Grab N days. The default is as many as the source carries. B<--offset N> Start N days in the future. The default is to start from today. B<--quiet> Suppress the progress messages normally written to standard error. B<--capabilities> Show which capabilities the grabber supports. For more information, see L B<--version> Show the version of the grabber. B<--help> Print a help message and exit. =head1 SEE ALSO L. =head1 AUTHOR Yngvi Þór Sigurjónsson (yngvi@teymi.is). Heavily based on tv_grab_dk by Jesper Skov (jskov@zoftcorp.dk). tv_grab_dk originally based on tv_grab_nl by Guido Diepen and Ed Avis (ed@membled.com) and tv_grab_fi by Matti Airas. Version 1.1, Eggert Thorlacius (eggert@thorlacius.com). Started out by replacing a couple of channels with XML feeds, but ended up by removing the sjonvarp.is code completely. Left in the "xxx.sjonvarp.is" XMLTV IDs for backwards compatibility. =head1 BUGS =cut use strict; use XMLTV; use XMLTV::Version "$XMLTV::VERSION"; use XMLTV::Capabilities qw/baseline manualconfig cache/; use XMLTV::Description 'Iceland'; use Getopt::Long; use HTML::TreeBuilder; use HTML::Entities; # parse entities use IO::File; use URI; use utf8; # source code is encoded in utf8 use Date::Manip; use XML::LibXSLT; use XML::DOM; use XMLTV::Memoize; use XMLTV::ProgressBar; use XMLTV::Ask; use XMLTV::Mode; use XMLTV::Config_file; use XMLTV::DST; use XMLTV::Get_nice; use XMLTV::Supplement qw/GetSupplement/; use XMLTV::Date; use XMLTV::Usage <= 6) { Date::Manip::Date_Init("SetDate=now,UTC"); } else { Date::Manip::Date_Init("TZ=UTC"); } } # default language my $LANG = 'is'; sub basechid($); sub ispluschannel($); sub process_xml_channel( $$$$$$ ); sub process_ruv_is( $$$$$$ ); sub process_skjarinn_is( $$$$$$ ); sub process_stod2_and_friends( $$$$$$ ); sub get_categories_map(); # Get options XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux'); my ($opt_days, $opt_offset, $opt_help, $opt_output, $opt_configure, $opt_config_file, $opt_gui, $opt_quiet, $opt_list_channels); $opt_days = 7; # default $opt_offset = 0; # default GetOptions('days=i' => \$opt_days, 'offset=i' => \$opt_offset, 'help' => \$opt_help, 'configure' => \$opt_configure, 'config-file=s' => \$opt_config_file, 'gui:s' => \$opt_gui, 'output=s' => \$opt_output, 'quiet' => \$opt_quiet, 'list-channels' => \$opt_list_channels, ) or usage(0); die 'number of days must not be negative' if (defined $opt_days && $opt_days < 0); usage(1) if $opt_help; XMLTV::Ask::init($opt_gui); my $mode = XMLTV::Mode::mode('grab', # default $opt_configure => 'configure', $opt_list_channels => 'list-channels', ); # File that stores which channels to download. my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_is', $opt_quiet); if ($mode eq 'configure') { XMLTV::Config_file::check_no_overwrite($config_file); open(CONF, ">$config_file") or die "cannot write to $config_file: $!"; # find list of available channels my $bar = new XMLTV::ProgressBar('getting list of channels', 1) if not $opt_quiet; my %channels = get_channels(); die 'no channels could be found' if (scalar(keys(%channels)) == 0); update $bar if not $opt_quiet; $bar->finish() if not $opt_quiet; my @chs = sort keys %channels; my @names = map { $channels{$_} } @chs; my @qs = map { "add channel $_?" } @names; my @want = ask_many_boolean(1, @qs); foreach (@chs) { my $w = shift @want; warn("cannot read input, stopping channel questions"), last if not defined $w; # No need to print to user - XMLTV::Ask is verbose enough. # Print a config line, but comment it out if channel not wanted. print CONF '#' if not $w; my $name = shift @names; print CONF "channel $_ $name\n"; # TODO don't store display-name in config file. } close CONF or warn "cannot close $config_file: $!"; say("Finished configuration."); exit(); } # Not configuring, we will need to write some output. die if $mode ne 'grab' and $mode ne 'list-channels'; # If we are grabbing, check we can read the config file before doing # anything else. # my @config_lines; if ($mode eq 'grab') { @config_lines = XMLTV::Config_file::read_lines($config_file); } my %w_args; if (defined $opt_output) { my $fh = new IO::File(">$opt_output"); die "cannot write to $opt_output: $!" if not defined $fh; $w_args{OUTPUT} = $fh; } $w_args{encoding} = 'ISO-8859-1'; $w_args{UNSAFE} = 1; # Needed by process_xml_channel my $writer = new XMLTV::Writer(%w_args); # TODO: standardize these things between grabbers. $writer->start ({ 'generator-info-name' => 'XMLTV', 'generator-info-url' => 'http://xmltv.org/', }); if ($mode eq 'list-channels') { my $bar = new XMLTV::ProgressBar('getting list of channels', 1) if not $opt_quiet; my %channels = get_channels(); die 'no channels could be found' if (scalar(keys(%channels)) == 0); update $bar if not $opt_quiet; foreach my $ch_did (sort(keys %channels)) { my $ch_name = $channels{$ch_did}; my $ch_xid = "$ch_did.sjonvarp.is"; $writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $ch_name ] ], 'icon' => [{'src' => get_icon($ch_did)}] }); } $bar->finish() if not $opt_quiet; $writer->end(); exit(); } # Not configuring or writing channels, must be grabbing listings. die if $mode ne 'grab'; my (%channels, @channels, $ch_did, $ch_name); my $line_num = 1; foreach (@config_lines) { ++ $line_num; next if not defined; # FIXME channel data should be read from the site, and then the # config file only gives the XMLTV ids that are interesting. # # Using internal list of channels now to get correctly encoded # channel names. Config file format remains unchanged for compatibility # if (/^channel:?\s+(\S+)\s+([^\#]+)/) { $ch_did = $1; push @channels, $ch_did; } else { warn "$config_file:$.: bad line\n"; } } %channels = get_channels(); ###################################################################### # begin main program # Fetch map of category number to names my %categories; get_categories_map(); my $now = parse_date('now'); die if not defined $now; # Mapping from cannel ID to URL my %stod2AndFriends = ( "ST2" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-dagskra-vikunnar", "ST2SPORT" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Sport-dagskra-vikunnar", "ST2SPORT2" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Sport-2-dagskra-vikunnar", "ST2SPORT3" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Sport-3-dagskra-vikunnar", "ST2SPORT4" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Sport-4-dagskra-vikunnar", "ST2SPORT5" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Sport-5-dagskra-vikunnar", "ST2SPORT6" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Sport-6-dagskra-vikunnar", "ST2EXTRA" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Extra-dagskra-vikunnar", "ST2BIO" => "http://www.stod2.is/XML--dagskrar-feed/XML-Stod-2-Bio-dagskra-vikunnar", ); # Declare array of { date, num_days, channel_id, channel_xid, function } # and populate it with all pages we need to get. For sites that only # support getting a single day at a time, we create $opt_days entries # per channel, but for the others, we create a single entry my @to_get; my $startday = UnixDate(DateCalc(UnixDate($now, '%Y-%m-%d'), "+ $opt_offset days"), '%Y-%m-%d'); die if not defined $startday; foreach $ch_did (@channels) { $ch_name = $channels{$ch_did}; my $ch_xid = "$ch_did.sjonvarp.is"; $writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $ch_name ] ], 'icon' => [{'src' => get_icon($ch_did)}] }); my $timeoffset = ispluschannel($ch_did) ? 1 : 0; if (basechid ($ch_did) eq 'RUV') { push @to_get, [ $startday, $opt_days, $ch_did, $ch_xid, $timeoffset, \&process_ruv_is ]; } elsif (basechid ($ch_did) eq 'S1') { push @to_get, [ $startday, $opt_days, $ch_did, $ch_xid, $timeoffset, \&process_skjarinn_is ]; } elsif (defined ($stod2AndFriends{basechid ($ch_did)} ) ) { push @to_get, [ $startday, $opt_days, $ch_did, $ch_xid, $timeoffset, \&process_stod2_and_friends ]; } } my %warned_ch_name; # suppress duplicate warnings my $bar = new XMLTV::ProgressBar('fetching data', scalar @to_get) if not $opt_quiet; my @to_get_detailed; my $num_detailed = 0; foreach (@to_get) { my ($date, $num_days, $ch_tvgids_id, $ch_xmltv_id, $timeoffset, $func) = @$_; &{$func}($writer, $date, $num_days, $ch_tvgids_id, $timeoffset, $ch_xmltv_id); update $bar if not $opt_quiet; } $bar->finish() if not $opt_quiet; $writer->end(); ###################################################################### # subroutine definitions # Attepmts to parse director from a string. If successful, director # is removed from original string and returned sub get_director($) { my $director =""; if($_[0] =~ s/Leikstjóri:\s*([^.]*)\.// ) { $director = $2; } return $director; } # Attepmts to parse actors from a string. If successful, actor list # is removed from original string and returned as an array sub get_actors($) { my $actors; if($_[0] =~ s/\s*(Aðalhlutverk:|[mM]eðal leikenda eru|Aðalhlutverk leika|[Íí] aðalhutverkum eru|[Ll]eikendur eru)\s*([^.]*)\.// ) { my @a = split(/, | og /, $2); s/[.]$// foreach @a; push @$actors, @a; } return $actors; } # Cuts "plus." of xmlhannelID if appropriate sub basechid($) { my ( $id ) = @_; if ($id =~ /^plus./) { return substr($id, 5); } return $id; } sub ispluschannel($) { return substr($_[0], 0, 5) eq "plus." } # Takes XML and XSL transform that converts it to a list of # elements, and writes the elements to the XMLTV writer (after massaging them # a bit) sub process_xml_channel( $$$$$$ ) { my ($writer, $infromdate, $num_days, $timeoffset, $xsl_transform, $xml) = @_; my $fromdate = ParseDate($infromdate); my $enddate = ParseDate(UnixDate(DateCalc($fromdate, "+ " . ($num_days) . " days"), '%Y-%m-%d')); # Step one: Use XSLT to munge XML to XMLTV format... my $parser = XML::LibXML->new(); die unless defined $parser; my $xslt = XML::LibXSLT->new(); die unless defined $xslt; my $stylesheet = $xslt->parse_stylesheet($parser->parse_string($xsl_transform)); # suppress warning message caused by using Perl var rather than a physical document() my $results; { local $SIG{__WARN__} = sub { warn @_ unless (defined $_[0] && $_[0] =~ /^I\/O warning : failed to load external entity "unknown-/); }; $results = $stylesheet->transform($parser->parse_string($xml)); } # Step 2: Loop through all programmes and all their children. # Do some processing on the children and then dump the programme # into the writer. my @programmeElem = $results->getDocumentElement->getElementsByTagName('programme'); foreach my $programme (@programmeElem) { my $myDate = ParseDate($programme->getAttribute('start')); if ((Date_Cmp($myDate, $fromdate) >= 0) && (Date_Cmp($myDate, $enddate) < 0)) { $myDate = DateCalc($myDate, "+ " . $timeoffset . " hours"); # Add one hour if this is a plus channel $myDate = UnixDate($myDate,'%q') . " +0000"; # Convert date to "20081231235900 +0000" $programme->setAttribute('start', $myDate); foreach my $node ($programme->childNodes) { if ($node->nodeName eq 'title' && $node->hasChildNodes()) { my $titleStr = $node->firstChild->data; $titleStr =~ s/ - N.TT$//; # Strip garbage that no one cares about $titleStr =~ s/ - Loka..ttur$//; my $episode_num="$1:$2" if ($titleStr =~ s/\s*\((\d+):(\d+)\)//); if ($episode_num) { my $episodeNode = XML::LibXML::Element->new('episode-num'); $episodeNode->appendTextNode($episode_num); $programme->appendChild($episodeNode); } $node->firstChild->setData($titleStr); } elsif ($node->nodeName eq 'sub-title' && ( !$node->hasChildNodes() || ($node->firstChild->data =~ /^[\n ]*$/))) { $programme->removeChild($node); } elsif ($node->nodeName eq 'desc') { if ($node->hasChildNodes() && ($node->firstChild->data !~ /^[\n ]*$/)) { my $credits = $programme->find("credits")->get_node(0); if( !defined( $credits ) ) { $credits = XML::LibXML::Element->new( 'credits' ); $programme->insertAfter( $credits, $node ); } # If this is the description, extract director and actors my $director = get_director($node->firstChild->data); if (defined $director && $director ne "") { my $dirNode = XML::LibXML::Element->new('director'); $dirNode->appendTextNode($director); $credits->appendChild($dirNode); } my $actors = get_actors($node->firstChild->data); foreach my $actor (@$actors) { my $actNode = XML::LibXML::Element->new('actor'); $actNode->appendTextNode($actor); $credits->appendChild($actNode); } } else { $programme->removeChild($node); } } elsif ($node->nodeName eq 'category') { # FIXME where does the empty category element come from? # [bilbo] sometimes (for no determinable reason) the document lookup is failing - maybe because it's # a Perl var and not a real doc, for some reason...? It fails to load the document() (clue = no "I/O warning" in this case) # Run it again and it's fine. Or not! #if (!$node->hasChildNodes()) { # $programme->removeChild($node); #} # That's just too flaky so let's go "old school" for now (otherwise nightly validator fails) # print STDERR "was: ".$node."\n"; ## Now done as a Supplement as of v1.30 ## my %categories = ( '1'=>'children', '2'=>'series', '3'=>'news', '4'=>'educational', '5'=>'sports', '6'=>'misc', '7'=>'movie', '8'=>'culture', '9'=>'music', '11'=>'entertainment', '13'=>'news magazine' ); # output the category name from the Supplement, else the category numeric value my $category = $categories{$node->firstChild->data} || $node->firstChild->data; $node->removeChildNodes(); $node->appendText($category); # print STDERR "now: ".$node."\n"; } } $writer->raw($programme->toString . "\n"); } } } sub process_ruv_is ( $$$$$$ ){ my ($writer, $fromdate, $num_days, $tv2chan, $timeoffset, $ch_xmltv_id) = @_; my $xsl_transform = <<"EOF"; children series news educational sports misc movie culture music entertainment news magazine <xsl:apply-templates select="title"/> EOF # [bilbo] following code removed in v1.30, ... # # # # # # # # # # <!-- category is no good: --> # # ..and replaced with: $xsl_transform .= <<"EOF"; EOF $xsl_transform .= <<"EOF"; : EOF # Get the XML. Note that due to some edge cases, we must get data for the day before # and after the ones we're looking for and let process_xml_cannel remove most of the # data. If we don't, tv_validate_grabber will fail my $dayBefore = UnixDate(DateCalc($fromdate, "- 1 days"), '%Y-%m-%d'); my $dayAfter = UnixDate(DateCalc($fromdate, "+ " . ($num_days + 1) . " days"), '%Y-%m-%d'); my $url = "http://muninn.ruv.is/files/xml/sjonvarpid/$dayBefore/$dayAfter/"; my $xml = get_nice( $url ); die( "Failed to fetch $url" ) unless defined( $xml ); process_xml_channel($writer, $fromdate, $num_days, $timeoffset, $xsl_transform, $xml); } sub process_skjarinn_is ( $$$$$$ ){ my ($writer, $fromdate, $num_days, $tv2chan, $timeoffset, $ch_xmltv_id) = @_; # NOTE: For Skjar 1, we ignore $fromdate and $num_days and always load the next five days. my $xsl_transform = <<"EOF"; <xsl:apply-templates select="title"/> EOF my $url = "http://skjareinn.is/einn/dagskrarupplysingar/?channel_id=7&weeks=2&output_format=xml"; my $xml = get_nice( $url ); die( "Failed to fetch $url" ) unless defined( $xml ); process_xml_channel($writer, $fromdate, $num_days, $timeoffset, $xsl_transform, $xml); } sub process_stod2_and_friends ( $$$$$$ ){ my ($writer, $fromdate, $num_days, $tv2chan, $timeoffset, $ch_xmltv_id) = @_; # NOTE: For Stod 2 et al, we ignore $fromdate and $num_days and always load the next week. # [honir] I don't know what this piece of code was intending to do: # # Specifically # event[not(reference_number/\@value=following::event/reference_number/\@value)] # ignores nodes where the same 'reference_number' occurs on any following node. But the 'reference_number' is not unique to a showing, i.e. a repeat # showing of the programme will have the same 'reference_number'; however this code will ignore all matching 'event' except the last! # I.e. we only grab the last repeated showing of a programme. This can't be what was intended! # my $xsl_transform = <<"EOF"; <xsl:apply-templates select="title"/> : EOF my $url = $stod2AndFriends{basechid($tv2chan)}; my $xml = get_nice( $url ); die( "Failed to fetch $url" ) unless defined( $xml ); process_xml_channel($writer, $fromdate, $num_days, $timeoffset, $xsl_transform, $xml); } # get channel listing sub get_channels { # I'm too lazy to figure out how to append plus channels programatically return ( "RUV" => "Ríkissjónvarpið", # "S1" => "Skjár 1", # "ST2" => "Stöð 2", # "ST2SPORT" => "Stöð 2 Sport", # "ST2SPORT2" => "Stöð 2 Sport 2", # "ST2EXTRA" => "Stöð 2 Extra", # "ST2BIO" => "Stöð 2 bíó", "plus.RUV" => "Ríkissjónvarpið+", # "plus.S1" => "Skjár 1+", # "plus.ST2" => "Stöð 2+", # "plus.ST2SPORT" => "Stöð 2 Sport+", # "plus.ST2SPORT2" => "Stöð 2 Sport 2+", # "plus.ST2EXTRA" => "Stöð 2 Extra+", # "plus.ST2BIO" => "Stöð 2 bíó+", ); } # Icon URL for a given channel. sub get_icon { my ($ch_did) = @_; $ch_did = basechid($ch_did); my %logos = ( "RUV" => "ruv", "S1" => "s1", "ST2" => "stod2", "ST2SPORT" => "2sport", "ST2SPORT2" => "2sport2", "ST2EXTRA" => "2extra", "ST2BIO" => "2bio", ); if (defined($logos{$ch_did})) { return "http://www.sjonvarp.is/images/logos/".$logos{$ch_did}.".gif"; } return ""; } # Fetch the map of category numbers from the Supplement server sub get_categories_map () { my $supplement = GetSupplement('tv_grab_is', 'category_map'); my @lines = split /[\r\n]+/, $supplement; foreach my $line (@lines) { chomp $line; chop($line) if ($line =~ m/\r$/); trim($line); next if $line =~ /^#/ || $line eq ''; my ($cat_num, $cat_name, $trash) = $line =~ /^(.*)==(.*?)([\s\t]*#.*)?$/; $categories{$cat_num} = $cat_name; } #use Data::Dumper; print Dumper (\%categorymap); } sub trim { # Remove leading & trailing spaces $_[0] =~ s/^\s+|\s+$//g; }