#!/usr/bin/perl =pod =head1 NAME tv_grab_ch_search - Grab TV listings for Switzerland (from tv.search.ch webpage). =head1 SYNOPSIS tv_grab_ch_search --help tv_grab_ch_search [--config-file FILE] --configure [--gui OPTION] tv_grab_ch_search [--config-file FILE] [--output FILE] [--quiet] [--days N] [--offset N] tv_grab_ch_search --list-channels tv_grab_ch_search --capabilities tv_grab_ch_search --version =head1 DESCRIPTION Output TV listings for several channels available in Switzerland and (partly) central Europe. The data comes from tv.search.ch. The grabber relies on parsing HTML so it might stop working at any time. First run B to choose, which channels you want to download. Then running B with no arguments will output listings in XML format to standard output. B<--configure> Ask for each available channel whether to download and write the configuration file. B<--config-file FILE> Set the name of the configuration file, the default is B<~/.xmltv/tv_grab_ch_search.conf>. This is the file written by B<--configure> and read when grabbing. B<--gui OPTION> Use this option to enable a graphical interface to be used. OPTION may be 'Tk', or left blank for the best available choice. Additional allowed values of OPTION are 'Term' for normal terminal output (default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar. B<--output FILE> Write to FILE rather than standard output. B<--days N> Grab N days. The default is fourteen. B<--offset N> Start N days in the future. The default is to start from now on (= zero). B<--quiet> Suppress the progress messages normally written to standard error. B<--list-channels> Write output giving elements for every channel available (ignoring the config file), but no programmes. B<--capabilities> Show which capabilities the grabber supports. For more information, see L B<--version> Show the version of the grabber. B<--help> print a help message and exit. =head1 SEE ALSO L. =head1 AUTHORS Daniel Bittel . Inspired by tv_grab_ch by Stefan Siegl. Patric Mueller . Markus Keller . =head1 BUGS If you happen to find a bug, you're requested to send a mail to one of the XMLTV mailing lists, see webpages at http://sourceforge.net/projects/xmltv/. =cut use warnings; use strict; use Encode; use DateTime; use LWP::Simple; use HTTP::Cookies; use XMLTV; use XMLTV::Version "$XMLTV::VERSION"; use XMLTV::Capabilities qw/baseline manualconfig cache/; use XMLTV::Description 'Switzerland (tv.search.ch)'; use XMLTV::Supplement qw/GetSupplement/; use Getopt::Long; use HTML::TreeBuilder; use HTML::Entities; use URI::Escape; use URI::URL; use XMLTV::Ask; use XMLTV::ProgressBar; use XMLTV::DST; use XMLTV::Config_file; use XMLTV::Mode; use XMLTV::Get_nice; use XMLTV::Memoize; use XMLTV::Usage< 'https://search.ch/tv/channels', 'source-info-url' => 'https://search.ch/tv', 'generator-info-name' => 'XMLTV', 'generator-info-url' => 'http://xmltv.org/', }; ## the timezone search.ch lives in is, CET/CEST my constant $TZ = "+0100"; my constant $lang = "de"; ## Parse argv now. First do undocumented --cache option. XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux'); my $opt_configure; my $opt_config_file; my $opt_gui; my $opt_output; my $opt_days = 14; my $opt_offset = 0; my $opt_quiet = 0; my $opt_slow = 0; my $opt_list_channels; my $opt_help; GetOptions( 'configure' => \$opt_configure, 'config-file=s' => \$opt_config_file, 'gui:s' => \$opt_gui, 'output=s' => \$opt_output, 'days=i' => \$opt_days, 'offset=i' => \$opt_offset, 'quiet' => \$opt_quiet, 'slow' => \$opt_slow, 'list-channels' => \$opt_list_channels, 'help' => \$opt_help, ) or usage(0); usage(1) if $opt_help; XMLTV::Ask::init($opt_gui); ## make sure offset+days arguments are within range die "neither offset nor days may be negative" if($opt_offset < 0 || $opt_days < 0); ## calculate global start/stop times ... my $grab_start = DateTime->now(time_zone => 'Europe/Zurich')->add( days => $opt_offset ); my $grab_stop = DateTime->now(time_zone => 'Europe/Zurich')->add ( days => $opt_offset + $opt_days ); my $mode = XMLTV::Mode::mode('grab', # default value $opt_configure => 'configure', $opt_list_channels => 'list-channels', ); ## initialize config file support my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_ch_search', $opt_quiet); my @config_lines; if($mode eq 'configure') { XMLTV::Config_file::check_no_overwrite($config_file); } elsif($mode eq 'grab' || $mode eq 'list-channels') { @config_lines = XMLTV::Config_file::read_lines($config_file); } else { die("never heard of XMLTV mode $mode, sorry :-(") } ## initialize user agent, so that cookie jar can be filled and passed on my $ua = LWP::UserAgent->new(keep_alive => 300); my $cookies = HTTP::Cookies->new(); $ua->cookie_jar($cookies); $ua->agent("xmltv/$XMLTV::VERSION"); $ua->env_proxy; ## hey, we can't live without channel data, so let's get those now! my $bar = new XMLTV::ProgressBar( 'getting list of channels', 1 ) if not $opt_quiet; my ($secret, %channels) = get_channels(); $bar->update() if not $opt_quiet; $bar->finish() if not $opt_quiet; my @requests; ## read our configuration file now my $line = 1; foreach(@config_lines) { $line ++; next unless defined; if (/^channel:?\s+(\S+)/) { warn("\nConfigured channel $1 not available anymore. \nPlease reconfigure tv_grab_ch_search.\n"), next unless(defined($channels{$1})); push @requests, $1; } else { warn "$config_file:$line: bad line\n"; } } ## if we're requested to do so, write out a new config file ... if ($mode eq 'configure') { open(CONFIG, ">$config_file") or die("cannot write to $config_file, due to: $!"); ## now let's annoy the user, sorry, I meant ask .. my @chs = sort keys %channels; my @names = map { $channels{$_} } @chs; my @qs = map { "add channel $_?" } @names; my @want = ask_many_boolean(1, @qs); foreach (@chs) { my $w = shift @want; my $chname = shift @names; warn("cannot read input, stopping to ask questions ..."), last if not defined $w; print CONFIG '#' if not $w; #- comment line out if user answer 'no' # shall we store the display name in the config file? # leave it in, since it probably makes it a lot easier for the # user to choose which channel to comment/uncommet - when manually # viing the config file -- are there people who do that? print CONFIG "channel $_ #$chname\n"; } close CONFIG or warn "unable to nicely close the config file: $!"; say("Finished configuration."); exit(); } ## well, we don't have to write a config file, so, probably it's some xml stuff :) ## if not, let's go dying ... die unless($mode eq 'grab' or $mode eq 'list-channels'); my %writer_args; if (defined $opt_output) { my $handle = new IO::File(">$opt_output"); die "cannot write to output file, $opt_output: $!" unless (defined $handle); $writer_args{'OUTPUT'} = $handle; } $writer_args{'encoding'} = 'utf-8'; if( defined( $opt_days )) { $writer_args{offset} = $opt_offset; $writer_args{days} = $opt_days; $writer_args{cutoff} = "000000"; } ## create our writer object my $writer = new XMLTV::Writer(%writer_args); $writer->start($head); if ($mode eq 'list-channels') { foreach (keys %channels) { my %channel = ('id' => channel_id($_), 'display-name' => [[$channels{$_}, $lang]]); $writer->write_channel(\%channel); } $writer->end(); exit(); } ## there's only one thing, why we might exist: write out tvdata! die unless ($mode eq 'grab'); die "No channels specified, run me with --configure flag\n" unless(scalar(@requests)); ## write out tags my $paramstr ="&secret=".$secret; foreach(@requests) { my $id = channel_id($_); my %channel = ('id' => $id, 'display-name' => [[$channels{$_}, $lang]]); $writer->write_channel(\%channel); $paramstr = $paramstr."&channels[]=".$_; } ## the page doesn't specify the year when the programmes begin or end, thus ## we need to guess, store current year and month globally as needed for every ## programme ... my $cur_year = DateTime->now()->year(); my $cur_month = DateTime->now()->month(); my $url=$head->{q(source-data-url)}; my $req = HTTP::Request->new(POST => $url); $req->content_type('application/x-www-form-urlencoded'); $req->content(substr ( $paramstr, 1)); # store the selected channels $ua->request($req); ## write out tags grab_channels(); ## hey, looks like we've finished ... $writer->end(); ## channel_id($s) :: turn site channel id into an xmltv id sub channel_id($) { my $s = shift; $s =~ s|^tv_||; return "$s.search.ch" } sub parse_page { my ($tb, $start_parse_date) = @_; foreach my $tv_channel ( $tb->look_down('class' => 'sl-card tv-index-channel') ) { my $channel_id = substr($tv_channel->attr('id'), 3); # tv-sf1 -> sf1 if ( defined($channel_id) ) { foreach my $tv_show ( $tv_channel ->look_down('class', qr/(^| )tv-tooltip( |$)/) ) { my %show; $show{channel} = channel_id($channel_id); my $tmp = $tv_show->look_down('_tag', 'a'); next unless defined($tmp); my %params = URI::URL->new($tmp->attr('href'))->query_form(); my $start_date = $params{'start'}; my $end_date = $params{'end'}; next unless defined($start_date); my $show_start = DateTime->new ( year => substr($start_date, 0, 4) ,month => substr($start_date, 5, 2) ,day => substr($start_date, 8, 2) ,hour => substr($start_date, 11, 2) ,minute => substr($start_date, 14, 2) ,second => substr($start_date, 17, 2) ,time_zone => 'Europe/Zurich'); $show{start} = $show_start->strftime( "%Y%m%d%H%M%S %z" ); # skip shows starting before the start date to prevent duplicates next if $show_start < $start_parse_date; $show{stop} = DateTime->new ( year => substr($end_date, 0, 4) ,month => substr($end_date, 5, 2) ,day => substr($end_date, 8, 2) ,hour => substr($end_date, 11, 2) ,minute => substr($end_date, 14, 2) ,second => substr($end_date, 17, 2) ,time_zone => 'Europe/Zurich' )->strftime( "%Y%m%d%H%M%S %z" ); my $title_tag = $tv_show->look_down('_tag' => 'h2'); $title_tag->objectify_text(); my $title = $title_tag->look_down('_tag', '~text')->attr('text'); $show{'title'} = [[$title, $lang]]; my $sub_title = $tv_show->look_down('_tag' => 'h3'); $show{'sub-title'} = [[$sub_title->as_text(), $lang]] if($sub_title); # Note: The layout is using dl lists for displaying this data # and only the dt tag is marked with meaningful classes. That's # why $variable->right()-as_text() is employed here to get the # content of the unmarked dd tag. # Beschreibung foreach my $description ($tv_show->look_down('class' => 'tv-detail-description')) { $show{desc} = [[ $description->right()->as_text(), $lang ]] } # Produktionsjahr foreach my $year ($tv_show->look_down('class' => 'tv-detail-year tv-detail-short')) { $show{date} = $year->right()->as_text(); } # Kategorie foreach my $category ($tv_show->look_down('class' => 'tv-detail-catname tv-detail-short')) { my $s = $category->right()->as_text(); my @categories = split(m/\s*[\/]\s*/, $s); foreach (@categories) { push @{$show{category}}, [$_, $lang ] if ($_) } } # Produktionsinfos foreach my $category ($tv_show->look_down('class' => 'tv-detail-production tv-detail-short')) { my $s = $category->right()->as_text(); $s=~ s/\(.*//; my @categories = split(m/\s*[\/,]\s*/, $s); foreach my $category (@categories) { if ($category) { my $is_defined = 0; foreach ( @{$show{category}} ) { if ("${$_}[0]" eq "$category" ) { $is_defined = 1; last; } } push @{$show{category}}, [$category, $lang ] if ($is_defined == 0); } } } # Produktionsland foreach my $country ($tv_show->look_down('class' => 'tv-detail-country tv-detail-short')) { my @countries = split(m/\s*[\/,]\s*/, $country->right()->as_text()); foreach (@countries) { push @{$show{country}}, [$_, $lang ]; } } # Cast foreach my $cast ($tv_show->look_down('class' => 'tv-detail-cast')) { my $s = $cast->right()->as_text(); $s=~ s/\(.*//; my @actors = split(m/\s*,\s*/, $s); $show{credits}{actor} = \@actors; } # Regisseur foreach my $directors ($tv_show->look_down('class' => 'tv-detail-director tv-detail-short')) { my @directors = split(m/\s*,\s*/, $directors->right()->as_text()); $show{credits}{director} = \@directors; } # Drehbuch foreach my $writers ($tv_show->look_down('class' => 'tv-detail-writer tv-detail-short')) { my @writers = split(m/\s*,\s*/, $writers->right()->as_text()); $show{credits}{writer} = \@writers; } # Wiederholung foreach my $previously_shown ($tv_show->look_down('class' => 'tv-detail-repetition')) { $show{'previously-shown'} = {} } # Episode foreach my $episode ($tv_show->look_down('class' => 'tv-detail-episode tv-detail-short')) { $show{'episode-num'} = [[ $episode->right()->as_text(), 'onscreen' ]] } # Untertitel für Gehörlose foreach my $deaf ($tv_show->look_down('_tag' => 'img', 'title' => encode("utf-8", "Untertitel für Gehörlose"))) { $show{subtitles} = [{ type => 'teletext' }]; } # Zweikanalton foreach my $bilingual ($tv_show->look_down('_tag' => 'img', 'title' => 'Zweikanalton')) { $show{audio}{stereo} = 'bilingual'; } # 16:9 foreach my $aspect ($tv_show->look_down('_tag' => 'img', 'title' => '16:9')) { $show{video}{aspect} = '16:9'; } $writer->write_programme(\%show); } } } } sub grab_channels { my $grabDate = $grab_start; my $url = $head->{q(source-info-url)}; $bar = new XMLTV::ProgressBar('grabbing channels ', (6*$opt_days)) if not $opt_quiet; grab_channel_loop: for (my $count = 0; $count < 6; $count++) { my $tb = HTML::TreeBuilder->new(); my $loop_date = $grabDate->year() . '-' . substr("0".$grabDate->month(),-2) . '-' . substr("0".$grabDate->day(),-2); my $req = HTTP::Request->new(GET => "$url?time=$loop_date+" . 4*$count .".00"); $req->header('Accept' => 'text/html'); $tb->ignore_unknown(0); # otherwise, html5 tags like section are stripped out $tb->parse(($ua->request($req))->content) or die "cannot parse content of http://search.ch/tv/?time=$loop_date+" . 4*$count .".00"; $tb->eof; parse_page($tb, $grabDate->clone()->truncate("to" => "hour")->set_hour(4*$count)); $tb->delete(); update $bar if not $opt_quiet; } $grabDate = $grabDate->add ( days => 1 ); if( DateTime->compare ( $grab_stop, $grabDate ) > 0) { goto grab_channel_loop; } $bar->finish() unless($opt_quiet); } ## get channel listing sub get_channels() { my %channels; my $url=$head->{q(source-data-url)}; my $tb=new HTML::TreeBuilder(); # For some reason, we need to fetch this page twice. Probably a bug in search.ch. # If you open https://search.ch/tv/channels as first page in private mode in Firefox, the first click on the Save button doesn't work either. $ua->get($url); $tb->parse($ua->get($url)->content) or die "cannot parse content of $url"; $tb->eof; my $secret = ($tb->look_down('name', 'secret'))->attr('value'); ## getting the channels directly selectable foreach($tb->look_down('_tag' => 'label')) { my $id = ($_->look_down('_tag' => 'input'))->id; # tv-channel-sf1 next unless(substr($id, 0, 10) eq "tv-channel"); my $channel_name = $_->as_text(); $channels{uri_escape(substr($id, 11))} = $channel_name; } $tb->delete; return ($secret, %channels); } ## get_page($url) :: try to download $url via http://, look for closing tag or die sub get_page($) { my $url = shift; my $retry = 0; local $SIG{__DIE__} = sub { die "\n$url: $_[0]" }; while($retry < 2) { my $got = eval { get_nice($url . ($retry ? "&retry=$retry" : "")); }; $retry ++; next if($@); # unable to download, doesn't look too good for us. return $got; } die "cannot grab webpage $url (tried $retry times). giving up. sorry"; }