Browse Source

eztv - support for cookies and https

themage
theMage 5 years ago
parent
commit
5961308da3
2 changed files with 635 additions and 3 deletions
  1. 12 3
      lib/CinePantufas/Source/EZTV.pm
  2. 623 0
      lib/HTTP/CookieJar.pm

+ 12 - 3
lib/CinePantufas/Source/EZTV.pm

@ -7,6 +7,7 @@ use CinePantufas::Core;
7 7
use CinePantufas::Priority qw(priority);
8 8
9 9
use HTTP::Tiny;
10
use HTTP::CookieJar;
10 11
11 12
my $prio = qr{HDTV|LOL|720p|x264|mkv|mp4|avi};
12 13
my %prio = (
@ -19,6 +20,14 @@ my %prio = (
19 20
  avi     => 1,
20 21
);
21 22
23
my $ua;
24
25
sub _ua {
26
  return $ua ||= HTTP::Tiny->new(
27
      cookie_jar  => HTTP::CookieJar->new(),
28
    );
29
}
30
22 31
sub source_name { "eztv" }
23 32
24 33
sub import {
@ -30,7 +39,7 @@ sub import {
30 39
sub retrieve_show_list {
31 40
  my $class = shift;
32 41
33
  my $resp = HTTP::Tiny->new->get('http://eztv.it');
42
  my $resp = _ua->get('https://eztv.it');
34 43
35 44
  die "Failed: $resp->{status} $resp->{reason}\n"
36 45
    unless $resp->{success};
@ -55,7 +64,7 @@ sub retrieve_show_list {
55 64
sub get_episode_list {
56 65
  my ($class,$show) = @_;
57 66
58
  my $resp = HTTP::Tiny->new->post_form('http://eztv.it/search/',
67
  my $resp = _ua->post_form('https://eztv.it/search/',
59 68
        $show->{params}
60 69
    );
61 70
@ -78,7 +87,7 @@ sub get_episode_list {
78 87
      next;
79 88
    }
80 89
81
    $_ = "http:$_" for grep { substr($_,0,1) eq '/' } values %links;
90
    $_ = "https:$_" for grep { substr($_,0,1) eq '/' } values %links;
82 91
  
83 92
    my $episode=($ses+0).'x'.sprintf('%02d', $epi);
84 93
    my $rowprio = priority($name);

+ 623 - 0
lib/HTTP/CookieJar.pm

@ -0,0 +1,623 @@
1
use v5.10;
2
use strict;
3
use warnings;
4
5
package HTTP::CookieJar;
6
# ABSTRACT: A minimalist HTTP user agent cookie jar
7
our $VERSION = '0.006'; # VERSION
8
9
use Carp       ();
10
use HTTP::Date ();
11
12
my $HAS_MPS = eval { require Mozilla::PublicSuffix; 1 };
13
14
# =construct new
15
#
16
#     my $jar = HTTP::CookieJar->new;
17
#
18
# Return a new, empty cookie jar
19
#
20
# =cut
21
22
sub new {
23
    my ($class) = @_;
24
    bless { store => {} }, $class;
25
}
26
27
# =method add
28
#
29
#     $jar->add(
30
#         "http://www.example.com/", "lang=en-US; Path=/; Domain=example.com"
31
#     );
32
#
33
# Given a request URL and a C<Set-Cookie> header string, attempts to adds the
34
# cookie to the jar.  If the cookie is expired, instead it deletes any matching
35
# cookie from the jar.  A C<Max-Age> attribute will be converted to an absolute
36
# C<Expires> attribute.
37
#
38
# It will throw an exception if the request URL is missing or invalid.  Returns true if
39
# successful cookie processing or undef/empty-list on failure.
40
#
41
# =cut
42
43
sub add {
44
    my ( $self, $request, $cookie ) = @_;
45
    return unless length $cookie;
46
    my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
47
    Carp::croak($@) if $@;
48
49
    return unless my $parse = _parse_cookie($cookie);
50
    my $name = $parse->{name};
51
52
    # check and normalize domain
53
    if ( exists $parse->{domain} ) {
54
        _normalize_domain( $host, $parse ) or return;
55
    }
56
    else {
57
        $parse->{domain}   = $host;
58
        $parse->{hostonly} = 1;
59
    }
60
    my $domain = $parse->{domain};
61
62
    # normalize path
63
    if ( !exists $parse->{path} || substr( $parse->{path}, 0, 1 ) ne "/" ) {
64
        $parse->{path} = _default_path($request_path);
65
    }
66
    my $path = $parse->{path};
67
    # set timestamps and normalize expires
68
    my $now = $parse->{creation_time} = $parse->{last_access_time} = time;
69
    if ( exists $parse->{'max-age'} ) {
70
        $parse->{expires} = $now + delete $parse->{'max-age'};
71
    }
72
    # update creation time from old cookie, if exists
73
    if ( my $old = $self->{store}{$domain}{$path}{$name} ) {
74
        $parse->{creation_time} = $old->{creation_time};
75
    }
76
    # if cookie has expired, purge any old matching cookie, too
77
    if ( defined $parse->{expires} && $parse->{expires} < $now ) {
78
        delete $self->{store}{$domain}{$path}{$name};
79
    }
80
    else {
81
        $self->{store}{$domain}{$path}{$name} = $parse;
82
    }
83
    return 1;
84
}
85
86
# =method clear
87
#
88
#     $jar->clear
89
#
90
# Empties the cookie jar.
91
#
92
# =cut
93
94
sub clear {
95
    my ($self) = @_;
96
    $self->{store} = {};
97
    return 1;
98
}
99
100
# =method cookies_for
101
#
102
#     my @cookies = $jar->cookies_for("http://www.example.com/foo/bar");
103
#
104
# Given a request URL, returns a list of hash references representing cookies
105
# that should be sent.  The hash references are copies -- changing values
106
# will not change the cookies in the jar.
107
#
108
# Cookies set C<secure> will only be returned if the request scheme is C<https>.
109
# Expired cookies will not be returned.
110
#
111
# Keys of a cookie hash reference might include:
112
#
113
# =for :list
114
# * name -- the name of the cookie
115
# * value -- the value of the cookie
116
# * domain -- the domain name to which the cookie applies
117
# * path -- the path to which the cookie applies
118
# * expires -- if present, when the cookie expires in epoch seconds
119
# * secure -- if present, the cookie was set C<Secure>
120
# * httponly -- if present, the cookie was set C<HttpOnly>
121
# * hostonly -- if present, the cookie may only be used with the domain as a host
122
# * creation_time -- epoch seconds since the cookie was first stored
123
# * last_access_time -- epoch seconds since the cookie was last stored
124
#
125
# Keep in mind that C<httponly> means it should only be used in requests and not
126
# made available via Javascript, etc.  This is pretty meaningless for Perl user
127
# agents.
128
#
129
# Generally, user agents should use the C<cookie_header> method instead.
130
#
131
# It will throw an exception if the request URL is missing or invalid.
132
#
133
# =cut
134
135
sub cookies_for {
136
    my ( $self, $request ) = @_;
137
    my ( $scheme, $host, $port, $request_path ) = eval { _split_url($request) };
138
    Carp::croak($@) if $@;
139
140
    my @found;
141
    my $now = time;
142
    for my $cookie ( $self->_all_cookies ) {
143
        next if $cookie->{hostonly}           && $host ne $cookie->{domain};
144
        next if $cookie->{secure}             && $scheme ne 'https';
145
        next if defined( $cookie->{expires} ) && $cookie->{expires} < $now;
146
        next unless _domain_match( $host, $cookie->{domain} );
147
        next unless _path_match( $request_path, $cookie->{path} );
148
        push @found, $cookie;
149
    }
150
    @found = sort {
151
        length( $b->{path} ) <=> length( $a->{path} )
152
          || $a->{creation_time} <=> $b->{creation_time}
153
    } @found;
154
    return @found;
155
}
156
157
# =method cookie_header
158
#
159
#     my $header = $jar->cookie_header("http://www.example.com/foo/bar");
160
#
161
# Given a request URL, returns a correctly-formatted string with all relevant
162
# cookies for the request.  This string is ready to be used in a C<Cookie> header
163
# in an HTTP request.  E.g.:
164
#
165
#     SID=31d4d96e407aad42; lang=en-US
166
#
167
# It follows the same exclusion rules as C<cookies_for>.
168
#
169
# If the request is invalid or no cookies apply, it will return an empty string.
170
#
171
# =cut
172
173
sub cookie_header {
174
    my ( $self, $req ) = @_;
175
    return join( "; ", map { "$_->{name}=$_->{value}" } $self->cookies_for($req) );
176
}
177
178
# =method dump_cookies
179
#
180
#     my @list = $jar->dump_cookies;
181
#     my @list = $jar->dump_cookies( { persistent => 1 } );
182
#
183
# Returns a list of raw cookies in string form.  The strings resemble what
184
# would be received from C<Set-Cookie> headers, but with additional internal
185
# fields.  The list is only intended for use with C<load_cookies> to allow
186
# cookie jar persistence.
187
#
188
# If a hash reference with a true C<persistent> key is given as an argument,
189
# cookies without an C<Expires> time (i.e. "session cookies") will be omitted.
190
#
191
# Here is a trivial example of saving a cookie jar file with L<Path::Tiny>:
192
#
193
#     path("jar.txt")->spew( join "\n", $jar->dump_cookies );
194
#
195
# =cut
196
197
sub dump_cookies {
198
    my ( $self, $args ) = @_;
199
    my @list;
200
    for my $c ( $self->_all_cookies ) {
201
        my @parts = "$c->{name}=$c->{value}";
202
        if ( defined $c->{expires} ) {
203
            push @parts, 'Expires=' . HTTP::Date::time2str( $c->{expires} );
204
        }
205
        else {
206
            next if $args->{persistent};
207
        }
208
        for my $attr (qw/Domain Path Creation_Time Last_Access_Time/) {
209
            push @parts, "$attr=$c->{lc $attr}" if defined $c->{ lc $attr };
210
        }
211
        for my $attr (qw/Secure HttpOnly HostOnly/) {
212
            push @parts, $attr if $c->{ lc $attr };
213
        }
214
        push @list, join( "; ", @parts );
215
    }
216
    return @list;
217
}
218
219
# =method load_cookies
220
#
221
#     $jar->load_cookies( @cookies );
222
#
223
# Given a list of cookie strings from C<dump_cookies>, it adds them to
224
# the cookie jar.  Cookies added in this way will supersede any existing
225
# cookies with similar domain, path and name.
226
#
227
# It returns the jar object for convenience when loading a new object:
228
#
229
#     my $jar = HTTP::CookieJar->new->load_cookies( @cookies );
230
#
231
# Here is a trivial example of loading a cookie jar file with L<Path::Tiny>:
232
#
233
#     my $jar = HTTP::CookieJar->new->load_cookies(
234
#         path("jar.txt")->lines
235
#     );
236
#
237
# =cut
238
239
sub load_cookies {
240
    my ( $self, @cookies ) = @_;
241
    for my $cookie (@cookies) {
242
        my $p = _parse_cookie( $cookie, 1 );
243
        next unless exists $p->{domain} && exists $p->{path};
244
        $p->{$_} //= time for qw/creation_time last_access_time/;
245
        $self->{store}{ $p->{domain} }{ $p->{path} }{ $p->{name} } = $p;
246
    }
247
    return $self;
248
}
249
250
#--------------------------------------------------------------------------#
251
# private methods
252
#--------------------------------------------------------------------------#
253
254
# return a copy of all cookies
255
sub _all_cookies {
256
    return map {
257
        { %$_ }
258
    } map { values %$_ } map { values %$_ } values %{ $_[0]->{store} };
259
}
260
261
#--------------------------------------------------------------------------#
262
# Helper subroutines
263
#--------------------------------------------------------------------------#
264
265
my $pub_re = qr/(?:domain|path|expires|max-age|httponly|secure)/;
266
my $pvt_re = qr/(?:$pub_re|creation_time|last_access_time|hostonly)/;
267
268
sub _parse_cookie {
269
    my ( $cookie, $private ) = @_;
270
    my ( $kvp, @attrs ) = split /;/, $cookie // '';
271
    my ( $name, $value ) =
272
      map { s/^\s*//; s/\s*$//; $_ } split( /=/, $kvp // '', 2 ); ## no critic
273
274
    return unless length $name;
275
    my $parse = { name => $name, value => $value // "" };
276
    for my $s (@attrs) {
277
        next unless defined $s && $s =~ /\S/;
278
        my ( $k, $v ) = map { s/^\s*//; s/\s*$//; $_ } split( /=/, $s, 2 ); ## no critic
279
        $k = lc $k;
280
        next unless $private ? ( $k =~ m/^$pvt_re$/ ) : ( $k =~ m/^$pub_re$/ );
281
        $v = 1 if $k =~ m/^(?:httponly|secure|hostonly)$/; # boolean flag if present
282
        $v = HTTP::Date::str2time($v) // 0 if $k eq 'expires'; # convert to epoch
283
        next unless length $v;
284
        $v =~ s{^\.}{}                            if $k eq 'domain'; # strip leading dot
285
        $v =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if $k eq 'path';   # unescape
286
        $parse->{$k} = $v;
287
    }
288
    return $parse;
289
}
290
291
sub _domain_match {
292
    my ( $string, $dom_string ) = @_;
293
    return 1 if $dom_string eq $string;
294
    return unless $string =~ /[a-z]/i;                               # non-numeric
295
    if ( $string =~ s{\Q$dom_string\E$}{} ) {
296
        return substr( $string, -1, 1 ) eq '.';                      # "foo."
297
    }
298
    return;
299
}
300
301
sub _normalize_domain {
302
    my ( $host, $parse ) = @_;
303
304
    if ($HAS_MPS) {
305
        my $host_pub_suff = eval { Mozilla::PublicSuffix::public_suffix($host) } // '';
306
        if ( _domain_match( $host_pub_suff, $parse->{domain} ) ) {
307
            if ( $parse->{domain} eq $host ) {
308
                return $parse->{hostonly} = 1;
309
            }
310
            else {
311
                return;
312
            }
313
        }
314
    }
315
316
    if ( $parse->{domain} !~ m{\.} && $parse->{domain} eq $host ) {
317
        return $parse->{hostonly} = 1;
318
    }
319
320
    return _domain_match( $host, $parse->{domain} );
321
}
322
323
sub _default_path {
324
    my ($path) = @_;
325
    return "/" if !length $path || substr( $path, 0, 1 ) ne "/";
326
    my ($default) = $path =~ m{^(.*)/}; # greedy to last /
327
    return length($default) ? $default : "/";
328
}
329
330
sub _path_match {
331
    my ( $req_path, $cookie_path ) = @_;
332
    return 1 if $req_path eq $cookie_path;
333
    if ( $req_path =~ m{^\Q$cookie_path\E(.*)} ) {
334
        my $rest = $1;
335
        return 1 if substr( $cookie_path, -1, 1 ) eq '/';
336
        return 1 if substr( $rest,        0,  1 ) eq '/';
337
    }
338
    return;
339
}
340
341
sub _split_url {
342
    my $url = shift;
343
    die(qq/No URL provided\n/) unless length $url;
344
345
    # URI regex adapted from the URI module
346
    # XXX path_query here really chops at ? or # to get just the path and not the query
347
    my ( $scheme, $authority, $path_query ) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#?]*)>
348
      or die(qq/Cannot parse URL: '$url'\n/);
349
350
    $scheme = lc $scheme;
351
    $path_query = "/$path_query" unless $path_query =~ m<\A/>;
352
    $path_query =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
353
354
    my $host = ( length($authority) ) ? lc $authority : 'localhost';
355
    $host =~ s/\A[^@]*@//; # userinfo
356
    my $port = do {
357
        $host =~ s/:([0-9]*)\z// && length $1
358
          ? $1
359
          : ( $scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef );
360
    };
361
362
    return ( $scheme, $host, $port, $path_query );
363
}
364
365
1;
366
367
368
# vim: ts=4 sts=4 sw=4 et:
369
370
__END__
371
372
=pod
373
374
=encoding UTF-8
375
376
=head1 NAME
377
378
HTTP::CookieJar - A minimalist HTTP user agent cookie jar
379
380
=head1 VERSION
381
382
version 0.006
383
384
=head1 SYNOPSIS
385
386
  use HTTP::CookieJar;
387
388
  my $jar = HTTP::CookieJar->new;
389
390
  # add cookie received from a request
391
  $jar->add( "http://www.example.com/", "CUSTOMER=WILE_E_COYOTE; Path=/; Domain=example.com" );
392
393
  # extract cookie header for a given request
394
  my $cookie = $jar->cookie_header( "http://www.example.com/" );
395
396
=head1 DESCRIPTION
397
398
This module implements a minimalist HTTP user agent cookie jar in
399
conformance with L<RFC 6265|http://tools.ietf.org/html/rfc6265>.
400
401
Unlike the commonly used L<HTTP::Cookies> module, this module does
402
not require use of L<HTTP::Request> and L<HTTP::Response> objects.
403
An LWP-compatible adapter is available as L<HTTP::CookieJar::LWP>.
404
405
=head1 CONSTRUCTORS
406
407
=head2 new
408
409
    my $jar = HTTP::CookieJar->new;
410
411
Return a new, empty cookie jar
412
413
=head1 METHODS
414
415
=head2 add
416
417
    $jar->add(
418
        "http://www.example.com/", "lang=en-US; Path=/; Domain=example.com"
419
    );
420
421
Given a request URL and a C<Set-Cookie> header string, attempts to adds the
422
cookie to the jar.  If the cookie is expired, instead it deletes any matching
423
cookie from the jar.  A C<Max-Age> attribute will be converted to an absolute
424
C<Expires> attribute.
425
426
It will throw an exception if the request URL is missing or invalid.  Returns true if
427
successful cookie processing or undef/empty-list on failure.
428
429
=head2 clear
430
431
    $jar->clear
432
433
Empties the cookie jar.
434
435
=head2 cookies_for
436
437
    my @cookies = $jar->cookies_for("http://www.example.com/foo/bar");
438
439
Given a request URL, returns a list of hash references representing cookies
440
that should be sent.  The hash references are copies -- changing values
441
will not change the cookies in the jar.
442
443
Cookies set C<secure> will only be returned if the request scheme is C<https>.
444
Expired cookies will not be returned.
445
446
Keys of a cookie hash reference might include:
447
448
=over 4
449
450
=item *
451
452
name -- the name of the cookie
453
454
=item *
455
456
value -- the value of the cookie
457
458
=item *
459
460
domain -- the domain name to which the cookie applies
461
462
=item *
463
464
path -- the path to which the cookie applies
465
466
=item *
467
468
expires -- if present, when the cookie expires in epoch seconds
469
470
=item *
471
472
secure -- if present, the cookie was set C<Secure>
473
474
=item *
475
476
httponly -- if present, the cookie was set C<HttpOnly>
477
478
=item *
479
480
hostonly -- if present, the cookie may only be used with the domain as a host
481
482
=item *
483
484
creation_time -- epoch seconds since the cookie was first stored
485
486
=item *
487
488
last_access_time -- epoch seconds since the cookie was last stored
489
490
=back
491
492
Keep in mind that C<httponly> means it should only be used in requests and not
493
made available via Javascript, etc.  This is pretty meaningless for Perl user
494
agents.
495
496
Generally, user agents should use the C<cookie_header> method instead.
497
498
It will throw an exception if the request URL is missing or invalid.
499
500
=head2 cookie_header
501
502
    my $header = $jar->cookie_header("http://www.example.com/foo/bar");
503
504
Given a request URL, returns a correctly-formatted string with all relevant
505
cookies for the request.  This string is ready to be used in a C<Cookie> header
506
in an HTTP request.  E.g.:
507
508
    SID=31d4d96e407aad42; lang=en-US
509
510
It follows the same exclusion rules as C<cookies_for>.
511
512
If the request is invalid or no cookies apply, it will return an empty string.
513
514
=head2 dump_cookies
515
516
    my @list = $jar->dump_cookies;
517
    my @list = $jar->dump_cookies( { persistent => 1 } );
518
519
Returns a list of raw cookies in string form.  The strings resemble what
520
would be received from C<Set-Cookie> headers, but with additional internal
521
fields.  The list is only intended for use with C<load_cookies> to allow
522
cookie jar persistence.
523
524
If a hash reference with a true C<persistent> key is given as an argument,
525
cookies without an C<Expires> time (i.e. "session cookies") will be omitted.
526
527
Here is a trivial example of saving a cookie jar file with L<Path::Tiny>:
528
529
    path("jar.txt")->spew( join "\n", $jar->dump_cookies );
530
531
=head2 load_cookies
532
533
    $jar->load_cookies( @cookies );
534
535
Given a list of cookie strings from C<dump_cookies>, it adds them to
536
the cookie jar.  Cookies added in this way will supersede any existing
537
cookies with similar domain, path and name.
538
539
It returns the jar object for convenience when loading a new object:
540
541
    my $jar = HTTP::CookieJar->new->load_cookies( @cookies );
542
543
Here is a trivial example of loading a cookie jar file with L<Path::Tiny>:
544
545
    my $jar = HTTP::CookieJar->new->load_cookies(
546
        path("jar.txt")->lines
547
    );
548
549
=for Pod::Coverage method_names_here
550
551
=head1 LIMITATIONS AND CAVEATS
552
553
=head2 RFC 6265 vs prior standards
554
555
This modules adheres as closely as possible to the user-agent rules
556
of RFC 6265.  Therefore, it does not handle nor generate C<Set-Cookie2>
557
and C<Cookie2> headers, implement C<.local> suffixes, or do path/domain
558
matching in accord with prior RFC's.
559
560
=head2 Internationalized domain names
561
562
Internationalized domain names given in requests must be properly
563
encoded in ASCII form.
564
565
=head2 Public suffixes
566
567
If L<Mozilla::PublicSuffix> is installed, cookie domains will be checked
568
against the public suffix list.  Public suffix cookies are only allowed
569
as host-only cookies.
570
571
=head2 Third-party cookies
572
573
According to RFC 6265, a cookie may be accepted only if has no C<Domain>
574
attribute (in which case it is "host-only") or if the C<Domain> attribute is a
575
suffix of the request URL.  This effectively prohibits Site A from setting a
576
cookie for unrelated Site B, which is one potential third-party cookie vector.
577
578
=head1 SEE ALSO
579
580
=over 4
581
582
=item *
583
584
L<HTTP::Cookies>
585
586
=item *
587
588
L<Mojo::UserAgent::CookieJar>
589
590
=back
591
592
=for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
593
594
=head1 SUPPORT
595
596
=head2 Bugs / Feature Requests
597
598
Please report any bugs or feature requests through the issue tracker
599
at L<https://github.com/dagolden/HTTP-CookieJar/issues>.
600
You will be notified automatically of any progress on your issue.
601
602
=head2 Source Code
603
604
This is open source software.  The code repository is available for
605
public review and contribution under the terms of the license.
606
607
L<https://github.com/dagolden/HTTP-CookieJar>
608
609
  git clone https://github.com/dagolden/HTTP-CookieJar.git
610
611
=head1 AUTHOR
612
613
David Golden <dagolden@cpan.org>
614
615
=head1 COPYRIGHT AND LICENSE
616
617
This software is Copyright (c) 2013 by David Golden.
618
619
This is free software, licensed under:
620
621
  The Apache License, Version 2.0, January 2004
622
623
=cut