#!/usr/bin/perl -w use strict; use File::Temp qw(tempfile); sub get_available_num { my $num = 1; opendir DIR, "."; while ($_ = readdir DIR) { next if !/^[0-9]{3}$/; s/^0+//; $num = $_ + 1 if ($_ >= $num); } closedir DIR; return $num; } sub split_uri { my $uri = shift; my ($proto, $name, $dir); $uri =~ s/\?.*$//; if ($uri =~ /^([^:]+:\/\/)(.*)$/) { $proto = $1; $uri = $2; } else { $proto = undef; } if ($uri =~ /^([^\/]+)\/(.*)/) { $name = $1; $dir = $2; if ($dir =~ /^(.+)\/[^\/]*$/) { $dir = $1; } } else { $name = $uri; $dir = undef; } return ($proto, $name, $dir) } sub get_file_from_uri { my $uri = shift; my $old_umask = umask 077; my ($fh, $filename) = tempfile; close $fh; $uri =~ s/\\/\\\\/g; $uri =~ s/"/\\"/g; die unless (system("wget -U \"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)\" -O \"".$filename."\" \"".$uri."\"") == 0); open F, "<$filename"; my @lines = ; my $lines = join " ", @lines; close F; unlink $filename; umask $old_umask; return $lines; } sub extract_uris { my $lines = shift; $lines =~ s/\s+/ /g; my %download_uris; my @uris = split /a href=/i, $lines; foreach my $uri (@uris) { next if (length($uri) == 0); my $t = substr $uri, 0, 1; if ($t =~ /['"]/) { $uri =~ s/^$t([^$t]+)$t.*$/$1/; } else { $uri =~ s/^([^ >]+)[ >].*$/$1/; } next if (!($uri =~ /\.(mpg|wmv|avi)$/)); $download_uris{$uri} = 1; } @uris = sort keys %download_uris; return @uris; } sub escape { shift; s/\\/\\\\/g; s/"/\\"/g; return $_; } while (@ARGV >= 1) { my $arg = shift @ARGV; my $lines = &get_file_from_uri($arg); my @uris = &extract_uris($lines); if (@uris == 0) { print "Nothing to download.\n"; exit 0; } my $dirnum = &get_available_num; my $fnum = 1; my ($proto, $hostname, $path) = &split_uri($arg); mkdir sprintf("%03i", $dirnum); foreach (@uris) { my $extension = $_; $extension =~ s/^.*\.([^.]+)$/$1/; my $fname = sprintf("%03i/%03i.%s", $dirnum, $fnum, $extension); if ($_ =~ /^\//) { $_ = $proto.$hostname.$_; } elsif (!($_ =~ /^[^:]+:\/\//)) { $_ = $proto.$hostname."/".$path."/".$_; } system "wget --referer=\"".&escape($arg)."\" -U \"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 6.0)\" -O \"".$fname."\" \"".&escape($_)."\"\n"; $fnum++; } }