#!/usr/bin/perl use strict; use Socket; use Fcntl; use Symbol; # for File Handle Object use IO::File; #my $host = ''; my $hatena_host = 'd.hatena.ne.jp'; my $port = 80; my $basepath = '/theme/'; my $save_dir = 'hatena/'; my $css_ref = 'http://d.hatena.ne.jp/themesample?hatena'; #--------------------------------------------------- # Start #--------------------------------------------------- my $theme_list = &fread_lines($ARGV[0]); print (($#$theme_list+1) . "個のテーマをダウンロードします\n"); #--------------------------------------------------- # テーマ探索 #--------------------------------------------------- foreach my $theme (@$theme_list) { chomp($theme); print "\n[$theme]\n"; if ($theme !~ /^[A-Z-]*$/) { # すべて大文字 → そのまま $theme =~ tr/A-Z/a-z/; } my $csspath = "$basepath$theme/$theme.css"; my $css = &get_file($hatena_host, $csspath, $css_ref); if (!ref $css) { next; } # cr2lf $css = join('', @$css); $css =~ s/\r\n|\r/\n/g; # \n に統一 $css =~ s/\n/\n\x01/g; # \n のうしろに \1 $css = [ split("\x01", $css) ]; my $files = &analyze_css($css); my $theme_dir = "$save_dir$theme/"; mkdir($theme_dir); &fwrite_lines("$theme_dir$theme.css", $css); my $referer = "http://$hatena_host$csspath"; foreach(@$files) { my $ref = $referer; my $file = $_; if ($file eq 'README') { undef $ref; } my $host; my $path; if ($file =~ /^http:\/\/([\w\.\-]+?)(\/.*)/) { $host = $1; $path = $2; } else { $host = $hatena_host; # d.hatena.ne.jp $path = "$basepath$theme/$file"; } my $data = &get_file($host, $path, $ref); if (!ref $data) { next; } $file = substr($file, rindex($file, '/')+1); &fwrite_lines("$theme_dir/$file", $data); } #------------------------------------------- # sleep 設定 #------------------------------------------- my $sleep_time = 10 + int(rand(30)); print "sleep($sleep_time)\n"; sleep($sleep_time); } exit(0); #------------------------------------------------------------------------------ # ●CSSを解析し必要なファイル一覧を作る #------------------------------------------------------------------------------ sub analyze_css { my ($css_lines) = @_; my %files = ('README' => 1); foreach(@$css_lines) { if ($_ =~ /url\(\s*(.+?)\s*\)/i) { my $file = $1; $file =~ s/\s//g; # for Hatena bugs $file =~ s|^http:\s*//|http://|; if ($file =~ /(["'])(.*)\1/) { $file = $2; } if ($file =~ /^http:\/\/\w+.hatena\.ne\.jp/) { my $file2 = substr($file, rindex($file, '/')+1); $_ =~ s/url\(\s*(.+?)\s*\)/url("$file2")/g; } if ($file ne '') { $files{$file} = 1; } } } my @files = keys(%files); return \@files; } ############################################################################### # ■HTTP クライアント関連ルーチン ############################################################################### #------------------------------------------------------------------------------ # ●指定ホストに接続する #------------------------------------------------------------------------------ sub connect_host { my ($host) = @_; # IP 情報に変換 my $ip_bin = inet_aton($host); if ($ip_bin eq '') { die("$host が見つかりません"); return ; } my $sockaddr = pack_sockaddr_in($port, $ip_bin); my $sh = IO::File->new; if (! socket($sh, Socket::PF_INET(), Socket::SOCK_STREAM(), 0)) { die("socket が開けません"); return ; } if (! connect($sh, $sockaddr)) { die("$host に接続できません"); return ; } autoflush $sh (1); return $sh; } #------------------------------------------------------------------------------ # ●GET, POST, HEAD などを送り、データを受信する #------------------------------------------------------------------------------ sub get_file { my ($host, $filepath, $referer) = @_; my $socket = &connect_host($host); if (!$socket) { print ("fail in &connect_host()\n"); return; } print "Try download $filepath\n"; print $socket <; my $len; while(<$socket>) { if($_ =~ /^\r?\n$/) { last; } } @response = <$socket>; close($socket); $status = (split(' ', $status))[1] +0; if ($status == 404) { print ("ファイルが見つかりません\n"); return ; } if ($status != 200) { print ("$host がエラーを返しました (Status : $status)\n"); return ; } return \@response; } #------------------------------------------------------------------------------ # ●URIエンコード #------------------------------------------------------------------------------ sub uri_encode { foreach(@_) { $_ =~ s/([^\.\*\-_a-zA-Z0-9 ])/'%' . unpack("H2",$1)/eg; $_ =~ s/ /+/g; } } #------------------------------------------------------------------------------ # ●ファイル:すべての行を読み込む #------------------------------------------------------------------------------ # $array_ref = &fread_lines($file); sub fread_lines { my ($file) = @_; my $fh = Symbol::gensym(); if ( !sysopen($fh, $file, O_RDONLY) ) { die("File can't read : $file"); } my @lines = <$fh>; close($fh); return \@lines; } #------------------------------------------------------------------------------ # ●すべての行をファイルに書き込む #------------------------------------------------------------------------------ # &fwrite_lines($file, $array_ref); sub fwrite_lines { my ($file, $lines) = @_; my $fh = Symbol::gensym(); if ( !sysopen($fh, $file, O_CREAT | O_WRONLY) ) { die("File can't write : $file"); } foreach(@$lines) { print $fh $_; } close($fh); }