package nichanlib;
use strict;
use warnings;
use File::Basename;
use File::Path;
use File::Spec;
use XMLRPC::Lite;
use Encode;
use Encode qw/ decode encode_utf8 /;
use LWP::UserAgent;
use HTML::TreeBuilder;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
require "mylib.pm";
sub get_2ch_bbs_menu{
my($menu_url, $tmp_2ch_bbs_menu_html_file, $save_2ch_bbs_menu_link_file) = @_;
print("取得メニューリンク:$menu_url\n");
print("作業用メニューリンクページ:$tmp_2ch_bbs_menu_html_file\n");
print("結果カテゴリリンクページ保存ファイル:$save_2ch_bbs_menu_link_file\n");
print(" => get page\n");
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.187 Safari/535.1');
my $res = $ua->get($menu_url);
$ua->parse_head(0);
my $content = decode ( 'shiftjis', $res->content );
my @content_array = ();
push(@content_array, $content);
&mylib::write_file($tmp_2ch_bbs_menu_html_file, \@content_array, 'utf8');
print(" => get link\n");
my $tree = new HTML::TreeBuilder;
$tree->parse_file($tmp_2ch_bbs_menu_html_file);
$tree->eof();
my @list = ();
for my $a ( $tree->look_down("href", qr{http://} ) ) {
my $line = join("\t", $a->attr_get_i('href'), decode( 'utf-8', $a->as_text));
push(@list, $line);
}
$tree = $tree->delete;
print(" => save file\n");
&mylib::write_file($save_2ch_bbs_menu_link_file, \@list, "utf8");
}
sub get_2ch_thread_link_for_category{
my($menu_url, $comment_count_hash_ref, $target_thread_count, $tmp_category_file, $thread_url_file, $filter_word_datas_ref) = @_;
print("カテゴリURL:$menu_url\n");
print("コメント数ハッシュのリファレンス:$comment_count_hash_ref\n");
print("作業用ファイル:$tmp_category_file\n");
print("スレッドURL保存ファイル:$thread_url_file\n");
print(" => get page\n");
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.187 Safari/535.1');
my $res = $ua->get($menu_url);
$ua->parse_head(0);
my $content= decode ( 'shiftjis' , $res->content );
my @content_array = ();
push(@content_array, $content);
&mylib::write_file($tmp_category_file, \@content_array, 'utf8');
print(" => get thread\n");
my $tree = new HTML::TreeBuilder;
$tree->parse_file($tmp_category_file);
$tree->eof();
my $base_url = "";
foreach my $tag ($tree->find("base")) {
$base_url = $tag->attr('href');
print ($base_url."\n");
last;
}
my @list = ();
for my $a ( $tree->look_down("href", qr{^\d} ) ) {
my $tmp_url = $base_url.decode('utf-8', $a->attr_get_i('href'));
$tmp_url =~ s/\/l50//go;
my $tmp_title = decode('utf-8', $a->as_text);
my $count = 0;
if($tmp_title =~ /\((\d+)\)$/){
$count = $1;
}
if($count >= $target_thread_count){
my @filter_str_datas = @{$filter_word_datas_ref};
my $check_fds_flag = 0;
foreach my $fsd(@filter_str_datas){
if($fsd ne ""){
my $result = index($tmp_title, $fsd);
if($result == -1){
next;
}else{
$check_fds_flag = 1;
last:
}
}
}
if($check_fds_flag == 0){
next;
}
$tmp_title =~ s/^\d{1,4}: //;
$tmp_title =~ s/ \((\d+)\)$//;
my $before_count=0;
if(exists($comment_count_hash_ref->{$tmp_title})){
$before_count = $comment_count_hash_ref->{$tmp_title};
$before_count += 0;
if($count > $before_count){
my $tmp_text_url = join("\t", $tmp_url, $tmp_title, $count);
push(@list, $tmp_text_url);
}
}else{
my $tmp_text_url = join("\t", $tmp_url, $tmp_title, $count);
push(@list, $tmp_text_url);
}
}
}
$tree = $tree->delete;
print(" => save file\n");
&mylib::write_file($thread_url_file, \@list, "utf8");
}
sub get_2ch_thread_contents{
my($thread_url_file, $output_dir, $wait_time) = @_;
print("スレッドURLファイル:$thread_url_file\n");
print("出力ディレクトリ:$output_dir\n");
print("処理待ち時間:$wait_time\n");
if(!(-d $output_dir)){
mkdir($output_dir);
}
my @web_datas = ();
my %thread_count_hash = ();
print(" => read url file\n");
my @list = &mylib::read_file($thread_url_file, "utf8");
foreach my $l (@list){
if($l eq ""){
next;
}
my @datas = split(/\t/, $l);
my $url = $datas[0];
push(@web_datas, $url);
my $title = $datas[1];
my $count = $datas[2];;
print("$title => $count\n");
$thread_count_hash{$url} = $count;
}
foreach my $wd (@web_datas){
print(" => get page:web data:$wd\n");
my @data_array = ();
my @split_address = split(/\//, $wd);
my $tmp_html_file_name = "_tmp.html";
my $url = $wd;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.187 Safari/535.1');
my $res = $ua->get($url);
if($res->is_success){
$ua->parse_head(0);
my $content = $res->decoded_content;
print(" => get thread content\n");
my $tree = new HTML::TreeBuilder;
$tree->parse($content);
$tree->eof();
my $title = "";
foreach my $tag ($tree->find("h1")) {
$title = $tag->as_text;
print("タイトル:".$title."\n");
push(@data_array, $title);
}
my $comment_count = $thread_count_hash{$wd};
$comment_count += 0;
push(@data_array, $comment_count);
for my $a ( $tree->look_down("class", "thread" ) ) {
my $line = $a->as_XML;
push(@data_array, $line);
}
print(" => save file\n");
my $file_name = $split_address[-2]."_".$split_address[-1].".txt";
my $save_file = File::Spec->catfile($output_dir, $file_name);
&mylib::write_file($save_file, \@data_array, "utf8");
$tree = $tree->delete;
sleep($wait_time);
}else{
print("get error:$url\n");
}
}
}
sub convert_entry_text_from_thread_contents{
my($input_dir, $output_dir) = @_;
print("スレッドページディレクトリ:$input_dir\n");
print("スレッドページ編集結果ディレクトリ:$output_dir\n");
print(" => get files\n");
my @files = mylib::get_files_all_dir($input_dir, "*.txt");
print(" => convert files\n");
foreach my $file (@files){
print(" => $file\n");
my@lines = mylib::read_file($file, 'utf8');
my @output_lines = ();
my $title = "";
my $comment_count = 0;
my $result = "";
for(my $i=0; $i <= $#lines ; $i++){
if($i == 0){
$title = $lines[$i];
next;
}
if($i == 1){
$comment_count = $lines[$i];
next;
}
$result = $lines[$i];
$result =~ s/(<a href=\"\.\.\/test\/read\.cgi.+?\">)(.+?)(<\/a>)/$2/g;
$result =~ s/(<a href=\"mailto:.*?\">).+?(<\/a>)/<font color=\"green\">名無しさん<\/font>/g;
$result =~ s/(<font color=\"green\">).+?(<\/font>)/<font color=\"green\">名無しさん<\/font>/g;
last;
}
push(@output_lines, $title);
push(@output_lines, $comment_count);
push(@output_lines, $result);
my ($filename, $path, $suffix) = fileparse($file);
my @tmp_paths = split(/\//, $path);
my @tmp_paths_2 = @tmp_paths[1..$#tmp_paths];
$path = join("/", @tmp_paths_2);
my $tmp_dir = mylib::combine_path($output_dir, $path);
if(!mylib::check_exist_dir($tmp_dir)){
mylib::make_dir_force($tmp_dir);
}
my $fn = mylib::combine_path($tmp_dir, $filename);
mylib::write_file($fn, \@output_lines, 'utf8');
}
}
sub convert_entry_text_from_thread_contents_color{
my($input_dir, $output_dir) = @_;
print("スレッドページディレクトリ:$input_dir\n");
print("スレッドページ編集結果(色付け)ディレクトリ:$output_dir\n");
print(" => get files\n");
my @files = mylib::get_files_all_dir($input_dir, "*.txt");
print(" => convert files\n");
foreach my $file (@files){
print(" => $file\n");
my@lines = mylib::read_file($file, 'utf8');
my $file_title = $lines[0];
my $file_comment_count = $lines[1];
my $contents = $lines[2];
my $dl_header = "<dl class=\"thread\">";
my $dl_footer = "</dl>";
my $start_index = 0;
my @output_datas = ();
my $comment_count = 1;
my %comment_hash = ();
while(1){
my $check_start_index = index($contents, "<dt>", $start_index);
if($check_start_index == -1){
last;
}
my $check_end_index = index ($contents, "</dd>", $check_start_index);
$check_end_index += 5;
my $comment = substr($contents, $check_start_index, $check_end_index - $check_start_index );
$comment_hash{$comment_count} = $comment;
$comment_count++;
$start_index = $check_end_index;
}
print(" => check コメントリンク\n");
my %add_comment_hash = ();
foreach my $key (sort(keys(%comment_hash))){
my $comment = $comment_hash{$key};
if($comment =~ /<dd>>>(\d+)/){
if($comment =~ s/<dd>/<dd class="ret1">/){
$comment_hash{$key} = $comment ;
}
}
}
my @sort_keys = sort {$a <=> $b} keys(%comment_hash);
push(@output_datas, $dl_header);
foreach my $key (@sort_keys){
push(@output_datas, $comment_hash{$key});
}
push(@output_datas, $dl_footer);
my $output_datas_join = join("",@output_datas);
my @output_lines = ();
push(@output_lines, $file_title);
push(@output_lines, $file_comment_count);
push(@output_lines, $output_datas_join);
my ($filename, $path, $suffix) = fileparse($file);
my @tmp_paths = split(/\//, $path);
my @tmp_paths_2 = @tmp_paths[1..$#tmp_paths];
$path = join("/", @tmp_paths_2);
my $tmp_dir = mylib::combine_path($output_dir, $path);
if(!mylib::check_exist_dir($tmp_dir)){
mylib::make_dir_force($tmp_dir);
}
my $fn = mylib::combine_path($tmp_dir, $filename);
mylib::write_file($fn, \@output_lines, 'utf8');
}
}
return 1;