共通ライブラリ
package mylib;
use utf8;
use Config::Simple;
use Data::UUID;
use Encode;
use File::Basename;
use File::Compare;
use File::Copy;
use File::Find::Rule;
use File::Path;
use File::Spec;
use File::Remove;
use List::Util;
use Sys::Hostname;
use Term::ANSIColor;
use Time::HiRes;
use Time::Piece;
sub print_include_dir{
print("#include dir:\n");
foreach(&get_include_dir()){
print($_."\n");
}
}
sub get_include_dir{
return @INC;
}
sub print_perl_version{
print("#perl verion:".&get_perl_version()."\n");
}
sub get_perl_version{
return $];
}
sub print_process_id{
print("#pid:".&get_process_id()."\n");
}
sub print_script_file{
print("#script file:".&get_script_file()."\n");
}
sub get_script_file{
return $0;
}
sub print_perl_path{
my $cmd_str = 'which perl';
my $result_qx = qx/$cmd_str/;
print("#perl path:".$result_qx);
}
sub print_perl_info{
&print_perl_path();
&print_perl_version();
&print_include_dir();
&print_script_file();
&print_pid();
}
sub get_os_name{
return $^O;
}
sub get_process_id{
return $$;
}
sub get_hostname{
return hostname();
}
sub get_loginname{
return getlogin();
}
sub read_file{
my($file, $enc) = @_;
if (!open(FH, "<:$enc", $file)){
die("error :$!");
}
my @ret_list = ();
while(<FH>){
my $line = $_;
chomp($line);
push(@ret_list, $line);
}
close(FH);
return @ret_list;
}
sub write_file{
my ($file, $list_ref, $enc) = @_;
if (!open(WFH, ">:$enc", "$file")){
die("error :$!");
}
foreach my $line (@{$list_ref}){
print WFH "$line\n"
}
close(WFH);
}
sub get_files_all_dir{
my ($dir, $ext) = @_;
my $rule = File::Find::Rule->new;
$rule->file;
$rule->name($ext);
my @files = $rule->in($dir);
return @files;
}
sub get_files_top_dir{
my ($dir, $file) = @_;
my $search = File::Spec->catfile( $dir, $file);
return glob($search);
}
sub create_cat_file_for_dir{
my ($dir, $file, $out_file, $enc) = @_;
if (!open(WFH, ">:$enc", "$out_file")){
die("error :$!");
}
my @files = &get_files_top_dir($dir, $file);
foreach my $file (@files){
my @lines = &read_file($file, "utf8");
foreach my $line (@lines){
print WFH "$line\n"
}
}
close(WFH);
}
sub uniq_for_array{
my ($list_ref) = @_;
my %seen = ();
my @uniqs = grep { ! $seen{$_} ++ } @{$list_ref};
return @uniqs;
}
sub uniq_for_file{
my ($in_file, $uniq_file) = @ARGV;
if (!open(FH, "<:utf8", $in_file)){
die("error :$!");
}
my $count = 0;
my %uniq_hash = ();
while(my $line = <FH>){
$count++;
print("$count"."\r");
chomp($line);
if(!exists($uniq_hash{$line})){
$uniq_hash{$line} = 1;
}
}
close(FH);
my @out_list = keys(%uniq_hash);
&write_file($uniq_file, \@out_list,"utf8");
}
sub get_array_count{
my ($array_ref) = @_;
return $#{$array_ref} + 1;
}
sub get_array_max_index{
my ($array_ref) = @_;
return $#{$array_ref};
}
sub get_hash_count{
my ($hash_ref) = @_;
my $count = keys( %{$hash_ref} );
return $count;
}
sub get_hash_keys{
my ($hash_ref) = @_;
return sort(keys( %{$hash_ref}));
}
sub get_hash_values{
my ($hash_ref) = @_;
return sort(values(%{$hash_ref}));
}
sub get_time_now{
my $t = Time::Piece::localtime();
my $time_str = sprintf("%04d/%02d/%02d %02d:%02d:%02d",$t->year,$t->mon,$t->mday,$t->hour,$t->minute,$t->sec);
return $time_str;
}
sub create_hash_from_word_list{
my($array_ref) = @_;
print("create_hash_from_word_list\n");
for my $index (0..$#{$array_ref}){
my $word = ${$array_ref}[$index];
print($word);
if($word ne ""){
$word_hash{$word} = 1;
}
}
return %word_hash;
}
sub print_hash{
my($hash_ref) = @_;
while( ($name, $value) = each %{$hash_ref} ){
print ("key:[$name]\tvalue:[$value]\n");
}
}
sub div_array_data{
my($array_ref, $div_array_count) = @_;
my $max_array_index = $#{$array_ref};
my $max_array_length = $max_array_index + 1;
my $div_count = int($max_array_length / $div_array_count) + 1;
my @div_data_matome_array = ();
my @tmp_data_array = ();
my $out_count = 0;
for(my $i = 0; $i <= $max_array_index; $i++){
$out_count++;
push(@tmp_data_array, $array_ref->[$i]);
if($out_count >= $div_count){
my @tmps = @tmp_data_array;
push(@div_data_matome_array, \@tmps);
@tmp_data_array = ();
$out_count = 0;
}
}
if($#tmp_data_array > 0){
my @tmps = @tmp_data_array;
push(@div_data_matome_array, \@tmps);
}
return @div_data_matome_array;
}
sub txt_to_caboca_for_file{
my($input_file, $output_file) = @_;
my $command = "cabocha -f1 $input_file > $output_file";
eval{
my $ret = system $command;
};
if ($@) {
print("cabocha処理例外発生:$input_file\n");
}
}
sub txt_to_caboca_for_dir{
my($input_dir, $ext, $output_dir) = @_;
if(-d $output_dir){
rmtree($output_dir);
}
mkdir($output_dir);
my @files = &get_files_all_dir($input_dir, $ext);
my $progress=0;
for(my $i=0;$i <= $#files ;$i++){
$progress = (($i+1) * 100) / ($#files + 1);
print sprintf(" %3.0f", $progress) . "% \r";
my $file_name = "$files[$i]" . ".cba";
$file_name = basename($file_name);
my $output_file = File::Spec->catfile($output_dir, $file_name);
my $command = "cabocha -f1 $files[$i] > $output_file";
eval{
my $ret = system $command;
};
if ($@) {
print("cabocha処理例外発生:$files[$i]\n");
}
}
}
sub create_guid_file_name{
my($ext) = @_;
my $file_name = Data::UUID->new->create_str.$ext;
return $file_name;
}
sub create_guid_file_name_add_prefix{
my($prefix, $ext) = @_;
my $file_name = $prefix.Data::UUID->new->create_str.$ext;
return $file_name;
}
sub stop_watch_start{
return Time::HiRes::time;
}
sub stop_watch_stop{
my ($start_time) = @_;
return Time::HiRes::time - $start_time;
}
sub stop_watch_stop_and_print{
my ($start_time) = @_;
printf("time:"."%0.3f(s)\n",Time::HiRes::time - $start_time);
}
sub make_dir_force{
my ($tmp_dir) = @_;
eval {
if (-d $tmp_dir) {
rmtree($tmp_dir);
}
mkpath [$tmp_dir] or die $!;
};
if ($@) {
die $@;
}
}
sub remove_dir_force{
my ($tmp_dir) = @_;
if ((tmp_dir ne '.') and (tmp_dir ne '..')) {
if (-d $tmp_dir) {
rmtree($tmp_dir);
}
}
}
sub system_command{
my ($cmd) = @_;
my $result_qx = "";
if($cmd ne ""){
$result_qx = qx/$cmd/;
}
$result_qx = decode('UTF-8', $result_qx);
return $result_qx;
}
sub copy_file{
my ($src_file, $dest_file) = @_;
File::Copy::copy($src_file, $dest_file) or die $!;
}
sub move_file{
my ($src, $dest) = @_;
File::Copy::move($src, $dest) or die $!;
}
sub search_for_array{
my ($data_array_ref, $search_str) = @_;
my @results = grep(/$search_str/, @{$data_array_ref});
return @results;
}
sub split_simple{
my ($div_string, $str) = @_;
return split(/$div_string/, $str);
}
sub merge_hash{
my ($hash_1_ref, $hash_2_ref) = @_;
my %hash_merge = (%{$hash_1_ref}, %{$hash_2_ref});
return %hash_merge;
}
sub check_exist_file{
my ($file) = @_;
my $result = 0;
if(-f $file){
$result = 1;
}
return $result;
}
sub check_exist_dir{
my ($dir) = @_;
my $result = 0;
if(-d $dir){
$result = 1;
}
return $result;
}
sub delete_file{
my ($file) = @_;
unlink $file or die $!;
}
sub delete_file_by_wildcard{
my ($wild_card) = @_;
File::Remove::remove $wild_card;
}
sub parse_file_path{
my($file_path) = @_;
my ($name, $dir, $ext) = fileparse($file_path);
my %tmp_hash = ();
$tmp_hash{"dir"} = $dir;
$tmp_hash{"name"} = $name;
return %tmp_hash;
}
sub get_full_path{
my($file_path) = @_;
return File::Spec->rel2abs($file_path);
}
sub compare_file{
my($file1, $file2) = @_;
if (File::Compare::compare($file1, $file2) == 0) {
return 1;
} else {
return 0;
}
}
sub regex_match_simple{
my ($check_str, $regex_str) = @_;
if ($check_str =~ m/$regex_str/gi) {
return 1;
}
return 0;
}
sub regex_replace_simple{
my ($check_str, $regex_str, $rep_str) = @_;
my $str = $check_str;
$str =~ s/$regex_str/$rep_str/gi;
return $str;
}
sub get_max_for_array{
my($array_ref) = @_;
my @array_data = @{$array_ref};
return List::Util::max(@array_data);
}
sub get_min_for_array{
my($array_ref) = @_;
my @array_data = @{$array_ref};
return List::Util::min(@array_data);
}
sub get_sum_for_array{
my($array_ref) = @_;
my @array_data = @{$array_ref};
return List::Util::sum(@array_data);
}
sub combine_path{
my($dir, $file) = @_;
my $cpath = File::Spec->catfile($dir, $file);
return $cpath;
}
sub enc_utf8{
my($str) = @_;
return Encode::encode('utf-8', $str);
}
sub dec_utf8{
my($str) = @_;
return Encode::decode('utf-8', $str);
}
sub print_color{
my($color, $str) = @_;
print color($color), $str, color("reset");
}
return 1;
package wplib;
use strict;
use warnings;
use Encode;
use Encode qw(encode_utf8);
use File::Basename;
use File::Path;
use File::Spec;
use XMLRPC::Lite;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
require "mylib.pm";
sub entry_thread{
my($username, $password, $endpoint, $blogid, $entry_title, $entry_body, $entry_more, $entry_excerpt, $entry_keyword, $categories_ref) = @_;
my $result = XMLRPC::Lite
-> proxy($endpoint)
-> call('metaWeblog.newPost', $blogid, $username, $password,
{
'title' => $entry_title,
'description' => $entry_body,
'mt_allow_comments' => 1,
'mt_allow_pings' => 1,
'mt_text_more' => $entry_more,
'mt_excerpt' => $entry_excerpt,
'mt_keywords' => $entry_keyword,
'categories' => $categories_ref,
},
1
)
-> result;
return $result;
}
sub update_thread{
my($username, $password, $endpoint, $postid, $entry_title, $entry_body, $entry_more, $entry_excerpt, $entry_keyword, $categories_ref) = @_;
my $result = XMLRPC::Lite
-> proxy($endpoint)
-> call('metaWeblog.editPost', $postid, $username, $password,
{
'title' => $entry_title,
'description' => $entry_body,
'mt_allow_comments' => 1,
'mt_allow_pings' => 1,
'mt_text_more' => $entry_more,
'mt_excerpt' => $entry_excerpt,
'mt_keywords' => $entry_keyword,
'categories' => $categories_ref,
},
1
)
-> result;
return $result;
}
sub get_blog_info{
my($username, $password, $endpoint) = @_;
my $rpc = XMLRPC::Lite->new();
$rpc->proxy($endpoint);
my $res = $rpc->call("blogger.getUsersBlogs",
XMLRPC::Data->type('string', ''),
XMLRPC::Data->type('string', $username),
XMLRPC::Data->type('string', $password),
);
return $rpc;
}
sub get_user_info{
my($username, $password, $endpoint) = @_;
my $rpc = XMLRPC::Lite->new();
$rpc->proxy($endpoint);
my $res = $rpc->call("blogger.getUserInfo",
"",
$username,
$password,
)->result;
return $res;
}
return 1;
package nichanlib;
use strict;
use warnings;
use File::Basename;
use File::Path;
use File::Spec;
use XMLRPC::Lite;
use Encode;
use Encode qw/ decode encode_utf8 /;
use LWP::UserAgent;
use HTML::TreeBuilder;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
require "mylib.pm";
sub get_2ch_bbs_menu{
my($menu_url, $tmp_2ch_bbs_menu_html_file, $save_2ch_bbs_menu_link_file) = @_;
print("取得メニューリンク:$menu_url\n");
print("作業用メニューリンクページ:$tmp_2ch_bbs_menu_html_file\n");
print("結果カテゴリリンクページ保存ファイル:$save_2ch_bbs_menu_link_file\n");
print(" => get page\n");
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.187 Safari/535.1');
my $res = $ua->get($menu_url);
$ua->parse_head(0);
my $content = decode ( 'shiftjis', $res->content );
my @content_array = ();
push(@content_array, $content);
&mylib::write_file($tmp_2ch_bbs_menu_html_file, \@content_array, 'utf8');
print(" => get link\n");
my $tree = new HTML::TreeBuilder;
$tree->parse_file($tmp_2ch_bbs_menu_html_file);
$tree->eof();
my @list = ();
for my $a ( $tree->look_down("href", qr{http://} ) ) {
my $line = join("\t", $a->attr_get_i('href'), decode( 'utf-8', $a->as_text));
push(@list, $line);
}
$tree = $tree->delete;
print(" => save file\n");
&mylib::write_file($save_2ch_bbs_menu_link_file, \@list, "utf8");
}
sub get_2ch_thread_link_for_category{
my($menu_url, $comment_count_hash_ref, $target_thread_count, $tmp_category_file, $thread_url_file, $filter_word_datas_ref) = @_;
print("カテゴリURL:$menu_url\n");
print("コメント数ハッシュのリファレンス:$comment_count_hash_ref\n");
print("作業用ファイル:$tmp_category_file\n");
print("スレッドURL保存ファイル:$thread_url_file\n");
print(" => get page\n");
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.187 Safari/535.1');
my $res = $ua->get($menu_url);
$ua->parse_head(0);
my $content= decode ( 'shiftjis' , $res->content );
my @content_array = ();
push(@content_array, $content);
&mylib::write_file($tmp_category_file, \@content_array, 'utf8');
print(" => get thread\n");
my $tree = new HTML::TreeBuilder;
$tree->parse_file($tmp_category_file);
$tree->eof();
my $base_url = "";
foreach my $tag ($tree->find("base")) {
$base_url = $tag->attr('href');
print ($base_url."\n");
last;
}
my @list = ();
for my $a ( $tree->look_down("href", qr{^\d} ) ) {
my $tmp_url = $base_url.decode('utf-8', $a->attr_get_i('href'));
$tmp_url =~ s/\/l50//go;
my $tmp_title = decode('utf-8', $a->as_text);
my $count = 0;
if($tmp_title =~ /\((\d+)\)$/){
$count = $1;
}
if($count >= $target_thread_count){
my @filter_str_datas = @{$filter_word_datas_ref};
my $check_fds_flag = 0;
foreach my $fsd(@filter_str_datas){
if($fsd ne ""){
my $result = index($tmp_title, $fsd);
if($result == -1){
next;
}else{
$check_fds_flag = 1;
last:
}
}
}
if($check_fds_flag == 0){
next;
}
$tmp_title =~ s/^\d{1,4}: //;
$tmp_title =~ s/ \((\d+)\)$//;
my $before_count=0;
if(exists($comment_count_hash_ref->{$tmp_title})){
$before_count = $comment_count_hash_ref->{$tmp_title};
$before_count += 0;
if($count > $before_count){
my $tmp_text_url = join("\t", $tmp_url, $tmp_title, $count);
push(@list, $tmp_text_url);
}
}else{
my $tmp_text_url = join("\t", $tmp_url, $tmp_title, $count);
push(@list, $tmp_text_url);
}
}
}
$tree = $tree->delete;
print(" => save file\n");
&mylib::write_file($thread_url_file, \@list, "utf8");
}
sub get_2ch_thread_contents{
my($thread_url_file, $output_dir, $wait_time) = @_;
print("スレッドURLファイル:$thread_url_file\n");
print("出力ディレクトリ:$output_dir\n");
print("処理待ち時間:$wait_time\n");
if(!(-d $output_dir)){
mkdir($output_dir);
}
my @web_datas = ();
my %thread_count_hash = ();
print(" => read url file\n");
my @list = &mylib::read_file($thread_url_file, "utf8");
foreach my $l (@list){
if($l eq ""){
next;
}
my @datas = split(/\t/, $l);
my $url = $datas[0];
push(@web_datas, $url);
my $title = $datas[1];
my $count = $datas[2];;
print("$title => $count\n");
$thread_count_hash{$url} = $count;
}
foreach my $wd (@web_datas){
print(" => get page:web data:$wd\n");
my @data_array = ();
my @split_address = split(/\//, $wd);
my $tmp_html_file_name = "_tmp.html";
my $url = $wd;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.187 Safari/535.1');
my $res = $ua->get($url);
if($res->is_success){
$ua->parse_head(0);
my $content = $res->decoded_content;
print(" => get thread content\n");
my $tree = new HTML::TreeBuilder;
$tree->parse($content);
$tree->eof();
my $title = "";
foreach my $tag ($tree->find("h1")) {
$title = $tag->as_text;
print("タイトル:".$title."\n");
push(@data_array, $title);
}
my $comment_count = $thread_count_hash{$wd};
$comment_count += 0;
push(@data_array, $comment_count);
for my $a ( $tree->look_down("class", "thread" ) ) {
my $line = $a->as_XML;
push(@data_array, $line);
}
print(" => save file\n");
my $file_name = $split_address[-2]."_".$split_address[-1].".txt";
my $save_file = File::Spec->catfile($output_dir, $file_name);
&mylib::write_file($save_file, \@data_array, "utf8");
$tree = $tree->delete;
sleep($wait_time);
}else{
print("get error:$url\n");
}
}
}
sub convert_entry_text_from_thread_contents{
my($input_dir, $output_dir) = @_;
print("スレッドページディレクトリ:$input_dir\n");
print("スレッドページ編集結果ディレクトリ:$output_dir\n");
print(" => get files\n");
my @files = mylib::get_files_all_dir($input_dir, "*.txt");
print(" => convert files\n");
foreach my $file (@files){
print(" => $file\n");
my@lines = mylib::read_file($file, 'utf8');
my @output_lines = ();
my $title = "";
my $comment_count = 0;
my $result = "";
for(my $i=0; $i <= $#lines ; $i++){
if($i == 0){
$title = $lines[$i];
next;
}
if($i == 1){
$comment_count = $lines[$i];
next;
}
$result = $lines[$i];
$result =~ s/(<a href=\"\.\.\/test\/read\.cgi.+?\">)(.+?)(<\/a>)/$2/g;
$result =~ s/(<a href=\"mailto:.*?\">).+?(<\/a>)/<font color=\"green\">名無しさん<\/font>/g;
$result =~ s/(<font color=\"green\">).+?(<\/font>)/<font color=\"green\">名無しさん<\/font>/g;
last;
}
push(@output_lines, $title);
push(@output_lines, $comment_count);
push(@output_lines, $result);
my ($filename, $path, $suffix) = fileparse($file);
my @tmp_paths = split(/\//, $path);
my @tmp_paths_2 = @tmp_paths[1..$#tmp_paths];
$path = join("/", @tmp_paths_2);
my $tmp_dir = mylib::combine_path($output_dir, $path);
if(!mylib::check_exist_dir($tmp_dir)){
mylib::make_dir_force($tmp_dir);
}
my $fn = mylib::combine_path($tmp_dir, $filename);
mylib::write_file($fn, \@output_lines, 'utf8');
}
}
sub convert_entry_text_from_thread_contents_color{
my($input_dir, $output_dir) = @_;
print("スレッドページディレクトリ:$input_dir\n");
print("スレッドページ編集結果(色付け)ディレクトリ:$output_dir\n");
print(" => get files\n");
my @files = mylib::get_files_all_dir($input_dir, "*.txt");
print(" => convert files\n");
foreach my $file (@files){
print(" => $file\n");
my@lines = mylib::read_file($file, 'utf8');
my $file_title = $lines[0];
my $file_comment_count = $lines[1];
my $contents = $lines[2];
my $dl_header = "<dl class=\"thread\">";
my $dl_footer = "</dl>";
my $start_index = 0;
my @output_datas = ();
my $comment_count = 1;
my %comment_hash = ();
while(1){
my $check_start_index = index($contents, "<dt>", $start_index);
if($check_start_index == -1){
last;
}
my $check_end_index = index ($contents, "</dd>", $check_start_index);
$check_end_index += 5;
my $comment = substr($contents, $check_start_index, $check_end_index - $check_start_index );
$comment_hash{$comment_count} = $comment;
$comment_count++;
$start_index = $check_end_index;
}
print(" => check コメントリンク\n");
my %add_comment_hash = ();
foreach my $key (sort(keys(%comment_hash))){
my $comment = $comment_hash{$key};
if($comment =~ /<dd>>>(\d+)/){
if($comment =~ s/<dd>/<dd class="ret1">/){
$comment_hash{$key} = $comment ;
}
}
}
my @sort_keys = sort {$a <=> $b} keys(%comment_hash);
push(@output_datas, $dl_header);
foreach my $key (@sort_keys){
push(@output_datas, $comment_hash{$key});
}
push(@output_datas, $dl_footer);
my $output_datas_join = join("",@output_datas);
my @output_lines = ();
push(@output_lines, $file_title);
push(@output_lines, $file_comment_count);
push(@output_lines, $output_datas_join);
my ($filename, $path, $suffix) = fileparse($file);
my @tmp_paths = split(/\//, $path);
my @tmp_paths_2 = @tmp_paths[1..$#tmp_paths];
$path = join("/", @tmp_paths_2);
my $tmp_dir = mylib::combine_path($output_dir, $path);
if(!mylib::check_exist_dir($tmp_dir)){
mylib::make_dir_force($tmp_dir);
}
my $fn = mylib::combine_path($tmp_dir, $filename);
mylib::write_file($fn, \@output_lines, 'utf8');
}
}
return 1;
#!/opt/local/bin/perl
use strict;
use warnings;
use threads;
use threads::shared;
use File::Basename;
use File::Copy;
use File::Path;
use File::Spec;
use Time::HiRes;
use Readonly;
use Encode;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
require 'mylib.pm';
require 'nichanlib.pm';
require 'wplib.pm';
Readonly my $DATA_INDEX_TITLE => 0;
Readonly my $DATA_INDEX_COMMENT_COUNT => 1;
Readonly my $DATA_INDEX_CONTENTS => 2;
Readonly my $MGR_INDEX_ID => 0;
Readonly my $MGR_INDEX_THREAD_NAME => 1;
Readonly my $MGR_INDEX_POST_ID => 2;
Readonly my $MGR_INDEX_FILE_PATH => 3;
Readonly my $MGR_INDEX_UPDATE_FLAG => 4;
Readonly my $MGR_INDEX_COMMENT_COUNT => 5;
Readonly my $MGR_INDEX_DATA_FROP_FLAG => 6;
Readonly my $WP_MNG_FILE => "wp_mgr.txt";
Readonly my $WP_MNG_FILE_BACK_DIR => "wp_mgr_back";
my($input_category_file, $thread_link_dir, $thread_contents_dir, $thread_contents_convert_dir, $thread_contents_convert_color_dir, $target_comment_count, $wait_time, $wait_time_for_wp, $filter_file) = @ARGV;
my @filter_datas = mylib::read_file($filter_file, "utf8");
if(!mylib::check_exist_dir($WP_MNG_FILE_BACK_DIR)){
mkdir($WP_MNG_FILE_BACK_DIR);
}else{
if(mylib::check_exist_file($WP_MNG_FILE)){
my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
my $date = sprintf("%04d%02d%02d_%02d%02d%02d", $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
my $back_file_name = $date."_".$WP_MNG_FILE;
$back_file_name = mylib::combine_path($WP_MNG_FILE_BACK_DIR, $back_file_name);
File::Copy::copy($WP_MNG_FILE, $back_file_name);
}
}
if(!mylib::check_exist_dir($thread_link_dir)){
mkdir($thread_link_dir);
}else{
mylib::remove_dir_force($thread_link_dir);
mkdir($thread_link_dir);
}
if(!mylib::check_exist_dir($thread_contents_dir)){
mkdir($thread_contents_dir);
}else{
mylib::remove_dir_force($thread_contents_dir);
mkdir($thread_contents_dir);
}
if(!mylib::check_exist_dir($thread_contents_convert_dir)){
mkdir($thread_contents_convert_dir);
}else{
mylib::remove_dir_force($thread_contents_convert_dir);
mkdir($thread_contents_convert_dir);
}
if(!mylib::check_exist_dir($thread_contents_convert_color_dir)){
mkdir($thread_contents_convert_color_dir);
}else{
mylib::remove_dir_force($thread_contents_convert_color_dir);
mkdir($thread_contents_convert_color_dir);
}
my $user = 'admin';
my $password = 'xxxxxxxxxxxxxxxxx';
my $endpoint = 'http://xxxxxxxx.sakura.ne.jp/wordpress/xmlrpc.php';
my $blogid = 1;
print("================================================================================\n");
print("| カテゴリリンクの一覧を読み込み:$input_category_file\n");
print("================================================================================\n");
my @tmp_category_lines = mylib::read_file($input_category_file, "utf8");
my @category_lines = ();
foreach(@tmp_category_lines){
if($_ =~ /^#/){
}else{
push(@category_lines, $_);
}
}
print(join("\n",@category_lines));
print("================================================================================\n");
print("| 管理ファイルがある場合、スレッド名、スレッドカウントを取得\n");
print("================================================================================\n");
my %wp_thread_comment_count_hash = ();
my %wp_id_comment_count_hash = ();
if(mylib::check_exist_file($WP_MNG_FILE)){
my @mngwp_lines = mylib::read_file($WP_MNG_FILE, 'utf8');
foreach (@mngwp_lines){
if($_ ne ""){
my @datas = split(/\t/, $_);
my $id = $datas[$MGR_INDEX_ID];
my $tn = $datas[$MGR_INDEX_THREAD_NAME];
my $comment_count = $datas[$MGR_INDEX_COMMENT_COUNT];
$wp_thread_comment_count_hash{$tn} = $comment_count;
$wp_id_comment_count_hash{$id} = $comment_count;
}
}
}
print("================================================================================\n");
print("| カテゴリ毎に記事一覧のリンクを取得、ファイルに保存する (フィルタリング後、この一覧をダウンロードに使用)\n");
print("================================================================================\n");
foreach my $cl (@category_lines){
if($cl ne "" ){
my @div_tab = split(/\t/, $cl);
my @div_slash = split('/', $div_tab[0]);
my $category = $div_slash[-1];
my $url = $div_tab[0]."subback.html";
my $save_file_path = mylib::combine_path($thread_link_dir, $category.".txt");
nichanlib::get_2ch_thread_link_for_category($url, \%wp_thread_comment_count_hash, $target_comment_count,'tmp_get_2ch_thread_link_for_category.txt', $save_file_path, \@filter_datas);
print($url." => ".$save_file_path."\n");
sleep($wait_time);
}
}
print("================================================================================\n");
print("| 記事一覧リンク毎に記事内容を取得、ファイルに保存\n");
print("================================================================================\n");
my @link_files = mylib::get_files_top_dir($thread_link_dir, '*.txt');
foreach my $lf (@link_files){
my @div_slash = split(/\//, $lf);
my @div_dot = split(/\./, $div_slash[1]);
my $hozon_dir = mylib::combine_path($thread_contents_dir, $div_dot[0]);
if(!mylib::check_exist_dir($hozon_dir)){
eval{
mkpath( $hozon_dir );
};
if( $@ ){
die "$hozon_dir を作成できません。$@";
}
}
nichanlib::get_2ch_thread_contents($lf, $hozon_dir, $wait_time);
}
print("================================================================================\n");
print("| 記事内容の必要のないリンクをテキストへ変換\n");
print("================================================================================\n");
nichanlib::convert_entry_text_from_thread_contents($thread_contents_dir, $thread_contents_convert_dir);
print("================================================================================\n");
print("| 記事内容にコメントにカラーを設定\n");
print("================================================================================\n");
nichanlib::convert_entry_text_from_thread_contents_color($thread_contents_convert_dir, $thread_contents_convert_color_dir);
print("================================================================================\n");
print("| wordpress有効チェック\n");
print("================================================================================\n");
my $result_ui = wplib::get_user_info($user, $password, $endpoint);
if(!defined($result_ui)){
print("==> wordpressが有効ではないので終了します\n");
exit();
}
print("================================================================================\n");
print("| ファイルリストを抽出\n");
print("================================================================================\n");
my @upload_kouho_files = mylib::get_files_all_dir($thread_contents_convert_color_dir ,"*.txt");
print("================================================================================\n");
print("| wp管理用ファイルを読み込み\n");
print("================================================================================\n");
my %mng_hash = ();
if(mylib::check_exist_file($WP_MNG_FILE)){
my @mngwp_lines = mylib::read_file($WP_MNG_FILE, 'utf8');
foreach (@mngwp_lines){
if($_ ne ""){
my @datas = split(/\t/, $_);
my $id = $datas[$MGR_INDEX_ID];
my $data = join("\t", @datas);
print("$id\n");
$mng_hash{$id} = $data;
}
}
}
print("================================================================================\n");
print("| 取得したファイルとをwp管理データを比較、更新ファイルを作成\n");
print("================================================================================\n");
my %tmp_entry_data_hash = ();
foreach my $upk_file (@upload_kouho_files){
my @suffix_list = qw /.txt/;
my ($category_id, $path, $suffix) = fileparse($upk_file, @suffix_list);
my @tmp_lines = mylib::read_file($upk_file,'utf8');
if(exists($mng_hash{$category_id})){
my $data = $mng_hash{$category_id};
my @datas = split("\t", $data);
$datas[$MGR_INDEX_UPDATE_FLAG] = -1;
$datas[$MGR_INDEX_COMMENT_COUNT] = $tmp_lines[$DATA_INDEX_COMMENT_COUNT];
my $entry = join("\t", @datas);;
$tmp_entry_data_hash{$category_id} = $entry;
}else{
my $title = $tmp_lines[$DATA_INDEX_TITLE];
my $comment_count = $tmp_lines[$DATA_INDEX_COMMENT_COUNT];
print("$upk_file\n");
my $entry = "$category_id\t${title}\t-1\t${upk_file}\t-1\t${comment_count}\t0";
$tmp_entry_data_hash{$category_id} = $entry;
}
}
my %entry_data_hash = %mng_hash;
foreach my $key (keys(%tmp_entry_data_hash)){
if(exists($entry_data_hash{$key})){
$entry_data_hash{$key} = $tmp_entry_data_hash{$key};
}else{
$entry_data_hash{$key} = $tmp_entry_data_hash{$key};
}
}
my @entry_datas = values(%entry_data_hash);
my @sorted_entry_datas = sort{
my @datas_a = split("\t",$a);
my @datas_b = split("\t",$b);
$datas_a[4] <=> $datas_b[4]; } @entry_datas;
mylib::write_file($WP_MNG_FILE, \@sorted_entry_datas, 'utf8');
print("================================================================================\n");
print("| wp管理データにより、wpの新規登録/更新処理\n");
print("================================================================================\n");
my %mngwp_hash = ();
my @mngwp_lines = mylib::read_file($WP_MNG_FILE, 'utf8');
foreach (@mngwp_lines){
my @datas = split(/\t/, $_);
my $id = $datas[0];
$mngwp_hash{$id} = join("\t", @datas);
}
foreach my $key (keys(%mngwp_hash)){
my $wait_flag = 1;
print($key."\n");
my $line = $mngwp_hash{$key};
my @datas = split(/\t/,$line);
my $post_id = $datas[$MGR_INDEX_POST_ID];
$post_id += 0;
my $entry_flag = $datas[$MGR_INDEX_UPDATE_FLAG];
$entry_flag += 0;
my $file = $datas[$MGR_INDEX_FILE_PATH];
if($entry_flag == -1){
if($post_id == -1){
print(" => new_entry => $file\n");
my %entry_hash = create_entry_data($file);
my $categories_ref = $entry_hash{"entry_categories_ref"};
my $keyword = $entry_hash{"keyword"};
my $title = $entry_hash{"title"};
$title = encode('utf-8', $title );
my $text_body = $entry_hash{"text_body"};
$text_body = encode('utf-8', $text_body);
my $text_more = $entry_hash{"text_more"};
$text_more = encode('utf-8', $text_more);
my $excerpt = "";
my $result =undef;
while(1){
my $checked = 0;
eval{
$result = wplib::entry_thread($user, $password, $endpoint, $blogid, $title, $text_body, $text_more, $excerpt, $keyword, $categories_ref);
};
if($@){
$checked = 1;
}else{
last;
}
if($checked == 1){
print("========================================> false sleep 600(sec)\n");
sleep(600)
}
}
if(!defined($result)){
print(" => new_entry_false\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split("\t", $tmp_mngwp);
$tmp_mngwp_datas[$DATA_INDEX_COMMENT_COUNT] = -1;
$tmp_mngwp = join('\t', @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}else{
my $new_post_id = $result;
print(" => new_entry_success => postid:$new_post_id\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split("\t", $tmp_mngwp);
$tmp_mngwp_datas[$MGR_INDEX_POST_ID] = $new_post_id;
$tmp_mngwp_datas[$MGR_INDEX_UPDATE_FLAG] = 1;
$tmp_mngwp = join("\t", @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}
}else{
print(" => update_entry => $file\n");
my %entry_hash = create_entry_data($file);
my $categories_ref = $entry_hash{"entry_categories_ref"};
my $keyword = $entry_hash{"keyword"};
my $title = $entry_hash{"title"};
$title = encode('utf-8', $title);
my $text_body = $entry_hash{"text_body"};
$text_body = encode('utf-8', $text_body);
my $text_more = $entry_hash{"text_more"};
$text_more = encode('utf-8', $text_more);
my $excerpt = "";
my $result_update = undef;
while(1){
my $checked = 0;
eval{
$result_update = wplib::update_thread($user, $password, $endpoint, $post_id, $title, $text_body, $text_more, $excerpt, $keyword, $categories_ref);
};
if($@){
$checked = 1;
}else{
last;
}
if($checked == 1){
print("========================================> false sleep 600(sec)\n");
sleep(600)
}
}
if(!defined($result_update )){
print(" => update_entry_false\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split("\t", $tmp_mngwp);
$tmp_mngwp_datas[$MGR_INDEX_UPDATE_FLAG] = -1;
$tmp_mngwp_datas[$DATA_INDEX_COMMENT_COUNT] = -1;
$tmp_mngwp = join('\t', @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}else{
print(" => update_entry_success\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split(/\t/, $tmp_mngwp);
$tmp_mngwp_datas[$MGR_INDEX_UPDATE_FLAG] = 1;
$tmp_mngwp = join("\t", @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}
}
}else{
$wait_flag = 0;
print(" => no work\n");
}
if($wait_flag == 1){
sleep($wait_time_for_wp);
}
}
print("================================================================================\n");
print("| wpへの新規登録/更新登録の結果をファイルに保存\n");
print("================================================================================\n");
my @output_mngwp_datas = ();
foreach my $key (keys(%mngwp_hash)){
my $line = $mngwp_hash{$key};
push(@output_mngwp_datas, $line);
}
mylib::write_file($WP_MNG_FILE, \@output_mngwp_datas, 'utf8');
exit();
sub create_entry_data{
my($file) = @_;
my %tmp_hash = ();
my @lines = mylib::read_file($file, "utf8");
my @suffix_list = qw /.txt/;
my ($filename, $path, $suffix) = fileparse($file, @suffix_list);
my @div_undes_datas = split(/_/, $filename);
my $category = $div_undes_datas[0];
my @entry_categories = ();
push(@entry_categories, $category);
$tmp_hash{"entry_categories_ref"} = \@entry_categories;
$tmp_hash{"keyword"} = $category;
my $entry_title = $lines[$DATA_INDEX_TITLE];
$entry_title = delete_thread_count($entry_title);
$tmp_hash{"title"} = $entry_title;
my $contents = $lines[$DATA_INDEX_CONTENTS];
my %div_contents_hash = div_enrty_data($contents);
$tmp_hash{"text_body"} = $div_contents_hash{"text_body"};
$tmp_hash{"text_more"} = $div_contents_hash{"text_more"};
return %tmp_hash;
}
sub div_enrty_data{
my($text) = @_;
my %tmp_hash = ();
my $check_str = '<dt>8';
my $pos_index = index($text, $check_str);
my $text_body = substr($text, 0, $pos_index);
my $len = length($text);
$len -= $pos_index;
my $text_more = substr($text, $pos_index, $len);
$tmp_hash{"text_body"} = $text_body;
$tmp_hash{"text_more"} = $text_more;
return %tmp_hash;
}
sub delete_thread_count{
my ($title) = @_;
my $title_only = $title;
$title_only =~ s/\s\(\d+\)$//;
return $title_only;
}