#!/opt/local/bin/perl
use strict;
use warnings;
use threads;
use threads::shared;
use File::Basename;
use File::Copy;
use File::Path;
use File::Spec;
use Time::HiRes;
use Readonly;
use Encode;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
require 'mylib.pm';
require 'nichanlib.pm';
require 'wplib.pm';
Readonly my $DATA_INDEX_TITLE => 0;
Readonly my $DATA_INDEX_COMMENT_COUNT => 1;
Readonly my $DATA_INDEX_CONTENTS => 2;
Readonly my $MGR_INDEX_ID => 0;
Readonly my $MGR_INDEX_THREAD_NAME => 1;
Readonly my $MGR_INDEX_POST_ID => 2;
Readonly my $MGR_INDEX_FILE_PATH => 3;
Readonly my $MGR_INDEX_UPDATE_FLAG => 4;
Readonly my $MGR_INDEX_COMMENT_COUNT => 5;
Readonly my $MGR_INDEX_DATA_FROP_FLAG => 6;
Readonly my $WP_MNG_FILE => "wp_mgr.txt";
Readonly my $WP_MNG_FILE_BACK_DIR => "wp_mgr_back";
my($input_category_file, $thread_link_dir, $thread_contents_dir, $thread_contents_convert_dir, $thread_contents_convert_color_dir, $target_comment_count, $wait_time, $wait_time_for_wp, $filter_file) = @ARGV;
my @filter_datas = mylib::read_file($filter_file, "utf8");
if(!mylib::check_exist_dir($WP_MNG_FILE_BACK_DIR)){
mkdir($WP_MNG_FILE_BACK_DIR);
}else{
if(mylib::check_exist_file($WP_MNG_FILE)){
my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);
my $date = sprintf("%04d%02d%02d_%02d%02d%02d", $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
my $back_file_name = $date."_".$WP_MNG_FILE;
$back_file_name = mylib::combine_path($WP_MNG_FILE_BACK_DIR, $back_file_name);
File::Copy::copy($WP_MNG_FILE, $back_file_name);
}
}
if(!mylib::check_exist_dir($thread_link_dir)){
mkdir($thread_link_dir);
}else{
mylib::remove_dir_force($thread_link_dir);
mkdir($thread_link_dir);
}
if(!mylib::check_exist_dir($thread_contents_dir)){
mkdir($thread_contents_dir);
}else{
mylib::remove_dir_force($thread_contents_dir);
mkdir($thread_contents_dir);
}
if(!mylib::check_exist_dir($thread_contents_convert_dir)){
mkdir($thread_contents_convert_dir);
}else{
mylib::remove_dir_force($thread_contents_convert_dir);
mkdir($thread_contents_convert_dir);
}
if(!mylib::check_exist_dir($thread_contents_convert_color_dir)){
mkdir($thread_contents_convert_color_dir);
}else{
mylib::remove_dir_force($thread_contents_convert_color_dir);
mkdir($thread_contents_convert_color_dir);
}
my $user = 'admin';
my $password = 'xxxxxxxxxxxxxxxxx';
my $endpoint = 'http://xxxxxxxx.sakura.ne.jp/wordpress/xmlrpc.php';
my $blogid = 1;
print("================================================================================\n");
print("| カテゴリリンクの一覧を読み込み:$input_category_file\n");
print("================================================================================\n");
my @tmp_category_lines = mylib::read_file($input_category_file, "utf8");
my @category_lines = ();
foreach(@tmp_category_lines){
if($_ =~ /^#/){
}else{
push(@category_lines, $_);
}
}
print(join("\n",@category_lines));
print("================================================================================\n");
print("| 管理ファイルがある場合、スレッド名、スレッドカウントを取得\n");
print("================================================================================\n");
my %wp_thread_comment_count_hash = ();
my %wp_id_comment_count_hash = ();
if(mylib::check_exist_file($WP_MNG_FILE)){
my @mngwp_lines = mylib::read_file($WP_MNG_FILE, 'utf8');
foreach (@mngwp_lines){
if($_ ne ""){
my @datas = split(/\t/, $_);
my $id = $datas[$MGR_INDEX_ID];
my $tn = $datas[$MGR_INDEX_THREAD_NAME];
my $comment_count = $datas[$MGR_INDEX_COMMENT_COUNT];
$wp_thread_comment_count_hash{$tn} = $comment_count;
$wp_id_comment_count_hash{$id} = $comment_count;
}
}
}
print("================================================================================\n");
print("| カテゴリ毎に記事一覧のリンクを取得、ファイルに保存する (フィルタリング後、この一覧をダウンロードに使用)\n");
print("================================================================================\n");
foreach my $cl (@category_lines){
if($cl ne "" ){
my @div_tab = split(/\t/, $cl);
my @div_slash = split('/', $div_tab[0]);
my $category = $div_slash[-1];
my $url = $div_tab[0]."subback.html";
my $save_file_path = mylib::combine_path($thread_link_dir, $category.".txt");
nichanlib::get_2ch_thread_link_for_category($url, \%wp_thread_comment_count_hash, $target_comment_count,'tmp_get_2ch_thread_link_for_category.txt', $save_file_path, \@filter_datas);
print($url." => ".$save_file_path."\n");
sleep($wait_time);
}
}
print("================================================================================\n");
print("| 記事一覧リンク毎に記事内容を取得、ファイルに保存\n");
print("================================================================================\n");
my @link_files = mylib::get_files_top_dir($thread_link_dir, '*.txt');
foreach my $lf (@link_files){
my @div_slash = split(/\//, $lf);
my @div_dot = split(/\./, $div_slash[1]);
my $hozon_dir = mylib::combine_path($thread_contents_dir, $div_dot[0]);
if(!mylib::check_exist_dir($hozon_dir)){
eval{
mkpath( $hozon_dir );
};
if( $@ ){
die "$hozon_dir を作成できません。$@";
}
}
nichanlib::get_2ch_thread_contents($lf, $hozon_dir, $wait_time);
}
print("================================================================================\n");
print("| 記事内容の必要のないリンクをテキストへ変換\n");
print("================================================================================\n");
nichanlib::convert_entry_text_from_thread_contents($thread_contents_dir, $thread_contents_convert_dir);
print("================================================================================\n");
print("| 記事内容にコメントにカラーを設定\n");
print("================================================================================\n");
nichanlib::convert_entry_text_from_thread_contents_color($thread_contents_convert_dir, $thread_contents_convert_color_dir);
print("================================================================================\n");
print("| wordpress有効チェック\n");
print("================================================================================\n");
my $result_ui = wplib::get_user_info($user, $password, $endpoint);
if(!defined($result_ui)){
print("==> wordpressが有効ではないので終了します\n");
exit();
}
print("================================================================================\n");
print("| ファイルリストを抽出\n");
print("================================================================================\n");
my @upload_kouho_files = mylib::get_files_all_dir($thread_contents_convert_color_dir ,"*.txt");
print("================================================================================\n");
print("| wp管理用ファイルを読み込み\n");
print("================================================================================\n");
my %mng_hash = ();
if(mylib::check_exist_file($WP_MNG_FILE)){
my @mngwp_lines = mylib::read_file($WP_MNG_FILE, 'utf8');
foreach (@mngwp_lines){
if($_ ne ""){
my @datas = split(/\t/, $_);
my $id = $datas[$MGR_INDEX_ID];
my $data = join("\t", @datas);
print("$id\n");
$mng_hash{$id} = $data;
}
}
}
print("================================================================================\n");
print("| 取得したファイルとをwp管理データを比較、更新ファイルを作成\n");
print("================================================================================\n");
my %tmp_entry_data_hash = ();
foreach my $upk_file (@upload_kouho_files){
my @suffix_list = qw /.txt/;
my ($category_id, $path, $suffix) = fileparse($upk_file, @suffix_list);
my @tmp_lines = mylib::read_file($upk_file,'utf8');
if(exists($mng_hash{$category_id})){
my $data = $mng_hash{$category_id};
my @datas = split("\t", $data);
$datas[$MGR_INDEX_UPDATE_FLAG] = -1;
$datas[$MGR_INDEX_COMMENT_COUNT] = $tmp_lines[$DATA_INDEX_COMMENT_COUNT];
my $entry = join("\t", @datas);;
$tmp_entry_data_hash{$category_id} = $entry;
}else{
my $title = $tmp_lines[$DATA_INDEX_TITLE];
my $comment_count = $tmp_lines[$DATA_INDEX_COMMENT_COUNT];
print("$upk_file\n");
my $entry = "$category_id\t${title}\t-1\t${upk_file}\t-1\t${comment_count}\t0";
$tmp_entry_data_hash{$category_id} = $entry;
}
}
my %entry_data_hash = %mng_hash;
foreach my $key (keys(%tmp_entry_data_hash)){
if(exists($entry_data_hash{$key})){
$entry_data_hash{$key} = $tmp_entry_data_hash{$key};
}else{
$entry_data_hash{$key} = $tmp_entry_data_hash{$key};
}
}
my @entry_datas = values(%entry_data_hash);
my @sorted_entry_datas = sort{
my @datas_a = split("\t",$a);
my @datas_b = split("\t",$b);
$datas_a[4] <=> $datas_b[4]; } @entry_datas;
mylib::write_file($WP_MNG_FILE, \@sorted_entry_datas, 'utf8');
print("================================================================================\n");
print("| wp管理データにより、wpの新規登録/更新処理\n");
print("================================================================================\n");
my %mngwp_hash = ();
my @mngwp_lines = mylib::read_file($WP_MNG_FILE, 'utf8');
foreach (@mngwp_lines){
my @datas = split(/\t/, $_);
my $id = $datas[0];
$mngwp_hash{$id} = join("\t", @datas);
}
foreach my $key (keys(%mngwp_hash)){
my $wait_flag = 1;
print($key."\n");
my $line = $mngwp_hash{$key};
my @datas = split(/\t/,$line);
my $post_id = $datas[$MGR_INDEX_POST_ID];
$post_id += 0;
my $entry_flag = $datas[$MGR_INDEX_UPDATE_FLAG];
$entry_flag += 0;
my $file = $datas[$MGR_INDEX_FILE_PATH];
if($entry_flag == -1){
if($post_id == -1){
print(" => new_entry => $file\n");
my %entry_hash = create_entry_data($file);
my $categories_ref = $entry_hash{"entry_categories_ref"};
my $keyword = $entry_hash{"keyword"};
my $title = $entry_hash{"title"};
$title = encode('utf-8', $title );
my $text_body = $entry_hash{"text_body"};
$text_body = encode('utf-8', $text_body);
my $text_more = $entry_hash{"text_more"};
$text_more = encode('utf-8', $text_more);
my $excerpt = "";
my $result =undef;
while(1){
my $checked = 0;
eval{
$result = wplib::entry_thread($user, $password, $endpoint, $blogid, $title, $text_body, $text_more, $excerpt, $keyword, $categories_ref);
};
if($@){
$checked = 1;
}else{
last;
}
if($checked == 1){
print("========================================> false sleep 600(sec)\n");
sleep(600)
}
}
if(!defined($result)){
print(" => new_entry_false\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split("\t", $tmp_mngwp);
$tmp_mngwp_datas[$DATA_INDEX_COMMENT_COUNT] = -1;
$tmp_mngwp = join('\t', @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}else{
my $new_post_id = $result;
print(" => new_entry_success => postid:$new_post_id\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split("\t", $tmp_mngwp);
$tmp_mngwp_datas[$MGR_INDEX_POST_ID] = $new_post_id;
$tmp_mngwp_datas[$MGR_INDEX_UPDATE_FLAG] = 1;
$tmp_mngwp = join("\t", @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}
}else{
print(" => update_entry => $file\n");
my %entry_hash = create_entry_data($file);
my $categories_ref = $entry_hash{"entry_categories_ref"};
my $keyword = $entry_hash{"keyword"};
my $title = $entry_hash{"title"};
$title = encode('utf-8', $title);
my $text_body = $entry_hash{"text_body"};
$text_body = encode('utf-8', $text_body);
my $text_more = $entry_hash{"text_more"};
$text_more = encode('utf-8', $text_more);
my $excerpt = "";
my $result_update = undef;
while(1){
my $checked = 0;
eval{
$result_update = wplib::update_thread($user, $password, $endpoint, $post_id, $title, $text_body, $text_more, $excerpt, $keyword, $categories_ref);
};
if($@){
$checked = 1;
}else{
last;
}
if($checked == 1){
print("========================================> false sleep 600(sec)\n");
sleep(600)
}
}
if(!defined($result_update )){
print(" => update_entry_false\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split("\t", $tmp_mngwp);
$tmp_mngwp_datas[$MGR_INDEX_UPDATE_FLAG] = -1;
$tmp_mngwp_datas[$DATA_INDEX_COMMENT_COUNT] = -1;
$tmp_mngwp = join('\t', @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}else{
print(" => update_entry_success\n");
my $tmp_mngwp = $mngwp_hash{$key};
my @tmp_mngwp_datas = split(/\t/, $tmp_mngwp);
$tmp_mngwp_datas[$MGR_INDEX_UPDATE_FLAG] = 1;
$tmp_mngwp = join("\t", @tmp_mngwp_datas);
$mngwp_hash{$key} = $tmp_mngwp;
}
}
}else{
$wait_flag = 0;
print(" => no work\n");
}
if($wait_flag == 1){
sleep($wait_time_for_wp);
}
}
print("================================================================================\n");
print("| wpへの新規登録/更新登録の結果をファイルに保存\n");
print("================================================================================\n");
my @output_mngwp_datas = ();
foreach my $key (keys(%mngwp_hash)){
my $line = $mngwp_hash{$key};
push(@output_mngwp_datas, $line);
}
mylib::write_file($WP_MNG_FILE, \@output_mngwp_datas, 'utf8');
exit();
sub create_entry_data{
my($file) = @_;
my %tmp_hash = ();
my @lines = mylib::read_file($file, "utf8");
my @suffix_list = qw /.txt/;
my ($filename, $path, $suffix) = fileparse($file, @suffix_list);
my @div_undes_datas = split(/_/, $filename);
my $category = $div_undes_datas[0];
my @entry_categories = ();
push(@entry_categories, $category);
$tmp_hash{"entry_categories_ref"} = \@entry_categories;
$tmp_hash{"keyword"} = $category;
my $entry_title = $lines[$DATA_INDEX_TITLE];
$entry_title = delete_thread_count($entry_title);
$tmp_hash{"title"} = $entry_title;
my $contents = $lines[$DATA_INDEX_CONTENTS];
my %div_contents_hash = div_enrty_data($contents);
$tmp_hash{"text_body"} = $div_contents_hash{"text_body"};
$tmp_hash{"text_more"} = $div_contents_hash{"text_more"};
return %tmp_hash;
}
sub div_enrty_data{
my($text) = @_;
my %tmp_hash = ();
my $check_str = '<dt>8';
my $pos_index = index($text, $check_str);
my $text_body = substr($text, 0, $pos_index);
my $len = length($text);
$len -= $pos_index;
my $text_more = substr($text, $pos_index, $len);
$tmp_hash{"text_body"} = $text_body;
$tmp_hash{"text_more"} = $text_more;
return %tmp_hash;
}
sub delete_thread_count{
my ($title) = @_;
my $title_only = $title;
$title_only =~ s/\s\(\d+\)$//;
return $title_only;
}