require './jcode.pl';
use Time::Local;
use HTML::Entities;
use Encode;

### ͂ĂȃubN}[N Of[^ ###

# Copyright (c) 2014-2018 TAKAGI-1 (TAKAGI Hitoshi, http://takagi1.net/).
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

# ϐEz ---
$In_FileName = 'dump_euc.dat';
$InRSS_FileName = 'rss_joined_euc.dat';
@bkdata;
@str;

$Out_FileName = 'dump-unified_euc.dat';
$OutFH = * OUT_FILE;

$ocode = 'euc-jp';
$br = "\n";

@new;
@old;
@out;

$i, $j;
$ele;

# --------------
$ENV{'TZ'} = "JST-9";

# --------------

@new = &RssToBk($InRSS_FileName);

@old = &get_Dump($In_FileName);
@out = &Unify(\@new, \@old);

open ($OutFH, ">".$Out_FileName );  # o͐p
&PrintArray_NoConvert($OutFH, &getbr(), @out);
close ($OutFH);

print "...Normally end.";
exit;

# ================================================


sub Unify {
  my ($New_ref, $Old_ref) = @_;
  my @New = @{$New_ref};
  my @Old = @{$Old_ref};
  my @out;
  my $Link;
  my $ele, my $i, my $j;
# /-------------
  for ($i=0; $i<= $#Old; $i++){
    push(@out, $Old[$i]);
    if ($Old[$i] =~ /<DL><p>/i ){
      $j = $i+1;
      last;
    }
  } 
  push(@out, @New);
  if ($New[$#New -1] =~ /HREF="(.*?)"/i ){
      $Link = $1;
      $Link = '"'.$Link.'"';
  } else {
    print ("ERROR: sub Unify: No url.");
    end;
  }

  for ($i=0; $i<= $#Old; $i++){
    if ($Old[$i] =~ /$Link/ ){
      $j = $i+2;
      last;
    }
  }

  if ( &getAddDate($New[$#New -1]) < &getOldestAddDate(@Old) ){
    push(@out, '</DL><p>');
  } else {
    for ($i=$j; $i<= $#Old; $i++){
      push(@out, $Old[$i]);
    }
  }

  return (@out);
}


sub getOldestAddDate {
# ϐEz ---
  my ($a) = @_;  # 
  my $i;
# /-------------
  for ($i=$#a; $i>=0; $i--){
    return ( &getAddDate($a[$i]) );
  }
  return (0);
}


sub get_Dump {
# ϐEz ---
  my ($In_FileName) = @_;
  my @a;
  my $ele;
# /-------------
  @a = &ReadFile($In_FileName);
  foreach $ele ( @a ) {
    $ele =~ s|\r?\n||g; # s
# ==
    $ele = &anti_character_corruption($ele);
# /=                 http://nlab.itmedia.co.jp/nl/articles/1110/03/news060.html
  }
  return (@a);
}

sub RssToBk {
# ϐEz ---
  my ($In_FileName) = @_;  # 
  my @a;
  my @item=([],[]);
  my @out;
  my $flag;
  my $i, my $j, my $ele;
# /-------------
  @a = &ReadFile($In_FileName);
  foreach $ele ( @a ) {
    $ele =~ s|\r?\n||g; # s
    $ele = &anti_character_corruption($ele);
  }
  @item = &getItemOfRss(@a);
  for ($i=0; $i<=$#item; $i++){
    push(@out, ( &EachRssToBk( @{$item[$i]} ) ) );
  }
  return (@out);
}

sub EachRssToBk {
# Link
# Add_date
# Tags
# Title
# Description
#
# ϐEz ---
  my (@item) = @_;
  my @out;
  my $Link;
  my $Add_date;
  my $Tags;
  my $Title;
  my $Description;
  my $i, my $j, my $ele;
# /-------------
  $Tags = "";
  for ($i=0; $i<=$#item; $i++){
    if ($item[$i] =~ /<link>(.*?)<\/link>/ ){
      $Link = $1;
    }
    if ($item[$i] =~ /<dc:date>(.*?)<\/dc:date>/ ){
      $Add_date = &HttpYmdhmsToTime( $1 );
    }
    if ($item[$i] =~ /<dc:subject>(.*?)<\/dc:subject>/ ){
      $Tags .= $1.",";
    }
    if ($item[$i] =~ /<title>(.*?)<\/title>/ ){
      $Title = $1;
    }
    if ($item[$i] =~ /<description>(.*?)<\/description>/ ){
      $Description = $1;
    }
  }
  chop($Tags);
  if ($Tags eq ""){
    push(@out, '  <DT><A HREF="'.$Link.'" ADD_DATE="'.$Add_date.'" LAST_VISIT="'.$Add_date.'">'.$Title.'</A>');  
  } else {
    push(@out, '  <DT><A HREF="'.$Link.'" ADD_DATE="'.$Add_date.'" LAST_VISIT="'.$Add_date.'" TAGS="'.$Tags.'">'.$Title.'</A>');
  }
  push(@out, '  <DD>'.$Description);
  
  return(@out);
}


sub HttpYmdhmsToTime {
# HTTP vgR̓tH[}bg time 
#  e.g.u2014-01-16T07:38:21+09:00v
# use Time::Local;
#
  my ($str) = @_;
  my $year, my $mon, my $mday;
  my $hours, my $min, my $sec;
# /-------------
  if ($str =~ /(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})/i) {
    $year = $1;
    $mon = $2;
    $mday = $3;
    $hours = $4;
    $min = $5;
    $sec = $6;
    return ( timelocal($sec, $min, $hours, $mday, $mon - 1, $year) );  
  } else {
    return ( 0 );    
  }
}


sub getItemOfRss {
# ϐEz ---
  my (@a) = @_;  #
  my @out=([],[]);
  my $flag;
  my $i, my $j, my $ele;
# /-------------
  foreach $ele ( @a ) {
    $ele =~ s|\r?\n||g; # s
    $ele = &anti_character_corruption($ele);
  }
  $j = -1;
  $flag = 0;
  for ($i=0; $i<=$#a; $i++){
    if ($a[$i] =~ /<item\W.*?>/){
      $flag = 1;
      $j++;
      push( @{$out[$j]}, $a[$i]); 
    } elsif ($a[$i] =~ /<\/item>/){
      $flag = 0;    
      push( @{$out[$j]}, $a[$i]);
    } else {
      if ($flag == 1) {
        push( @{$out[$j]}, $a[$i]);      
      }
    }
  }
  return (@out);
}


sub PrintArray_NoConvert {
# ϐEz ---
  my ($Fh, $bond, @array) = @_;	## ̒lreturn̍ۂɕԂȂ
  my $ele;
# /-------------
  foreach $ele (@array) {
    print $Fh $ele.$bond;
  }
}

sub anti_character_corruption {
  my ($e) = @_;
# /-------------
# http://d.hatena.ne.jp/kopug/20060903

  $e =~ s|\\x{ff5e}|`|g;
  $e =~ s|\\x{2225}|a|g;
  $e =~ s|\\x{ff0d}|||g;
  $e =~ s|\\x{ffe0}||g;
  $e =~ s|\\x{ffe1}||g;
  $e =~ s|\\x{ffe2}||g;

  return ($e);
}


sub getAddDate {
  my ($str) = @_;
# /-------------
  if ( $str =~ /ADD_DATE="([0-9]+)"/i) {
    return ( $1 +0 );    
  } else {
    return (0);    
  }
}


# ================================================

sub ReadFile {
# : okz/cmp_sub.pl B2009/ 3/28
## t@Cǂ
# ϐEz ---
  my ($In_FileName) = @_;	## ̒lreturn̍ۂɕԂȂ
  my @content;
# /-------------
  open (IN_FILE_ReadFile, "<$In_FileName");  # ͐p
  @content = <IN_FILE_ReadFile>;
  close (IN_FILE_ReadFile);
  return @content;
}


sub ConvertArray {
# : word-cards.pl
  my ($code, @array) = @_;
  my $ele;
# /-------------
  foreach $ele (@array) {
    &jcode'convert( \$ele , $code);
  }
  return @array;
}


sub PrintArray {
## : 2008/ 8/17 e1.pl
# ϐEz ---
  my ($Fh, $code, $bond, @array) = @_;	## ̒lreturn̍ۂɕԂȂ
  my $ele;
# /-------------
  foreach $ele (@array) {
    &jcode'convert( \$ele ,$code);
    print $Fh $ele.$bond;
  }
}


sub getocode {
#Global#  $ocode;
  return $ocode;
}

sub getbr {
#Global# $br;
  return ($br);
}

