#===========================================================================
# 
# CacheSingleton; methods to implement the Singleton pattern in terms
# of cache use.  In other words, cache objects are actually pointers to
# reference-counted pages in a shared "page cache" directory.

package Sitescooper::CacheSingleton;

use Carp;
use Sitescooper::Main;

@ISA = qw();
use vars qw{ @ISA $SLASH $MAXFILENAMELEN };
use strict;

# ---------------------------------------------------------------------------

sub new {
  my $class = shift; $class = ref($class) || $class;

  my ($main, $factory) = @_;

  $SLASH                = $Sitescooper::Main::SLASH;
  my $self = {
    'main'		=> $main,
    'factory'		=> $factory,
    'lastmod'		=> 0,
    'refcount'		=> 1,
  };

  $MAXFILENAMELEN = 256;	# reasonable default I'd say
  if ($Sitescooper::Main::use_hashes_for_cache_filenames) { # set on MacOS 9.x
    $MAXFILENAMELEN = 32;
  }

  bless ($self, $class);
  $self;
}

# ---------------------------------------------------------------------------

sub get_cached_page {
  my ($self, $ptr) = @_;

  my $pagefile = $self->{factory}->{pagecachedir}.$SLASH.$ptr;

  open (IN, "<".$pagefile) or return undef;
  binmode IN; my $cachedpage = join ('', <IN>); close IN;

  if ($cachedpage =~ s/^<!-- ([^>]+) -->\n//s) {
    foreach my $nv (split (/\s+\/\/\s+/, $1)) {
      $nv =~ /^\s*(\S+): (\S+)\s*$/ or next;
      my $name = $1;
      my $val = $2;

      if ($name eq 'lastmod') { $self->{lastmod} = $val+0; }
      elsif ($name eq 'refcount') { $self->{refcount} = $val+0; }
    }
  }

  $cachedpage;
}

# ---------------------------------------------------------------------------

sub save_cached_page {
  my ($self, $url, $sum, $size, $pagetext, $lastmod) = @_;

  my $pagefile = $self->mk_pagefile_filename ($url, $sum, $size);
  my $pagecache = $self->{factory}->{pagecachedir};

  my $header1 = "<!-- refcount: ";
  my $header2 = " ";
  if (defined $lastmod) { $header2 = " lastmod: $lastmod "; }
  $header2 .= "-->\n";

  my $tries = 0;
  while (-f "$pagefile.lk") {
    warn "another process is updating $pagefile... sleeping.\n";
    $tries++; if ($tries > 20) {
      warn "overriding lock on $pagefile.\n"; last;
    }
    sleep 1;
  }

  open (PID, ">$pagefile.lk")
  		or warn "write to $pagefile.lk failed: $!\n";
  print PID $$;
  close PID;
  
  my $refcount = 0;
  if (open (IN, "<$pagefile")) {
    $_ = <IN>;
    / refcount: (\d+) / and $refcount = $1;
    close IN;
  }

  $refcount++;
  $header1 .= $refcount;

  open (OUT, ">$pagefile");
  binmode OUT;
  print OUT $header1, $header2, $pagetext;
  close OUT;

  unlink ("$pagefile.lk");

  $self->dbg ("cache singleton $pagefile: ref count now $refcount");

  # trim off the pagecache dir for return.
  $pagefile =~ s/\Q${pagecache}\E\///g;
  $pagefile;
}

# ---------------------------------------------------------------------------

sub dec_refcount_for_page {
  my ($self, $pagefile) = @_;
  local ($_);

  $pagefile = $self->{factory}->{pagecachedir}.$SLASH.$pagefile;
  my $decfile = $pagefile.".dec";
  my $bakfile = $pagefile.".bak";

  open (IN, "<$pagefile") or return undef;
  if (!open (OUT, ">$decfile")) {
    warn "Cannot write to $decfile\n";
    close IN;
    return undef;
  }

  # read the first line, and decrement the ref count therein.
  $_ = <IN>;
  my $refcount;
  if (/ refcount: (\d+) /) {
    $refcount = $1;
  } else {
    $refcount = 1;		# default value
    $_ =~ s/ -->/ refcount: 1 -->/;
  }

  $refcount--;
  s/ refcount: \d+ / refcount: ${refcount} /;
  print OUT $_;

  if ($refcount <= 0) {
    close IN; close OUT;
    unlink ($pagefile, $decfile);
    $self->dbg ("cache singleton $pagefile: deleted");
    return 1;
  }

  while (<IN>) { print OUT; }
  close IN;
  close OUT or warn "Failed to write to $decfile\n";

  rename ($pagefile, $bakfile) or warn "rename $pagefile failed";
  rename ($decfile, $pagefile) or warn "rename $pagefile failed";
  unlink ($bakfile);

  $self->dbg ("cache singleton $pagefile: ref count now $refcount");
}

# ---------------------------------------------------------------------------

sub mk_pagefile_filename {
  my ($self, $url, $sum, $size) = @_;

  # Generate a deep cache filename. It'll look like:
  #
  #    HOST         PATH              CKSUM SIZE
  #    sitename_com/some_page_in_site-95443-3243
  #
  # Very long names are shortened, by stripping off the start of the URL
  # and turning it into a 32-bit hash represented as a hex value. The
  # end of the URL is left untouched where possible to avoid filename
  # collisions.
  #
  if (length ($url) > $MAXFILENAMELEN-5) {
    my $splitpoint = length ($url) - ($MAXFILENAMELEN-10);

    my $start = substr ($url, 0, $splitpoint);	# hashed into a 32bit value
    my $end = substr ($url, $splitpoint);	# left as a string

    $url = sprintf ("%08lx_%s", unpack ("%32C*", $start), $end);
  }

  Sitescooper::PerSiteDirCache::mk_generic_cache_filename
  	($self->{factory}->{pagecachedir}, $url."-".$sum."-".$size, 1);
}

sub dbg {
  my $self = shift;
  $self->{main}->dbg(@_);
}

sub verbose {
  my $self = shift;
  $self->{main}->verbose(@_);
}

# ---------------------------------------------------------------------------

1;
