(*pp ocamlrun ./version_filter -pp "camlp4o -impl" *)
(************************************************************
 *
 * A part of Regexp/OCaml module.
 * 
 * (c) 2002-2005 Yutaka Oiwa. All Rights Reserved.
 * 
 * This file is distributed under the terms of the GNU Library
 * General Public License, with the special exception on
 * linking described in file ../LICENSE.
 *
 ************************************************************)

(* $Id: declare_once.ml 129 2005-06-06 16:08:39Z yutaka $ *)

(* An Important Note to Developers:

     Please refrain from changing interfaces and internal data
     structures of this module without contacting to the author:
     When different implementations of this module are loaded into one
     instance of camlp4 preprocessor, it may crash, because this
     module uses a dirty, type-unsafe trick to share the internal
     states between several instances of this module.  Instead, please
     give the author a chance to merge your improvements into the original
     distribution.

     Alternatively, if you really need to modify, change the value of
     version_signature to include your own identifier.  If this module
     detects that another instance with a different version signature is 
     already loaded, it raises error.
*)

#if 3.06 | 3.07pre | 3.07 | 3.07_5 | 3.07_19
#define _loc loc
#endif

(* --------------------------------------------------------------------------------- *)
(* an idea taken from Fran\c{c}ois Pottier's regexp syntax sugar (with modification) *)

open Printf

open Pcaml

open MLast

#load "pa_extend.cmo";;
#load "q_MLast.cmo";;

let debug = 
  try ignore (Sys.getenv "MR_DEBUG"); true with _ -> false

let default_no_sharing =
  (try ignore (Sys.getenv "MR_NO_SHARING"); true with _ -> !Sys.interactive)

#if 3.06 | 3.07pre | 3.07 | 3.07_5
let dummy_loc = (1, 1)
#else
open Lexing
let dummy_loc = { pos_fname = ""; pos_lnum = 1; pos_bol = 0; pos_cnum = 0 },
  { pos_fname = ""; pos_lnum = 1; pos_bol = 0; pos_cnum = 0 }
#endif


type decl_type = 
    Expr of MLast.expr

type recorded_declaration =
    {
     package : string;
     file : string;
     tag : string;
     symbol : string;
     declaration : decl_type;
     is_global : bool;
     is_shared : bool;
     mutable is_active : bool;
   }

(* the state which should be shared among multiple instances of this module *)

(* Code sharing with two or more instances of this module is
   mandatory feature, because some syntax rules does not 
   work if they are introduced two or more times.
   An example of such rules is the first rule in str_item ('NEXT' rule).
 *)

type global_state =
    {
     signature : string;
     version_signature : string;
     mutable pending_globals : recorded_declaration list;
     mutable cached_globals : recorded_declaration list;
     mutable gensym_name_cache : string;
     mutable gensym_hash_cache : string;
     mutable gensym_counter : int;
     mutable no_sharing : bool;
   }

let signature = "DECLONCE CAMLP4 PREPROCESSOR MODULE STATE"
let version_signature = "003"

let g = ref { 
  signature = signature;
  version_signature = version_signature;
  pending_globals = []; cached_globals = [];
  gensym_name_cache = ""; gensym_hash_cache = "d41d8cd9";
  gensym_counter = 0; no_sharing = default_no_sharing }

let debug_no_sharing () = 
  if (!g).no_sharing then "true" else "false"

(* This function allows registering a new global declaration. It can be called within a grammar rule. *)

let lookup_cached ~package tag =
  let rec loop = function
      [] -> None
    | { package = p; tag = t; file = f; is_global = true ; is_active = true; symbol = s }::_
	when p = package && t = tag && f = !Pcaml.input_file
      -> Some s
    | _::tl -> loop tl
  in
  if tag = "" then None else
  loop (!g).cached_globals

let put elem array =
  array := elem :: !array

let declare ~package ?(tag = "") sym item =
  let is_global = tag <> "" in
  let decl_record = 
    { package = package;
      file = !Pcaml.input_file;
      tag = tag;
      is_global = is_global;
      is_shared = is_global && not (!g).no_sharing;
      is_active = true;
      symbol = sym;
      declaration = item } in
  if is_global then
    (!g).cached_globals <- decl_record :: (!g).cached_globals;
  (!g).pending_globals <- decl_record :: (!g).pending_globals
					  
(* This function is used to generate a fresh identifier. *)

let gensym ~package =
  if (!g).gensym_name_cache <> !Pcaml.input_file then begin
    (!g).gensym_name_cache <- !Pcaml.input_file;
    (!g).gensym_hash_cache <- 
      String.sub (Digest.to_hex (Digest.string !Pcaml.input_file)) 0 8;
  end;
  let n = (!g).gensym_counter in
  (!g).gensym_counter <- n + 1;
  Printf.sprintf "_once_%s_%s_%d" (!g).gensym_hash_cache package (!g).gensym_counter

(* This hook, which is called once per declaration, adds the global
   declarations generated by calls to [declare] at the beginning of
   each declarations. *)

(* nest-related state variable is only used with
   syntax expander, and thus not needed to be shared among instances. *)
#if 3.06 | 3.07pre | 3.07 | 3.07_5
let print_loc f loc = 
  Printf.fprintf f "(%d,%d)" (fst loc) (snd loc)
#else
let print_loc f (loc, _) = 
  Printf.fprintf f "%S (%d,%d)" loc.pos_fname loc.pos_lnum loc.pos_cnum
#endif

let nest_level = ref 0
let decr_nest loc = 
  decr nest_level;
  if debug then 
    (Printf.eprintf "NEST decr (%d) @ %a.\n"
       !nest_level print_loc loc; flush stderr)
let incr_nest loc = 
  incr nest_level; 
  if debug then
    (Printf.eprintf "NEST incr (%d) @ %a.\n"
       !nest_level print_loc loc; flush stderr)

exception Not_possible

(* given a pattern, returns a list containing all names bound by this pattern. *)
let rec pickup_variables = function
    <:patt< $p1$ . $p2$ >> -> pickup_variables p2
  | <:patt< ( $p1$ as $p2$ ) >> -> pickup_variables p2 @ pickup_variables p1
  | PaApp(_,p1,p2) ->
      (* application of constructors:
	 notice that p1 may contain variables if
	 arity is more than one *)
	 pickup_variables p1 @ pickup_variables p2
  | <:patt< [|$list:l$|] >> | <:patt< ( $list:l$ ) >> -> (* array, tuple *)
    List.flatten (List.map pickup_variables l)
  | <:patt< _ >>
  | <:patt< $chr:_$ >>
  | <:patt< $int:_$ >>
#if 3.06 | 3.07pre
#else
  | PaNativeInt _ | PaInt64 _ | PaInt32 _
#endif
  | <:patt< $str:_$ >>
  | <:patt< $flo:_$ >> -> []
  | <:patt< ~ $lab$ : $pat$ >> -> pickup_variables pat
#if 3.06
#else
  | <:patt< ~ $lab$ >> -> [lab]
#endif
  | <:patt< $lid:i$ >> -> [i]
  | <:patt< $uid:s$ >> -> []
  | <:patt< ? $i$ : ($p$ = $e$) >> -> pickup_variables p
  | <:patt< ? $i$ : ($p$) >> -> pickup_variables p
#if 3.06
#else
  | <:patt< ? $i$ >> -> [i]
#endif
  | <:patt< $p1$ | $p2$ >> -> 
    (* (p1 | p2) : p1 and p2 should contain same variables *)
    pickup_variables p1
  | <:patt< $p1$ .. $p2$ >> ->
      [] (* range does not contain further variables *)
  | PaRec(_,fields) -> (* record: list of (name, pattern) *)
      List.flatten (List.map (fun (nam,pat) -> pickup_variables pat) fields)
  | <:patt< ( $p$ : $t$ ) >> -> (* type constraint *) pickup_variables p
  | <:patt< # $modident$ >> -> []
  | <:patt< `$ident$ >> -> []
  | <:patt< $anti:e$ >> -> (* antiquot *) assert false

(*
   Insert local declarations to a structure item.
   The argument locals is a list of bindings.
   Each binding is a pair of bool and (patt * expr) list,
   where the boolean stands for "rec" flag.
 *)
   
let insert_local s locals =
  if locals = [] then s else
  let _loc = MLast.loc_of_str_item s in
  let local_value () = 
    List.map 
      (fun (isrec, binds) -> 
#if 3.06
	<:str_item<value $rec:isrec$ $list:binds$>>
#else
	<:str_item<value $opt:isrec$ $list:binds$>>
#endif
      )
      locals
  in
  let local_let e = 
    List.fold_right
      (fun (isrec, binds) e ->
#if 3.06
	<:expr<let $rec:isrec$ $list:binds$ in $e$>>)
#else
	<:expr<let $opt:isrec$ $list:binds$ in $e$>>)
#endif
      locals e
  in
  match s with
#if 3.06
    <:str_item<value $rec:r$ $pat$ = $expr$>> ->
      <:str_item<value $rec:r$ $pat$ = $ (local_let expr) $>>
#else
  | <:str_item<value $opt:r$ $pat$ = $expr$>> ->
      <:str_item<value $opt:r$ $pat$ = $ (local_let expr) $>>
#endif
  | <:str_item<$exp:expr$>> ->
      <:str_item<$exp:local_let expr$>>
#if 3.06
  | <:str_item<value $rec:r$ $list:l$>> 
#else
  | <:str_item<value $opt:r$ $list:l$>> 
#endif
    ->
      let variables = 
	List.flatten (List.map (fun (pat,exp) -> pickup_variables pat) l)
      in
      let variables_p =
	(List.map (fun name -> <:patt< $lid:name$ >>) variables) in
      let variables_e = 
	(List.map (fun name -> <:expr< $lid:name$ >>) variables) in
      let e = local_let 
#if 3.06
	  <:expr< let $rec:r$ $list:l$ in ($list:variables_e$) >>
#else
	  <:expr< let $opt:r$ $list:l$ in ($list:variables_e$) >>
#endif
      in
      <:str_item<value ($list:variables_p$) = $e$ >>
  | <:str_item<module $m$ = struct $list:s$ end>> ->
      <:str_item<module $m$ = struct 
	$list:(local_value () @ s)$ end>>
  | <:str_item<module $m$ : $t$ = struct $list:s$ end>> ->
      <:str_item<module $m$ : $t$ = struct 
	$list:(local_value () @ s)$ end>>
  | _ -> raise Not_possible
	
(* insert appropriate declaration at the top of str_item phrase.
*)
let install_syntax_1 () =
  EXTEND
    GLOBAL: str_item;
  str_item: FIRST
    [[ s = NEXT ->
      let keep, add =
	if !nest_level = 0 then [], (!g).pending_globals
	else 
	  List.partition (fun { is_global = b } -> b)
	    (!g).pending_globals
      in
      (!g).pending_globals <- keep;
      if add = [] then s
      else begin
	let global, local = List.partition (fun { is_shared = b } -> b) add in
	let local_binds =
	  List.rev_map 
	    (fun ({ symbol = sym; declaration = Expr e } as r) ->
	      r.is_active <- false;
	      (false, [<:patt<$lid:sym$>>, e]))
	    local in
	let locals_unavail, s =
	  try
	    [], insert_local s local_binds
	  with Not_possible -> local, s
	in
	let extra = List.rev_map
	    (fun { symbol = sym; declaration = Expr e } ->
	      <:str_item< value $lid:sym$ = $e$ >>) (locals_unavail @ global) in
	if extra = [] then s else 
	<:str_item< declare $list:extra @ [s] $ end >>
      end
]];
END

(* the cached declaration should put into toplevel struct. *)

let install_syntax_2 () = EXTEND
    GLOBAL: module_expr;
  module_expr: FIRST
    [[ "functor"; "("; i = UIDENT; ":"; t = module_type; ")"; "->";
        me = SELF ->
	<:module_expr< functor ( $i$ : $t$ ) -> $me$ >>
  | "struct"; nest_incr; st = LIST0 [ s = str_item; OPT ";;" -> s ]; "end" ->
      decr_nest _loc;
      <:module_expr< struct $list:st$ end >> ]];
  
  nest_incr: [[ (* empty *) -> incr_nest _loc ]];
END

let process_option i =
  (* VERY DIRTY TRICK: black magic here! :-( *)
  (* Is there no way to prevent a module from being loaded twice? *)
  match i with
    0 -> (* actually () : unit *)
      if debug then (Printf.eprintf "NO SHARING (%s) -> " (debug_no_sharing ()));
      (!g).no_sharing <- true;
      if debug then (Printf.eprintf "(%s).\n" (debug_no_sharing ()); flush stderr);
      0
  | 1 -> 
      (Obj.magic !g)
  | _ -> assert false


let share_global_state f = 
  let check_compat b = 
    if not b then
      failwith "declare_once: FATAL: previous instance has incompatible state";
    ()
  in
  let f = (Obj.magic f : int -> global_state) in
  let state = f 1 in
  begin
    let o = Obj.repr state in
    check_compat (Obj.is_block o);
    check_compat (Obj.size o >= 2);
    let s = Obj.field o 0 in
    check_compat (Obj.is_block s);
    check_compat (Obj.tag s = Obj.string_tag);
    check_compat ((Obj.magic s) = signature);
    let s = Obj.field o 1 in
    check_compat (Obj.is_block s);
    check_compat (Obj.tag s = Obj.string_tag);
    if (Obj.magic s) <> version_signature then
      failwith
	("declarare_once: FATAL: previous instance is of different version (prev:" ^
	 (Obj.magic s : string) ^ "<--> this:" ^ version_signature ^ ")");
    check_compat (Obj.size o = Obj.size (Obj.repr !g));
  end;
  g := state

let option_name = "-no-shared-precompile"

let option_docstring = 
  "not to share duplicated precompiled declarations (regexp etc.)."

let install_option () =
  Pcaml.add_option option_name 
    (Arg.Unit (Obj.magic process_option : unit -> unit))
    option_docstring

let _ =
  let rec find = function
      [] -> None
    | (n,Arg.Unit f,d)::tl ->
	if n = option_name && d = option_docstring then Some f
	else find tl
    | _::tl -> find tl
  in
  match find (Pcaml.arg_spec_list ()) with
    None ->
      if debug then
	(Printf.eprintf "LOADED 'once' processor (%s).\n" 
	   (debug_no_sharing ()); flush stderr);
      install_syntax_1 ();
      install_syntax_2 ();
      install_option ();
  | Some f ->
      share_global_state f;
      if debug then
	(Printf.eprintf "LOADED 'once' processor twice, using shared configuration (%s).\n"
	   (debug_no_sharing ());
	 flush stderr)

