%
*************************************************************************
*                                                                       *
*    Copyrighted Cornell University 2000                                *
*                                                                       *
*                                                                       *
*                Nuprl Proof Development System                         *
*                ------------------------------                         *
*                                                                       *
*   Developed by the Nuprl group, Department of Computer Science,       *
*   Cornell University, Ithaca NY.  See the release notes for a list    *
*   of the members of the group.                                        *
*                                                                       *
*   Permission is granted to use and modify Nuprl provided this notice  *
*   is retained in derived works.                                       *
*                                                                       *
*                                                                       *
*************************************************************************
%

%[
*********************************************************************
*********************************************************************
Caching tactics for proof goals.
**********************************************************************
**********************************************************************
Tactics which cache proofs of subgoals in hope that same subgoals come
up repeatedly.

At end are experimental functions and tactics that try to thin out
unimportant hypotheses and put goals into more canonical forms. This
ought to result in smaller cache sizes, greater cache hit rates, and
faster detection of hits/misses.  However, the thinning is not
conservative, and the if much backtracking occurs, then performance
can be worse than with no caching at all!  

Reference variables controlling caching:

  pc_num_thms : int [5]

    Controls number of most recent theorems for which proofs are cached.
]%


%[
**********************************************************************
Building sequent terms.
**********************************************************************

Functions here very similar to those in sequent.ml. These should
be a little faster.
]%


%[
**********************************************************************
Proof Caching Code
**********************************************************************

Cache is 2-dim list. 

  1st index: (tok) name of theorem 
  2nd index: (tok list) main identifiers in cached goal

Each entry is a list of items of form (n,k,i,p)  

   where 
     n (int) is number of hits on an entry
     t (tok) is name of tactic entry is from. 
     i (int) is num of hyps of sequent
     seq (term) is a sequent term whose proof has been completed.

NB: important that only sequents, not whole proofs are cached. If proofs
are cached, then the benefit from having primitive rule trees discarded
by the refiner is lost.

Should experiment with different notions of equality,
to see if it makes any difference. 
(e.g. does alpha eq on sequents help? Does soft equality help?)
]%



letref proof_cache = []
 : (object_id # ((tok list # (int # string # int # (assumption list) # term ) list) list)) list
;;

letref proof_cache_aps = 0 ;;
letref proof_cache_updates = 0 ;;


let reset_proof_cache (():unit) = 
  proof_cache := []
  ; proof_cache_aps := 0
  ; proof_cache_updates := 0
  ; ()
;;

% For each pcache set
  1. count # entries
  2. count total # hits
%

let pc_outline thm_nam = 
  let cache = apply_alist proof_cache thm_nam in
  let hits l = reduce (\x y.x+y) 0 (map fst l) in
  map (\id,x.id,length x,hits x) cache
;;

% add up sizes of all cached terms %
let pc_goal_size (assums, concl) =
 (2 * (length assums))		% one for list of assums and one for assumption structure. shared with proof.  %
 + (conses_in_term concl)	
 + (reduce (\x y. x+y) 0 (map (conses_in_term o type_of_declaration) assums))
;;

let pc_size thm_nam = 
  let cache = apply_alist proof_cache thm_nam in
  let size l = reduce (\x y.x+y) 0 
               (map (pc_goal_size o snd o snd o snd) l) 
  in
  let total_size = reduce (\x y.x+y) 0 (map (size o snd) cache) in
  let set_sizes = map (\id,x.id,length x,size x) cache in
    total_size,set_sizes
;;

%let pc_profile () =
% 
 
let rpc = reset_proof_cache ;;
let pcs () = proof_cache_aps,proof_cache_updates ;;


let summarize_cache_set cs = 
  let hits l = reduce (\x y.x+y) 0 (map fst l) in
  let normed_entries, unnormed_entries = divide_list (\(),(),i,().i = -1) cs in
      length normed_entries
      ,hits normed_entries
      ,length unnormed_entries
      ,hits unnormed_entries
;;
       
let format_cs_summary (a,b,c,d) = 
  let i = int_to_string in
    concatenate_strings
     [i a;" ne, ";i b;" nh, ";i c;" ue, ";i d;" uh"]
;;

let pcs' n = 
  let cache = snd (nth n proof_cache) in
  let data = map (summarize_cache_set o snd) cache in
  let as,bs,cds = unzip3 data in
  let cs,ds = unzip cds in
    format_cs_summary (sum as,sum bs,sum cs,sum ds)
;;

let pc_outline' n = 
  let cache = snd (nth n proof_cache) in
    map (\id,x.id,format_cs_summary (summarize_cache_set x)) cache 
;;

% proof cache theorems %

let pcts (():unit) = map fst proof_cache ;;
  
% sequent id is a tok list %

%
Access / Update functions for proof cache 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
These two functions should be sufficient for all accesses to 
proof_cache.
%

let  pc_lookup thm_nam seq_id = 
  %mlbreak `lookup`;%
  apply_2d_alist proof_cache thm_nam seq_id
;;

% maximum number of theorems which info is cached about %

letref pc_num_thms = 5 ;;

% brings entries_for_thm to front of table. Discard end of table if table
too long. 
%

let pc_update thm_nam seq_id entry = 
   %mlbreak `update`;%
  let entries_for_other_thms = remove_alist_entry proof_cache thm_nam in
  let entries_for_thm = apply_alist proof_cache thm_nam ? [] in
  let entries_for_thm' = update_alist entries_for_thm seq_id entry in
  let proof_cache' = (thm_nam, entries_for_thm') . entries_for_other_thms in

    proof_cache := (firstn pc_num_thms proof_cache' ? proof_cache')
    ; ()
;;

let pc_clear thm_nam = 
  proof_cache := remove_alist_entry proof_cache thm_nam ;
  ()
;;

 
%
Ideally should track in tactics when on non-computational branch of
proof. Requires threading information through the various tactics. 
(Alternatively, could use tactic argument passing mechanism.)

Only cache goals whose computational content is trivial. (since no proofs
are cached, only terms).
%

%
Added wf_cache_p to allow wf_cacheing to be turned off for load fully.
%

letref wf_cache_p = true;;

let is_cacheable_goal p =
  %mlbreak `cache`;% 
  wf_cache_p &
  is_terms 
   ``equal member false true not ge le less_than gt subtype suptype
     squash
    `` 
   (concl p)
;;

let cacheable_goal_extract p = 
  if is_terms ``not ge le subtype suptype`` (concl p) then 
     mk_lambda_term null_var axiom_term
  else
     axiom_term
;;

let FiatWithWitness p = 
  if is_terms ``not ge le subtype suptype`` (concl p) then 
    (UseEqWitness (mk_lambda_term null_var axiom_term) THEN Fiat) p
  else
    Fiat p
;;

%
let cid be id of concl.

Then result of get_sequent_id is:

1. concl is 
    a in T
    a = b in T
  i) a is var with type S. Return [<Sid>;<Tid>]
  ii) a is not var. Return [<aid>]
2. o/w  Return [<cid>]
%

let get_sequent_id p = 
  let c = concl p in
  if is_terms ``member equal`` c then 
    let ta = (sub_term 2 c) in
    if is_term `variable` ta then
      [opid_of_term (h (get_decl_num (dest_var ta) p) p)
      ;opid_of_term (sub_term 1 c)]
    else
      [opid_of_term ta]
  else 
    [opid_of_term c]
;;

let unfold_if_member_term t = 
  if is_term `member` t then
  (let T,t = dest_member t in mk_equal_term T t t)  
  else
    t
;;



%[ 
UpdatingMLPfCache
  thinned (:bool) true if prior to execution, potentially irrelevant hyps 
           have been thinned.
  thm_id (:tok) identifies thm being refined
  Tac_id (:string) identifies tactic T.
  T (:tactic) Tactic whose behaviour is being monitored.

Assumption is made here that if tactic returns null proof list, then
goal has been proven. 

Strictly speaking, this isn't known until
1. validation has been run.
2. frontier of resulting proof has been checked to be null.

Having validations executed within tactics is currently (May 3rd 95) 
dangerous, since the refiner creates validations that do destructive 
update of structures; it doesn't expect validations to be run in tactics that
might later fail.

The refiner will be fixed in V5, and maybe V4. When this is done, this tactic
should probably be fixed too.

NB: of course, the result p of executing T's validation result should used 
in the validation (\ps.p) returned by UpdatingMLCache. 
There is no point executing the validation multiple times when we know what
its result must be.

When T has been run on a cacheable goal and produces no subgoals, then
UpdatingMLCache substitutes a trivial validation (that produced by
FiatWithWitness). This is done to keep the size of validation closures to 
a minimum. These closures have been observed in some proofs to grow to 10's of 
MB in size, causing much unwanted dynamic consing.
]%

let equal_object_ids x y = x = y;;

% don't fiat first occurrence.
  4/2004 to allow primitive proof references to be found
%
let UpdatingMLPfCache_aux lookp thinned thm_id Tac_id (T:tactic) p = 
  let ps,v = T p 
  in 
  if not null ps or not is_cacheable_goal p then
    ps,v
  else
  %let ps_v' = FiatWithWitness p in%
  if equal_object_ids thm_id (dummy_object_id () %"none"%) then
    %ps_v'% ps,v
  else
  let assums = hyps p 
  in let c = concl p 
  in let concl = unfold_if_member_term c
  in let i = if thinned then (-1) else length assums
  in let seq_id = get_sequent_id p 
  in let cache_set = pc_lookup thm_id seq_id ? [] 
  in let pos =
     if not lookp then 0 else
     search_list
       (\n,(),i',assums',concl'.
	  ( not (i < i')
	  & alpha_equal_sequents_aux (0 < i')  assums' concl' assums concl)
       )
       cache_set
       ? 0
  in
  if pos = 0 then
    ( pc_update thm_id seq_id ((0,Tac_id,i,assums,concl).cache_set)
      ; proof_cache_updates := proof_cache_updates + 1
      ; %ps_v'%ps,v
    )
  else
    %ps_v'%ps,v
;; 

let UpdatingMLPfCache = UpdatingMLPfCache_aux true;;

let ApplyMLPfCache thinned thm_id (Tac_id:string) p =
  if equal_object_ids thm_id (dummy_object_id () %"none"%) then 
    failwith `ApplyMLPfCache: no cache relevant`  
  if not is_cacheable_goal p then
    failwith `ApplyMLPfCache: not cacheable goal`
  else
  let assums = hyps p
  in let concl = unfold_if_member_term (concl p)
  in let i = if thinned then (-1) else length assums
  in let seq_id = get_sequent_id p
  in let cache_set = 
    pc_lookup thm_id seq_id
    ?
    failwith `ApplyMLPfCache: no applicable entries for thm and seq_id`
  in let pos = 
    search_list
     (\n,(),i',assums', concl'.
	( not (i < i')
	& alpha_equal_sequents_aux (0 < i') assums' concl' assums concl))
     cache_set
    ?
    failwith `ApplyMLPfCache: no matching cache entry`
  in 
  let cache_set' = update_nth ((\n.n+1) # id) cache_set pos
  in
  proof_cache_aps := proof_cache_aps + 1
  ;
  pc_update thm_id seq_id cache_set'
  ;
  FiatWithWitness p 
%
  ( OnHyps (rev (upto (i+1) n)) Thin
    THEN Graft (copy_proof p')
  ) p
%
;;


%[
**********************************************************************
Pruning Environments
**********************************************************************
]%

%
linearize_dag 
~~~~~~~~~~~~~

Assume dag presented as:

[w1,[v1-1;... ;v1-m1]
;
;wn,[vn-1;... ;vn-mn]]

where the wi and the vi-j are nodes and the vi-j
are the immediate predecessors of wi. 

Function returns a list:

[wk1;...;wkn] where k is a permutation on 1...n and all predecessors of 
wi are to the left of wi.


Algorithm: 
  repeatedly pick first wi with no in-edges.
  and  remove this wi from any vi-j's.

Preconditions:
  wi-j are subset of vi.
  Graph is dag.

  Function fails if preconditions not satisfied.
%

letrec linearize_dag dag= 
  if null dag then [] else
  let n = search_list (null o snd) dag 
          ? failwith `linearize_dag`
  in
  let v = fst (nth n dag) in
  let dag1 = remove_nth n dag in
  let dag2 = map (\v',ws.v',remove v ws) dag1 in
    v. linearize_dag dag2
;;

%
Idea here is to calculate from a term and an environment, 
a minimal environment for the term with the environment in an order
determined purely by ordering of the free variables in the term.

get_type_and_prior_vars
~~~~~~~~~~~~~~~~~~~~~~~
Input
1. xAs : (var # term) list ( = zip xs As)
2. vs : var list  

  where vs are subset of xAs

Output
   ws : var list 
  ,fvss : var list list 

As intermediate step vBs = zip vs Bs is constructed  
where each v,B is an element of xAs

ws are (free vars of Bs) \ vs
%

let get_type_and_prior_vars xAs vs = 
  % Bs are Types of vs %
  let Bs = map (apply_alist xAs) vs in

  % get list of free vars of Bs, not in  1 of those in vs, 1 of those not %
  let g B = let Bfvs = free_vars B in (diff Bfvs vs),Bfvs in
  let Bfvss_not_in_vs,Bfvss = unzip (map g Bs) in
  
    Bfvss, reduce union [] Bfvss_not_in_vs  
;;

%
get_relevant_vars_and_fvs_of_their_types
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
%
let get_relevant_vars_and_fvs_of_their_types xAs t = 
  letrec aux vs = 
    if null vs then [] else
    let fvss,ws = get_type_and_prior_vars xAs vs in
      zip vs fvss @ aux ws
  in
    aux (free_vars t)
;;

%
normalize_env & weakly_normalize_env
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Input
1. xAs : (var # term) list
2. t   : term

Output
1. zCs : (var # term) list
%

let normalize_env xAs t = 
  let dag = get_relevant_vars_and_fvs_of_their_types xAs t in
  let norm_vs = linearize_dag dag 
  in
    map (\v.v,apply_alist xAs v) norm_vs
;;

% This is faster, and could therefore be better in practice than above %

let weakly_normalize_env xAs t = 
  let dag = get_relevant_vars_and_fvs_of_their_types xAs t in
  let vs = map fst dag 
  in
    filter (\x,A.member x vs) xAs
;;

%[
**********************************************************************
Sequent Pruning
**********************************************************************
]%

% thin hyps so that only those declaring vs are left. %
let ThinToVars vs p = 
  let seq_vars = number (declared_vars p) in
  let hs_to_go = mapfilter (\n,v.if (member v vs) then fail else n) seq_vars in
     OnHyps (rev hs_to_go) Thin p
;;


% returns pair: vs,b
  vs are essential vars
  b is true iff all vars of p are essential
%

let essential_vars_of_seq p = 
  let xAs = dest_hyps p in
  let xAs' = weakly_normalize_env xAs (concl p) in
    (map fst xAs'), length xAs = length xAs' 
;;

% Always makes progress; fails if no irrelevant hyps to thin %

let ThinIrrevHyps p = 
  let vs,b = essential_vars_of_seq p 
  in if b then
    failwith `ThinIrrevHyps: no progress`
  else 
    ThinToVars vs p
;;

  
