
;;;************************************************************************
;;;                                                                       *
;;;    Copyrighted Cornell University 2003                                *
;;;                                                                       *
;;;                                                                       *
;;;                Formal Digital Library System                          *
;;;                ------------------------------                         *
;;;                                                                       *
;;;   Developed by the FDL group, Department of Computer Science,         *
;;;   Cornell University, Ithaca NY.  See the release notes for a list    *
;;;   of the members of the group.                                        *
;;;                                                                       *
;;;   Permission is granted to use and modify FDL provided this notice    *
;;;   is retained in derived works.                                       *
;;;                                                                       *
;;;                                                                       *
;;;************************************************************************

#+cltl2(in-system-package)
#-cltl2(in-package *system-package-name*)

(defun lisp-file-extension ()
  (car make::*filename-extensions*))

(defun bin-file-extension ()
  (cdr make::*filename-extensions*))

(defun system-bin-path-list ()
  (standard-binary-directories))

;;;
;;; Used in compiling ML files.
;;;

(defun compile-lisp-file (input-file &key output-file)
  (cond (output-file
	 #+lucid
	 (let ((b (getf (lcl:compiler-options) :fast-entry)))
	   (unwind-protect 
		(progn (lcl:compiler-options :fast-entry t)
		       (compile-file input-file :output-file output-file 
				     :messages nil :warnings nil))
	     (lcl:compiler-options :fast-entry b)))
	 #-lucid
	 (compile-file input-file :output-file output-file ))
	(t
	 #+lucid
	 (let ((b (getf (lcl:compiler-options) :fast-entry)))
	   (unwind-protect 
		(progn (lcl:compiler-options :fast-entry t)
		       (compile-file input-file :messages nil :warnings nil))
	     (lcl:compiler-options :fast-entry b)))
	 #-lucid
	 (compile-file input-file))))

;;; Used in loading compiled ML files.
(defun load-lisp-file (filename)
  #+lucid
  (let ((system::*redefinition-action* :quiet))
    (load filename :verbose nil))
  #-lucid
  (load filename :verbose nil))


(defun compile-lisp-form (function-spec &optional lambda-exp)
  #+lucid
  (compile function-spec lambda-exp  :fast-entry t :messages nil :warnings nil)
  #-lucid
  (compile function-spec lambda-exp))


;;;;	For purposes of discussion, considered a an abstract string be
;;;;	a list of numbers where each number is the unicode character.
;;;;	
;;;;	There are convienent representations of certain subsets of this
;;;;	abstract string class.
;;;;	
;;;;	  - standard string : a string consisting of only the following characters :
;;;;	      * !"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~
;;;;		ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
;;;;		and newline, ie char code 10, #x0A, #o12.
;;;;	
;;;;	  - ascii string	: a string consisting of only characters with codes less than 128.
;;;;	  - byte string		: a string consisting of only characters with codes less than 256.
;;;;	  - unicode string	: a string consisting of unicode characters, ie with two byte8 codes.
;;;;	
;;;;	We may want to represent the entire abstract string class inside
;;;;	the subsets. This can be done naturally by escaping:
;;;;	  \\ == \
;;;;	  \xxxx == (string-to-unicode "xxxx") where x is a hex character: 0123456789ABCDEFabcdef
;;;;	
;;;;	
;;;;	Where are strings and character used ?
;;;;	  - internal : characters. Symbols and strings.
;;;;	  - ascii import/export : ascii chararacters with embedded unicode.
;;;;	      * must still be byte io as some lisps don't have ascii characters.
;;;;	  - compressed stream io : unsigned byte data.
;;;;	      * expect few non-standard chars in stream, since non-standard chars
;;;;		will mostly be used only in dforms, and stream io is primarily io
;;;;		of structure. The 8th bit is used for 
;;;;	  - edit : encodes/decodes strings.
;;;;	      * need to identify unicode for display.
;;;;	      * need to allow for import of non-standard chars.
;;;;	
;;;;	dilemma : What to use internal strings? standard, ascii, byte or unicode strings.
;;;;	dilemma : What to use for io? Strings and characters or byte8 and byte8 arrays.
;;;;
;;;;	One confounding fact is that various Lisp implementations treat characters and character io differently
;;;;	  - Allegro allows byte8 char-codes for strings and string io.
;;;;	  - Lucid allows byte16 char-codes for strings but outputs only the last 8 bits for string io.
;;;;	 * The problem is that there is no specification for how strings are output onto a byte stream.
;;;;	   If we make our implementation rely on that fact then we are taking a risk.
;;;;	   One would hope that a byte8 character code would be sent but there is no guarauntee.
;;;;	 * One very plausible scenario: vendors add unicode support and start sending two bytes 
;;;;	   for every char or the utf8 unicode encoding which would most certainly munge up the upper byte8 range.
;;;;
;;;;	Strings are good since they are used internally and thus some conversion may be avoided.
;;;;	
;;;;	Unicode strings can not be used for internal strings.
;;;;	  - not all implementations support, although that may change for the better.
;;;;	  
;;;;	
;;;;	Thus, use byte8 and byte io. That rules out byte8 and unicode chars for internal strings,
;;;;	since need the eight bit for compression control codes.
;;;;	
;;;;	There is not much to be gained with byte7 (ascii) characters over standard characters.
;;;;	So use the lowest common denominator for internal strings, ie standard chars.
;;;;	Note then that #[0D] -> u000D.
;;;;	so [75; 30 30 30 0D ] == #[0D], this is of course true for any byte7 character.
;;;;	
;;;;	Now how do we make the string <-> byte8 array conversion efficient.
;;;;	Note that a any non-standard character in a string is represented by escaped unicode (uxxxx).
;;;;	  - find a common representation, ie keep the long version of escaped unicode chars in the byte8 representation,
;;;;	    ie do not use the other available byte7 codes that are not shared with the standard char codes.
;;;;	      * does lisp implementation allow recognize these as interchangeable types for fast copy.
;;;;	        Generally, they do not, still requiring byte by byte conversion to move from one form to the other.
;;;;	  - do not use strings but replace with byte8 arrays.
;;;;	      * require our own psuedo symbol table to replace symbols.
;;;;		need the cheap eq test.
;;;;	
;;;;
;;;;	BYTE8-array as primitive data :
;;;;
;;;;    The second solution of not using builtin lisp symbols and strings for data representation
;;;;	is attractive for other reasons. Particulary, such an implementation would be easier
;;;;	to port to other languages. Also it delineates a boundary between implementation code and data
;;;;	which is conceptually appealing. 
;;;;	
;;;;	At the momemt it is not pratical to make that leap, however if we recognize that as the
;;;;	goal, we can define a boundary within which we work with byte arrays. For example,
;;;;	the compressed ascii terms should be (and mostly are) within that boundary. Then
;;;;	we can gradually move functions inside the boundary. Fortunately most algorithms 
;;;;	already are abstract wrt this distinction (mainly via the parameter abstractions).
;;;;	
;;;;	The disturbing thing is that at the moment the most efficient method might be to
;;;;	use byte7 chars in strings and do byte8 string io (using the 8th bit for compression).
;;;;	The pivotal point that makes this undesirable is that we do not trust the lisp implementations
;;;;	not to change and corrupt our compression.
;;;;	
;;;;	If we implement byte8 abstractly, then we might get away with
;;;;	a conditional implementation where the underlying primitives 
;;;;	may deal with strings or byte arrays.
;;;;	
;;;;	Then, need to allow shortcuts for conversions when types match.
;;;;	
;;;;	
;;;;	Escaped BYTE8-arrays :
;;;;	
;;;;	We desire the byte-arrays to facilitate compressed ascii IO.
;;;;	We stil need to examine strings byte by byte to do ascii syntax escaping.
;;;;	How about pre-escaping strings for ascii io.
;;;;	Ie if o{a{}b:s} -> o{a\{\}b:s},
;;;;	but then if t = `{}`, destruct_token_parameter (make_token_parameter t) != t !!!. 
;;;;	Or we must implicitly escaped during compilation of `{}` so that t is `\{\}`
;;;;	
;;;;	The escaping during ML compilation is plausible, but might be a fair
;;;;	amount of work. Forceing escaping in lisp code is not possible but
;;;;	there should be few values needing escaping so fixing by hand is doable.
;;;;	Bigger issue is preventing new coding from doing the wrong thing. The consequences
;;;;	are dire but should be immediate, ie you can't corrupt too much because
;;;;	you'll get hung up real fast.
;;;;
;;;;	FTTB, do not mess with the parameter values, but rather add a cache to
;;;;	parameters for byte8 arrays.
;;;;	
;;;;	




;;;;	
;;;;	
;;;;	Where do symbols and strings come from? 
;;;;	 Lisp eval
;;;;	 ML eval
;;;;	 parameters
;;;;
;;;;
;;;;	BYTE16 -> SSTRING : Note that this subsumes BYTE8 -> SSTRING
;;;;	  - input to string.
;;;;	  - ml tokof : int -> tok
;;;;	  - ml int_to_char : int -> tok
;;;;	  - ml compile file input.
;;;;	  - string-to-standard-character-string : for converting lisp listener
;;;;	    input to internal string rep.
;;;;	
;;;;	
;;;;	
;;;;	
;;;;	
;;;;	
;;;;	


;;;;	Conditionalize implementation wrt character set supported.
;;;;	  - standard chars : some lisp vendors only supply the standard characters.
;;;;	  - 16 bit characters : some lisp vendors supply full set of 16 bit characters.
;;;;	
;;;;	There are probably some other variations, but we will limit ourselves to these.
;;;;	
;;;;	Where characters are used :
;;;;	  - internal : characters. Symbols and strings.
;;;;	      * 16bit-chars : straight ascii mapping + direct unicode.
;;;;	      * standard-chars : 
;;;;		  - embed unicode by \hhhh where h is ascii rep of hex chars.
;;;;		  - map ascii to standard chars and vice versa if neccessary.
;;;;		    Generally standard character codes correspond with ascii
;;;;		    However, there are exceptions, eg aclpc maps ascii newline 
;;;;		    to (control-m).
;;;;	  - ascii import/export : ascii chararacters with embedded unicode.
;;;;	      * must still be byte io as some lisps don't have ascii characters.
;;;;	  - compressed stream io : unsigned byte data.
;;;;	      * could compress unicode as #\escape byte byte.
;;;;	      * Note when accumulating io for tooltalk must accumulate byte-array
;;;;		and not string.
;;;;	  - edit : encodes/decodes strings.
;;;;	    strings <-> unicode arrays.
;;;;	
;;;;	Character codes need not be expected ascii codes. EG, Allegro for windows maps #\newline
;;;;	to (control-m) rather than 10. Thus we need to make sure that ascii export/import maps
;;;;	character correctly to ascii codes.
;;;;	

;;;;	+
;;;;	add bit to file scanner which indicates if scan-cur-char is standard-char
;;;;   
;;;;	file scanner reads byte and sets scan-cur-byte. nullifies scan-cur-char.
;;;;	If scan-cur-char called then
;;;;	byte coerced to char. Assume caller knows what they're doing ie don't check if
;;;;	it is a standard char.
;;;;	
;;;;	Or coerce to char by default but leave byte accessible. assumes not error to
;;;;	code-char non-standard char.  This is better as most bytes will be chars.
;;;;	maybe allow option. a compressed stream may actually have more indices than
;;;;	characters. have file scanner to coercion and add method to file scanner
;;;;	to report last byte read.
;;;;	
;;;;	decompression should call scan-cur-byte but then if index or state then 
;;;;	call scan-cur-char.


;;;;	+++
;;;;	
;;;;	Or add scan primitives for test if at byte, ie lower some of the compress
;;;;	logic into the scanner. stream scanner stills needs hook to return byte, but
;;;;	that is better hid in the scanner. Actually all scanners could have hook
;;;;	but should never be called except by compression for streams.
;;;;	Could have file scanner return t instead of char if 80 bit on. Then
;;;;	test for byte is check if cur-char t. Then instead of scan next char 
;;;;	scan-next-byte is called leaving cur-char as is. Once index is read scan-next-char
;;;;	can be called.
;;;;	
;;;;	



;;;;	lucid could be 16bit-chars compliant if some milling
;;;;	where done to insure extended-characters were handled
;;;;	properly for things like concatenate, etc.
;;;;	
;;;;	However, 16bit-chars would be more space consuming so
;;;;	use the standard-char setup.

;;;; #+lucid(push :16bit-chars *features*)



(defconstant itab 9)
(defconstant inewline 10)
(defconstant ilinefeed 10)
(defconstant ipage 12)
(defconstant ireturn 13)


(defconstant *standard-character-string*
  (format nil "~a~a~a !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~~" #\newline #\return #\tab))

(defconstant *standard-character-list* 
  (coerce *standard-character-string* 'list))

(defconstant *standard-character-codes* 
  #+aclpc(cons ireturn (cons inewline (mapcar #'char-code *standard-character-list*)))
  #-aclpc(mapcar #'char-code *standard-character-list*)
  )




;;; standard character sbits is somewhat of a misnomer as
;;; something like ascii-code sbits might be more proper.

(defun standard-character-sbits (codes)
  (let ((a (make-array 128 :element-type 'bit :initial-element 0)))
    (dolist (i codes)
      (setf (sbit a i) 1))
    a))

(defun test-standard-character-bit (code sbits)
  (and code
       (integerp code)
       (< code 128)
       (= 1 (sbit sbits code))))


(defun set-standard-character-bit (code sbits on)
  (setf (sbit sbits code) (if on 1 0)))

(defmacro with-sbit ((sbits code) &body body)
  (let ((save (gensym)))
    `(let ((,save (test-standard-character-bit ,code ,sbits)))
      (set-standard-character-bit ,code ,sbits t)
      (unwind-protect
	   (progn ,@body)
	(unless ,save (set-standard-character-bit ,code ,sbits nil))))))

;;;;	maps ascii codes to standard characters.

(defconstant *standard-ascii-character-array*
  (let ((a (make-array 128 :element-type 'standard-char :initial-element #\space)))
    (dolist (i *standard-character-codes*)
	    (setf (aref a i) (code-char i)))
    (setf (aref a inewline) #\newline)
    a)) 

;; Assumes standard-character-code-p t.
;;; RLE TODO change to code-to-standard-character and mill callers.
;;; RLE TODO doc all char stuff better and use consistent naming established in doc.
(defmacro byte-to-standard-character (byte)
  `(progn
    ;; but shouldn't ever happen and slowing things down!
    ;;(unless (< ,byte (length *standard-ascii-character-array*))
    ;;(raise-error '(fu)))
    
    (aref *standard-ascii-character-array* ,byte)))



;;;;	It should be safe to when converting from chars to codes and back to chars
;;;;	to ignore 16bit-chars condition, however standard-char-to-code mapping must
;;;;	still be done. Scanner should work with 16bit input, so when scanning from
;;;;	strings or text 16bit values are acceptable.


;;;;	map standard chars to ascii codes.
;; also converts non-standard-characters to codes.
(defun character-to-code (ch)
  #+aclpc(if (eql #\newline ch)
	     inewline
	     (if (eql #\tab ch)
		 itab
		 (char-code ch)))
  #-aclpc(char-code ch)
  )

;;;;	
;;;;	Convert common types to various vendor specific foreign function types.
;;;;	
;;;;	
;;;;	
;;;;	


(defun ff-simple-type (type-spec)
  (case type-spec
    
    (int	#+lucid		:signed-32bit
		#+allegro	:integer
		#+cmu		'alien:integer
		)


    (unsigned	#+lucid		:unsigned-32bit
		#+allegro	:unsigned-integer	;; rle just guessing.
		#+cmu		'alien:unsigned
		)

    (string	#+lucid		:simple-string
		#+allegro	:string
		#+cmu		'c-call:c-string
		)

    (void	#+lucid		:void
		#+allegro	:void
		#+cmu		'c-call:void
		)

    (otherwise	nil)))


(defun ff-type (type-spec)

  (or

   (ff-simple-type type-spec)

   (error "Unknown simple type [~a] for foreign function." type-spec)))


(defun dash-to-under (ss)
  (let* ((s (string ss))
	(ns (make-string (length s))))
    (dotimes (i (length s))
      (if (eql #\- (char s i))
	  (setf (char ns i) #\_)
	  (setf (char ns i) (char s i))))
    (string-downcase ns)))

(defun ff-name (nn)

  ;; this eventually will be trouble. With mixed case will need explict 
  (dash-to-under
   (string-downcase
    (let ((n (string nn)))

      #+lucid	(let ((s (make-string (1+ (length n)))))
		  (setf (char s 0) #\_)
		  (dotimes (i (length n))
		    (setf (char s (1+ i))
			  (let ((c (char n i)))
			    (if (eql c #\-) #\_ c))))
		  s)
      #+allegro	(ff:convert-to-lang n)

      #+cmu	(let ((s (make-string (length n))))
		  (dotimes (i (length n))
		    (setf (char s i)
			  (let ((c (char n i)))
			    (if (eql c #\-) #\_ c))))
		  s)))))


(defun ff-name-local (n ln)
  (if ln ln n))


(defmacro defunff ((name &optional lname) rtype (&rest args))
  ;;
  ;;
  #+lucid	`(def-foreign-function (,(if lname lname name)
					(:return-type ,(ff-simple-type rtype))
					(:language :c)
					(:name ,(ff-name name)))
		  ,(@(mapcar #'(lambda (arg) `(,(car arg) ,(ff-type (cadr arg)))) args)))
				 
  #+allegro	`(ff:defforeign
		  ',(if lname lname name)
		  :return-type ,(ff-type rtype)
		  :entry-point ,(ff-name name)
		  :arguments '(,@(mapcar #'(lambda (arg) (intern (string (ff-type (cadr arg))))) args))
		  )

  #| (not allegro-v5.0)
  #+allegro-v5.0	`(ff:def-foreign-call (,name nil ,(if lname lname name))
			  :returning ,(ff-type rtype)
			  :arguments '(,@(mapcar #'(lambda (arg) (intern (string (ff-type (cadr arg))))) args))
			  )
  |#

  #+cmu		`(alien:def-alien-routine (,(ff-name name) ,(ff-name-local name lname))
		  ,(ff-type rtype)
		  ,@(mapcar #'(lambda (arg) `(,(car arg) ,(ff-type (cadr arg)))) args))
			
  )

;; example from cmucl manual.
#|
(declaim (inline c-function))
(def-alien-routine c-function
    (* (struct c-struct))
  (i int)
  (s c-string)
  (r (* (struct  c-struct)))
  (a (array int 10)))

(defun call-cfun ()
  (with-alien ((ar (array int 10))
	       (c-struct (struct c-struct)))
    (dotimes (i 10)			; Fill array.
      (setf (deref ar i) i))

    (setf (slot c-struct 'x) 20)
    (setf (slot c-struct 's) "A Lisp String")

    (with-alien ((res (* (struct c-struct)) (c-function 5 "Another Lisp String" (addr c-struct) ar)))
      (format t "Returned from C function.")
      (multiple-value-prog1 (values (slot res 'x) (slot res 's))

	;; Deallocate result iafter we are done using it.
	(free-alien res)))))
|#


		   

;;;;	
;;;;	Some implementation dependent defs to supply uniform
;;;;	names across implementations.
;;;;	


(defun features () (dolist (f *features*) (format t "~a~%" f)))

#+lucid
(defun image-save (filename restart-f)

  (user::disksave filename
		  :full-gc t
		  :restart-function restart-f))


#+allegro
(defun image-save (filename restart-f)

  (setf excl:*read-init-files* nil)
  (setf excl:*restart-init-function* restart-f)

  (excl:gc t)

  (excl::dumplisp :name filename)
  )

#+cmu
(defun image-save (filename restart-f)

  (extensions:gc :full t)  
  (extensions:save-lisp filename :init-function restart-f
                                 ;;:load-init-file restart-f
  )
)



;; allegro-v5.0.1 is kludge to compile in pvs.
(defmacro with-profile ((type onoff &optional (graphp t) (justoncep t)) &body body)
  #+(or (not allegro) (and allegro allegro-v5.0.1))
  `(time (progn ,@body))
  #+(and allegro (not allegro-v5.0.1))
  (let ((aux (gensym))
	(switch-global (intern (concatenate 'string "*" (string onoff) "*"))))
    `(labels ((,aux ()
	       ,@body))
      (declare (special ,switch-global))
      (unless (boundp ',switch-global) (setf ,switch-global t))
      (if ,switch-global
	  (progn (when ,justoncep (setf ,switch-global nil))
		 (multiple-value-prog1 (prof:with-profiling (:type ,type :count t) (,aux))
		   (progn (prof:show-flat-profile)
			  ,(when graphp `(prof:show-call-graph))
			  (prof:show-call-counts))))
	  (time (,aux))))))

(defun reset-ml ()) ;; stub in-case ml not loaded.
