Parse-URI :: LISP

In the last exams session I had to do a simplified URI parser in LISP.
LISP has the well known ability to make your brain nut, but when you get a bit inside the things it become quite interesting.
The projects of this course are always quite similar, so I assume someone could be interested to see how I did it.
Please note that I did everything in no more than 3 or 4 hours, so surely it should had been done better (for example without using defparameter instructions).
Anyway it gave me a pretty high mark, so I’m OK with it.

Here is the simplified syntax I had to follow ::

Highslide JS

And here is the code – Parse-uri.lisp ::

;;;; Mafio
(defstruct uri scheme userinfo host port path query fragment)

(defparameter struttura (make-uri))
(defparameter urilist (coerce "" 'list))

(defun parse-uri (uristr)
  (setq struttura (make-uri))
  (setq urilist (coerce uristr 'list))

  (cond
    ((not (stringp uristr)) (error "stringa non trovata"))
    (T (scheme-dfa "s0"))
  )

  (if (equal (uri-scheme struttura) nil) nil
    (setf (uri-scheme struttura) 
          (coerce (uri-scheme struttura) 'string))
  )

  (setf urilist (cdr urilist))

  (cond
   ;;; mailto:userinfo@host
   ((equal (uri-scheme struttura) "mailto")
    (if (equal urilist nil) (error "userinfo non trovato")
        (userinfo-dfa "s0"))
    (if (equal (car urilist) #\@)
        (progn
          (setf urilist (cdr urilist)) ;; tolgo la @
          (if (equal (car urilist) #\.) ;; se host inizia con . errore
              (error "host errato"))
          (if (not (equal urilist nil))
              (host-dfa "s0")
          )
          (if (equal (car urilist) #\.) ;; se host finisce con . errore
              (error "host errore"))
          (setf (uri-host struttura)
                (coerce (uri-host struttura) 'string))
        )
    )
    (setf (uri-userinfo struttura)
          (coerce (uri-userinfo struttura) 'string))
   )
   ;;; news:host
   ((equal (uri-scheme struttura) "news")
    (if (equal (car urilist) #\.)
        (error "host errato"))
    (host-dfa "s0")
    (setf (uri-host struttura)
          (coerce (uri-host struttura) 'string))
   )
   ;;; tel or fax:userinfo
   ((or (equal (uri-scheme struttura) "tel")
        (equal (uri-scheme struttura) "fax"))
    (userinfo-dfa "s0")
    (setf (uri-userinfo struttura)
          (coerce (uri-userinfo struttura) 'string))
   )
   ;;; caso 1 .:. scheme:// etc etc
   ((and (equal (car urilist) #\/) (equal (car (cdr urilist)) #\/))
    (setf urilist (cdr (cdr urilist))) ;; tolgo i 2 //

    ;authority-usri ;; cerca eventuale userinfo
    (userinfo-dfa "s0")

    ;; se manca host, ricopio userinfo in urilist e controllo host
    (if (or (equal urilist nil) (not (equal (car urilist) #\@)))
        (progn
          (setf urilist (append (uri-userinfo struttura) urilist))
          (setf (uri-userinfo struttura) nil)         
          (if (equal (car urilist) #\.)
              (error "host errato"))
          (host-dfa "s0")
          (setf (uri-host struttura)
                (coerce (uri-host struttura) 'string))
        )
    )

    ;; sia userinfo che host sono presenti
    (if (equal (car urilist) #\@)
        (progn
          (setf urilist (cdr urilist)) ;; tolgo la @
          (if (equal (car urilist) #\.)
              (error "host errato"))
          (host-dfa "s0")
          (setf (uri-host struttura)
                (coerce (uri-host struttura) 'string))
          (setf (uri-userinfo struttura)
                (coerce (uri-userinfo struttura) 'string))
        )
    )

    ;; cerca port
    (if (equal (car urilist) #\:)
        (progn
          (setf urilist (cdr urilist)) ;; tolgo i :
          (port-dfa)
          (setf (uri-port struttura)
                (coerce (uri-port struttura) 'string))
        )
    )

    ;; cerca path-after-authority
    (if (and (equal (car urilist) #\/) 
             (not (equal (car (cdr urilist)) #\/)))
        (progn
          (setf urilist (cdr urilist)) 
          (path-dfa)
          (setf (uri-path struttura)
                (coerce (uri-path struttura) 'string))
        )
    )

    ;; cerca query
    (if (equal (car urilist) #\?)
        (progn
          (if (equal (cdr urilist) nil)
              (error "query errata"))
          (if (equal (car (cdr urilist)) #\#)
              (error "query errata"))
          (setf urilist (cdr urilist))
          (query-dfa)
          (setf (uri-query struttura)
                (coerce (uri-query struttura) 'string))
        )
    )

    ;; cerca fragment
    (if (equal (car urilist) #\#)
        (progn
          (if (equal (cdr urilist) nil)
              (error "fragment errato"))
          (setf urilist (cdr urilist))
          (fragment-dfa)
          (setf (uri-fragment struttura)
                (coerce (uri-fragment struttura) 'string))
        )
    )

   )
   ;;; caso 2 .:. scheme:/ etc etc
   (T
    (if (equal (car urilist) #\/)
        (setf urilist (cdr urilist)) ;; tolgo lo /
    )
    ;; se ? allora query
    (if (equal (car urilist) #\?)
        (progn
          (if (equal (cdr urilist) nil)
              (error "query errata"))
          (if (equal (car (cdr urilist)) #\#)
              (error "query errata"))
          (setf urilist (cdr urilist)) ;; tolgo ?
          (query-dfa)
          (setf (uri-query struttura)
                (coerce (uri-query struttura) 'string))

          ;; se # allora fragment
          (if (equal (car urilist) #\#)
              (progn
                (if (equal (cdr urilist) nil)
                    (error "fragment errato"))
                (setf urilist (cdr urilist)) ;; tolgo #
                (fragment-dfa)
                (setf (uri-fragment struttura)
                      (coerce (uri-fragment struttura) 'string))
              )
          )
        )
    )

    ;; se # allora fragment
    (if (equal (car urilist) #\#)
        (progn
          (if (equal (cdr urilist) nil)
              (error "fragment errato"))
          (setf urilist (cdr urilist)) ;; tolgo #
          (fragment-dfa)
          (setf (uri-fragment struttura)
                (coerce (uri-fragment struttura) 'string))
        )
    )

    ;; altrimenti cerca path
    (if (not (equal urilist nil))     
        (progn
          (path-dfa)
          (setf (uri-path struttura)
                (coerce (uri-path struttura) 'string))
          ;; se ? allora query
          (if (equal (car urilist) #\?)
              (progn
                (if (equal (cdr urilist) nil)
                    (error "query errata"))
                (if (equal (car (cdr urilist)) #\#)
                    (error "query errata"))
                (setf urilist (cdr urilist)) ;; tolgo ?
                (query-dfa)
                (setf (uri-query struttura)
                      (coerce (uri-query struttura) 'string))

                ;; se # allora fragment
                (if (equal (car urilist) #\#)
                    (progn
                      (if (equal (cdr urilist) nil)
                          (error "fragment errato"))
                      (setf urilist (cdr urilist)) ;; tolgo #
                      (fragment-dfa)
                      (setf (uri-fragment struttura)
                            (coerce (uri-fragment struttura) 'string))
                    )
                )
              )
            ;; se # allora fragment
            (if (equal (car urilist) #\#)
                (progn
                  (if (equal (cdr urilist) nil)
                      (error "fragment errato"))
                  (setf urilist (cdr urilist)) ;; tolgo #
                  (fragment-dfa)
                  (setf (uri-fragment struttura)
                        (coerce (uri-fragment struttura) 'string))
                )
            )
           )
        )
    )

   )
  )

  ;;; se c'e' ancora qualcosa nella lista dai errore
  (if (not (equal urilist nil))
      (error "errore")
  )

  struttura
)

;;; costruisce struttura uri-scheme
(defun scheme-dfa (state)
  (cond
    ((equal urilist nil) (error "scheme errato"))
    ((equal (car urilist) #\:) (if (equal state "s2") T nil))
    ((equal state "s0")
     (if (check-character (car urilist))
	 (progn (setf (uri-scheme struttura) (mk-lst (car urilist))) 
			(setf urilist (cdr urilist)) 
		(scheme-dfa "s1"))
	 (error "scheme errato"))
     )
    ((equal state "s1")
     (if (check-character (car urilist))
	 (if (equal (car urilist) #\:) (scheme-dfa "s2")
	     (progn (setf (uri-scheme struttura) (mk-lst (car urilist))) 
				(setf urilist (cdr urilist)) 
		    (scheme-dfa "s1"))
	     )
	 (error "scheme errato"))
    )
  )
)

;;; costruisce struttura uri-userinfo
(defun userinfo-dfa (state)
  (cond
    ((equal urilist nil) (if (equal state "s1") T nil))
    ((equal (car urilist) #\@) (if (equal state "s2") T nil))
    ((equal state "s0")
     (if (check-character (car urilist))
	 (progn (setf (uri-userinfo struttura) (mk-lst-usri (car urilist)))
			(setf urilist (cdr urilist)) 
		(userinfo-dfa "s1"))
         (error "userinfo errato 1"))
     )
    ((equal state "s1")
     (if (check-character (car urilist))
	 (if (equal (car urilist) #\@) (userinfo-dfa "s2")
	     (progn (setf (uri-userinfo struttura) (mk-lst-usri (car urilist))) 
				(setf urilist (cdr urilist)) 
		    (userinfo-dfa "s1"))
	     )
     )
    )
  )
)

;;; costruisce automa host
(defun host-dfa (state)
  (if (and (equal (car urilist) #\.) (equal (car (cdr urilist)) #\.))
      (error "host errato"))
  (if (and (equal (car urilist) #\.) (equal (cdr urilist) nil))
      (error "host errato"))
  (if (and (equal (car urilist) #\.)
           (not (check-character (car (cdr urilist)))) 
           (not (equal (cdr urilist) nil)))
      (error "host errato"))  
  (if (equal urilist nil) nil)

  (cond
    ((and (equal state "s1") (equal urilist nil) T))
    ((and (equal state "s1") (not (check-character (car urilist)))) T)
    ((equal state "s0")
     (if (check-character (car urilist))
	 (progn
           (setf (uri-host struttura) (mk-lst-host (car urilist))) 
           (setf urilist (cdr urilist)) 
           (host-dfa "s1")
         )
     )
    )
    ((equal state "s1")
     (if (check-character (car urilist))
         (progn
           (setf (uri-host struttura) (mk-lst-host (car urilist))) 
           (setf urilist (cdr urilist)) 
           (host-dfa "s1")
         )  
     )
    )
  )
)

;;; costruisce automa porta
(defun port-dfa ()
  (if (equal urilist nil) T)
  (if (is-digit (car urilist))
      (progn
        (mk-lst-port (car urilist))
        (setf urilist (cdr urilist))
        (port-dfa)
      )
  )
)

;;; costruisce automa path
(defun path-dfa ()
  (if (equal urilist nil) T)
  (if (and (equal (car urilist) #\/) (equal (car (cdr urilist)) #\/))
      (error "path errato")
  )
  (if (and (equal (car urilist) #\/)
           (not (equal urilist nil)))
      (progn
        (mk-lst-path (car urilist))
        (setf urilist (cdr urilist))
        (path-dfa)
      )
  )
  (if (and (equal (check-character (car urilist)) T)
           (not (equal urilist nil)))
      (progn
        (mk-lst-path (car urilist))
        (setf urilist (cdr urilist))
        (path-dfa)
      )
  )
)

;; costruisce query
(defun query-dfa ()
  (if (equal urilist nil) T)
  (if (and (not (equal (car urilist) #\#))
           (not (equal urilist nil)))
      (progn
        (mk-lst-query (car urilist))
        (setf urilist (cdr urilist))
        (query-dfa)
      )
  )
)

;; costruisce fragment
(defun fragment-dfa ()
  (if (not (equal urilist nil))
      (progn
        (mk-lst-fragment (car urilist))
        (setf urilist (cdr urilist))
        (fragment-dfa)
      )
  )
)

;;; controlla se parametro 0 <= digit <= 9
(defun is-digit (digit)
  (if (or (equal digit #\0) (equal digit #\1) (equal digit #\2)
          (equal digit #\3) (equal digit #\4) (equal digit #\5)
          (equal digit #\6) (equal digit #\7) (equal digit #\8)
          (equal digit #\9)) T nil)
)

;;; controlla sintassi identificatore
;;; se trova un "/", "?", "#", "@", ":" ritorna "nil", altrimenti "T"
(defun check-character (carattere)
  (if (and (not (equal carattere #\/)) (not (equal carattere #\?)) 
           (not (equal carattere #\#)) (not (equal carattere #\@))
           (not (equal carattere #\:)))
      T nil)
)

;;; append uri-scheme
(defun mk-lst (elem)
  (cond
    ((equal (uri-scheme struttura) nil) (setf (uri-scheme struttura) 
											  (list elem)))
    ((atom (uri-scheme struttura)) (setf (uri-scheme struttura) 
										 (list (uri-scheme struttura) elem)))
    (T (setf (uri-scheme struttura) (append (uri-scheme struttura) 
											(list elem))))
  )
)

;;; append uri-userinfo
(defun mk-lst-usri (elem)
  (cond
   ((equal (uri-userinfo struttura) nil) (setf (uri-userinfo struttura) 
											   (list elem)))
   ((atom (uri-userinfo struttura)) (setf (uri-userinfo struttura)
										(list (uri-userinfo struttura) elem)))
   (T (setf (uri-userinfo struttura) (append (uri-userinfo struttura) 
											 (list elem))))
    )
)

;;; append uri-host
(defun mk-lst-host (elem)
  (cond
   ((equal (uri-host struttura) nil) (setf (uri-host struttura) 
										   (list elem)))
   ((atom (uri-host struttura)) (setf (uri-host struttura) 
									  (list (uri-host struttura) elem)))
   (T (setf (uri-host struttura) (append (uri-host struttura) (list elem))))
    )
)

;;; append uri-port
(defun mk-lst-port (elem)
  (cond
   ((equal (uri-port struttura) nil) (setf (uri-port struttura) (list elem)))
   ((atom (uri-port struttura)) (setf (uri-port struttura) 
									  (list (uri-port struttura) elem)))
   (T (setf (uri-port struttura) (append (uri-port struttura) (list elem))))
    )
)

;;; append uri-path
(defun mk-lst-path (elem)
  (cond
   ((equal (uri-path struttura) nil) (setf (uri-path struttura) (list elem)))
   ((atom (uri-path struttura)) (setf (uri-path struttura) 
									  (list (uri-path struttura) elem)))
   (T (setf (uri-path struttura) (append (uri-path struttura) (list elem))))
    )
)

;;; append uri-query
(defun mk-lst-query (elem)
  (cond
   ((equal (uri-query struttura) nil) (setf (uri-query struttura) (list elem)))
   ((atom (uri-query struttura)) (setf (uri-query struttura) 
									   (list (uri-query struttura) elem)))
   (T (setf (uri-query struttura) (append (uri-query struttura) (list elem))))
    )
)

;;; append uri-query
(defun mk-lst-fragment (elem)
  (cond
   ((equal (uri-fragment struttura) nil) (setf (uri-fragment struttura) 
											   (list elem)))
   ((atom (uri-fragment struttura)) (setf (uri-fragment struttura) 
										(list (uri-fragment struttura) elem)))
   (T (setf (uri-fragment struttura) (append (uri-fragment struttura) 
											 (list elem))))
    )
)

And remember, like my prof says: a true programmer write code only past 3 am.

Cheers, Fabio

Leave a Comment

Your email address will not be published. Required fields are marked *