User:Tony Sidaway/Feeds/source/feeds-1.0

This is an RSS news aggregator used by me in connection with some Wikipedia-related discussions.

All code is Copyright Tony Sidaway, 2008.

For code licensing purposes, it can be used under the Apache license. This is in addition to the Wikipedia site license which applies to the use of the text of the code.

Using this code.

This code will compile and run under any reasonably up-to-date installation of the Chicken Scheme compiler. It requires a small tweak to the http egg, which I discuss in this thread on the chicken-users mailing list.

(require-extension srfi-1)
(require-extension srfi-13)
(require-extension posix)
(require-extension irnc-base)

(define verbose #f)
(define posting #t)
(define filter-string #f)
(define where 11)
(define posting-period 10)

;;;(define base-url "http://www.ubbcentral.com")
(define base-url "http://www.wikback.com")
(define action "/forums/ubbthreads.php")
;;;(define post-template "/forums/ubbthreads.php/ubb/newpost/Board/~a.html")
(define post-template "/forums/ubbthreads.php?ubb=newpost&Board=~a")

(define (usage)
  (printf "Parameters:~%")
  (printf " --no-post~%")
  (printf " -n~%")
  (printf "   Don't post to any forum~%")
  (printf " --filter STRING~%")
  (printf " -f STRING~%")
  (printf "   Filter feeds, selecting only those containing the string%")
  (printf " --where FORUMID~%")
  (printf " -w FORUMID~%")
  (printf "   Post on the forum identified by the numerical FORUMID%")
  (printf " --where FORUMID~%")
  (printf " -w FORUMID~%")
  (printf "   Post on the forum identified by the numerical FORUMID%")
  (printf " --base-url URL~%")
  (printf " -b URL~%")
  (printf "   Use URL as the base url for posting~%")
  (printf " --action STRING~%")
  (printf " -a STRING~%")
  (printf "   Use the string to append to the base URL to obtain the action target for all form post actions~%")
  (printf " --post-template STRING~%")
  (printf " -p STRING~%")
  (printf "   Use the string appended to the base URL as a format template to be used in combination with the --where parameter to construct the forum posting URL~%")
  (printf " --help~%")
  (printf " -h~%")
  (printf " --usage~%")
  (printf "   Display this usage information and terminate~%")
  (printf " --verbose~%")
  (printf " -v~%")
  (printf "   Send diagnostic trace information to standard error stream~%"))

(define (with-fallback fallback thunk)
  (call/cc
   (lambda (k)
     (with-exception-handler
       (lambda (x) (k (fallback)))
       (lambda ()
	 (fluid-let ((##sys#signal-hook
		      (lambda (mode msg . args)
			(k (fallback)))))
	     (thunk)))))))

(let loop ((arguments (command-line-arguments)))
  (or (null? arguments)
      (case (string->symbol (car arguments))
        [(--no-post -n)
         (set! posting #f)
         (loop (cdr arguments))]
        [(--verbose -v)
         (set! verbose #t)
         (loop (cdr arguments))]
        [(--filter -f)
         (if (< (length arguments) 2)
             (error "Missing value for --filter argument"))
         (set! filter-string (second arguments))
         (loop (cddr arguments))]
        [(--where -w)
         (if (< (length arguments) 2)
             (error "Missing value for --where argument"))
         (set! where (second arguments))
         (loop (cddr arguments))]
        [(--base-url -b)
         (if (< (length arguments) 2)
             (error "Missing value for --base-url argument"))
         (set! base-url (second arguments))
         (loop (cddr arguments))]
        [(--action -a)
         (if (< (length arguments) 2)
             (error "Missing value for --action argument"))
         (set! action (second arguments))
         (loop (cddr arguments))]
        [(--post-template -p)
         (if (< (length arguments) 2)
             (error "Missing value for --post-template argument"))
         (set! action (second arguments))
         (loop (cddr arguments))]
        [(--help -h --usage)
         (usage)
         (exit)]
        [else
         (usage)
         (error (format "Error: ~s not recognised" (car arguments)))])))

(or filter-string
    (error "Please specify a --filter-string argument, or --usage for help"))

; get-cookies: return contents of cookie attribute for a request.
(define get-cookies
  (let ([http:request-attributes http:request-attributes])
    (lambda (req)
      (alist-ref "cookie" (http:request-attributes req) string-ci=?))))

; ubb-http: perform a http GET or POST
(define ubb-http
  (let ([get-cookies get-cookies])
    (lambda (url body method . cookies)
      (with-fallback (lambda () (list #f #f))
                     (lambda ()
                       (let* ([attributes (append
                                           `(("Content-Type" . "application/x-www-form-urlencoded")
                                             ("User-Agent" . ,(irnc-user-agent))
                                             ("Referer" . ,url))
                                           (map (lambda (x) (cons "Cookie" x)) cookies))]
                              [request (http:make-request method url attributes)]
                              [response (if (eqv? 'POST method)
                                            (http:POST request body)
                                            (http:GET request))]
                              [returned-cookies (get-cookies request)])
                         (list response returned-cookies)))))))

(define (sort-cookies cookie-line)
  (filter (lambda (x)
            (not (string=? (cadr (string-split x "=")) "deleted")))
          (string-split cookie-line ";")))


(define (ubb-login base-url action username password)
  (with-fallback (lambda () #f)
                 (lambda ()
                   (let* ((action-url (string-append base-url action))
                          (login-keys `(("Loginname" . ,username)
                                        ("Loginpass" . ,password)
                                        ("firstlogin" . "1")
                                        ("ON_COMPLETION_URL" . "")
                                        ("rememberme" . "1")
                                        ("ubb" . "start_page")
                                        ("buttlogin" . "Log In")))
                          (login-result (ubb-http action-url login-keys 'POST)))
                     (sort-cookies (cadr login-result))))))
  

(define (ubb-post cookies base-url post action forum subject content)
  (with-fallback (lambda () #f)
                 (lambda ()
                   (let* ((post-url (string-append base-url post))
                          (action-url (string-append base-url action))
                          (post-page (ubb-http post-url "" 'GET (string-intersperse cookies ";")))
                          (sexp-post-page (html->shtml (car post-page)))
                          (form (begin
                                  (if verbose (fprintf (current-error-port) "sexp-post-page is ~s~%" sexp-post-page))
                                  ((sxpath "//form[@name='replier']") sexp-post-page)))
                          (md5 (car ((sxpath "//input[@name='md5_stamp']/@value/text()") form)))
                          (body (format "ubb=addpost&Board=~a&Subject=~a&what=&textcont=Submit&Body=~a&md5_stamp=~a"
                                        forum (uri-encode subject) (uri-encode content) md5))
                          (post-result (ubb-http action-url body 'POST (string-intersperse cookies ";"))))
                     (html->shtml (car post-result))))))


(define post (format post-template where))

(define usernamepass (with-input-from-file "~/.ubb" read))
(define feeds (with-input-from-file "~/.feeds" read))
(define ids (with-input-from-file "~/.feed-post-ids" read))

;(display (format "There are ~a feeds~%" (length feeds)))
;(pretty-print feeds)

(define cookies (ubb-login base-url action (car usernamepass) (cadr usernamepass)))

(define (string-clean s)
  (string-substitute "\\s+$" "" (string-substitute "^\\s+" "" s)))

(define (cadar-q x)
  (if (and (> (length x) 0)
           (> (length (car x)) 1))
      (cadar x) ""))

(define (car-q x)
  (if (> (length x) 0) (car x) ""))

(define (clean-borked-feed feed)
  (fprintf (current-error-port) "Cleaning borked feed~%")
  (string-intersperse
   (find-tail
    (lambda (x) (char=? (string-ref x 0) #\<))
    (string-split feed "\r\n"))
   "\r\n"))

(define (parse-feed feed-qualifiers feed)
  (with-fallback
   (lambda () '())
   (lambda ()
     (fprintf (current-error-port) "feed-qualifiers are ~s, borked is ~s~%" feed-qualifiers
              (memq 'borked-feed feed-qualifiers))
     (and (> (string-length feed) 0)
          (let ((clean-feed
                 (if (memq 'borked-feed feed-qualifiers) (clean-borked-feed feed) feed)))
            (with-input-from-string clean-feed
              (lambda () (SSAX:XML->SXML (current-input-port) '()))))))))

(define (extract-items feed-type sfeed)
  (case feed-type
    [(rss-2.0)
     ((sxpath "//rss/channel/item") sfeed)]
    [else
      '()]))

(for-each
 (lambda (feed-record)
   (and-let* ((feed-name (first feed-record))
	      (feed-description-url (second feed-record))
              (feed-type (third feed-record))
              (feed-qualifiers (fourth feed-record))
              (feed-url (fifth feed-record))
              (feed
               (with-fallback
                (lambda () (list "" #f))
                (lambda () (irnc-http feed-url "" 'GET))))
              (sfeed
               (begin
                 (if verbose (fprintf (current-error-port) "feed is ~s~%" feed))
                 (parse-feed feed-qualifiers (car feed))))
              (items (extract-items feed-type sfeed))
              (filtered-items
               (begin
                 (fprintf (current-error-port) "Feed type is ~s~%" feed-type)
                 (if verbose (fprintf (current-error-port)
                                      "Items are ~s~%" items))
                 (filter
                  (lambda (x)
                    (string-contains-ci
                     (string-intersperse
                      (list (string-clean
                             (cadar-q ((sxpath "//title") x)))
                            (string-clean
                             (cadar-q ((sxpath "//description") x))))
                      " ")
                     filter-string))
                  items))))
             (if verbose
                 (fprintf (current-error-port) "Feed ~a, filtered-items are ~s~%"
                          feed-name filtered-items))
             (for-each
              (lambda (x)
                (let ((title (string-clean (cadar-q ((sxpath "//title") x))))
                      (link (string-clean (cadar-q ((sxpath "//link") x))))
                      (guid (string-clean (car-q ((sxpath "//guid/text()") x))))
                      (pubDate (string-clean
                                (cadar-q ((sxpath "//pubDate") x))))
                      (description (string-clean
                                    (apply string-append
                                           (filter string?
                                                   (html->shtml
                                                    (cadar-q
                                                     ((sxpath "//description") x))))))))
                  (let ((id (if (string=? guid "") (string-append pubDate title) guid)))
                    (if verbose (fprintf (current-error-port) "id is ~s~%" id))
                    (if (not (member id ids))
                        (begin
                          (set! ids (cons id ids))
                          (if verbose (fprintf (current-error-port) "ids is ~s~%" ids))
                          (with-output-to-file "~/.feed-post-ids" (lambda () (write ids)))
                          ((if posting
                               (lambda (x)
                                 (let ((result (ubb-post cookies base-url post
                                                         action where
                                                         (string-append feed-name ": " title) x)))
                                   (sleep posting-period)
                                   result))
                               display)
                           (format "[url=~a]~a[/url], ~a~%~%[url=~a]~a[/url]~%~%~a~%"
                                   feed-description-url feed-name pubDate link
                                   title description)))))))
              filtered-items)))
             feeds)