;;; url-queue.el --- Fetching web pages in parallel;; Copyright (C) 2011-2012 Free Software Foundation, Inc.;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>;; Keywords: comm;; This file is part of GNU Emacs.;; GNU Emacs is free software: you can redistribute it and/or modify;; it under the terms of the GNU General Public License as published by;; the Free Software Foundation, either version 3 of the License, or;; (at your option) any later version.;; GNU Emacs is distributed in the hope that it will be useful,;; but WITHOUT ANY WARRANTY; without even the implied warranty of;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;; GNU General Public License for more details.;; You should have received a copy of the GNU General Public License;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.;;; Commentary:;; The point of this package is to allow fetching web pages in;; parallel -- but control the level of parallelism to avoid DoS-ing;; web servers and Emacs.;;; Code:(eval-when-compile(require'cl))(require'browse-url)(require'url-parse)(defcustomurl-queue-parallel-processes6"The number of concurrent processes.":version"24.1":type'integer:group'url)(defcustomurl-queue-timeout5"How long to let a job live once it's started (in seconds).":version"24.1":type'integer:group'url);;; Internal variables.(defvarurl-queuenil)(defstructurl-queueurlcallbackcbargssilentpbufferstart-timepre-triggeredinhibit-cookiesp);;;###autoload(defunurl-queue-retrieve(urlcallback&optionalcbargssilentinhibit-cookies)"Retrieve URL asynchronously and call CALLBACK with CBARGS when finished.This is like `url-retrieve' (which see for details of the arguments),but with limits on the degree of parallelism. The variable`url-queue-parallel-processes' sets the number of concurrent processes.The variable `url-queue-timeout' sets a timeout."(setqurl-queue(appendurl-queue(list(make-url-queue:urlurl:callbackcallback:cbargscbargs:silentpsilent:inhibit-cookiespinhibit-cookies))))(url-queue-setup-runners));; To ensure asynch behaviour, we start the required number of queue;; runners from `run-with-idle-timer'. So we're basically going;; through the queue in two ways: 1) synchronously when a program;; calls `url-queue-retrieve' (which will then start the required;; number of queue runners), and 2) at the exit of each job, which;; will then not start any further threads, but just reuse the;; previous "slot".(defunurl-queue-setup-runners()(let((running0)waiting)(dolist(entryurl-queue)(cond((or(url-queue-start-timeentry)(url-queue-pre-triggeredentry))(incfrunning))((notwaiting)(setqwaitingentry))))(when(andwaiting(<runningurl-queue-parallel-processes))(setf(url-queue-pre-triggeredwaiting)t)(run-with-idle-timer0.01nil'url-queue-run-queue))))(defunurl-queue-run-queue()(url-queue-prune-old-entries)(let((running0)waiting)(dolist(entryurl-queue)(cond((url-queue-start-timeentry)(incfrunning))((notwaiting)(setqwaitingentry))))(when(andwaiting(<runningurl-queue-parallel-processes))(setf(url-queue-start-timewaiting)(float-time))(url-queue-start-retrievewaiting))))(defunurl-queue-callback-function(statusjob)(setqurl-queue(delqjoburl-queue))(when(and(eq(carstatus):error)(eq(cadr(cadrstatus))'connection-failed));; If we get a connection error, then flush all other jobs from;; the host from the queue. This particularly makes sense if the;; error really is a DNS resolver issue, which happens;; synchronously and totally halts Emacs.(url-queue-remove-jobs-from-host(plist-get(nthcdr3(cadrstatus)):host)))(url-queue-run-queue)(apply(url-queue-callbackjob)(consstatus(url-queue-cbargsjob))))(defunurl-queue-remove-jobs-from-host(host)(let((jobsnil))(dolist(joburl-queue)(when(equal(url-host(url-generic-parse-url(url-queue-urljob)))host)(pushjobjobs)))(dolist(jobjobs)(url-queue-kill-jobjob)(setqurl-queue(delqjoburl-queue)))))(defunurl-queue-start-retrieve(job)(setf(url-queue-bufferjob)(ignore-errors(url-retrieve(url-queue-urljob)#'url-queue-callback-function(listjob)(url-queue-silentpjob)(url-queue-inhibit-cookiespjob)))))(defunurl-queue-prune-old-entries()(let(dead-jobs)(dolist(joburl-queue);; Kill jobs that have lasted longer than the timeout.(when(and(url-queue-start-timejob)(>(-(float-time)(url-queue-start-timejob))url-queue-timeout))(pushjobdead-jobs)))(dolist(jobdead-jobs)(url-queue-kill-jobjob)(setqurl-queue(delqjoburl-queue)))))(defunurl-queue-kill-job(job)(when(bufferp(url-queue-bufferjob))(let(process)(while(setqprocess(get-buffer-process(url-queue-bufferjob)))(set-process-sentinelprocess'ignore)(ignore-errors(delete-processprocess)))));; Call the callback with an error message to ensure that the caller;; is notified that the job has failed.(with-current-buffer(if(and(bufferp(url-queue-bufferjob))(buffer-live-p(url-queue-bufferjob)));; Use the (partially filled) process buffer it it exists.(url-queue-bufferjob);; If not, just create a new buffer, which will probably be;; killed again by the caller.(generate-new-buffer" *temp*"))(apply(url-queue-callbackjob)(cons(list:error(list'error'url-queue-timeout"Queue timeout exceeded"))(url-queue-cbargsjob)))))(if(not(fboundp'float-time))(defunfloat-time(&optionalspecified-time)"Convert time value SPECIFIED-TIME to a floating point number.See `current-time'. Since the result is a floating-point number, this maynot have the same accuracy as does the result of `current-time'.If not supplied, SPECIFIED-TIME defaults to the result of `current-time'."(orspecified-time(setqspecified-time(current-time)))(+(*(popspecified-time)(+#x100000.0))(if(conspspecified-time)(popspecified-time)(prog1specified-time(setqspecified-timenil)))(or(andspecified-time(/(carspecified-time)1000000.0))0.0))))(provide'url-queue);;; url-queue.el ends here