openIt_Types;openDbg;openIt_Ops;moduleIt_http(IO:MonadIO)=structopenIteratees;moduleI=Make(IO);openI;(* Combining the primitive iteratees to solve the running problem: Reading headers and the content from an HTTP-like stream*)typeline=string(* The line of text, terminators are not included *);(* Read the line of text from the stream The line can be terminated by CR, LF or CRLF. Return (Right Line) if successful. Return (Left Line) if EOF or a stream error were encountered before the terminator is seen. The returned line is the string read so far. This is a totally high-level Iteratee, built by composing low-level ones. It knows nothing about the representation of Iteratees.*)value(line:iterateechar([=`No_term|`Term]*line))=letlf=['\n']inletcrlf=['\r';'\n']inletchecklts=return&((ifts=0then`No_termelse`Term),l)inletterminators=headscrlf>>=funn->ifn==0thenheadslfelsereturnninbreak_chars(func->c=='\r'||c=='\n')>>=funl->let()=dbg"http_line: %S\n"linterminators>>=funts->checklts;(* Line iteratees: processors of a stream whose elements are made of Lines Print lines as they are received. This is the first `impure' iteratee with non-trivial actions during chunk processing*)value(print_lines:iterateelineunit)=letpr_linel=print_line(">> read line: "^l)inie_contstepwhererecsteps=matchswith[Chunkc->letlst=S.to_listciniflst=[]thenie_contMstepelseio_iterpr_linelst>>%fun()->ie_contMstep|EOFe->pr_line(ife=Nonethen">> natural end"else">> unnatural end")>>%fun()->ie_doneM()s];(* Combining the primitive iteratees to solve the running problem: Reading headers and the content from an HTTP-like stream Convert the stream of characters to the stream of lines, and apply the given iteratee to enumerate the latter. The stream of lines is normally terminated by the empty line. When the stream of characters is terminated, the stream of lines is also terminated, abnormally. This is the first proper Enumeratee: it is the iteratee of the character stream and the enumerator of the line stream. More generally, we could have used sequence_stream to implement enum_lines.*)exceptionNon_terminated_lines;valuerec(enum_lines:enumerateecharstring'a)i=matchiwith[IE_contNonek->let()=dbg"enum_lines: IE_cont\n"inline>>=funterm_line->matchterm_linewith[(`Term,"")->let()=dbg"enum_lines: empty line\n"inreturni(* empty line, normal exit *)|(`Term,l)->let()=dbg"enum_lines: term: %S\n"linliftI(k(chunk_ofl)>>%fun(i,_s)->IO.return(enum_linesi))|(`No_term,l)->let()=dbg"enum_lines: non-term: %S\n"lin(lift:_)(k(ifl=""thenEOF(SomeEnd_of_file)elsechunk_ofl)>>%fun(i,_s)->enum_errEnd_of_filei)]|IE_cont(Some_)_->let()=dbg"enum_lines: error\n"inreturni|IE_done_->let()=dbg"enum_lines: done\n"inreturni];(* HTTP chunk decoding Each chunk has the following format: <chunk-size> CRLF <chunk-data> CRLF where <chunk-size> is the hexadecimal number; <chunk-data> is a sequence of <chunk-size> bytes. The last chunk (so-called EOF chunk) has the format 0 CRLF CRLF (where 0 is an ASCII zero, a character with the decimal code 48). For more detail, see "Chunked Transfer Coding", Sec 3.6.1 of the HTTP/1.1 standard: http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 The following enum_chunk_decoded has the signature of the enumerator of the nested (encapsulated and chunk-encoded) stream. It receives an iteratee for the embedded stream and returns the iteratee for the base, embedding stream. Thus what is an enumerator and what is an iteratee may be a matter of perspective. We have a decision to make: Suppose an iteratee has finished (either because it obtained all needed data or encountered an error that makes further processing meaningless). While skipping the rest of the stream/the trailer, we encountered a framing error (e.g., missing CRLF after chunk data). What do we do? We chose to disregard the latter problem. Rationale: when the iteratee has finished, we are in the process of skipping up to the EOF (draining the source). Disregarding the errors seems OK then. Also, the iteratee may have found an error and decided to abort further processing. Flushing the remainder of the input is reasonable then. One can make a different choice...*)exceptionEChunkedofstring;value(enum_chunk_decoded:enumerateecharchar'a)iter=letrec(enum_chunk_decoded:enumerateecharchar'a)iter=break_chars((=)'\r')>>=funsize_str->matchsize_strwith[""->frame_err(exc"Error reading chunk size")iter|str->(* todo: ptso *)matchread_hexstrwith[None->frame_err(exc("Bad chunk size: "^str))iter|Somesize->let()=dbg"enum_chunk_decoded: frame %i (%x) bytes\n"sizesizeingetCRLFiter(takesizeiter>>=funr->getCRLFr(ifsize=0thenreturnrelseenum_chunk_decodedr))]]andgetCRLFiterm=heads['\r';'\n']>>=funn->ifn=2thenmelseframe_err(exc"Bad Chunk: no CRLF")iterandread_hexx=tryScanf.sscanfx"%x%!"(funi->Somei)with[Scanf.Scan_failure_->None]andexcmsg=EChunkedmsg(* If the processing is restarted, we report the frame error to the inner Iteratee, and exit *)andframe_erreiter=throw_recoverable_err(exc"Frame error")(funs->enum_erreiter>>%funi->IO.return(returni,Sl.ones))inenum_chunk_decodediter;valueit_dbgit=matchitwith[IE_contNone_->"IE_cont None _"|IE_cont(Somee)_->Printf.sprintf"IE_cont (Some %S) _"(Printexc.to_stringe)|IE_done_->"IE_done _"];exceptionMultipart_errorofstring;valuemultipart_errorfmt=Printf.ksprintf(funs->throw_err(Multipart_errors))fmt;valuemultipart_max_header_size=ref1024andmultipart_max_headers_count=ref10;valueit_multipart:string->(liststring->iterateechar'a)->iteratee'a'r->iterateechar'r=fun(typea)(typer)boundaryit_part(it_fold:iterateear)->letread_line=break_chars((=)'\r')>>=funline->letret()=returnlineinpeek>>=funexp_cr_opt->matchexp_cr_optwith[None->ret()|Someexp_cr->let()=assert(exp_cr='\r')injunk>>=fun()->peek>>=funexp_lf_opt->matchexp_lf_optwith[None->ret()|Someexp_lf->junk>>=fun()->matchexp_lfwith['\n'->ret()|c->multipart_error"bad line ending: expected %C, found %C"'\n'c]]]inletread_part_headers=letrecinnercountacc=ifcount>multipart_max_headers_count.valthenmultipart_error"too many multipart headers (more than %i)"multipart_max_headers_count.valelselimitmultipart_max_header_size.valread_line>>=funit_line->matchit_linewith[IE_contNone_->multipart_error"multipart header is longer than %i bytes"multipart_max_header_size.val|IE_cont(Some_)_|IE_done_->it_line>>=funline->let()=fdbg"read_part_headers: line = %S"lineinifline=""thenreturn&List.revaccelseinner(count+1)[line::acc]]ininner0[]inletafter_boundary=head>>=func1->head>>=func2->match(c1,c2)with[('-','-')->head>>=func3->head>>=func4->match(c3,c4)with[('\r','\n')->return`Finished|_->multipart_error"after closing boundary: expected CR LF, \ found %C %C"c3c4]|('\r','\n')->return`Next|_->multipart_error"after boundary: expected either CR LF or \"--\", \ found %C %C"c1c2]inletcrlf_boundary="\r\n"^boundaryinletproc_of_it_foldit_fold=matchit_foldwith[IE_done_|IE_cont(Some_)_->`Skip|IE_contNone_->`Proc]inletfdbg_stream_chartitle=ie_cont(funs->let()=fdbg"stream: %s: %s"title(dbgstream_char~body:10s)inie_doneM()s)inletrecloop_boundaries(it_fold:iterateear)=let()=fdbg"loop"infdbg_stream_char"before loop">>=fun()->after_boundary>>=funab->matchabwith[`Finished->let()=fdbg" `Finished"in(* fdbg_stream_char "after finished" >>= fun () -> *)it_ignore(* must ignore, RFC 2046 *)>>=fun()->letres=feed_itit_fold(EOFNone)inlet()=fdbg" loop_boundaries: res = %s"(it_dbgres)inmap_readyres|`Next->let()=fdbg" `Next"inread_part_headers>>=funpart_headers->letproc=proc_of_it_foldit_foldinbreak_subsequence(probe_stringcrlf_boundary)(matchprocwith[`Proc->let()=fdbg"will read this part with it_part"in(it_partpart_headers)>>=funp->let()=fdbg"it_part ok"inreturn&Somep|`Skip->let()=fdbg"will skip this part"init_ignore>>=fun()->let()=fdbg"part was skipped"inreturnNone])>>=fun(opt_boundary,opt_it_part)->fdbg_stream_char"after it_part">>=fun()->matchopt_boundarywith[None->(* если границу не нашли *)(* если opt_it_part = None -- не собирали результаты, значит даём it_fold'у multipart_error, вылетаем. если Some -- итерату it_part уже дали EOF в break_sequence, деинит сделан, значит даём .. см выше .. *)leterr_msg=Multipart_error"expected boundary, found EOF"inmap_ready&feed_itit_fold(EOF(Someerr_msg))|Some()->(* если границу нашли (и прочитали) *)eof_to_resopt_it_partNone>>=funres_part->matchres_partwith[`OkNone->loop_boundariesit_fold|`Ok(Somep)->letit_fold=feed_itit_fold(chunk_ofp)inloop_boundariesit_fold|`Errore->(* часть собрать не смогли по вине it_proc -- передаём ошибку в it_fold и допринять части *)loop_boundaries&feed_itit_fold(EOF(Somee))]]]inletrecsearch_for_beginning()=break_subsequence(probe_stringboundary)(it_last2)>>=fun(opt_boundary,it_last2)->matchopt_boundarywith[None->let()=fdbg"sfb: None"inmap_ready&feed_itit_fold(EOFNone)|Some()->let()=fdbg"sfb: Some"ineof_to_resit_last2None>>=funres_last2->matchres_last2with[`Ok([]|['\r';'\n'])->let()=fdbg"sfb: found good prefix"inloop_boundariesit_fold|`Ok_->let()=fdbg"sfb: found bad prefix"infdbg_stream_char"sfb/bad">>=fun()->search_for_beginning()|`Error_->throw_err(Failure"unexpected error from it_last 2")]]insearch_for_beginning();end;