@@ -179,37 +179,30 @@ def comments(self):
179179
180180
181181class UnstructuredTokenList (TokenList ):
182-
183182 token_type = 'unstructured'
184183
185184
186185class Phrase (TokenList ):
187-
188186 token_type = 'phrase'
189187
190188class Word (TokenList ):
191-
192189 token_type = 'word'
193190
194191
195192class CFWSList (WhiteSpaceTokenList ):
196-
197193 token_type = 'cfws'
198194
199195
200196class Atom (TokenList ):
201-
202197 token_type = 'atom'
203198
204199
205200class Token (TokenList ):
206-
207201 token_type = 'token'
208202 encode_as_ew = False
209203
210204
211205class EncodedWord (TokenList ):
212-
213206 token_type = 'encoded-word'
214207 cte = None
215208 charset = None
@@ -496,16 +489,19 @@ def domain(self):
496489
497490
498491class DotAtom (TokenList ):
499-
500492 token_type = 'dot-atom'
501493
502494
503495class DotAtomText (TokenList ):
504-
505496 token_type = 'dot-atom-text'
506497 as_ew_allowed = True
507498
508499
500+ class NoFoldLiteral (TokenList ):
501+ token_type = 'no-fold-literal'
502+ as_ew_allowed = False
503+
504+
509505class AddrSpec (TokenList ):
510506
511507 token_type = 'addr-spec'
@@ -809,35 +805,42 @@ def params(self):
809805
810806
811807class ContentType (ParameterizedHeaderValue ):
812-
813808 token_type = 'content-type'
814809 as_ew_allowed = False
815810 maintype = 'text'
816811 subtype = 'plain'
817812
818813
819814class ContentDisposition (ParameterizedHeaderValue ):
820-
821815 token_type = 'content-disposition'
822816 as_ew_allowed = False
823817 content_disposition = None
824818
825819
826820class ContentTransferEncoding (TokenList ):
827-
828821 token_type = 'content-transfer-encoding'
829822 as_ew_allowed = False
830823 cte = '7bit'
831824
832825
833826class HeaderLabel (TokenList ):
834-
835827 token_type = 'header-label'
836828 as_ew_allowed = False
837829
838830
839- class Header (TokenList ):
831+ class MsgID (TokenList ):
832+ token_type = 'msg-id'
833+ as_ew_allowed = False
834+
835+ def fold (self , policy ):
836+ # message-id tokens may not be folded.
837+ return str (self ) + policy .linesep
838+
839+ class MessageID (MsgID ):
840+ token_type = 'message-id'
840841
842+
843+ class Header (TokenList ):
841844 token_type = 'header'
842845
843846
@@ -1583,7 +1586,7 @@ def get_addr_spec(value):
15831586 addr_spec .append (token )
15841587 if not value or value [0 ] != '@' :
15851588 addr_spec .defects .append (errors .InvalidHeaderDefect (
1586- "add -spec local part with no domain" ))
1589+ "addr -spec local part with no domain" ))
15871590 return addr_spec , value
15881591 addr_spec .append (ValueTerminal ('@' , 'address-at-symbol' ))
15891592 token , value = get_domain (value [1 :])
@@ -1968,6 +1971,110 @@ def get_address_list(value):
19681971 value = value [1 :]
19691972 return address_list , value
19701973
1974+
1975+ def get_no_fold_literal (value ):
1976+ """ no-fold-literal = "[" *dtext "]"
1977+ """
1978+ no_fold_literal = NoFoldLiteral ()
1979+ if not value :
1980+ raise errors .HeaderParseError (
1981+ "expected no-fold-literal but found '{}'" .format (value ))
1982+ if value [0 ] != '[' :
1983+ raise errors .HeaderParseError (
1984+ "expected '[' at the start of no-fold-literal "
1985+ "but found '{}'" .format (value ))
1986+ no_fold_literal .append (ValueTerminal ('[' , 'no-fold-literal-start' ))
1987+ value = value [1 :]
1988+ token , value = get_dtext (value )
1989+ no_fold_literal .append (token )
1990+ if not value or value [0 ] != ']' :
1991+ raise errors .HeaderParseError (
1992+ "expected ']' at the end of no-fold-literal "
1993+ "but found '{}'" .format (value ))
1994+ no_fold_literal .append (ValueTerminal (']' , 'no-fold-literal-end' ))
1995+ return no_fold_literal , value [1 :]
1996+
1997+ def get_msg_id (value ):
1998+ """msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS]
1999+ id-left = dot-atom-text / obs-id-left
2000+ id-right = dot-atom-text / no-fold-literal / obs-id-right
2001+ no-fold-literal = "[" *dtext "]"
2002+ """
2003+ msg_id = MsgID ()
2004+ if value [0 ] in CFWS_LEADER :
2005+ token , value = get_cfws (value )
2006+ msg_id .append (token )
2007+ if not value or value [0 ] != '<' :
2008+ raise errors .HeaderParseError (
2009+ "expected msg-id but found '{}'" .format (value ))
2010+ msg_id .append (ValueTerminal ('<' , 'msg-id-start' ))
2011+ value = value [1 :]
2012+ # Parse id-left.
2013+ try :
2014+ token , value = get_dot_atom_text (value )
2015+ except errors .HeaderParseError :
2016+ try :
2017+ # obs-id-left is same as local-part of add-spec.
2018+ token , value = get_obs_local_part (value )
2019+ msg_id .defects .append (errors .ObsoleteHeaderDefect (
2020+ "obsolete id-left in msg-id" ))
2021+ except errors .HeaderParseError :
2022+ raise errors .HeaderParseError (
2023+ "expected dot-atom-text or obs-id-left"
2024+ " but found '{}'" .format (value ))
2025+ msg_id .append (token )
2026+ if not value or value [0 ] != '@' :
2027+ msg_id .defects .append (errors .InvalidHeaderDefect (
2028+ "msg-id with no id-right" ))
2029+ # Even though there is no id-right, if the local part
2030+ # ends with `>` let's just parse it too and return
2031+ # along with the defect.
2032+ if value and value [0 ] == '>' :
2033+ msg_id .append (ValueTerminal ('>' , 'msg-id-end' ))
2034+ value = value [1 :]
2035+ return msg_id , value
2036+ msg_id .append (ValueTerminal ('@' , 'address-at-symbol' ))
2037+ value = value [1 :]
2038+ # Parse id-right.
2039+ try :
2040+ token , value = get_dot_atom_text (value )
2041+ except errors .HeaderParseError :
2042+ try :
2043+ token , value = get_no_fold_literal (value )
2044+ except errors .HeaderParseError as e :
2045+ try :
2046+ token , value = get_domain (value )
2047+ msg_id .defects .append (errors .ObsoleteHeaderDefect (
2048+ "obsolete id-right in msg-id" ))
2049+ except errors .HeaderParseError :
2050+ raise errors .HeaderParseError (
2051+ "expected dot-atom-text, no-fold-literal or obs-id-right"
2052+ " but found '{}'" .format (value ))
2053+ msg_id .append (token )
2054+ if value and value [0 ] == '>' :
2055+ value = value [1 :]
2056+ else :
2057+ msg_id .defects .append (errors .InvalidHeaderDefect (
2058+ "missing trailing '>' on msg-id" ))
2059+ msg_id .append (ValueTerminal ('>' , 'msg-id-end' ))
2060+ if value and value [0 ] in CFWS_LEADER :
2061+ token , value = get_cfws (value )
2062+ msg_id .append (token )
2063+ return msg_id , value
2064+
2065+
2066+ def parse_message_id (value ):
2067+ """message-id = "Message-ID:" msg-id CRLF
2068+ """
2069+ message_id = MessageID ()
2070+ try :
2071+ token , value = get_msg_id (value )
2072+ except errors .HeaderParseError :
2073+ message_id .defects .append (errors .InvalidHeaderDefect (
2074+ "Expected msg-id but found {!r}" .format (value )))
2075+ message_id .append (token )
2076+ return message_id
2077+
19712078#
19722079# XXX: As I begin to add additional header parsers, I'm realizing we probably
19732080# have two level of parser routines: the get_XXX methods that get a token in
0 commit comments