logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

_parseaddr.py (17689B)


  1. # Copyright (C) 2002-2007 Python Software Foundation
  2. # Contact: email-sig@python.org
  3. """Email address parsing code.
  4. Lifted directly from rfc822.py. This should eventually be rewritten.
  5. """
  6. __all__ = [
  7. 'mktime_tz',
  8. 'parsedate',
  9. 'parsedate_tz',
  10. 'quote',
  11. ]
  12. import time, calendar
  13. SPACE = ' '
  14. EMPTYSTRING = ''
  15. COMMASPACE = ', '
  16. # Parse a date field
  17. _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
  18. 'aug', 'sep', 'oct', 'nov', 'dec',
  19. 'january', 'february', 'march', 'april', 'may', 'june', 'july',
  20. 'august', 'september', 'october', 'november', 'december']
  21. _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
  22. # The timezone table does not include the military time zones defined
  23. # in RFC822, other than Z. According to RFC1123, the description in
  24. # RFC822 gets the signs wrong, so we can't rely on any such time
  25. # zones. RFC1123 recommends that numeric timezone indicators be used
  26. # instead of timezone names.
  27. _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
  28. 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
  29. 'EST': -500, 'EDT': -400, # Eastern
  30. 'CST': -600, 'CDT': -500, # Central
  31. 'MST': -700, 'MDT': -600, # Mountain
  32. 'PST': -800, 'PDT': -700 # Pacific
  33. }
  34. def parsedate_tz(data):
  35. """Convert a date string to a time tuple.
  36. Accounts for military timezones.
  37. """
  38. res = _parsedate_tz(data)
  39. if not res:
  40. return
  41. if res[9] is None:
  42. res[9] = 0
  43. return tuple(res)
  44. def _parsedate_tz(data):
  45. """Convert date to extended time tuple.
  46. The last (additional) element is the time zone offset in seconds, except if
  47. the timezone was specified as -0000. In that case the last element is
  48. None. This indicates a UTC timestamp that explicitly declaims knowledge of
  49. the source timezone, as opposed to a +0000 timestamp that indicates the
  50. source timezone really was UTC.
  51. """
  52. if not data:
  53. return None
  54. data = data.split()
  55. if not data: # This happens for whitespace-only input.
  56. return None
  57. # The FWS after the comma after the day-of-week is optional, so search and
  58. # adjust for this.
  59. if data[0].endswith(',') or data[0].lower() in _daynames:
  60. # There's a dayname here. Skip it
  61. del data[0]
  62. else:
  63. i = data[0].rfind(',')
  64. if i >= 0:
  65. data[0] = data[0][i+1:]
  66. if len(data) == 3: # RFC 850 date, deprecated
  67. stuff = data[0].split('-')
  68. if len(stuff) == 3:
  69. data = stuff + data[1:]
  70. if len(data) == 4:
  71. s = data[3]
  72. i = s.find('+')
  73. if i == -1:
  74. i = s.find('-')
  75. if i > 0:
  76. data[3:] = [s[:i], s[i:]]
  77. else:
  78. data.append('') # Dummy tz
  79. if len(data) < 5:
  80. return None
  81. data = data[:5]
  82. [dd, mm, yy, tm, tz] = data
  83. mm = mm.lower()
  84. if mm not in _monthnames:
  85. dd, mm = mm, dd.lower()
  86. if mm not in _monthnames:
  87. return None
  88. mm = _monthnames.index(mm) + 1
  89. if mm > 12:
  90. mm -= 12
  91. if dd[-1] == ',':
  92. dd = dd[:-1]
  93. i = yy.find(':')
  94. if i > 0:
  95. yy, tm = tm, yy
  96. if yy[-1] == ',':
  97. yy = yy[:-1]
  98. if not yy[0].isdigit():
  99. yy, tz = tz, yy
  100. if tm[-1] == ',':
  101. tm = tm[:-1]
  102. tm = tm.split(':')
  103. if len(tm) == 2:
  104. [thh, tmm] = tm
  105. tss = '0'
  106. elif len(tm) == 3:
  107. [thh, tmm, tss] = tm
  108. elif len(tm) == 1 and '.' in tm[0]:
  109. # Some non-compliant MUAs use '.' to separate time elements.
  110. tm = tm[0].split('.')
  111. if len(tm) == 2:
  112. [thh, tmm] = tm
  113. tss = 0
  114. elif len(tm) == 3:
  115. [thh, tmm, tss] = tm
  116. else:
  117. return None
  118. try:
  119. yy = int(yy)
  120. dd = int(dd)
  121. thh = int(thh)
  122. tmm = int(tmm)
  123. tss = int(tss)
  124. except ValueError:
  125. return None
  126. # Check for a yy specified in two-digit format, then convert it to the
  127. # appropriate four-digit format, according to the POSIX standard. RFC 822
  128. # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
  129. # mandates a 4-digit yy. For more information, see the documentation for
  130. # the time module.
  131. if yy < 100:
  132. # The year is between 1969 and 1999 (inclusive).
  133. if yy > 68:
  134. yy += 1900
  135. # The year is between 2000 and 2068 (inclusive).
  136. else:
  137. yy += 2000
  138. tzoffset = None
  139. tz = tz.upper()
  140. if tz in _timezones:
  141. tzoffset = _timezones[tz]
  142. else:
  143. try:
  144. tzoffset = int(tz)
  145. except ValueError:
  146. pass
  147. if tzoffset==0 and tz.startswith('-'):
  148. tzoffset = None
  149. # Convert a timezone offset into seconds ; -0500 -> -18000
  150. if tzoffset:
  151. if tzoffset < 0:
  152. tzsign = -1
  153. tzoffset = -tzoffset
  154. else:
  155. tzsign = 1
  156. tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
  157. # Daylight Saving Time flag is set to -1, since DST is unknown.
  158. return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
  159. def parsedate(data):
  160. """Convert a time string to a time tuple."""
  161. t = parsedate_tz(data)
  162. if isinstance(t, tuple):
  163. return t[:9]
  164. else:
  165. return t
  166. def mktime_tz(data):
  167. """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
  168. if data[9] is None:
  169. # No zone info, so localtime is better assumption than GMT
  170. return time.mktime(data[:8] + (-1,))
  171. else:
  172. t = calendar.timegm(data)
  173. return t - data[9]
  174. def quote(str):
  175. """Prepare string to be used in a quoted string.
  176. Turns backslash and double quote characters into quoted pairs. These
  177. are the only characters that need to be quoted inside a quoted string.
  178. Does not add the surrounding double quotes.
  179. """
  180. return str.replace('\\', '\\\\').replace('"', '\\"')
  181. class AddrlistClass:
  182. """Address parser class by Ben Escoto.
  183. To understand what this class does, it helps to have a copy of RFC 2822 in
  184. front of you.
  185. Note: this class interface is deprecated and may be removed in the future.
  186. Use email.utils.AddressList instead.
  187. """
  188. def __init__(self, field):
  189. """Initialize a new instance.
  190. `field' is an unparsed address header field, containing
  191. one or more addresses.
  192. """
  193. self.specials = '()<>@,:;.\"[]'
  194. self.pos = 0
  195. self.LWS = ' \t'
  196. self.CR = '\r\n'
  197. self.FWS = self.LWS + self.CR
  198. self.atomends = self.specials + self.LWS + self.CR
  199. # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
  200. # is obsolete syntax. RFC 2822 requires that we recognize obsolete
  201. # syntax, so allow dots in phrases.
  202. self.phraseends = self.atomends.replace('.', '')
  203. self.field = field
  204. self.commentlist = []
  205. def gotonext(self):
  206. """Skip white space and extract comments."""
  207. wslist = []
  208. while self.pos < len(self.field):
  209. if self.field[self.pos] in self.LWS + '\n\r':
  210. if self.field[self.pos] not in '\n\r':
  211. wslist.append(self.field[self.pos])
  212. self.pos += 1
  213. elif self.field[self.pos] == '(':
  214. self.commentlist.append(self.getcomment())
  215. else:
  216. break
  217. return EMPTYSTRING.join(wslist)
  218. def getaddrlist(self):
  219. """Parse all addresses.
  220. Returns a list containing all of the addresses.
  221. """
  222. result = []
  223. while self.pos < len(self.field):
  224. ad = self.getaddress()
  225. if ad:
  226. result += ad
  227. else:
  228. result.append(('', ''))
  229. return result
  230. def getaddress(self):
  231. """Parse the next address."""
  232. self.commentlist = []
  233. self.gotonext()
  234. oldpos = self.pos
  235. oldcl = self.commentlist
  236. plist = self.getphraselist()
  237. self.gotonext()
  238. returnlist = []
  239. if self.pos >= len(self.field):
  240. # Bad email address technically, no domain.
  241. if plist:
  242. returnlist = [(SPACE.join(self.commentlist), plist[0])]
  243. elif self.field[self.pos] in '.@':
  244. # email address is just an addrspec
  245. # this isn't very efficient since we start over
  246. self.pos = oldpos
  247. self.commentlist = oldcl
  248. addrspec = self.getaddrspec()
  249. returnlist = [(SPACE.join(self.commentlist), addrspec)]
  250. elif self.field[self.pos] == ':':
  251. # address is a group
  252. returnlist = []
  253. fieldlen = len(self.field)
  254. self.pos += 1
  255. while self.pos < len(self.field):
  256. self.gotonext()
  257. if self.pos < fieldlen and self.field[self.pos] == ';':
  258. self.pos += 1
  259. break
  260. returnlist = returnlist + self.getaddress()
  261. elif self.field[self.pos] == '<':
  262. # Address is a phrase then a route addr
  263. routeaddr = self.getrouteaddr()
  264. if self.commentlist:
  265. returnlist = [(SPACE.join(plist) + ' (' +
  266. ' '.join(self.commentlist) + ')', routeaddr)]
  267. else:
  268. returnlist = [(SPACE.join(plist), routeaddr)]
  269. else:
  270. if plist:
  271. returnlist = [(SPACE.join(self.commentlist), plist[0])]
  272. elif self.field[self.pos] in self.specials:
  273. self.pos += 1
  274. self.gotonext()
  275. if self.pos < len(self.field) and self.field[self.pos] == ',':
  276. self.pos += 1
  277. return returnlist
  278. def getrouteaddr(self):
  279. """Parse a route address (Return-path value).
  280. This method just skips all the route stuff and returns the addrspec.
  281. """
  282. if self.field[self.pos] != '<':
  283. return
  284. expectroute = False
  285. self.pos += 1
  286. self.gotonext()
  287. adlist = ''
  288. while self.pos < len(self.field):
  289. if expectroute:
  290. self.getdomain()
  291. expectroute = False
  292. elif self.field[self.pos] == '>':
  293. self.pos += 1
  294. break
  295. elif self.field[self.pos] == '@':
  296. self.pos += 1
  297. expectroute = True
  298. elif self.field[self.pos] == ':':
  299. self.pos += 1
  300. else:
  301. adlist = self.getaddrspec()
  302. self.pos += 1
  303. break
  304. self.gotonext()
  305. return adlist
  306. def getaddrspec(self):
  307. """Parse an RFC 2822 addr-spec."""
  308. aslist = []
  309. self.gotonext()
  310. while self.pos < len(self.field):
  311. preserve_ws = True
  312. if self.field[self.pos] == '.':
  313. if aslist and not aslist[-1].strip():
  314. aslist.pop()
  315. aslist.append('.')
  316. self.pos += 1
  317. preserve_ws = False
  318. elif self.field[self.pos] == '"':
  319. aslist.append('"%s"' % quote(self.getquote()))
  320. elif self.field[self.pos] in self.atomends:
  321. if aslist and not aslist[-1].strip():
  322. aslist.pop()
  323. break
  324. else:
  325. aslist.append(self.getatom())
  326. ws = self.gotonext()
  327. if preserve_ws and ws:
  328. aslist.append(ws)
  329. if self.pos >= len(self.field) or self.field[self.pos] != '@':
  330. return EMPTYSTRING.join(aslist)
  331. aslist.append('@')
  332. self.pos += 1
  333. self.gotonext()
  334. domain = self.getdomain()
  335. if not domain:
  336. # Invalid domain, return an empty address instead of returning a
  337. # local part to denote failed parsing.
  338. return EMPTYSTRING
  339. return EMPTYSTRING.join(aslist) + domain
  340. def getdomain(self):
  341. """Get the complete domain name from an address."""
  342. sdlist = []
  343. while self.pos < len(self.field):
  344. if self.field[self.pos] in self.LWS:
  345. self.pos += 1
  346. elif self.field[self.pos] == '(':
  347. self.commentlist.append(self.getcomment())
  348. elif self.field[self.pos] == '[':
  349. sdlist.append(self.getdomainliteral())
  350. elif self.field[self.pos] == '.':
  351. self.pos += 1
  352. sdlist.append('.')
  353. elif self.field[self.pos] == '@':
  354. # bpo-34155: Don't parse domains with two `@` like
  355. # `a@malicious.org@important.com`.
  356. return EMPTYSTRING
  357. elif self.field[self.pos] in self.atomends:
  358. break
  359. else:
  360. sdlist.append(self.getatom())
  361. return EMPTYSTRING.join(sdlist)
  362. def getdelimited(self, beginchar, endchars, allowcomments=True):
  363. """Parse a header fragment delimited by special characters.
  364. `beginchar' is the start character for the fragment.
  365. If self is not looking at an instance of `beginchar' then
  366. getdelimited returns the empty string.
  367. `endchars' is a sequence of allowable end-delimiting characters.
  368. Parsing stops when one of these is encountered.
  369. If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
  370. within the parsed fragment.
  371. """
  372. if self.field[self.pos] != beginchar:
  373. return ''
  374. slist = ['']
  375. quote = False
  376. self.pos += 1
  377. while self.pos < len(self.field):
  378. if quote:
  379. slist.append(self.field[self.pos])
  380. quote = False
  381. elif self.field[self.pos] in endchars:
  382. self.pos += 1
  383. break
  384. elif allowcomments and self.field[self.pos] == '(':
  385. slist.append(self.getcomment())
  386. continue # have already advanced pos from getcomment
  387. elif self.field[self.pos] == '\\':
  388. quote = True
  389. else:
  390. slist.append(self.field[self.pos])
  391. self.pos += 1
  392. return EMPTYSTRING.join(slist)
  393. def getquote(self):
  394. """Get a quote-delimited fragment from self's field."""
  395. return self.getdelimited('"', '"\r', False)
  396. def getcomment(self):
  397. """Get a parenthesis-delimited fragment from self's field."""
  398. return self.getdelimited('(', ')\r', True)
  399. def getdomainliteral(self):
  400. """Parse an RFC 2822 domain-literal."""
  401. return '[%s]' % self.getdelimited('[', ']\r', False)
  402. def getatom(self, atomends=None):
  403. """Parse an RFC 2822 atom.
  404. Optional atomends specifies a different set of end token delimiters
  405. (the default is to use self.atomends). This is used e.g. in
  406. getphraselist() since phrase endings must not include the `.' (which
  407. is legal in phrases)."""
  408. atomlist = ['']
  409. if atomends is None:
  410. atomends = self.atomends
  411. while self.pos < len(self.field):
  412. if self.field[self.pos] in atomends:
  413. break
  414. else:
  415. atomlist.append(self.field[self.pos])
  416. self.pos += 1
  417. return EMPTYSTRING.join(atomlist)
  418. def getphraselist(self):
  419. """Parse a sequence of RFC 2822 phrases.
  420. A phrase is a sequence of words, which are in turn either RFC 2822
  421. atoms or quoted-strings. Phrases are canonicalized by squeezing all
  422. runs of continuous whitespace into one space.
  423. """
  424. plist = []
  425. while self.pos < len(self.field):
  426. if self.field[self.pos] in self.FWS:
  427. self.pos += 1
  428. elif self.field[self.pos] == '"':
  429. plist.append(self.getquote())
  430. elif self.field[self.pos] == '(':
  431. self.commentlist.append(self.getcomment())
  432. elif self.field[self.pos] in self.phraseends:
  433. break
  434. else:
  435. plist.append(self.getatom(self.phraseends))
  436. return plist
  437. class AddressList(AddrlistClass):
  438. """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
  439. def __init__(self, field):
  440. AddrlistClass.__init__(self, field)
  441. if field:
  442. self.addresslist = self.getaddrlist()
  443. else:
  444. self.addresslist = []
  445. def __len__(self):
  446. return len(self.addresslist)
  447. def __add__(self, other):
  448. # Set union
  449. newaddr = AddressList(None)
  450. newaddr.addresslist = self.addresslist[:]
  451. for x in other.addresslist:
  452. if not x in self.addresslist:
  453. newaddr.addresslist.append(x)
  454. return newaddr
  455. def __iadd__(self, other):
  456. # Set union, in-place
  457. for x in other.addresslist:
  458. if not x in self.addresslist:
  459. self.addresslist.append(x)
  460. return self
  461. def __sub__(self, other):
  462. # Set difference
  463. newaddr = AddressList(None)
  464. for x in self.addresslist:
  465. if not x in other.addresslist:
  466. newaddr.addresslist.append(x)
  467. return newaddr
  468. def __isub__(self, other):
  469. # Set difference, in-place
  470. for x in other.addresslist:
  471. if x in self.addresslist:
  472. self.addresslist.remove(x)
  473. return self
  474. def __getitem__(self, index):
  475. # Make indexing, slices, and 'in' work
  476. return self.addresslist[index]