logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

ElementPath.py (14007B)


  1. #
  2. # ElementTree
  3. # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
  4. #
  5. # limited xpath support for element trees
  6. #
  7. # history:
  8. # 2003-05-23 fl created
  9. # 2003-05-28 fl added support for // etc
  10. # 2003-08-27 fl fixed parsing of periods in element names
  11. # 2007-09-10 fl new selection engine
  12. # 2007-09-12 fl fixed parent selector
  13. # 2007-09-13 fl added iterfind; changed findall to return a list
  14. # 2007-11-30 fl added namespaces support
  15. # 2009-10-30 fl added child element value filter
  16. #
  17. # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
  18. #
  19. # fredrik@pythonware.com
  20. # http://www.pythonware.com
  21. #
  22. # --------------------------------------------------------------------
  23. # The ElementTree toolkit is
  24. #
  25. # Copyright (c) 1999-2009 by Fredrik Lundh
  26. #
  27. # By obtaining, using, and/or copying this software and/or its
  28. # associated documentation, you agree that you have read, understood,
  29. # and will comply with the following terms and conditions:
  30. #
  31. # Permission to use, copy, modify, and distribute this software and
  32. # its associated documentation for any purpose and without fee is
  33. # hereby granted, provided that the above copyright notice appears in
  34. # all copies, and that both that copyright notice and this permission
  35. # notice appear in supporting documentation, and that the name of
  36. # Secret Labs AB or the author not be used in advertising or publicity
  37. # pertaining to distribution of the software without specific, written
  38. # prior permission.
  39. #
  40. # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  41. # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  42. # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  43. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  44. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  45. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  46. # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  47. # OF THIS SOFTWARE.
  48. # --------------------------------------------------------------------
  49. # Licensed to PSF under a Contributor Agreement.
  50. # See https://www.python.org/psf/license for licensing details.
  51. ##
  52. # Implementation module for XPath support. There's usually no reason
  53. # to import this module directly; the <b>ElementTree</b> does this for
  54. # you, if needed.
  55. ##
  56. import re
  57. xpath_tokenizer_re = re.compile(
  58. r"("
  59. r"'[^']*'|\"[^\"]*\"|"
  60. r"::|"
  61. r"//?|"
  62. r"\.\.|"
  63. r"\(\)|"
  64. r"!=|"
  65. r"[/.*:\[\]\(\)@=])|"
  66. r"((?:\{[^}]+\})?[^/\[\]\(\)@!=\s]+)|"
  67. r"\s+"
  68. )
  69. def xpath_tokenizer(pattern, namespaces=None):
  70. default_namespace = namespaces.get('') if namespaces else None
  71. parsing_attribute = False
  72. for token in xpath_tokenizer_re.findall(pattern):
  73. ttype, tag = token
  74. if tag and tag[0] != "{":
  75. if ":" in tag:
  76. prefix, uri = tag.split(":", 1)
  77. try:
  78. if not namespaces:
  79. raise KeyError
  80. yield ttype, "{%s}%s" % (namespaces[prefix], uri)
  81. except KeyError:
  82. raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
  83. elif default_namespace and not parsing_attribute:
  84. yield ttype, "{%s}%s" % (default_namespace, tag)
  85. else:
  86. yield token
  87. parsing_attribute = False
  88. else:
  89. yield token
  90. parsing_attribute = ttype == '@'
  91. def get_parent_map(context):
  92. parent_map = context.parent_map
  93. if parent_map is None:
  94. context.parent_map = parent_map = {}
  95. for p in context.root.iter():
  96. for e in p:
  97. parent_map[e] = p
  98. return parent_map
  99. def _is_wildcard_tag(tag):
  100. return tag[:3] == '{*}' or tag[-2:] == '}*'
  101. def _prepare_tag(tag):
  102. _isinstance, _str = isinstance, str
  103. if tag == '{*}*':
  104. # Same as '*', but no comments or processing instructions.
  105. # It can be a surprise that '*' includes those, but there is no
  106. # justification for '{*}*' doing the same.
  107. def select(context, result):
  108. for elem in result:
  109. if _isinstance(elem.tag, _str):
  110. yield elem
  111. elif tag == '{}*':
  112. # Any tag that is not in a namespace.
  113. def select(context, result):
  114. for elem in result:
  115. el_tag = elem.tag
  116. if _isinstance(el_tag, _str) and el_tag[0] != '{':
  117. yield elem
  118. elif tag[:3] == '{*}':
  119. # The tag in any (or no) namespace.
  120. suffix = tag[2:] # '}name'
  121. no_ns = slice(-len(suffix), None)
  122. tag = tag[3:]
  123. def select(context, result):
  124. for elem in result:
  125. el_tag = elem.tag
  126. if el_tag == tag or _isinstance(el_tag, _str) and el_tag[no_ns] == suffix:
  127. yield elem
  128. elif tag[-2:] == '}*':
  129. # Any tag in the given namespace.
  130. ns = tag[:-1]
  131. ns_only = slice(None, len(ns))
  132. def select(context, result):
  133. for elem in result:
  134. el_tag = elem.tag
  135. if _isinstance(el_tag, _str) and el_tag[ns_only] == ns:
  136. yield elem
  137. else:
  138. raise RuntimeError(f"internal parser error, got {tag}")
  139. return select
  140. def prepare_child(next, token):
  141. tag = token[1]
  142. if _is_wildcard_tag(tag):
  143. select_tag = _prepare_tag(tag)
  144. def select(context, result):
  145. def select_child(result):
  146. for elem in result:
  147. yield from elem
  148. return select_tag(context, select_child(result))
  149. else:
  150. if tag[:2] == '{}':
  151. tag = tag[2:] # '{}tag' == 'tag'
  152. def select(context, result):
  153. for elem in result:
  154. for e in elem:
  155. if e.tag == tag:
  156. yield e
  157. return select
  158. def prepare_star(next, token):
  159. def select(context, result):
  160. for elem in result:
  161. yield from elem
  162. return select
  163. def prepare_self(next, token):
  164. def select(context, result):
  165. yield from result
  166. return select
  167. def prepare_descendant(next, token):
  168. try:
  169. token = next()
  170. except StopIteration:
  171. return
  172. if token[0] == "*":
  173. tag = "*"
  174. elif not token[0]:
  175. tag = token[1]
  176. else:
  177. raise SyntaxError("invalid descendant")
  178. if _is_wildcard_tag(tag):
  179. select_tag = _prepare_tag(tag)
  180. def select(context, result):
  181. def select_child(result):
  182. for elem in result:
  183. for e in elem.iter():
  184. if e is not elem:
  185. yield e
  186. return select_tag(context, select_child(result))
  187. else:
  188. if tag[:2] == '{}':
  189. tag = tag[2:] # '{}tag' == 'tag'
  190. def select(context, result):
  191. for elem in result:
  192. for e in elem.iter(tag):
  193. if e is not elem:
  194. yield e
  195. return select
  196. def prepare_parent(next, token):
  197. def select(context, result):
  198. # FIXME: raise error if .. is applied at toplevel?
  199. parent_map = get_parent_map(context)
  200. result_map = {}
  201. for elem in result:
  202. if elem in parent_map:
  203. parent = parent_map[elem]
  204. if parent not in result_map:
  205. result_map[parent] = None
  206. yield parent
  207. return select
  208. def prepare_predicate(next, token):
  209. # FIXME: replace with real parser!!! refs:
  210. # http://effbot.org/zone/simple-iterator-parser.htm
  211. # http://javascript.crockford.com/tdop/tdop.html
  212. signature = []
  213. predicate = []
  214. while 1:
  215. try:
  216. token = next()
  217. except StopIteration:
  218. return
  219. if token[0] == "]":
  220. break
  221. if token == ('', ''):
  222. # ignore whitespace
  223. continue
  224. if token[0] and token[0][:1] in "'\"":
  225. token = "'", token[0][1:-1]
  226. signature.append(token[0] or "-")
  227. predicate.append(token[1])
  228. signature = "".join(signature)
  229. # use signature to determine predicate type
  230. if signature == "@-":
  231. # [@attribute] predicate
  232. key = predicate[1]
  233. def select(context, result):
  234. for elem in result:
  235. if elem.get(key) is not None:
  236. yield elem
  237. return select
  238. if signature == "@-='" or signature == "@-!='":
  239. # [@attribute='value'] or [@attribute!='value']
  240. key = predicate[1]
  241. value = predicate[-1]
  242. def select(context, result):
  243. for elem in result:
  244. if elem.get(key) == value:
  245. yield elem
  246. def select_negated(context, result):
  247. for elem in result:
  248. if (attr_value := elem.get(key)) is not None and attr_value != value:
  249. yield elem
  250. return select_negated if '!=' in signature else select
  251. if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):
  252. # [tag]
  253. tag = predicate[0]
  254. def select(context, result):
  255. for elem in result:
  256. if elem.find(tag) is not None:
  257. yield elem
  258. return select
  259. if signature == ".='" or signature == ".!='" or (
  260. (signature == "-='" or signature == "-!='")
  261. and not re.match(r"\-?\d+$", predicate[0])):
  262. # [.='value'] or [tag='value'] or [.!='value'] or [tag!='value']
  263. tag = predicate[0]
  264. value = predicate[-1]
  265. if tag:
  266. def select(context, result):
  267. for elem in result:
  268. for e in elem.findall(tag):
  269. if "".join(e.itertext()) == value:
  270. yield elem
  271. break
  272. def select_negated(context, result):
  273. for elem in result:
  274. for e in elem.iterfind(tag):
  275. if "".join(e.itertext()) != value:
  276. yield elem
  277. break
  278. else:
  279. def select(context, result):
  280. for elem in result:
  281. if "".join(elem.itertext()) == value:
  282. yield elem
  283. def select_negated(context, result):
  284. for elem in result:
  285. if "".join(elem.itertext()) != value:
  286. yield elem
  287. return select_negated if '!=' in signature else select
  288. if signature == "-" or signature == "-()" or signature == "-()-":
  289. # [index] or [last()] or [last()-index]
  290. if signature == "-":
  291. # [index]
  292. index = int(predicate[0]) - 1
  293. if index < 0:
  294. raise SyntaxError("XPath position >= 1 expected")
  295. else:
  296. if predicate[0] != "last":
  297. raise SyntaxError("unsupported function")
  298. if signature == "-()-":
  299. try:
  300. index = int(predicate[2]) - 1
  301. except ValueError:
  302. raise SyntaxError("unsupported expression")
  303. if index > -2:
  304. raise SyntaxError("XPath offset from last() must be negative")
  305. else:
  306. index = -1
  307. def select(context, result):
  308. parent_map = get_parent_map(context)
  309. for elem in result:
  310. try:
  311. parent = parent_map[elem]
  312. # FIXME: what if the selector is "*" ?
  313. elems = list(parent.findall(elem.tag))
  314. if elems[index] is elem:
  315. yield elem
  316. except (IndexError, KeyError):
  317. pass
  318. return select
  319. raise SyntaxError("invalid predicate")
  320. ops = {
  321. "": prepare_child,
  322. "*": prepare_star,
  323. ".": prepare_self,
  324. "..": prepare_parent,
  325. "//": prepare_descendant,
  326. "[": prepare_predicate,
  327. }
  328. _cache = {}
  329. class _SelectorContext:
  330. parent_map = None
  331. def __init__(self, root):
  332. self.root = root
  333. # --------------------------------------------------------------------
  334. ##
  335. # Generate all matching objects.
  336. def iterfind(elem, path, namespaces=None):
  337. # compile selector pattern
  338. if path[-1:] == "/":
  339. path = path + "*" # implicit all (FIXME: keep this?)
  340. cache_key = (path,)
  341. if namespaces:
  342. cache_key += tuple(sorted(namespaces.items()))
  343. try:
  344. selector = _cache[cache_key]
  345. except KeyError:
  346. if len(_cache) > 100:
  347. _cache.clear()
  348. if path[:1] == "/":
  349. raise SyntaxError("cannot use absolute path on element")
  350. next = iter(xpath_tokenizer(path, namespaces)).__next__
  351. try:
  352. token = next()
  353. except StopIteration:
  354. return
  355. selector = []
  356. while 1:
  357. try:
  358. selector.append(ops[token[0]](next, token))
  359. except StopIteration:
  360. raise SyntaxError("invalid path") from None
  361. try:
  362. token = next()
  363. if token[0] == "/":
  364. token = next()
  365. except StopIteration:
  366. break
  367. _cache[cache_key] = selector
  368. # execute selector pattern
  369. result = [elem]
  370. context = _SelectorContext(elem)
  371. for select in selector:
  372. result = select(context, result)
  373. return result
  374. ##
  375. # Find first matching object.
  376. def find(elem, path, namespaces=None):
  377. return next(iterfind(elem, path, namespaces), None)
  378. ##
  379. # Find all matching objects.
  380. def findall(elem, path, namespaces=None):
  381. return list(iterfind(elem, path, namespaces))
  382. ##
  383. # Find text for first matching object.
  384. def findtext(elem, path, default=None, namespaces=None):
  385. try:
  386. elem = next(iterfind(elem, path, namespaces))
  387. return elem.text or ""
  388. except StopIteration:
  389. return default