Use lxml in scripts/compile_tests when it is available (#120633)

It's around 30x (300s -> 10s) faster. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120633 Approved by: https://github.com/oulgen
2025-12-06 12:20:52 +01:00 · 2024-02-26 12:48:02 -08:00 · 2024-02-26 12:48:02 -08:00 · 7b1cc140aa
commit 7b1cc140aa
parent 5a0a964444
2 changed files with 17 additions and 3 deletions
--- a/scripts/compile_tests/common.py
+++ b/scripts/compile_tests/common.py
@ -1,6 +1,19 @@
+import functools
 import os
-import xml.etree.ElementTree as ET
+import warnings

+try:
+    import lxml.etree
+
+    p = lxml.etree.XMLParser(huge_tree=True)
+    parse = functools.partial(lxml.etree.parse, parser=p)
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+    parse = ET.parse
+    warnings.warn(
+        "lxml was not found. `pip install lxml` to make this script run much faster"
+    )
 from download_reports import download_reports


@ -9,7 +22,7 @@ def open_test_results(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".xml"):
-                tree = ET.parse(f"{root}/{file}")
+                tree = parse(f"{root}/{file}")
                xmls.append(tree)
    return xmls

--- a/torch/testing/_internal/dynamo_test_failures.py
+++ b/torch/testing/_internal/dynamo_test_failures.py
@ -11,7 +11,8 @@ import sys
 # see scripts/compile_tests/update_failures.py
 #
 # If you're adding a new test, and it's failing PYTORCH_TEST_WITH_DYNAMO=1,
-# either add the appropriate decorators to your test or list them in this file.
+# either add the appropriate decorators to your test or add skips for them
+# via test/dynamo_skips and test/dynamo_expected_failures.
 #
 # *These are not exactly unittest.expectedFailure and unittest.skip. We'll
 # always execute the test and then suppress the signal, if necessary.