Add collapsed sample profiler output format

Implements collapsed stack trace format output for the sampling profiler. This format represents complete call stacks as semicolon- delimited strings with sample counts, making it compatible with external flamegraph generation tools like flamegraph.pl. The format uses filename:function:line notation and stores call trees during sampling for efficient post-processing into the collapsed format.
python · pablogsal · Jul 10, 2025 · Jun 4, 2025 · Jul 3, 2025 · Jun 22, 2025
commit 2e8e0a9e1628290950c1e80e6c41dbecfb02a068
diff --git a/Lib/profile/sample.py b/Lib/profile/sample.py
@@ -6,6 +6,7 @@
 from _colorize import ANSIColors
 
 from .pstats_collector import PstatsCollector
+from .stack_collectors import CollapsedStackCollector
 
 
 class SampleProfiler:
@@ -255,6 +256,9 @@ def sample(
     match output_format:
         case "pstats":
             collector = PstatsCollector(sample_interval_usec)
+        case "collapsed":
+            collector = CollapsedStackCollector()
+            filename = filename or f"collapsed.{pid}.txt"
         case _:
             raise ValueError(f"Invalid output format: {output_format}")
 
@@ -281,6 +285,8 @@ def main():
             "The default sort is by cumulative time (--sort-cumulative)."
             "Format descriptions:\n"
             "  pstats     Standard Python profiler output format\n"
+            "  collapsed  Stack traces in collapsed format (file:function:line;file:function:line;... count)\n"
+            "             Useful for generating flamegraphs with tools like flamegraph.pl"
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
         color=True,
@@ -325,7 +331,7 @@ def main():
     )
     parser.add_argument(
         "--format",
-        choices=["pstats"],
+        choices=["pstats", "collapsed"],
         default="pstats",
         help="Output format (default: pstats)",
     )

diff --git a/Lib/profile/stack_collectors.py b/Lib/profile/stack_collectors.py
@@ -0,0 +1,37 @@
+import collections
+import os
+
+from .collector import Collector
+
+
+class StackTraceCollector(Collector):
+    def __init__(self):
+        self.call_trees = []
+        self.function_samples = collections.defaultdict(int)
+
+    def collect(self, stack_frames):
+        for thread_id, frames in stack_frames:
+            if frames and len(frames) > 0:
+                # Store the complete call stack (reverse order - root first)
+                call_tree = list(reversed(frames))
+                self.call_trees.append(call_tree)
+
+                # Count samples per function
+                for frame in frames:
+                    self.function_samples[frame] += 1
+
+
+class CollapsedStackCollector(StackTraceCollector):
+    def export(self, filename):
+        stack_counter = collections.Counter()
+        for call_tree in self.call_trees:
+            # Call tree is already in root->leaf order
+            stack_str = ";".join(
+                f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree
+            )
+            stack_counter[stack_str] += 1
+
+        with open(filename, "w") as f:
+            for stack, count in stack_counter.items():
+                f.write(f"{stack} {count}\n")
+        print(f"Collapsed stack output written to {filename}")