Skip to content
This repository was archived by the owner on Sep 9, 2025. It is now read-only.

Commit c0a49c4

Browse files
committed
Determine which nodes in a file are “local”
A node is local if it cannot possibly participate in a name binding that leaves its file. In a stack graph, that means that there is no sequence of partial paths that connects the node to the root node. We have two methods for “setting” the set of local nodes. The first is meant to be called at index time, and actually analyzes the partial paths in the database to determine which nodes are local. The second is meant to be called at query time, where you've stored the locality information in your storage layer, and just need to load that back into our internal view of the stack graph.
1 parent d37f1d9 commit c0a49c4

9 files changed

Lines changed: 506 additions & 0 deletions

File tree

stack-graphs/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ copious-debugging = []
2020
test = false
2121

2222
[dependencies]
23+
bitvec = "0.22"
2324
controlled-option = "0.4"
2425
either = "1.6"
2526
fxhash = "0.2"

stack-graphs/include/stack-graphs.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,25 @@ struct sg_partial_paths {
548548
// partial path.
549549
typedef uint32_t sg_partial_path_handle;
550550

551+
// Encodes a set of node handles.
552+
//
553+
// The elements are encoded in a bit set. Use the traditional mask and shift pattern to determine
554+
// if a particular handle is in the set:
555+
//
556+
// ``` c
557+
// size_t element_index = handle / 32;
558+
// size_t bit_index = handle % 32;
559+
// size_t bit_mask = 1 << bit_index;
560+
// bool bit_is_set =
561+
// element_index < set.length &&
562+
// (set.elements[element_index] & bit_mask) != 0;
563+
// ```
564+
struct sg_node_handle_set {
565+
const uint32_t *elements;
566+
// Note that this is the number of uint32_t's in `elements`, NOT the number of bits in the set.
567+
size_t length;
568+
};
569+
551570
// Implements a phased forward path-stitching algorithm.
552571
//
553572
// Our overall goal is to start with a set of _seed_ paths, and to repeatedly extend each path by
@@ -936,6 +955,31 @@ void sg_partial_path_database_add_partial_paths(const struct sg_stack_graph *gra
936955
const struct sg_partial_path *paths,
937956
sg_partial_path_handle *out);
938957

958+
// Determines which nodes in the stack graph are “local”, taking into account the partial paths in
959+
// this database. The result is valid until the next call to this function, or until the database
960+
// is freed.
961+
//
962+
// A local node has no partial path that connects it to the root node in either direction. That
963+
// means that it cannot participate in any paths that leave the file.
964+
//
965+
// This method is meant to be used at index time, to calculate the set of nodes that are local
966+
// after having just calculated the set of partial paths for the file.
967+
void sg_partial_path_database_find_local_nodes(struct sg_partial_path_database *db);
968+
969+
// Marks that a list of stack graph nodes are local.
970+
//
971+
// This method is meant to be used at query time. You will have precalculated the set of local
972+
// nodes for a file at index time; at query time, you will load this information from your storage
973+
// layer and use this method to update our internal view of which nodes are local.
974+
void sg_partial_path_database_mark_local_nodes(struct sg_partial_path_database *db,
975+
size_t count,
976+
const sg_node_handle *nodes);
977+
978+
// Returns a reference to the set of stack graph nodes that are local, according to this database
979+
// of partial paths. The resulting set is only valid until the next call to any function that
980+
// mutates the partial path database.
981+
struct sg_node_handle_set sg_partial_path_database_local_nodes(const struct sg_partial_path_database *db);
982+
939983
// Creates a new forward path stitcher that is "seeded" with a set of starting stack graph nodes.
940984
//
941985
// Before calling this method, you must ensure that `db` contains all of the possible partial

stack-graphs/src/arena.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use std::num::NonZeroU32;
3939
use std::ops::Index;
4040
use std::ops::IndexMut;
4141

42+
use bitvec::vec::BitVec;
4243
use controlled_option::Niche;
4344

4445
use crate::utils::cmp_option;
@@ -354,6 +355,79 @@ where
354355
}
355356
}
356357

358+
//-------------------------------------------------------------------------------------------------
359+
// Handle sets
360+
361+
/// Contains a set of handles, encoded efficiently using a bit set.
362+
#[repr(C)]
363+
pub struct HandleSet<T> {
364+
elements: BitVec<bitvec::order::Lsb0, u32>,
365+
_phantom: PhantomData<T>,
366+
}
367+
368+
impl<T> HandleSet<T> {
369+
/// Creates a new, empty handle set.
370+
pub fn new() -> HandleSet<T> {
371+
HandleSet::default()
372+
}
373+
374+
/// Removes all elements from this handle set.
375+
pub fn clear(&mut self) {
376+
self.elements.clear();
377+
}
378+
379+
/// Returns whether this set contains a particular handle.
380+
pub fn contains(&self, handle: Handle<T>) -> bool {
381+
let index = handle.as_usize();
382+
self.elements.get(index).map(|bit| *bit).unwrap_or(false)
383+
}
384+
385+
/// Adds a handle to this set.
386+
pub fn add(&mut self, handle: Handle<T>) {
387+
let index = handle.as_usize();
388+
if self.elements.len() <= index {
389+
self.elements.resize(index + 1, false);
390+
}
391+
let mut bit = unsafe { self.elements.get_unchecked_mut(index) };
392+
*bit = true;
393+
}
394+
395+
/// Removes a handle from this set.
396+
pub fn remove(&mut self, handle: Handle<T>) {
397+
let index = handle.as_usize();
398+
if let Some(mut bit) = self.elements.get_mut(index) {
399+
*bit = false;
400+
}
401+
}
402+
403+
/// Returns an iterator of all of the handles in this set.
404+
pub fn iter(&self) -> impl Iterator<Item = Handle<T>> + '_ {
405+
self.elements
406+
.iter_ones()
407+
.map(|index| Handle::new(unsafe { NonZeroU32::new_unchecked(index as u32) }))
408+
}
409+
410+
/// Returns a pointer to this set's storage.
411+
pub(crate) fn as_ptr(&self) -> *const u32 {
412+
self.elements.as_raw_ptr()
413+
}
414+
415+
/// Returns the number of instances stored in this arena.
416+
#[inline(always)]
417+
pub(crate) fn len(&self) -> usize {
418+
self.elements.as_raw_slice().len()
419+
}
420+
}
421+
422+
impl<T> Default for HandleSet<T> {
423+
fn default() -> HandleSet<T> {
424+
HandleSet {
425+
elements: BitVec::default(),
426+
_phantom: PhantomData,
427+
}
428+
}
429+
}
430+
357431
//-------------------------------------------------------------------------------------------------
358432
// Arena-allocated lists
359433

stack-graphs/src/c.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,6 +1606,76 @@ pub extern "C" fn sg_partial_path_database_add_partial_paths(
16061606
}
16071607
}
16081608

1609+
//-------------------------------------------------------------------------------------------------
1610+
// Local nodes
1611+
1612+
/// Encodes a set of node handles.
1613+
///
1614+
/// The elements are encoded in a bit set. Use the traditional mask and shift pattern to determine
1615+
/// if a particular handle is in the set:
1616+
///
1617+
/// ``` c
1618+
/// size_t element_index = handle / 32;
1619+
/// size_t bit_index = handle % 32;
1620+
/// size_t bit_mask = 1 << bit_index;
1621+
/// bool bit_is_set =
1622+
/// element_index < set.length &&
1623+
/// (set.elements[element_index] & bit_mask) != 0;
1624+
/// ```
1625+
#[repr(C)]
1626+
pub struct sg_node_handle_set {
1627+
pub elements: *const u32,
1628+
/// Note that this is the number of uint32_t's in `elements`, NOT the number of bits in the set.
1629+
pub length: usize,
1630+
}
1631+
1632+
/// Determines which nodes in the stack graph are “local”, taking into account the partial paths in
1633+
/// this database. The result is valid until the next call to this function, or until the database
1634+
/// is freed.
1635+
///
1636+
/// A local node has no partial path that connects it to the root node in either direction. That
1637+
/// means that it cannot participate in any paths that leave the file.
1638+
///
1639+
/// This method is meant to be used at index time, to calculate the set of nodes that are local
1640+
/// after having just calculated the set of partial paths for the file.
1641+
#[no_mangle]
1642+
pub extern "C" fn sg_partial_path_database_find_local_nodes(db: *mut sg_partial_path_database) {
1643+
let db = unsafe { &mut (*db).inner };
1644+
db.find_local_nodes();
1645+
}
1646+
1647+
/// Marks that a list of stack graph nodes are local.
1648+
///
1649+
/// This method is meant to be used at query time. You will have precalculated the set of local
1650+
/// nodes for a file at index time; at query time, you will load this information from your storage
1651+
/// layer and use this method to update our internal view of which nodes are local.
1652+
#[no_mangle]
1653+
pub extern "C" fn sg_partial_path_database_mark_local_nodes(
1654+
db: *mut sg_partial_path_database,
1655+
count: usize,
1656+
nodes: *const sg_node_handle,
1657+
) {
1658+
let db = unsafe { &mut (*db).inner };
1659+
let nodes = unsafe { std::slice::from_raw_parts(nodes, count) };
1660+
for node in nodes {
1661+
db.mark_local_node(node.clone().into());
1662+
}
1663+
}
1664+
1665+
/// Returns a reference to the set of stack graph nodes that are local, according to this database
1666+
/// of partial paths. The resulting set is only valid until the next call to any function that
1667+
/// mutates the partial path database.
1668+
#[no_mangle]
1669+
pub extern "C" fn sg_partial_path_database_local_nodes(
1670+
db: *const sg_partial_path_database,
1671+
) -> sg_node_handle_set {
1672+
let db = unsafe { &(*db).inner };
1673+
sg_node_handle_set {
1674+
elements: db.local_nodes.as_ptr(),
1675+
length: db.local_nodes.len(),
1676+
}
1677+
}
1678+
16091679
//-------------------------------------------------------------------------------------------------
16101680
// Path stitching
16111681

stack-graphs/src/stitching.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ use std::ops::Index;
4343

4444
use crate::arena::Arena;
4545
use crate::arena::Handle;
46+
use crate::arena::HandleSet;
4647
use crate::arena::List;
4748
use crate::arena::ListArena;
4849
use crate::arena::ListCell;
@@ -72,6 +73,7 @@ use crate::paths::SymbolStack;
7273
/// needed.
7374
pub struct Database {
7475
pub(crate) partial_paths: Arena<PartialPath>,
76+
pub(crate) local_nodes: HandleSet<Node>,
7577
symbol_stack_keys: ListArena<Handle<Symbol>>,
7678
symbol_stack_key_cache: HashMap<SymbolStackCacheKey, SymbolStackKeyHandle>,
7779
paths_by_start_node: SupplementalArena<Node, Vec<Handle<PartialPath>>>,
@@ -83,6 +85,7 @@ impl Database {
8385
pub fn new() -> Database {
8486
Database {
8587
partial_paths: Arena::new(),
88+
local_nodes: HandleSet::new(),
8689
symbol_stack_keys: List::new_arena(),
8790
symbol_stack_key_cache: HashMap::new(),
8891
paths_by_start_node: SupplementalArena::new(),
@@ -196,6 +199,82 @@ impl Database {
196199
}
197200
}
198201

202+
/// Determines which nodes in the stack graph are “local”, taking into account the partial
203+
/// paths in this database.
204+
///
205+
/// A local node has no partial path that connects it to the root node in either direction.
206+
/// That means that it cannot participate in any paths that leave the file.
207+
///
208+
/// This method is meant to be used at index time, to calculate the set of nodes that are local
209+
/// after having just calculated the set of partial paths for the file.
210+
pub fn find_local_nodes(&mut self) {
211+
// Assume that any node that is the start or end of a partial path is local to this file
212+
// until we see a path connecting the root node to it (in either direction).
213+
self.local_nodes.clear();
214+
for handle in self.iter_partial_paths() {
215+
self.local_nodes.add(self[handle].start_node);
216+
self.local_nodes.add(self[handle].end_node);
217+
}
218+
219+
// The root node and jump-to-scope node are the most obvious non-local nodes.
220+
let mut nonlocal_start_nodes = HandleSet::new();
221+
let mut nonlocal_end_nodes = HandleSet::new();
222+
self.local_nodes.remove(StackGraph::root_node());
223+
nonlocal_start_nodes.add(StackGraph::root_node());
224+
nonlocal_end_nodes.add(StackGraph::root_node());
225+
self.local_nodes.remove(StackGraph::jump_to_node());
226+
nonlocal_start_nodes.add(StackGraph::jump_to_node());
227+
nonlocal_end_nodes.add(StackGraph::jump_to_node());
228+
229+
// Other nodes are non-local if we see any partial path that connects it to another
230+
// non-local node. Repeat until we reach a fixed point.
231+
let mut keep_checking = true;
232+
while keep_checking {
233+
keep_checking = false;
234+
for handle in self.iter_partial_paths() {
235+
let start_node = self[handle].start_node;
236+
let end_node = self[handle].end_node;
237+
238+
// First check forwards paths, where non-localness propagates from the start node
239+
// of each path.
240+
let start_node_is_nonlocal = nonlocal_start_nodes.contains(start_node);
241+
let end_node_is_nonlocal = nonlocal_start_nodes.contains(end_node);
242+
if start_node_is_nonlocal && !end_node_is_nonlocal {
243+
keep_checking = true;
244+
nonlocal_start_nodes.add(end_node);
245+
self.local_nodes.remove(end_node);
246+
}
247+
248+
// Then check reverse paths, where non-localness propagates from the end node of
249+
// each path.
250+
let start_node_is_nonlocal = nonlocal_end_nodes.contains(start_node);
251+
let end_node_is_nonlocal = nonlocal_end_nodes.contains(end_node);
252+
if !start_node_is_nonlocal && end_node_is_nonlocal {
253+
keep_checking = true;
254+
nonlocal_end_nodes.add(start_node);
255+
self.local_nodes.remove(start_node);
256+
}
257+
}
258+
}
259+
}
260+
261+
/// Marks that a stack graph node is local.
262+
///
263+
/// This method is meant to be used at query time. You will have precalculated the set of
264+
/// local nodes for a file at index time; at query time, you will load this information from
265+
/// your storage layer and use this method to update our internal view of which nodes are
266+
/// local.
267+
pub fn mark_local_node(&mut self, node: Handle<Node>) {
268+
self.local_nodes.add(node);
269+
}
270+
271+
/// Returns whether a node is local according to the partial paths in this database. You must
272+
/// have already called [`find_local_nodes`][] or [`mark_local_node`][], depending on whether
273+
/// it is index time or query time.
274+
pub fn node_is_local(&self, node: Handle<Node>) -> bool {
275+
self.local_nodes.contains(node)
276+
}
277+
199278
/// Returns an iterator over all of the handles of all of the partial paths in this database.
200279
/// (Note that because we're only returning _handles_, this iterator does not retain a
201280
/// reference to the `Database`.)

0 commit comments

Comments
 (0)