Skip to content

Commit 8d4d4b4

Browse files
committed
Auto merge of #119912 - notriddle:notriddle/reexport-dedup, r=<try>
rustdoc-search: single result for items with multiple paths Part of #15723 Preview: https://notriddle.com/rustdoc-html-demo-9/reexport-dup/std/index.html?search=hashmap This change uses the same "exact" paths as trait implementors and type alias inlining to track items with multiple reachable paths. This way, if you search for `vec`, you get only the `std` exports of it, and not the one from `alloc`. It still includes all the items in the search index so that you can search for them by all available paths. For example, try `core::option` and `std::option`, and notice that the results page doesn't show duplicates, but still shows all the items in their respective crates.
2 parents 23148b1 + f6f69e8 commit 8d4d4b4

13 files changed

+315
-26
lines changed

src/librustdoc/formats/cache.rs

+12
Original file line numberDiff line numberDiff line change
@@ -348,16 +348,28 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
348348
{
349349
let desc =
350350
short_markdown_summary(&item.doc_value(), &item.link_names(self.cache));
351+
// For searching purposes, a re-export is a duplicate if:
352+
//
353+
// - It's either an inline, or a true re-export
354+
// - It's got the same name
355+
// - Both of them have the same exact path
356+
let defid = (match &*item.kind {
357+
&clean::ItemKind::ImportItem(ref import) => import.source.did,
358+
_ => None,
359+
})
360+
.or_else(|| item.item_id.as_def_id());
351361
// In case this is a field from a tuple struct, we don't add it into
352362
// the search index because its name is something like "0", which is
353363
// not useful for rustdoc search.
354364
self.cache.search_index.push(IndexItem {
355365
ty,
366+
defid,
356367
name: s,
357368
path: join_with_double_colon(path),
358369
desc,
359370
parent,
360371
parent_idx: None,
372+
exact_path: None,
361373
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
362374
self.cache.parent_stack.last()
363375
{

src/librustdoc/html/render/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,13 @@ pub(crate) enum RenderMode {
111111
#[derive(Debug)]
112112
pub(crate) struct IndexItem {
113113
pub(crate) ty: ItemType,
114+
pub(crate) defid: Option<DefId>,
114115
pub(crate) name: Symbol,
115116
pub(crate) path: String,
116117
pub(crate) desc: String,
117118
pub(crate) parent: Option<DefId>,
118119
pub(crate) parent_idx: Option<isize>,
120+
pub(crate) exact_path: Option<String>,
119121
pub(crate) impl_id: Option<DefId>,
120122
pub(crate) search_type: Option<IndexItemFunctionType>,
121123
pub(crate) aliases: Box<[Symbol]>,

src/librustdoc/html/render/search_index.rs

+135-16
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::collections::{BTreeMap, VecDeque};
44
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
55
use rustc_middle::ty::TyCtxt;
66
use rustc_span::def_id::DefId;
7+
use rustc_span::sym;
78
use rustc_span::symbol::Symbol;
89
use serde::ser::{Serialize, SerializeSeq, SerializeStruct, Serializer};
910
use thin_vec::ThinVec;
@@ -22,10 +23,13 @@ pub(crate) fn build_index<'tcx>(
2223
cache: &mut Cache,
2324
tcx: TyCtxt<'tcx>,
2425
) -> String {
26+
// Maps from ID to position in the `crate_paths` array.
2527
let mut itemid_to_pathid = FxHashMap::default();
2628
let mut primitives = FxHashMap::default();
2729
let mut associated_types = FxHashMap::default();
28-
let mut crate_paths = vec![];
30+
31+
// item type, display path, re-exported internal path
32+
let mut crate_paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)> = vec![];
2933

3034
// Attach all orphan items to the type's definition if the type
3135
// has since been learned.
@@ -35,11 +39,13 @@ pub(crate) fn build_index<'tcx>(
3539
let desc = short_markdown_summary(&item.doc_value(), &item.link_names(cache));
3640
cache.search_index.push(IndexItem {
3741
ty: item.type_(),
42+
defid: item.item_id.as_def_id(),
3843
name: item.name.unwrap(),
3944
path: join_with_double_colon(&fqp[..fqp.len() - 1]),
4045
desc,
4146
parent: Some(parent),
4247
parent_idx: None,
48+
exact_path: None,
4349
impl_id,
4450
search_type: get_function_type_for_search(
4551
item,
@@ -88,17 +94,22 @@ pub(crate) fn build_index<'tcx>(
8894
map: &mut FxHashMap<F, isize>,
8995
itemid: F,
9096
lastpathid: &mut isize,
91-
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
97+
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
9298
item_type: ItemType,
9399
path: &[Symbol],
100+
exact_path: Option<&[Symbol]>,
94101
) -> RenderTypeId {
95102
match map.entry(itemid) {
96103
Entry::Occupied(entry) => RenderTypeId::Index(*entry.get()),
97104
Entry::Vacant(entry) => {
98105
let pathid = *lastpathid;
99106
entry.insert(pathid);
100107
*lastpathid += 1;
101-
crate_paths.push((item_type, path.to_vec()));
108+
crate_paths.push((
109+
item_type,
110+
path.to_vec(),
111+
exact_path.map(|path| path.to_vec()),
112+
));
102113
RenderTypeId::Index(pathid)
103114
}
104115
}
@@ -111,21 +122,30 @@ pub(crate) fn build_index<'tcx>(
111122
primitives: &mut FxHashMap<Symbol, isize>,
112123
associated_types: &mut FxHashMap<Symbol, isize>,
113124
lastpathid: &mut isize,
114-
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
125+
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
115126
) -> Option<RenderTypeId> {
116-
let Cache { ref paths, ref external_paths, .. } = *cache;
127+
let Cache { ref paths, ref external_paths, ref exact_paths, .. } = *cache;
117128
match id {
118129
RenderTypeId::DefId(defid) => {
119130
if let Some(&(ref fqp, item_type)) =
120131
paths.get(&defid).or_else(|| external_paths.get(&defid))
121132
{
133+
let exact_fqp = exact_paths
134+
.get(&defid)
135+
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
136+
// re-exports only count if the name is exactly the same
137+
// this is a size optimization, as well as a DWIM attempt
138+
// since if the names are not the same, the intent probably
139+
// isn't, either
140+
.filter(|fqp| fqp.last() == fqp.last());
122141
Some(insert_into_map(
123142
itemid_to_pathid,
124143
ItemId::DefId(defid),
125144
lastpathid,
126145
crate_paths,
127146
item_type,
128147
fqp,
148+
exact_fqp.map(|x| &x[..]).filter(|exact_fqp| exact_fqp != fqp),
129149
))
130150
} else {
131151
None
@@ -140,6 +160,7 @@ pub(crate) fn build_index<'tcx>(
140160
crate_paths,
141161
ItemType::Primitive,
142162
&[sym],
163+
None,
143164
))
144165
}
145166
RenderTypeId::Index(_) => Some(id),
@@ -150,6 +171,7 @@ pub(crate) fn build_index<'tcx>(
150171
crate_paths,
151172
ItemType::AssocType,
152173
&[sym],
174+
None,
153175
)),
154176
}
155177
}
@@ -161,7 +183,7 @@ pub(crate) fn build_index<'tcx>(
161183
primitives: &mut FxHashMap<Symbol, isize>,
162184
associated_types: &mut FxHashMap<Symbol, isize>,
163185
lastpathid: &mut isize,
164-
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
186+
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
165187
) {
166188
if let Some(generics) = &mut ty.generics {
167189
for item in generics {
@@ -258,7 +280,7 @@ pub(crate) fn build_index<'tcx>(
258280
}
259281
}
260282

261-
let Cache { ref paths, .. } = *cache;
283+
let Cache { ref paths, ref exact_paths, ref external_paths, .. } = *cache;
262284

263285
// Then, on parent modules
264286
let crate_items: Vec<&IndexItem> = search_index
@@ -273,14 +295,54 @@ pub(crate) fn build_index<'tcx>(
273295
lastpathid += 1;
274296

275297
if let Some(&(ref fqp, short)) = paths.get(&defid) {
276-
crate_paths.push((short, fqp.clone()));
298+
let exact_fqp = exact_paths
299+
.get(&defid)
300+
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
301+
.filter(|exact_fqp| {
302+
exact_fqp.last() == Some(&item.name) && *exact_fqp != fqp
303+
});
304+
crate_paths.push((short, fqp.clone(), exact_fqp.cloned()));
277305
Some(pathid)
278306
} else {
279307
None
280308
}
281309
}
282310
});
283311

312+
if let Some(defid) = item.defid
313+
&& item.parent_idx.is_none()
314+
{
315+
// If this is a re-export, retain the original path.
316+
// Associated items don't use this.
317+
// Their parent carries the exact fqp instead.
318+
let exact_fqp = exact_paths
319+
.get(&defid)
320+
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp));
321+
item.exact_path = exact_fqp.and_then(|fqp| {
322+
// re-exports only count if the name is exactly the same
323+
// this is a size optimization, as well as a DWIM attempt
324+
// since if the names are not the same, the intent probably
325+
// isn't, either
326+
if fqp.last() != Some(&item.name) {
327+
return None;
328+
}
329+
let path =
330+
if item.ty == ItemType::Macro && tcx.has_attr(defid, sym::macro_export) {
331+
// `#[macro_export]` always exports to the crate root.
332+
tcx.crate_name(defid.krate).to_string()
333+
} else {
334+
if fqp.len() < 2 {
335+
return None;
336+
}
337+
join_with_double_colon(&fqp[..fqp.len() - 1])
338+
};
339+
if path == item.path {
340+
return None;
341+
}
342+
Some(path)
343+
});
344+
}
345+
284346
// Omit the parent path if it is same to that of the prior item.
285347
if lastpath == &item.path {
286348
item.path.clear();
@@ -319,7 +381,7 @@ pub(crate) fn build_index<'tcx>(
319381
struct CrateData<'a> {
320382
doc: String,
321383
items: Vec<&'a IndexItem>,
322-
paths: Vec<(ItemType, Vec<Symbol>)>,
384+
paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
323385
// The String is alias name and the vec is the list of the elements with this alias.
324386
//
325387
// To be noted: the `usize` elements are indexes to `items`.
@@ -332,6 +394,7 @@ pub(crate) fn build_index<'tcx>(
332394
ty: ItemType,
333395
name: Symbol,
334396
path: Option<usize>,
397+
exact_path: Option<usize>,
335398
}
336399

337400
impl Serialize for Paths {
@@ -345,6 +408,10 @@ pub(crate) fn build_index<'tcx>(
345408
if let Some(ref path) = self.path {
346409
seq.serialize_element(path)?;
347410
}
411+
if let Some(ref path) = self.exact_path {
412+
assert!(self.path.is_some());
413+
seq.serialize_element(path)?;
414+
}
348415
seq.end()
349416
}
350417
}
@@ -367,43 +434,94 @@ pub(crate) fn build_index<'tcx>(
367434
mod_paths.insert(&item.path, index);
368435
}
369436
let mut paths = Vec::with_capacity(self.paths.len());
370-
for (ty, path) in &self.paths {
437+
for (ty, path, exact) in &self.paths {
371438
if path.len() < 2 {
372-
paths.push(Paths { ty: *ty, name: path[0], path: None });
439+
paths.push(Paths { ty: *ty, name: path[0], path: None, exact_path: None });
373440
continue;
374441
}
375442
let full_path = join_with_double_colon(&path[..path.len() - 1]);
443+
let full_exact_path = exact
444+
.as_ref()
445+
.filter(|exact| exact.last() == path.last() && exact.len() >= 2)
446+
.map(|exact| join_with_double_colon(&exact[..exact.len() - 1]));
447+
let exact_path = extra_paths.len() + self.items.len();
448+
let exact_path = full_exact_path.as_ref().map(|full_exact_path| match extra_paths
449+
.entry(full_exact_path.clone())
450+
{
451+
Entry::Occupied(entry) => *entry.get(),
452+
Entry::Vacant(entry) => {
453+
if let Some(index) = mod_paths.get(&full_exact_path) {
454+
return *index;
455+
}
456+
entry.insert(exact_path);
457+
if !revert_extra_paths.contains_key(&exact_path) {
458+
revert_extra_paths.insert(exact_path, full_exact_path.clone());
459+
}
460+
exact_path
461+
}
462+
});
376463
if let Some(index) = mod_paths.get(&full_path) {
377-
paths.push(Paths { ty: *ty, name: *path.last().unwrap(), path: Some(*index) });
464+
paths.push(Paths {
465+
ty: *ty,
466+
name: *path.last().unwrap(),
467+
path: Some(*index),
468+
exact_path,
469+
});
378470
continue;
379471
}
380472
// It means it comes from an external crate so the item and its path will be
381473
// stored into another array.
382474
//
383475
// `index` is put after the last `mod_paths`
384476
let index = extra_paths.len() + self.items.len();
385-
if !revert_extra_paths.contains_key(&index) {
386-
revert_extra_paths.insert(index, full_path.clone());
387-
}
388-
match extra_paths.entry(full_path) {
477+
match extra_paths.entry(full_path.clone()) {
389478
Entry::Occupied(entry) => {
390479
paths.push(Paths {
391480
ty: *ty,
392481
name: *path.last().unwrap(),
393482
path: Some(*entry.get()),
483+
exact_path,
394484
});
395485
}
396486
Entry::Vacant(entry) => {
397487
entry.insert(index);
488+
if !revert_extra_paths.contains_key(&index) {
489+
revert_extra_paths.insert(index, full_path);
490+
}
398491
paths.push(Paths {
399492
ty: *ty,
400493
name: *path.last().unwrap(),
401494
path: Some(index),
495+
exact_path,
402496
});
403497
}
404498
}
405499
}
406500

501+
// Direct exports use adjacent arrays for the current crate's items,
502+
// but re-exported exact paths don't.
503+
let mut re_exports = Vec::new();
504+
for (item_index, item) in self.items.iter().enumerate() {
505+
if let Some(exact_path) = item.exact_path.as_ref() {
506+
if let Some(path_index) = mod_paths.get(&exact_path) {
507+
re_exports.push((item_index, *path_index));
508+
} else {
509+
let path_index = extra_paths.len() + self.items.len();
510+
let path_index = match extra_paths.entry(exact_path.clone()) {
511+
Entry::Occupied(entry) => *entry.get(),
512+
Entry::Vacant(entry) => {
513+
entry.insert(path_index);
514+
if !revert_extra_paths.contains_key(&path_index) {
515+
revert_extra_paths.insert(path_index, exact_path.clone());
516+
}
517+
path_index
518+
}
519+
};
520+
re_exports.push((item_index, path_index));
521+
}
522+
}
523+
}
524+
407525
let mut names = Vec::with_capacity(self.items.len());
408526
let mut types = String::with_capacity(self.items.len());
409527
let mut full_paths = Vec::with_capacity(self.items.len());
@@ -463,6 +581,7 @@ pub(crate) fn build_index<'tcx>(
463581
crate_data.serialize_field("f", &functions)?;
464582
crate_data.serialize_field("c", &deprecated)?;
465583
crate_data.serialize_field("p", &paths)?;
584+
crate_data.serialize_field("r", &re_exports)?;
466585
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
467586
if has_aliases {
468587
crate_data.serialize_field("a", &self.aliases)?;

0 commit comments

Comments
 (0)