diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index 71ec1c5042fda..e834d0421d920 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -3394,6 +3394,9 @@ pub enum AttrKind { /// Doc attributes (e.g. `#[doc="..."]`) are represented with the `Normal` /// variant (which is much less compact and thus more expensive). DocComment(CommentKind, Symbol), + + /// A regular (non-doc) comment. + Comment(CommentKind, Symbol), } #[derive(Clone, Encodable, Decodable, Debug, Walkable)] diff --git a/compiler/rustc_ast/src/ast_traits.rs b/compiler/rustc_ast/src/ast_traits.rs index 73bfa0ba7ab6d..20a5b0088335a 100644 --- a/compiler/rustc_ast/src/ast_traits.rs +++ b/compiler/rustc_ast/src/ast_traits.rs @@ -173,16 +173,16 @@ impl HasTokens for Attribute { fn tokens(&self) -> Option<&LazyAttrTokenStream> { match &self.kind { AttrKind::Normal(normal) => normal.tokens.as_ref(), - kind @ AttrKind::DocComment(..) => { - panic!("Called tokens on doc comment attr {kind:?}") + kind @ (AttrKind::DocComment(..) | AttrKind::Comment(..)) => { + panic!("Called tokens on (doc) comment attr {kind:?}") } } } fn tokens_mut(&mut self) -> Option<&mut Option> { Some(match &mut self.kind { AttrKind::Normal(normal) => &mut normal.tokens, - kind @ AttrKind::DocComment(..) => { - panic!("Called tokens_mut on doc comment attr {kind:?}") + kind @ (AttrKind::DocComment(..) | AttrKind::Comment(..)) => { + panic!("Called tokens_mut on (doc) comment attr {kind:?}") } }) } diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 369fe12539fa8..ee94e3c4d3c69 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -62,7 +62,7 @@ impl Attribute { pub fn get_normal_item(&self) -> &AttrItem { match &self.kind { AttrKind::Normal(normal) => &normal.item, - AttrKind::DocComment(..) => panic!("unexpected doc comment"), + AttrKind::DocComment(..) | AttrKind::Comment(..) => panic!("unexpected (doc) comment"), } } @@ -71,14 +71,14 @@ impl Attribute { pub fn replace_args(&mut self, new_args: AttrItemKind) { match &mut self.kind { AttrKind::Normal(normal) => normal.item.args = new_args, - AttrKind::DocComment(..) => panic!("unexpected doc comment"), + AttrKind::DocComment(..) | AttrKind::Comment(..) => panic!("unexpected (doc) comment"), } } pub fn unwrap_normal_item(self) -> AttrItem { match self.kind { AttrKind::Normal(normal) => normal.item, - AttrKind::DocComment(..) => panic!("unexpected doc comment"), + AttrKind::DocComment(..) | AttrKind::Comment(..) => panic!("unexpected (doc) comment"), } } } @@ -94,7 +94,7 @@ impl AttributeExt for Attribute { AttrArgs::Eq { expr, .. } => Some(expr.span), _ => None, }, - AttrKind::DocComment(..) => None, + AttrKind::DocComment(..) | AttrKind::Comment(..) => None, } } @@ -103,7 +103,7 @@ impl AttributeExt for Attribute { /// a doc comment) will return `false`. fn is_doc_comment(&self) -> Option { match self.kind { - AttrKind::Normal(..) => None, + AttrKind::Normal(..) | AttrKind::Comment(..) => None, AttrKind::DocComment(..) => Some(self.span), } } @@ -118,7 +118,7 @@ impl AttributeExt for Attribute { None } } - AttrKind::DocComment(..) => None, + AttrKind::DocComment(..) | AttrKind::Comment(..) => None, } } @@ -127,14 +127,14 @@ impl AttributeExt for Attribute { AttrKind::Normal(p) => { Some(p.item.path.segments.iter().map(|i| i.ident.name).collect()) } - AttrKind::DocComment(_, _) => None, + AttrKind::DocComment(_, _) | AttrKind::Comment(_, _) => None, } } fn path_span(&self) -> Option { match &self.kind { AttrKind::Normal(attr) => Some(attr.item.path.span), - AttrKind::DocComment(_, _) => None, + AttrKind::DocComment(_, _) | AttrKind::Comment(_, _) => None, } } @@ -150,7 +150,7 @@ impl AttributeExt for Attribute { .zip(name) .all(|(s, n)| s.args.is_none() && s.ident.name == *n) } - AttrKind::DocComment(..) => false, + AttrKind::DocComment(..) | AttrKind::Comment(..) => false, } } @@ -176,7 +176,7 @@ impl AttributeExt for Attribute { fn meta_item_list(&self) -> Option> { match &self.kind { AttrKind::Normal(normal) => normal.item.meta_item_list(), - AttrKind::DocComment(..) => None, + AttrKind::DocComment(..) | AttrKind::Comment(..) => None, } } @@ -198,7 +198,7 @@ impl AttributeExt for Attribute { fn value_str(&self) -> Option { match &self.kind { AttrKind::Normal(normal) => normal.item.value_str(), - AttrKind::DocComment(..) => None, + AttrKind::DocComment(..) | AttrKind::Comment(..) => None, } } @@ -266,6 +266,7 @@ impl AttributeExt for Attribute { fn doc_resolution_scope(&self) -> Option { match &self.kind { AttrKind::DocComment(..) => Some(self.style), + AttrKind::Comment(..) => None, AttrKind::Normal(normal) if normal.item.path == sym::doc && normal.item.value_str().is_some() => { @@ -307,6 +308,11 @@ impl Attribute { self.style } + /// Returns `true` if this is a regular (non-doc) comment (`//` or `/* */`). + pub fn is_comment(&self) -> bool { + matches!(self.kind, AttrKind::Comment(..)) + } + pub fn may_have_doc_links(&self) -> bool { self.doc_str().is_some_and(|s| comments::may_have_doc_links(s.as_str())) || self.deprecation_note().is_some_and(|s| comments::may_have_doc_links(s.as_str())) @@ -316,14 +322,14 @@ impl Attribute { pub fn meta(&self) -> Option { match &self.kind { AttrKind::Normal(normal) => normal.item.meta(self.span), - AttrKind::DocComment(..) => None, + AttrKind::DocComment(..) | AttrKind::Comment(..) => None, } } pub fn meta_kind(&self) -> Option { match &self.kind { AttrKind::Normal(normal) => normal.item.meta_kind(), - AttrKind::DocComment(..) => None, + AttrKind::DocComment(..) | AttrKind::Comment(..) => None, } } @@ -339,6 +345,9 @@ impl Attribute { token::DocComment(comment_kind, self.style, data), self.span, )], + // Regular comments are never part of any real token stream; returning + // an empty vec prevents them from being injected into macro inputs. + AttrKind::Comment(..) => vec![], } } } @@ -737,6 +746,20 @@ pub fn mk_doc_comment( Attribute { kind: AttrKind::DocComment(comment_kind, data), id: g.mk_attr_id(), style, span } } +pub fn mk_comment( + g: &AttrIdGenerator, + comment_kind: CommentKind, + data: Symbol, + span: Span, +) -> Attribute { + Attribute { + kind: AttrKind::Comment(comment_kind, data), + id: g.mk_attr_id(), + style: AttrStyle::Outer, + span, + } +} + fn mk_attr( g: &AttrIdGenerator, style: AttrStyle, diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 62ec063585171..de7fd4a67b396 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -524,6 +524,10 @@ pub enum TokenKind { /// similarly to symbols in string literal tokens. DocComment(CommentKind, ast::AttrStyle, Symbol), + /// A regular (non-doc) comment token. + /// `Symbol` is the comment's data excluding its delimiters (`//`, `/*`, `*/`). + Comment(CommentKind, Symbol), + /// End Of File Eof, } @@ -654,8 +658,8 @@ impl Token { | FatArrow | Pound | Dollar | Question | SingleQuote => true, OpenParen | CloseParen | OpenBrace | CloseBrace | OpenBracket | CloseBracket - | OpenInvisible(_) | CloseInvisible(_) | Literal(..) | DocComment(..) | Ident(..) - | NtIdent(..) | Lifetime(..) | NtLifetime(..) | Eof => false, + | OpenInvisible(_) | CloseInvisible(_) | Literal(..) | DocComment(..) | Comment(..) + | Ident(..) | NtIdent(..) | Lifetime(..) | NtLifetime(..) | Eof => false, } } @@ -1072,7 +1076,7 @@ impl Token { | Comma | Semi | PathSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question | OpenParen | CloseParen | OpenBrace | CloseBrace | OpenBracket | CloseBracket | OpenInvisible(_) | CloseInvisible(_) | Literal(..) | Ident(..) | NtIdent(..) - | Lifetime(..) | NtLifetime(..) | DocComment(..) | Eof, + | Lifetime(..) | NtLifetime(..) | DocComment(..) | Comment(..) | Eof, _, ) => { return None; diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index f46ce8fd76865..6e9665e3a2710 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -696,6 +696,22 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere )); self.hardbreak() } + ast::AttrKind::Comment(comment_kind, data) => { + // Printing here too would duplicate every comment. + // Only emit from the AST attribute when there is no source text available. + if self.comments().is_some() { + return false; + } + match comment_kind { + ast::token::CommentKind::Line => { + self.word(format!("//{}", data)); + } + ast::token::CommentKind::Block => { + self.word(format!("/*{}*/", data)); + } + } + self.hardbreak() + } } true } @@ -1095,6 +1111,10 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere doc_comment_to_string(DocFragmentKind::Sugared(comment_kind), attr_style, data) .into() } + token::Comment(comment_kind, data) => match comment_kind { + token::CommentKind::Line => format!("//{data}").into(), + token::CommentKind::Block => format!("/*{data}*/").into(), + }, token::Eof => "".into(), } } diff --git a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs index ad602d5196dc1..c3246c3b56c02 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs @@ -420,7 +420,7 @@ impl<'a> State<'a> { // let _ = add_attr!(1 + 1); // // We must pretty-print `#[attr] (1 + 1)` not `#[attr] 1 + 1`. - !attrs.is_empty() + attrs.iter().any(|a| !a.is_comment()) && matches!( expr.kind, ast::ExprKind::Binary(..) diff --git a/compiler/rustc_attr_parsing/src/interface.rs b/compiler/rustc_attr_parsing/src/interface.rs index 7305c4b7c2fa8..f2ab0b2088ba2 100644 --- a/compiler/rustc_attr_parsing/src/interface.rs +++ b/compiler/rustc_attr_parsing/src/interface.rs @@ -311,6 +311,9 @@ impl<'sess, S: Stage> AttributeParser<'sess, S> { comment: *symbol, })); } + // Regular comments have no semantic meaning + // for attribute parsing; skip them. + ast::AttrKind::Comment(..) => continue, ast::AttrKind::Normal(n) => { attr_paths.push(PathParser(&n.item.path)); let attr_path = AttrPath::from_ast(&n.item.path, lower_span); diff --git a/compiler/rustc_attr_parsing/src/validate_attr.rs b/compiler/rustc_attr_parsing/src/validate_attr.rs index f56e85b110610..6290d39001828 100644 --- a/compiler/rustc_attr_parsing/src/validate_attr.rs +++ b/compiler/rustc_attr_parsing/src/validate_attr.rs @@ -22,7 +22,10 @@ use rustc_span::{Span, Symbol, sym}; use crate::{AttributeParser, Late, session_diagnostics as errors}; pub fn check_attr(psess: &ParseSess, attr: &Attribute) { - if attr.is_doc_comment() || attr.has_name(sym::cfg_trace) || attr.has_name(sym::cfg_attr_trace) + if attr.is_doc_comment() + || attr.is_comment() + || attr.has_name(sym::cfg_trace) + || attr.has_name(sym::cfg_attr_trace) { return; } diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs index a1e14b5245137..e4598cfb97c94 100644 --- a/compiler/rustc_builtin_macros/src/asm.rs +++ b/compiler/rustc_builtin_macros/src/asm.rs @@ -64,6 +64,10 @@ fn validate_asm_args<'a>( for arg in args { for attr in arg.attributes.0.iter() { + // FIXME: can we simply skip them? + if attr.is_comment() { + continue; + } if !matches!(attr.name(), Some(sym::cfg | sym::cfg_attr)) { ecx.dcx().emit_err(errors::AsmAttributeNotSupported { span: attr.span() }); } diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index ec5951e50e3a8..37a4e5292ded2 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -167,7 +167,7 @@ pub(crate) fn attr_into_trace(mut attr: Attribute, trace_name: Symbol) -> Attrib // This makes the trace attributes unobservable to token-based proc macros. *tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::default())); } - AttrKind::DocComment(..) => unreachable!(), + AttrKind::DocComment(..) | AttrKind::Comment(..) => unreachable!(), } attr } @@ -415,6 +415,10 @@ impl<'a> StripUnconfigured<'a> { /// If attributes are not allowed on expressions, emit an error for `attr` #[instrument(level = "trace", skip(self))] pub(crate) fn maybe_emit_expr_attr_err(&self, attr: &Attribute) { + // Regular comments are never "real" attributes. + if attr.is_comment() { + return; + } if self.features.is_some_and(|features| !features.stmt_expr_attributes()) && !attr.span.allows_unstable(sym::stmt_expr_attributes) { diff --git a/compiler/rustc_expand/src/expand.rs b/compiler/rustc_expand/src/expand.rs index c8ef295b2a79d..6c3835d45497c 100644 --- a/compiler/rustc_expand/src/expand.rs +++ b/compiler/rustc_expand/src/expand.rs @@ -2114,7 +2114,10 @@ impl<'a, 'b> InvocationCollector<'a, 'b> { let mut cfg_pos = None; let mut attr_pos = None; for (pos, attr) in item.attrs().iter().enumerate() { - if !attr.is_doc_comment() && !self.cx.expanded_inert_attrs.is_marked(attr) { + if !attr.is_doc_comment() + && !attr.is_comment() + && !self.cx.expanded_inert_attrs.is_marked(attr) + { let name = attr.name(); if name == Some(sym::cfg) || name == Some(sym::cfg_attr) { cfg_pos = Some(pos); // a cfg attr found, no need to search anymore diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index fd5dac3cd9263..c446592643101 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -925,10 +925,10 @@ fn is_empty_token_tree(sess: &Session, seq: &mbe::SequenceRepetition) -> bool { while let Some(tt) = iter.next() { match tt { mbe::TokenTree::MetaVarDecl { kind: NonterminalKind::Vis, .. } => {} - mbe::TokenTree::Token(t @ Token { kind: DocComment(..), .. }) => { + mbe::TokenTree::Token(t @ Token { kind: DocComment(..) | Comment(..), .. }) => { let mut now = t; while let Some(&mbe::TokenTree::Token( - next @ Token { kind: DocComment(..), .. }, + next @ Token { kind: DocComment(..) | Comment(..), .. }, )) = iter.peek() { now = next; diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs index eb874a27cece5..cef66cf875437 100644 --- a/compiler/rustc_expand/src/mbe/quoted.rs +++ b/compiler/rustc_expand/src/mbe/quoted.rs @@ -72,6 +72,12 @@ fn parse( // additional trees if need be. let mut iter = input.iter(); while let Some(tree) = iter.next() { + // Skip regular comments. + // They have no semantic meaning in macro rules. + if let tokenstream::TokenTree::Token(Token { kind: token::Comment(..), .. }, _) = tree { + continue; + } + // Given the parsed tree, if there is a metavar and we are expecting matchers, actually // parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`). let tree = parse_tree(tree, &mut iter, part, sess, node_id, features, edition); diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 037afbb9f550b..9489ef931d9d0 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -297,6 +297,8 @@ impl FromInternal for Vec> { span: DelimSpan::from_single(span), })); } + // FIXME: would it be safe to put an `unreachable!()` here? + Comment(..) => {} OpenParen | CloseParen | OpenBrace | CloseBrace | OpenBracket | CloseBracket | OpenInvisible(_) | CloseInvisible(_) | Eof => unreachable!(), diff --git a/compiler/rustc_lint/src/builtin.rs b/compiler/rustc_lint/src/builtin.rs index af590d98c301c..0cb709fefd4b4 100644 --- a/compiler/rustc_lint/src/builtin.rs +++ b/compiler/rustc_lint/src/builtin.rs @@ -825,12 +825,11 @@ fn warn_if_doc(cx: &EarlyContext<'_>, node_span: Span, node_kind: &str, attrs: & if is_doc_comment || is_doc_attribute { let sub = match attr.kind { - AttrKind::DocComment(CommentKind::Line, _) | AttrKind::Normal(..) => { - BuiltinUnusedDocCommentSub::PlainHelp - } - AttrKind::DocComment(CommentKind::Block, _) => { - BuiltinUnusedDocCommentSub::BlockHelp - } + AttrKind::DocComment(CommentKind::Line, _) + | AttrKind::Normal(..) + | AttrKind::Comment(CommentKind::Line, _) => BuiltinUnusedDocCommentSub::PlainHelp, + AttrKind::DocComment(CommentKind::Block, _) + | AttrKind::Comment(CommentKind::Block, _) => BuiltinUnusedDocCommentSub::BlockHelp, }; cx.emit_span_lint( UNUSED_DOC_COMMENTS, diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 5766d25bc86ce..d8a49f7bbabde 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -187,9 +187,19 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // additional validation. let kind = match token.kind { rustc_lexer::TokenKind::LineComment { doc_style } => { - // Skip non-doc comments + // Skip non-doc comments, + // but record them to later attach to AST nodes. let Some(doc_style) = doc_style else { self.lint_unicode_text_flow(start); + // Exclude the opening `//` (2 bytes). + let content_start = start + BytePos(2); + let content = self.str_from(content_start); + let span = self.mk_sp(start, self.pos); + self.psess.all_comments.lock().push(( + span.lo(), + CommentKind::Line, + Symbol::intern(content), + )); preceded_by_whitespace = true; continue; }; @@ -205,9 +215,20 @@ impl<'psess, 'src> Lexer<'psess, 'src> { self.report_unterminated_block_comment(start, doc_style); } - // Skip non-doc comments + // Skip non-doc comments, + // but record them to later attach to AST nodes. let Some(doc_style) = doc_style else { self.lint_unicode_text_flow(start); + // Exclude the opening `/*` and closing `*/` (2 bytes each). + let content_start = start + BytePos(2); + let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); + let content = self.str_from_to(content_start, content_end); + let span = self.mk_sp(start, self.pos); + self.psess.all_comments.lock().push(( + span.lo(), + CommentKind::Block, + Symbol::intern(content), + )); preceded_by_whitespace = true; continue; }; diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 78b42ee11e2dc..e7c95688e8fa0 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -1,5 +1,5 @@ use rustc_ast as ast; -use rustc_ast::token::{self, MetaVarKind}; +use rustc_ast::token::{self, CommentKind, MetaVarKind}; use rustc_ast::tokenstream::ParserRange; use rustc_ast::{AttrItemKind, Attribute, attr}; use rustc_errors::codes::*; @@ -47,6 +47,32 @@ impl<'a> Parser<'a> { let mut outer_attrs = ast::AttrVec::new(); let mut just_parsed_doc_comment = false; let start_pos = self.num_bump_calls; + + if !self.token.span.is_dummy() && !self.prev_token.span.is_dummy() { + let prev_hi: BytePos = self.prev_token.span.hi(); + let curr_lo: BytePos = self.token.span.lo(); + if prev_hi <= curr_lo { + let curr_span = self.token.span; + let all_comments = self.psess.all_comments.lock(); + for &(pos, comment_kind, data) in all_comments.iter() { + if pos >= prev_hi && pos < curr_lo { + let comment_len = match comment_kind { + CommentKind::Line => 2 + data.as_str().len(), + CommentKind::Block => 4 + data.as_str().len(), + }; + let end = BytePos(pos.0 + comment_len as u32); + let span = curr_span.with_lo(pos).with_hi(end); + outer_attrs.push(attr::mk_comment( + &self.psess.attr_id_generator, + comment_kind, + data, + span, + )); + } + } + } + } + loop { let attr = if self.check(exp!(Pound)) { let prev_outer_attr_sp = outer_attrs.last().map(|attr: &Attribute| attr.span); diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index e04178645fdd8..1c34176e90754 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -79,6 +79,12 @@ impl AttrWrapper { pub(super) fn is_empty(&self) -> bool { self.attrs.is_empty() } + + /// Returns `true` if every attribute in this wrapper is a regular comment. + pub(super) fn is_all_comments(&self) -> bool { + // FIXME: does short-circuting improve performance here? + !self.attrs.is_empty() && self.attrs.iter().all(|a| a.is_comment()) + } } /// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute @@ -399,7 +405,9 @@ impl<'a> Parser<'a> { /// `test`, `global_allocator`. fn needs_tokens(attrs: &[ast::Attribute]) -> bool { attrs.iter().any(|attr| match attr.name() { - None => !attr.is_doc_comment(), + // Regular comments (`AttrKind::Comment`) are never real attributes. + // So, they should not trigger token collection; same treatment as doc comments. + None => !attr.is_doc_comment() && !attr.is_comment(), Some(name) => name == sym::cfg_attr || !rustc_feature::is_builtin_attr_name(name), }) } diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index c18e8c631fecc..d4ba24bffb17f 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -455,7 +455,7 @@ impl<'a> Parser<'a> { /// Parses prefix-forms of range notation: `..expr`, `..`, `..=expr`. fn parse_expr_prefix_range(&mut self, attrs: AttrWrapper) -> PResult<'a, Box> { - if !attrs.is_empty() { + if !attrs.is_empty() && !attrs.is_all_comments() { let err = errors::DotDotRangeAttribute { span: self.token.span }; self.dcx().emit_err(err); } @@ -2890,6 +2890,9 @@ impl<'a> Parser<'a> { branch_span: Span, attrs: AttrWrapper, ) { + if attrs.is_all_comments() { + return; + } if !attrs.is_empty() && let [x0 @ xn] | [x0, .., xn] = &*attrs.take_for_recovery(self.psess) { diff --git a/compiler/rustc_parse/src/parser/generics.rs b/compiler/rustc_parse/src/parser/generics.rs index 8c02092fd6788..8183d6ef79964 100644 --- a/compiler/rustc_parse/src/parser/generics.rs +++ b/compiler/rustc_parse/src/parser/generics.rs @@ -274,12 +274,15 @@ impl<'a> Parser<'a> { return Ok((None, Trailing::No, UsePreAttrPos::No)); } else { // Check for trailing attributes and stop parsing. - if !attrs.is_empty() { + let non_comment: Vec<_> = attrs.iter().filter(|a| !a.is_comment()).collect(); + if !non_comment.is_empty() { if !params.is_empty() { - this.dcx().emit_err(errors::AttrAfterGeneric { span: attrs[0].span }); - } else { this.dcx() - .emit_err(errors::AttrWithoutGenerics { span: attrs[0].span }); + .emit_err(errors::AttrAfterGeneric { span: non_comment[0].span }); + } else { + this.dcx().emit_err(errors::AttrWithoutGenerics { + span: non_comment[0].span, + }); } } return Ok((None, Trailing::No, UsePreAttrPos::No)); @@ -451,7 +454,10 @@ impl<'a> Parser<'a> { let pred_lo = self.token.span; let predicate = self.collect_tokens(None, attrs, ForceCollect::No, |this, attrs| { for attr in &attrs { - self.psess.gated_spans.gate(sym::where_clause_attrs, attr.span); + // FIXME: investigate how to avoid this if. + if !attr.is_comment() { + self.psess.gated_spans.gate(sym::where_clause_attrs, attr.span); + } } let kind = if this.check_lifetime() && this.look_ahead(1, |t| !t.is_like_plus()) { let lifetime = this.expect_lifetime(); diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 0f4927432f6fa..b6c9638986eea 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -3,6 +3,7 @@ use std::mem; use ast::token::IdentIsRaw; use rustc_ast as ast; +use rustc_ast::HasAttrs; use rustc_ast::ast::*; use rustc_ast::token::{self, Delimiter, InvisibleOrigin, MetaVarKind, TokenKind}; use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree}; @@ -63,6 +64,20 @@ impl<'a> Parser<'a> { let post_attr_lo = self.token.span; let mut items: ThinVec> = ThinVec::new(); + // Collect file/module-level leading comments. + let leading_comments: Vec = if !post_attr_lo.is_dummy() { + let file_start = + self.psess.source_map().lookup_source_file(post_attr_lo.lo()).start_pos; + let first_lo = post_attr_lo.lo(); + if file_start < first_lo { + self.collect_comments_in_range(file_start, first_lo, post_attr_lo) + } else { + Vec::new() + } + } else { + Vec::new() + }; + // There shouldn't be any stray semicolons before or after items. // `parse_item` consumes the appropriate semicolons so any leftover is an error. loop { @@ -73,6 +88,37 @@ impl<'a> Parser<'a> { items.push(item); } + // Prepend the first comments in a file to the first item. + if !leading_comments.is_empty() { + if let Some(first_item) = items.first_mut() { + (**first_item).visit_attrs(|item_attrs| { + let mut new_attrs = rustc_ast::AttrVec::from(leading_comments.clone()); + new_attrs.extend(item_attrs.drain(..)); + *item_attrs = new_attrs; + }); + } + } + + // Collect trailing comments and attach them to the last item's attrs so + // they are preserved in the AST. + if !self.prev_token.span.is_dummy() { + let anchor = self.prev_token.span; + let term_hi = if self.token.kind == token::Eof { + self.psess.source_map().lookup_source_file(anchor.lo()).end_position() + } else { + self.token.span.lo() + }; + let from = items.last().map_or(post_attr_lo.hi(), |i| i.span.hi()); + if from < term_hi { + let trailing = self.collect_comments_in_range(from, term_hi, anchor); + if !trailing.is_empty() { + if let Some(last_item) = items.last_mut() { + (**last_item).visit_attrs(|item_attrs| item_attrs.extend(trailing)); + } + } + } + } + if !self.eat(term) { let token_str = super::token_descr(&self.token); if !self.maybe_consume_incorrect_semicolon(items.last().map(|x| &**x)) { @@ -571,7 +617,8 @@ impl<'a> Parser<'a> { /// Recover if we parsed attributes and expected an item but there was none. fn recover_attrs_no_item(&mut self, attrs: &[Attribute]) -> PResult<'a, ()> { - let ([start @ end] | [start, .., end]) = attrs else { + let real_attrs: Vec<&Attribute> = attrs.iter().filter(|a| !a.is_comment()).collect(); + let ([start @ end] | [start, .., end]) = real_attrs.as_slice() else { return Ok(()); }; let msg = if end.is_doc_comment() { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 8c1c3c7025f5e..7456822b3453c 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -44,7 +44,7 @@ use rustc_data_structures::fx::FxHashMap; use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult}; use rustc_index::interval::IntervalSet; use rustc_session::parse::ParseSess; -use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw, sym}; +use rustc_span::{BytePos, ErrorGuaranteed, Ident, Span, Symbol, kw, sym}; use thin_vec::ThinVec; use token_type::TokenTypeSet; pub use token_type::{ExpKeywordPair, ExpTokenPair, TokenType}; @@ -1111,6 +1111,31 @@ impl<'a> Parser<'a> { self.expected_token_types.clear(); } + pub(crate) fn collect_comments_in_range( + &self, + from: BytePos, + to: BytePos, + anchor: Span, + ) -> Vec { + let all = self.psess.all_comments.lock(); + all.iter() + .filter(|&&(pos, _, _)| pos >= from && pos < to) + .map(|&(pos, kind, data)| { + let comment_len = match kind { + rustc_ast::token::CommentKind::Line => 2 + data.as_str().len(), + rustc_ast::token::CommentKind::Block => 4 + data.as_str().len(), + }; + let end = BytePos(pos.0 + comment_len as u32); + rustc_ast::attr::mk_comment( + &self.psess.attr_id_generator, + kind, + data, + anchor.with_lo(pos).with_hi(end), + ) + }) + .collect() + } + /// Advance the parser by one token. pub fn bump(&mut self) { // Note: destructuring here would give nicer code, but it was found in #96210 to be slower diff --git a/compiler/rustc_parse/src/parser/path.rs b/compiler/rustc_parse/src/parser/path.rs index e514a08c8fb3a..06b1451822cfc 100644 --- a/compiler/rustc_parse/src/parser/path.rs +++ b/compiler/rustc_parse/src/parser/path.rs @@ -418,9 +418,11 @@ impl<'a> Parser<'a> { if matches!(param.ty.kind, TyKind::CVarArgs) { dcx.emit_err(PathFoundCVariadicParams { span: param.pat.span }); } - if !param.attrs.is_empty() { + let non_comment_attrs: Vec<_> = + param.attrs.iter().filter(|a| !a.is_comment()).collect(); + if !non_comment_attrs.is_empty() { dcx.emit_err(PathFoundAttributeInParams { - span: param.attrs[0].span, + span: non_comment_attrs[0].span, }); } param.ty diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 5bd2ca3139228..329e75300c6a0 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -273,14 +273,21 @@ impl<'a> Parser<'a> { /// Also error if the previous token was a doc comment. fn error_outer_attrs(&self, attrs: AttrWrapper) { if !attrs.is_empty() - && let attrs @ [.., last] = &*attrs.take_for_recovery(self.psess) + && !attrs.is_all_comments() + && let attrs @ [.., _last] = &*attrs.take_for_recovery(self.psess) { + // Filter to non-comment attrs for the real checks. + let real_attrs: Vec<_> = attrs.iter().filter(|a| !a.is_comment()).collect(); + if real_attrs.is_empty() { + return; + } + let last = real_attrs.last().unwrap(); if last.is_doc_comment() { self.dcx().emit_err(errors::DocCommentDoesNotDocumentAnything { span: last.span, missing_comma: None, }); - } else if attrs.iter().any(|a| a.style == AttrStyle::Outer) { + } else if real_attrs.iter().any(|a| a.style == AttrStyle::Outer) { self.dcx().emit_err(errors::ExpectedStatementAfterOuterAttr { span: last.span }); } } @@ -782,6 +789,19 @@ impl<'a> Parser<'a> { continue; }; } + if !self.prev_token.span.is_dummy() { + let closing_brace_lo = self.prev_token.span.lo(); + let from = stmts.last().map_or(lo.hi(), |s| s.span.hi()); + if from < closing_brace_lo { + let trailing = + self.collect_comments_in_range(from, closing_brace_lo, self.prev_token.span); + if !trailing.is_empty() { + if let Some(last_stmt) = stmts.last_mut() { + last_stmt.visit_attrs(|attrs| attrs.extend(trailing)); + } + } + } + } Ok(self.mk_block(stmts, s, lo.to(self.prev_token.span))) } @@ -972,7 +992,7 @@ impl<'a> Parser<'a> { // Expression without semicolon. StmtKind::Expr(expr) if classify::expr_requires_semi_to_be_stmt(expr) - && !expr.attrs.is_empty() + && expr.attrs.iter().any(|a| !a.is_comment()) && !matches!(self.token.kind, token::Eof | token::Semi | token::CloseBrace) => { // The user has written `#[attr] expr` which is unsupported. (#106020) diff --git a/compiler/rustc_passes/src/input_stats.rs b/compiler/rustc_passes/src/input_stats.rs index e424cc09fb607..3b2368bec5619 100644 --- a/compiler/rustc_passes/src/input_stats.rs +++ b/compiler/rustc_passes/src/input_stats.rs @@ -770,7 +770,7 @@ impl<'v> ast_visit::Visitor<'v> for StatCollector<'v> { fn visit_attribute(&mut self, attr: &'v ast::Attribute) { record_variants!( (self, attr, attr.kind, None, ast, Attribute, AttrKind), - [Normal, DocComment] + [Normal, DocComment, Comment] ); ast_visit::walk_attribute(self, attr) } diff --git a/compiler/rustc_session/src/parse.rs b/compiler/rustc_session/src/parse.rs index 65a15dba42873..89227be845652 100644 --- a/compiler/rustc_session/src/parse.rs +++ b/compiler/rustc_session/src/parse.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use rustc_ast::attr::AttrIdGenerator; use rustc_ast::node_id::NodeId; +use rustc_ast::token::CommentKind; use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_data_structures::sync::{AppendOnlyVec, DynSend, DynSync, Lock}; use rustc_errors::annotate_snippet_emitter_writer::AnnotateSnippetEmitter; @@ -18,7 +19,7 @@ use rustc_feature::{GateIssue, UnstableFeatures, find_feature_issue}; use rustc_span::edition::Edition; use rustc_span::hygiene::ExpnId; use rustc_span::source_map::{FilePathMapping, SourceMap}; -use rustc_span::{Span, Symbol, sym}; +use rustc_span::{BytePos, Span, Symbol, sym}; use crate::Session; use crate::config::{Cfg, CheckCfg}; @@ -271,6 +272,9 @@ pub struct ParseSess { proc_macro_quoted_spans: AppendOnlyVec, /// Used to generate new `AttrId`s. Every `AttrId` is unique. pub attr_id_generator: AttrIdGenerator, + /// All regular (non-doc) comments encountered during lexing. + /// Used by the parser to attach comments to the nearest following AST node. + pub all_comments: Lock>, } impl ParseSess { @@ -302,6 +306,7 @@ impl ParseSess { assume_incomplete_release: false, proc_macro_quoted_spans: Default::default(), attr_id_generator: AttrIdGenerator::new(), + all_comments: Default::default(), } } diff --git a/src/tools/clippy/clippy_lints/src/attrs/mixed_attributes_style.rs b/src/tools/clippy/clippy_lints/src/attrs/mixed_attributes_style.rs index d71c8e9894bf7..f901b0d9df555 100644 --- a/src/tools/clippy/clippy_lints/src/attrs/mixed_attributes_style.rs +++ b/src/tools/clippy/clippy_lints/src/attrs/mixed_attributes_style.rs @@ -28,6 +28,7 @@ impl From<&AttrKind> for SimpleAttrKind { Self::Normal(path_symbols) }, AttrKind::DocComment(..) => Self::Doc, + AttrKind::Comment(..) => Self::Doc, } } } diff --git a/src/tools/clippy/clippy_lints/src/attrs/mod.rs b/src/tools/clippy/clippy_lints/src/attrs/mod.rs index c15a378053e39..8e12b985b128a 100644 --- a/src/tools/clippy/clippy_lints/src/attrs/mod.rs +++ b/src/tools/clippy/clippy_lints/src/attrs/mod.rs @@ -608,7 +608,7 @@ impl EarlyLintPass for PostExpansionEarlyAttributes { AttrKind::Normal(normal_attr) => { !matches!(normal_attr.item.args, AttrItemKind::Unparsed(AttrArgs::Eq { .. })) }, - AttrKind::DocComment(..) => true, + AttrKind::DocComment(..) | AttrKind::Comment(..) => true, } { span_lint_and_help( diff --git a/src/tools/clippy/clippy_lints/src/empty_line_after.rs b/src/tools/clippy/clippy_lints/src/empty_line_after.rs index b7b84c173f418..cb478ffca8c59 100644 --- a/src/tools/clippy/clippy_lints/src/empty_line_after.rs +++ b/src/tools/clippy/clippy_lints/src/empty_line_after.rs @@ -214,11 +214,18 @@ impl Stop { let SpanData { lo, hi, .. } = attr.span.data(); let file = cx.sess().source_map().lookup_source_file(lo); + // Regular comments are not doc comments and should not trigger + // neither the doc-comment nor outer-attribute empty-line lints. + if matches!(attr.kind, AttrKind::Comment(..)) { + return None; + } + Some(Self { span: attr.span, kind: match attr.kind { AttrKind::Normal(_) => StopKind::Attr, AttrKind::DocComment(comment_kind, _) => StopKind::Doc(comment_kind), + AttrKind::Comment(..) => unreachable!(""), }, first: file.lookup_line(file.relative_position(lo))?, last: file.lookup_line(file.relative_position(hi))?, diff --git a/src/tools/clippy/clippy_utils/src/check_proc_macro.rs b/src/tools/clippy/clippy_utils/src/check_proc_macro.rs index 601cf564062bb..2d338bec1238c 100644 --- a/src/tools/clippy/clippy_utils/src/check_proc_macro.rs +++ b/src/tools/clippy/clippy_utils/src/check_proc_macro.rs @@ -379,6 +379,8 @@ fn attr_search_pat(attr: &Attribute) -> (Pat, Pat) { (Pat::Str("/*!"), Pat::Str("*/")) } }, + AttrKind::Comment(CommentKind::Line, ..) => (Pat::Str("//"), Pat::Str("")), + AttrKind::Comment(CommentKind::Block, ..) => (Pat::Str("/*"), Pat::Str("*/")), } }