diff --git a/_typos.toml b/_typos.toml index 583ebdb..45b430f 100755 --- a/_typos.toml +++ b/_typos.toml @@ -30,23 +30,13 @@ extend-ignore-identifiers-re = [ "prev", "normalises", "goes", - "Bare", - "inout", - "ba", - "ede", ] -[default.extend-words] -Bare = "Bare" -Supress = "Supress" -teh = "teh" -Teh = "Teh" - [files] ignore-hidden = false ignore-files = true extend-exclude = [ - "CHANGELOG.md", + "./CHANGELOG.md", "/usr/**/*", "/tmp/**/*", "/**/node_modules/**", diff --git a/classifications/_universal_rules.json b/classifications/_universal_rules.json index 9a3c5ce..702feeb 100644 --- a/classifications/_universal_rules.json +++ b/classifications/_universal_rules.json @@ -1189,7 +1189,7 @@ "inner": "syntax_punctuation", "inner_attribute_item": "syntax_annotation", "inner_doc_comment_marker": "syntax_literal", - "input": "syntax_keyword", + "inout": "syntax_keyword", "instance": "syntax_keyword", "instance_declarations": "definition_type", "instance_expression": "operation_operator", diff --git a/crates/ast-engine/src/language.rs b/crates/ast-engine/src/language.rs index bf91261..c120dc1 100644 --- a/crates/ast-engine/src/language.rs +++ b/crates/ast-engine/src/language.rs @@ -73,11 +73,8 @@ pub trait Language: Clone + std::fmt::Debug + Send + Sync + 'static { /// Implementors should override this method and return `Some(Self)` when the /// file type is supported and `None` when it is not. fn from_path>(_path: P) -> Option { - unimplemented!( - "Language::from_path is not implemented for type `{}`. \ - Override Language::from_path for this type if path-based detection is required.", - std::any::type_name::() - ) + // TODO: throw panic here if not implemented properly? + None } fn kind_to_id(&self, kind: &str) -> u16; diff --git a/crates/ast-engine/src/replacer/indent.rs b/crates/ast-engine/src/replacer/indent.rs index e53cb01..59262cd 100644 --- a/crates/ast-engine/src/replacer/indent.rs +++ b/crates/ast-engine/src/replacer/indent.rs @@ -101,9 +101,6 @@ fn get_new_line() -> C::Underlying { fn get_space() -> C::Underlying { C::decode_str(" ")[0].clone() } -fn get_tab() -> C::Underlying { - C::decode_str("\t")[0].clone() -} const MAX_LOOK_AHEAD: usize = 512; @@ -186,16 +183,21 @@ pub fn formatted_slice<'a, C: Content>( if !slice.contains(&get_new_line::()) { return Cow::Borrowed(slice); } - let (indent, is_tab) = get_indent_at_offset_with_tab::(content.get_range(0..start)); Cow::Owned( - indent_lines::(0, &DeindentedExtract::MultiLine(slice, indent), is_tab).into_owned(), + indent_lines::( + 0, + &DeindentedExtract::MultiLine( + slice, + get_indent_at_offset::(content.get_range(0..start)), + ), + ) + .into_owned(), ) } pub fn indent_lines<'a, C: Content>( indent: usize, extract: &'a DeindentedExtract<'a, C>, - is_tab: bool, ) -> Cow<'a, [C::Underlying]> { use DeindentedExtract::{MultiLine, SingleLine}; let (lines, original_indent) = match extract { @@ -211,27 +213,18 @@ pub fn indent_lines<'a, C: Content>( Ordering::Less => Cow::Owned(indent_lines_impl::( indent - original_indent, lines.split(|b| *b == get_new_line::()), - is_tab, )), } } -fn indent_lines_impl<'a, C, Lines>( - indent: usize, - mut lines: Lines, - is_tab: bool, -) -> Vec +fn indent_lines_impl<'a, C, Lines>(indent: usize, mut lines: Lines) -> Vec where C: Content + 'a, Lines: Iterator, { let mut ret = vec![]; - let indent_char = if is_tab { - get_tab::() - } else { - get_space::() - }; - let leading: Vec<_> = std::iter::repeat_n(indent_char, indent).collect(); + let space = get_space::(); + let leading: Vec<_> = std::iter::repeat_n(space, indent).collect(); // first line wasn't indented, so we don't add leading spaces if let Some(line) = lines.next() { ret.extend(line.iter().cloned()); @@ -248,62 +241,40 @@ where /// returns 0 if no indent is found before the offset /// either truly no indent exists, or the offset is in a long line pub fn get_indent_at_offset(src: &[C::Underlying]) -> usize { - get_indent_at_offset_with_tab::(src).0 -} - -/// returns (indent, `is_tab`) -pub fn get_indent_at_offset_with_tab(src: &[C::Underlying]) -> (usize, bool) { let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD; let mut indent = 0; - let mut is_tab = false; let new_line = get_new_line::(); let space = get_space::(); - let tab = get_tab::(); + // TODO: support TAB. only whitespace is supported now for c in src[lookahead..].iter().rev() { if *c == new_line { - return (indent, is_tab); + return indent; } if *c == space { indent += 1; - } else if *c == tab { - indent += 1; - is_tab = true; } else { indent = 0; - is_tab = false; } } // lookahead == 0 means we have indentation at first line. if lookahead == 0 && indent != 0 { - (indent, is_tab) + indent } else { - (0, false) + 0 } } // NOTE: we assume input is well indented. // following lines should have fewer indentations than initial line fn remove_indent(indent: usize, src: &[C::Underlying]) -> Vec { + let indentation: Vec<_> = std::iter::repeat_n(get_space::(), indent).collect(); let new_line = get_new_line::(); - let space = get_space::(); - let tab = get_tab::(); let lines: Vec<_> = src .split(|b| *b == new_line) - .map(|line| { - let mut stripped = line; - let mut count = 0; - while count < indent { - if let Some(rest) = stripped.strip_prefix(std::slice::from_ref(&space)) { - stripped = rest; - } else if let Some(rest) = stripped.strip_prefix(std::slice::from_ref(&tab)) { - stripped = rest; - } else { - break; - } - count += 1; - } - stripped + .map(|line| match line.strip_prefix(&*indentation) { + Some(stripped) => stripped, + None => line, }) .collect(); lines.join(&new_line).clone() @@ -328,7 +299,7 @@ mod test { .count(); let end = source.chars().count() - trailing_white; let extracted = extract_with_deindent(&source, start..end); - let result_bytes = indent_lines::(0, &extracted, source.contains('\t')); + let result_bytes = indent_lines::(0, &extracted); let actual = std::str::from_utf8(&result_bytes).unwrap(); assert_eq!(actual, expected); } @@ -420,8 +391,8 @@ pass fn test_replace_with_indent(target: &str, start: usize, inserted: &str) -> String { let target = target.to_string(); let replace_lines = DeindentedExtract::MultiLine(inserted.as_bytes(), 0); - let (indent, is_tab) = get_indent_at_offset_with_tab::(&target.as_bytes()[..start]); - let ret = indent_lines::(indent, &replace_lines, is_tab); + let indent = get_indent_at_offset::(&target.as_bytes()[..start]); + let ret = indent_lines::(indent, &replace_lines); String::from_utf8(ret.to_vec()).unwrap() } @@ -474,26 +445,4 @@ pass let actual = test_replace_with_indent(target, 6, inserted); assert_eq!(actual, "def abc():\n pass"); } - - #[test] - fn test_tab_indent() { - let src = "\n\t\tdef test():\n\t\t\tpass"; - let expected = "def test():\n\tpass"; - test_deindent(src, expected, 0); - } - - #[test] - fn test_tab_replace() { - let target = "\t\t"; - let inserted = "def abc(): pass"; - let actual = test_replace_with_indent(target, 2, inserted); - assert_eq!(actual, "def abc(): pass"); - let inserted = "def abc():\n\tpass"; - let actual = test_replace_with_indent(target, 2, inserted); - assert_eq!(actual, "def abc():\n\t\t\tpass"); - - let target = "\t\tdef abc():\n\t\t\t"; - let actual = test_replace_with_indent(target, 14, inserted); - assert_eq!(actual, "def abc():\n\t\tpass"); - } } diff --git a/crates/ast-engine/src/replacer/template.rs b/crates/ast-engine/src/replacer/template.rs index 72423c0..e95d843 100644 --- a/crates/ast-engine/src/replacer/template.rs +++ b/crates/ast-engine/src/replacer/template.rs @@ -4,7 +4,7 @@ // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT -use super::indent::{DeindentedExtract, extract_with_deindent, indent_lines}; +use super::indent::{DeindentedExtract, extract_with_deindent, get_indent_at_offset, indent_lines}; use super::{MetaVarExtract, Replacer, split_first_meta_var}; use crate::NodeMatch; use crate::language::Language; @@ -52,10 +52,10 @@ impl TemplateFix { impl Replacer for TemplateFix { fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying { let leading = nm.get_doc().get_source().get_range(0..nm.range().start); - let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::(leading); + let indent = get_indent_at_offset::(leading); let bytes = replace_fixer(self, nm.get_env()); let replaced = DeindentedExtract::MultiLine(&bytes, 0); - indent_lines::(indent, &replaced, is_tab).to_vec() + indent_lines::(indent, &replaced).to_vec() } } @@ -64,7 +64,7 @@ type Indent = usize; #[derive(Debug, Clone)] pub struct Template { fragments: Vec, - vars: Vec<(MetaVarExtract, Indent, bool)>, // the third element is is_tab + vars: Vec<(MetaVarExtract, Indent)>, } fn create_template( @@ -82,10 +82,8 @@ fn create_template( { fragments.push(tmpl[len..len + offset + i].to_string()); // NB we have to count ident of the full string - let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::( - &tmpl.as_bytes()[..len + offset + i], - ); - vars.push((meta_var, indent, is_tab)); + let indent = get_indent_at_offset::(&tmpl.as_bytes()[..len + offset + i]); + vars.push((meta_var, indent)); len += skipped + offset + i; offset = 0; continue; @@ -115,8 +113,8 @@ fn replace_fixer(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underl if let Some(frag) = frags.next() { ret.extend_from_slice(&D::Source::decode_str(frag)); } - for ((var, indent, is_tab), frag) in vars.zip(frags) { - if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) { + for ((var, indent), frag) in vars.zip(frags) { + if let Some(bytes) = maybe_get_var(env, var, indent.to_owned()) { ret.extend_from_slice(&bytes); } ret.extend_from_slice(&D::Source::decode_str(frag)); @@ -128,7 +126,6 @@ fn maybe_get_var<'e, 't, C, D>( env: &'e MetaVarEnv<'t, D>, var: &MetaVarExtract, indent: usize, - is_tab: bool, ) -> Option> where C: Content + 'e, @@ -139,7 +136,7 @@ where // transformed source does not have range, directly return bytes let source = env.get_transformed(name)?; let de_intended = DeindentedExtract::MultiLine(source, 0); - let bytes = indent_lines::(indent, &de_intended, is_tab); + let bytes = indent_lines::(indent, &de_intended); return Some(Cow::Owned(bytes.into())); } MetaVarExtract::Single(name) => { @@ -163,7 +160,7 @@ where } }; let extracted = extract_with_deindent(source, range); - let bytes = indent_lines::(indent, &extracted, is_tab); + let bytes = indent_lines::(indent, &extracted); Some(Cow::Owned(bytes.into())) } diff --git a/crates/flow/src/incremental/analyzer.rs b/crates/flow/src/incremental/analyzer.rs index 9b33262..e6cb845 100644 --- a/crates/flow/src/incremental/analyzer.rs +++ b/crates/flow/src/incremental/analyzer.rs @@ -471,21 +471,21 @@ impl IncrementalAnalyzer { } // Save edges to storage in batch - if !edges_to_save.is_empty() - && let Err(e) = self.storage.save_edges_batch(&edges_to_save).await - { - warn!( - error = %e, - "batch save failed, falling back to individual saves" - ); - for edge in &edges_to_save { - if let Err(e) = self.storage.save_edge(edge).await { - warn!( - file_from = ?edge.from, - file_to = ?edge.to, - error = %e, - "failed to save edge individually" - ); + if !edges_to_save.is_empty() { + if let Err(e) = self.storage.save_edges_batch(&edges_to_save).await { + warn!( + error = %e, + "batch save failed, falling back to individual saves" + ); + for edge in &edges_to_save { + if let Err(e) = self.storage.save_edge(edge).await { + warn!( + file_from = ?edge.from, + file_to = ?edge.to, + error = %e, + "failed to save edge individually" + ); + } } } } diff --git a/crates/language/src/lib.rs b/crates/language/src/lib.rs index 721ddd6..1e26f25 100644 --- a/crates/language/src/lib.rs +++ b/crates/language/src/lib.rs @@ -1721,23 +1721,20 @@ pub fn from_extension(path: &Path) -> Option { } // Handle extensionless files or files with unknown extensions - if let Some(_file_name) = path.file_name().and_then(|n| n.to_str()) { + if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) { // 1. Check if the full filename matches a known extension (e.g. .bashrc) #[cfg(any(feature = "bash", feature = "all-parsers"))] - if constants::BASH_EXTS.contains(&_file_name) { + if constants::BASH_EXTS.contains(&file_name) { return Some(SupportLang::Bash); } // 2. Check known extensionless file names #[cfg(any(feature = "bash", feature = "all-parsers", feature = "ruby"))] for (name, lang) in constants::LANG_RELATIONSHIPS_WITH_NO_EXTENSION { - if *name == _file_name { + if *name == file_name { return Some(*lang); } } - - // Silence unused variable warning if bash and ruby and all-parsers are not enabled - let _ = file_name; } // 3. Try shebang check as last resort diff --git a/crates/rule-engine/src/rule/deserialize_env.rs b/crates/rule-engine/src/rule/deserialize_env.rs index ecebb62..8c1cfa6 100644 --- a/crates/rule-engine/src/rule/deserialize_env.rs +++ b/crates/rule-engine/src/rule/deserialize_env.rs @@ -38,7 +38,7 @@ fn into_map( .collect() } -type OrderResult = Result; +type OrderResult = Result; /// A struct to store information to deserialize rules. #[derive(Clone, Debug)] @@ -79,22 +79,27 @@ struct TopologicalSort<'a, T: DependentRule> { order: Vec<&'a str>, // bool stands for if the rule has completed visit seen: RapidMap<&'a str, bool>, + env: Option<&'a RuleRegistration>, } impl<'a, T: DependentRule> TopologicalSort<'a, T> { - fn get_order(maps: &RapidMap) -> OrderResult> { - let mut top_sort = TopologicalSort::new(maps); + fn get_order( + maps: &'a RapidMap, + env: Option<&'a RuleRegistration>, + ) -> OrderResult> { + let mut top_sort = TopologicalSort::new(maps, env); for key in maps.keys() { top_sort.visit(key)?; } Ok(top_sort.order) } - fn new(maps: &'a RapidMap) -> Self { + fn new(maps: &'a RapidMap, env: Option<&'a RuleRegistration>) -> Self { Self { maps, order: vec![], seen: RapidMap::default(), + env, } } @@ -105,7 +110,7 @@ impl<'a, T: DependentRule> TopologicalSort<'a, T> { return if completed { Ok(()) } else { - Err(key.to_string()) + Err(ReferentRuleError::CyclicRule(key.to_string())) }; } let Some(item) = self.maps.get(key) else { @@ -113,7 +118,12 @@ impl<'a, T: DependentRule> TopologicalSort<'a, T> { // e.g. if key is rule_id // if rule_id not found in global, it can be a local rule // if rule_id not found in local, it can be a global rule - // TODO: add check here and return Err if rule not found + if let Some(env) = self.env { + // Note: We only check if the key is completely missing + if !env.contains_match_rule(key) { + return Err(ReferentRuleError::UndefinedUtil(key.to_string())); + } + } return Ok(()); }; // mark the id as seen but not completed @@ -165,8 +175,7 @@ impl DeserializeEnv { self, utils: &RapidMap, ) -> Result { - let order = TopologicalSort::get_order(utils) - .map_err(ReferentRuleError::CyclicRule) + let order = TopologicalSort::get_order(utils, Some(&self.registration)) .map_err(RuleSerializeError::MatchesReference)?; for id in order { let rule = utils.get(id).expect("must exist"); @@ -182,8 +191,8 @@ impl DeserializeEnv { ) -> Result { let registration = GlobalRules::default(); let utils = into_map(utils); - let order = TopologicalSort::get_order(&utils) - .map_err(ReferentRuleError::CyclicRule) + let temp_env = RuleRegistration::from_globals(®istration); + let order = TopologicalSort::get_order(&utils, Some(&temp_env)) .map_err(RuleSerializeError::from)?; for id in order { let (lang, core) = utils.get(id).expect("must exist"); @@ -204,10 +213,11 @@ impl DeserializeEnv { } pub(crate) fn get_transform_order<'a>( - &self, + &'a self, trans: &'a RapidMap>, - ) -> Result, String> { - TopologicalSort::get_order(trans) + ) -> Result, ReferentRuleError> { + // Transformations don't need env rule registration checks, pass None + TopologicalSort::get_order(trans, None) } pub fn with_globals(self, globals: &GlobalRules) -> Self { @@ -277,7 +287,16 @@ local-rule: ) .expect("failed to parse utils"); // should not panic - DeserializeEnv::new(TypeScript::Tsx).with_utils(&utils)?; + let registration = GlobalRules::default(); + let core: crate::rule_core::SerializableRuleCore = + from_str("rule: {pattern: '123'}").unwrap(); + let env_dummy = DeserializeEnv::new(TypeScript::Tsx).with_globals(®istration); + registration + .insert("global-rule", core.get_matcher(env_dummy).unwrap()) + .unwrap(); + DeserializeEnv::new(TypeScript::Tsx) + .with_globals(®istration) + .with_utils(&utils)?; Ok(()) } diff --git a/crates/rule-engine/src/rule/referent_rule.rs b/crates/rule-engine/src/rule/referent_rule.rs index 872e7ed..d9e66b5 100644 --- a/crates/rule-engine/src/rule/referent_rule.rs +++ b/crates/rule-engine/src/rule/referent_rule.rs @@ -32,6 +32,10 @@ impl Registration { // it only insert new item to the RapidMap. It is safe to cast the raw ptr. unsafe { &mut *(Arc::as_ptr(&self.0) as *mut RapidMap) } } + + pub(crate) fn contains_key(&self, id: &str) -> bool { + self.0.contains_key(id) + } } pub type GlobalRules = Registration; @@ -83,6 +87,10 @@ impl RuleRegistration { RegistrationRef { local, global } } + pub(crate) fn contains_match_rule(&self, id: &str) -> bool { + self.local.contains_key(id) || self.global.contains_key(id) + } + pub(crate) fn insert_local(&self, id: &str, rule: Rule) -> Result<(), ReferentRuleError> { if rule.check_cyclic(id) { return Err(ReferentRuleError::CyclicRule(id.into())); diff --git a/crates/rule-engine/src/transform/mod.rs b/crates/rule-engine/src/transform/mod.rs index c1cca50..68bf370 100644 --- a/crates/rule-engine/src/transform/mod.rs +++ b/crates/rule-engine/src/transform/mod.rs @@ -8,6 +8,7 @@ mod parse; mod rewrite; mod string_case; mod trans; +use crate::rule::referent_rule::ReferentRuleError; use crate::{DeserializeEnv, RuleCore}; @@ -70,9 +71,12 @@ impl Transform { .map(|(key, val)| val.parse(&env.lang).map(|t| (key.to_string(), t))) .collect(); let map = map?; - let order = env - .get_transform_order(&map) - .map_err(TransformError::Cyclic)?; + let order = env.get_transform_order(&map).map_err(|e| match e { + ReferentRuleError::CyclicRule(s) => TransformError::Cyclic(s), + _ => unreachable!( + "get_transform_order uses None for env, so only CyclicRule is possible" + ), + })?; let transforms = order .iter() .map(|&key| (key.to_string(), map[key].clone())) diff --git a/crates/rule-engine/src/transform/trans.rs b/crates/rule-engine/src/transform/trans.rs index cdf0f1e..0a80a89 100644 --- a/crates/rule-engine/src/transform/trans.rs +++ b/crates/rule-engine/src/transform/trans.rs @@ -551,26 +551,5 @@ if (true) { Ok(()) } - #[test] - fn test_rewrite() -> R { - let trans = parse( - r#" - rewrite: - source: "$A" - rewriters: ["re1", "re2"] - joinBy: ", " - "#, - )?; - let parsed = trans.parse(&TypeScript::Tsx).expect("should parse"); - match &parsed { - Trans::Rewrite(r) => { - assert_eq!(r.rewriters, vec!["re1", "re2"]); - assert_eq!(r.join_by, Some(", ".to_string())); - } - _ => panic!("should be rewrite"), - } - assert_eq!(parsed.used_rewriters(), &["re1", "re2"]); - assert_eq!(parsed.used_vars(), "A"); - Ok(()) - } + // TODO: add a symbolic test for Rewrite } diff --git a/crates/services/src/lib.rs b/crates/services/src/lib.rs index c932db7..06a9548 100644 --- a/crates/services/src/lib.rs +++ b/crates/services/src/lib.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // SPDX-License-Identifier: AGPL-3.0-or-later -#![allow(unexpected_cfgs)] +#![feature(trait_alias)] //! # Thread Service Layer //! //! This crate provides the service layer interfaces for Thread that abstract over diff --git a/crates/services/src/types.rs b/crates/services/src/types.rs index c9fdd9a..b8857c3 100644 --- a/crates/services/src/types.rs +++ b/crates/services/src/types.rs @@ -52,9 +52,7 @@ pub use thread_ast_engine::{ pub use thread_language::{SupportLang, SupportLangErr}; #[cfg(not(feature = "ast-grep-backend"))] -pub trait Doc: Clone + 'static {} -#[cfg(not(feature = "ast-grep-backend"))] -impl Doc for T {} +pub trait Doc = Clone + 'static; #[cfg(not(feature = "ast-grep-backend"))] #[derive(Debug, Clone)]