diff --git a/assets/default_newlines.packdump b/assets/default_newlines.packdump index 62d0953d..2b757d87 100644 Binary files a/assets/default_newlines.packdump and b/assets/default_newlines.packdump differ diff --git a/assets/default_nonewlines.packdump b/assets/default_nonewlines.packdump index 3b5ee135..ca8b61ab 100644 Binary files a/assets/default_nonewlines.packdump and b/assets/default_nonewlines.packdump differ diff --git a/examples/syntest.rs b/examples/syntest.rs index 0e3d0164..12ee2622 100644 --- a/examples/syntest.rs +++ b/examples/syntest.rs @@ -41,7 +41,7 @@ lazy_static! { pub static ref SYNTAX_TEST_ASSERTION_PATTERN: Regex = Regex::new(r#"(?xm) \s*(?: (?P<-)|(?P\^+) - )(.+)$"#).unwrap(); + )(.*)$"#).unwrap(); } #[derive(Debug)] diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index f94de197..423f709e 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -52,7 +52,8 @@ impl ParseState { /// pointer to the main context of the syntax. pub fn new(syntax: &SyntaxDefinition) -> ParseState { let start_state = StateLevel { - context: syntax.contexts["main"].clone(), + // __start is a special context we add in yaml_load.rs + context: syntax.contexts["__start"].clone(), prototype: None, captures: None, }; @@ -128,7 +129,7 @@ impl ParseState { ctx_ref.prototype.clone() }; let context_chain = self.stack - .iter() + .iter().rev() // iterate the stack in top-down order to apply the prototypes .filter_map(|lvl| lvl.prototype.as_ref().cloned()) .chain(prototype.into_iter()) .chain(Some(cur_level.context.clone()).into_iter()); @@ -257,8 +258,8 @@ impl ParseState { // ex: ((bob)|(hi))* could match hibob in wrong order, and outer has to push first // we don't have to handle a capture matching multiple times, Sublime doesn't let mut map: Vec<((usize, i32), ScopeStackOp)> = Vec::new(); - for (cap_index, scopes) in capture_map.iter() { - if let Some((cap_start, cap_end)) = reg_match.regions.pos(*cap_index) { + for &(cap_index, ref scopes) in capture_map.iter() { + if let Some((cap_start, cap_end)) = reg_match.regions.pos(cap_index) { // marking up empty captures causes pops to be sorted wrong if cap_start == cap_end { continue; @@ -291,56 +292,101 @@ impl ParseState { cur_context: &Context, match_op: &MatchOperation, ops: &mut Vec<(usize, ScopeStackOp)>) { - let involves_pop = match *match_op { - MatchOperation::Pop | - MatchOperation::Set(_) => true, - MatchOperation::Push(_) | - MatchOperation::None => false, - }; - // println!("metas ops for {:?}, is pop: {}, initial: {}", + // println!("metas ops for {:?}, initial: {}", // match_op, - // involves_pop, // initial); // println!("{:?}", cur_context.meta_scope); - if involves_pop { - let v = if initial { - &cur_context.meta_content_scope - } else { - &cur_context.meta_scope - }; - if !v.is_empty() { - ops.push((index, ScopeStackOp::Pop(v.len()))); - } - - if !initial && cur_context.clear_scopes != None { - ops.push((index, ScopeStackOp::Restore)); - } - } match *match_op { + MatchOperation::Pop => { + let v = if initial { + &cur_context.meta_content_scope + } else { + &cur_context.meta_scope + }; + if !v.is_empty() { + ops.push((index, ScopeStackOp::Pop(v.len()))); + } + + // cleared scopes are restored after the scopes from match pattern that invoked the pop are applied + if !initial && cur_context.clear_scopes != None { + ops.push((index, ScopeStackOp::Restore)); + } + }, + // for some reason the ST3 behaviour of set is convoluted and is inconsistent with the docs and other ops + // - the meta_content_scope of the current context is applied to the matched thing, unlike pop + // - the clear_scopes are applied after the matched token, unlike push + // - the interaction with meta scopes means that the token has the meta scopes of both the current scope and the new scope. MatchOperation::Push(ref context_refs) | MatchOperation::Set(ref context_refs) => { - for r in context_refs { - let ctx_ptr = r.resolve(); - let ctx = ctx_ptr.borrow(); + let is_set = match *match_op { + MatchOperation::Set(_) => true, + _ => false + }; + // a match pattern that "set"s keeps the meta_content_scope and meta_scope from the previous context + if initial { + // add each context's meta scope + for r in context_refs.iter() { + let ctx_ptr = r.resolve(); + let ctx = ctx_ptr.borrow(); + + if !is_set { + if let Some(clear_amount) = ctx.clear_scopes { + ops.push((index, ScopeStackOp::Clear(clear_amount))); + } + } - if initial { - if let Some(clear_amount) = ctx.clear_scopes { - ops.push((index, ScopeStackOp::Clear(clear_amount))); + for scope in ctx.meta_scope.iter() { + ops.push((index, ScopeStackOp::Push(*scope))); } } + } else { + let repush = (is_set && (!cur_context.meta_scope.is_empty() || !cur_context.meta_content_scope.is_empty())) || context_refs.iter().any(|r| { + let ctx_ptr = r.resolve(); + let ctx = ctx_ptr.borrow(); + + !ctx.meta_content_scope.is_empty() || (ctx.clear_scopes.is_some() && is_set) + }); + if repush { + // remove previously pushed meta scopes, so that meta content scopes will be applied in the correct order + let mut num_to_pop : usize = context_refs.iter().map(|r| { + let ctx_ptr = r.resolve(); + let ctx = ctx_ptr.borrow(); + ctx.meta_scope.len() + }).sum(); + + // also pop off the original context's meta scopes + if is_set { + num_to_pop += cur_context.meta_content_scope.len() + cur_context.meta_scope.len(); + } - let v = if initial { - &ctx.meta_scope - } else { - &ctx.meta_content_scope - }; - for scope in v.iter() { - ops.push((index, ScopeStackOp::Push(*scope))); + // do all the popping as one operation + if num_to_pop > 0 { + ops.push((index, ScopeStackOp::Pop(num_to_pop))); + } + + // now we push meta scope and meta context scope for each context pushed + for r in context_refs { + let ctx_ptr = r.resolve(); + let ctx = ctx_ptr.borrow(); + + // for some reason, contrary to my reading of the docs, set does this after the token + if is_set { + if let Some(clear_amount) = ctx.clear_scopes { + ops.push((index, ScopeStackOp::Clear(clear_amount))); + } + } + + for scope in ctx.meta_scope.iter() { + ops.push((index, ScopeStackOp::Push(*scope))); + } + for scope in ctx.meta_content_scope.iter() { + ops.push((index, ScopeStackOp::Push(*scope))); + } + } } } - } - MatchOperation::None | - MatchOperation::Pop => (), + }, + MatchOperation::None => (), } } @@ -434,7 +480,6 @@ mod tests { (0, Push(Scope::new("keyword.control.def.ruby").unwrap())), (3, Pop(2)), (3, Push(Scope::new("meta.function.ruby").unwrap())), - (4, Pop(1)), (4, Push(Scope::new("entity.name.function.ruby").unwrap())), (7, Pop(1)) ]; @@ -466,6 +511,8 @@ mod tests { (6, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())), (6, Push(Scope::new("punctuation.definition.string.begin.ruby").unwrap())), (12, Pop(1)), + (12, Pop(1)), + (12, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())), (12, Push(Scope::new("text.sql.embedded.ruby").unwrap())), (12, Clear(ClearAmount::TopN(2))), (12, Restore), @@ -486,7 +533,7 @@ mod tests { fn expect_scope_stacks(line: &str, expect: &[&str]) { // check that each expected scope stack appears at least once while parsing the given test line - + //let syntax = SyntaxSet::load_syntax_file("testdata/parser_tests.sublime-syntax", true).unwrap(); use std::fs::File; use std::io::Read; @@ -495,7 +542,7 @@ mod tests { f.read_to_string(&mut s).unwrap(); let syntax = SyntaxDefinition::load_from_str(&s, true).unwrap(); - + let mut state = ParseState::new(&syntax); let mut ss = SyntaxSet::new(); diff --git a/src/parsing/syntax_definition.rs b/src/parsing/syntax_definition.rs index eec83745..48807ded 100644 --- a/src/parsing/syntax_definition.rs +++ b/src/parsing/syntax_definition.rs @@ -11,7 +11,7 @@ use super::scope::*; use regex_syntax::escape; use rustc_serialize::{Encodable, Encoder, Decodable, Decoder}; -pub type CaptureMapping = HashMap>; +pub type CaptureMapping = Vec<(usize, Vec)>; pub type ContextPtr = Rc>; /// The main data structure representing a syntax definition loaded from a diff --git a/src/parsing/syntax_set.rs b/src/parsing/syntax_set.rs index 9d1ed123..e84cfadc 100644 --- a/src/parsing/syntax_set.rs +++ b/src/parsing/syntax_set.rs @@ -176,8 +176,11 @@ impl SyntaxSet { /// some may specify a Packages/PackageName/SyntaxName.sublime-syntax path /// others may just have SyntaxName.sublime-syntax /// this caters for these by matching the end of the path of the loaded syntax definition files + // however, if a syntax name is provided without a folder, make sure we don't accidentally match the end of a different syntax definition's name - by checking a / comes before it or it is the full path pub fn find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxDefinition> { - return self.path_syntaxes.iter().find(|t| t.0.ends_with(path)).map(|&(_,i)| &self.syntaxes[i]); + let mut slash_path = "/".to_string(); + slash_path.push_str(&path); + return self.path_syntaxes.iter().find(|t| t.0.ends_with(&slash_path) || t.0 == path).map(|&(_,i)| &self.syntaxes[i]); } /// Convenience method that tries to find the syntax for a file path, diff --git a/src/parsing/yaml_load.rs b/src/parsing/yaml_load.rs index af31e7ac..4759e048 100644 --- a/src/parsing/yaml_load.rs +++ b/src/parsing/yaml_load.rs @@ -51,10 +51,17 @@ struct ParserState<'a> { variable_regex: Regex, backref_regex: Regex, short_multibyte_regex: Regex, - top_level_scope: Scope, lines_include_newline: bool, } +static START_CONTEXTS: &'static str = " +__start: + - match: '' + push: __main +__main: + - include: main +"; + impl SyntaxDefinition { /// In case you want to create your own SyntaxDefinition's in memory from strings. /// Generally you should use a `SyntaxSet` @@ -96,15 +103,16 @@ impl SyntaxDefinition { variable_regex: Regex::new(r"\{\{([A-Za-z0-9_]+)\}\}").unwrap(), backref_regex: Regex::new(r"\\\d").unwrap(), short_multibyte_regex: Regex::new(r"\\x([a-fA-F][a-fA-F0-9])").unwrap(), - top_level_scope: top_level_scope, lines_include_newline: lines_include_newline, }; - let contexts = try!(SyntaxDefinition::parse_contexts(contexts_hash, &mut state)); + let mut contexts = try!(SyntaxDefinition::parse_contexts(contexts_hash, &mut state)); if !contexts.contains_key("main") { return Err(ParseSyntaxError::MainMissing); } + SyntaxDefinition::add_initial_contexts(&mut contexts, &mut state, top_level_scope); + let defn = SyntaxDefinition { name: try!(get_key(h, "name", |x| x.as_str())).to_owned(), scope: top_level_scope, @@ -135,12 +143,6 @@ impl SyntaxDefinition { let is_prototype = name == "prototype"; let context_ptr = try!(SyntaxDefinition::parse_context(val_vec, state, is_prototype)); - if name == "main" { - let mut context = context_ptr.borrow_mut(); - if context.meta_content_scope.is_empty() { - context.meta_content_scope.push(state.top_level_scope) - } - } contexts.insert(name.to_owned(), context_ptr); } } @@ -276,11 +278,11 @@ impl SyntaxDefinition { .unwrap_or_else(|| Ok(vec![]))); let captures = if let Ok(map) = get_key(map, "captures", |x| x.as_hash()) { - let mut res_map = HashMap::new(); + let mut res_map = Vec::new(); for (key, value) in map.iter() { if let (Some(key_int), Some(val_str)) = (key.as_i64(), value.as_str()) { - res_map.insert(key_int as usize, - try!(str_to_scopes(val_str, state.scope_repo))); + res_map.push((key_int as usize, + try!(str_to_scopes(val_str, state.scope_repo)))); } } Some(res_map) @@ -335,6 +337,46 @@ impl SyntaxDefinition { Ok(vec![try!(SyntaxDefinition::parse_reference(y, state))]) } } + + /// Sublime treats the top level context slightly differently from + /// including the main context from other syntaxes. When main is popped + /// it is immediately re-added and when it is `set` over the file level + /// scope remains. This behaviour is emulated through some added contexts + /// that are the actual top level contexts used in parsing. + /// See https://github.com/trishume/syntect/issues/58 for more. + fn add_initial_contexts(contexts: &mut HashMap, + state: &mut ParserState, + top_level_scope: Scope) { + let yaml_docs = YamlLoader::load_from_str(START_CONTEXTS).unwrap(); + let yaml = &yaml_docs[0]; + + let start_yaml : &[Yaml] = yaml["__start"].as_vec().unwrap(); + let start = SyntaxDefinition::parse_context(start_yaml, state, false).unwrap(); + { + let mut start_b = start.borrow_mut(); + start_b.meta_content_scope = vec![top_level_scope]; + } + contexts.insert("__start".to_owned(), start); + + let main_yaml : &[Yaml] = yaml["__main"].as_vec().unwrap(); + let main = SyntaxDefinition::parse_context(main_yaml, state, false).unwrap(); + { + let real_main = contexts["main"].borrow(); + let mut main_b = main.borrow_mut(); + main_b.meta_include_prototype = real_main.meta_include_prototype; + main_b.meta_scope = real_main.meta_scope.clone(); + main_b.meta_content_scope = real_main.meta_content_scope.clone(); + } + contexts.insert("__main".to_owned(), main); + + // add the top_level_scope as a meta_content_scope to main so + // pushes from other syntaxes add the file scope + // TODO: this order is not quite correct if main also has a meta_scope + { + let mut real_main = contexts["main"].borrow_mut(); + real_main.meta_content_scope.insert(0,top_level_scope); + } + } } #[cfg(test)] @@ -388,7 +430,8 @@ mod tests { false) .unwrap(); assert_eq!(defn2.name, "C"); - assert_eq!(defn2.scope, Scope::new("source.c").unwrap()); + let top_level_scope = Scope::new("source.c").unwrap(); + assert_eq!(defn2.scope, top_level_scope); let exts: Vec = vec![String::from("c"), String::from("h")]; assert_eq!(defn2.file_extensions, exts); assert_eq!(defn2.hidden, true); @@ -397,15 +440,20 @@ mod tests { let n: Vec = Vec::new(); println!("{:?}", defn2); // assert!(false); + assert_eq!(defn2.contexts["main"].borrow().meta_content_scope, vec![top_level_scope]); assert_eq!(defn2.contexts["main"].borrow().meta_scope, n); assert_eq!(defn2.contexts["main"].borrow().meta_include_prototype, true); + + assert_eq!(defn2.contexts["__main"].borrow().meta_content_scope, n); + assert_eq!(defn2.contexts["__start"].borrow().meta_content_scope, vec![top_level_scope]); + assert_eq!(defn2.contexts["string"].borrow().meta_scope, vec![Scope::new("string.quoted.double.c").unwrap()]); let first_pattern: &Pattern = &defn2.contexts["main"].borrow().patterns[0]; match first_pattern { &Pattern::Match(ref match_pat) => { let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed"); - assert_eq!(&m[&1], &vec![Scope::new("meta.preprocessor.c++").unwrap()]); + assert_eq!(&m[0], &(1,vec![Scope::new("meta.preprocessor.c++").unwrap()])); use parsing::syntax_definition::ContextReference::*; // this is sadly necessary because Context is not Eq because of the Regex diff --git a/testdata/Testing.sublime-syntax b/testdata/Testing.sublime-syntax index 030036bf..14f62c2f 100644 --- a/testdata/Testing.sublime-syntax +++ b/testdata/Testing.sublime-syntax @@ -12,6 +12,8 @@ contexts: - include: zoom main: - meta_include_prototype: false + - match: =testset + push: testset - match: hi scope: comment - match: troll @@ -52,3 +54,14 @@ contexts: - meta_scope: bamf - match: bamf scope: keyword + testset: + - meta_scope: constant.testset.meta + - meta_content_scope: string.testset.content + - match: =doset + set: setto + setto: + - clear_scopes: 1 + - meta_scope: constant.setto.meta + - meta_content_scope: comment.setto.content + - match: =endset + pop: true diff --git a/testdata/minimized_tests/syntax_test_aspmini.asp b/testdata/minimized_tests/syntax_test_aspmini.asp new file mode 100644 index 00000000..94bf4929 --- /dev/null +++ b/testdata/minimized_tests/syntax_test_aspmini.asp @@ -0,0 +1,9 @@ +' SYNTAX TEST "Packages/ASP/HTML-ASP.sublime-syntax" +<% + Class TestClass2 Public Sub TestSub () Response.Write("wow") End Sub End Class +'^^^^^ meta.class.asp meta.class.identifier.asp storage.type.asp +' ^ meta.class.asp meta.class.body.asp meta.class.asp meta.class.identifier.asp +' ^ meta.class.asp meta.class.body.asp +%> +

foobar

+'^^^ text.html.asp meta.tag.block.any.html - source.asp.embedded.html diff --git a/testdata/minimized_tests/syntax_test_scalamini.scala b/testdata/minimized_tests/syntax_test_scalamini.scala new file mode 100644 index 00000000..35197d95 --- /dev/null +++ b/testdata/minimized_tests/syntax_test_scalamini.scala @@ -0,0 +1,4 @@ +// SYNTAX TEST "Packages/Scala/Scala.sublime-syntax" + + class Foo[A](a: Bar) +// ^^^^^^^^^ meta.class.identifier.scala