trishume · robinst · Aug 1, 2018 · Jun 1, 2018 · Jun 16, 2018 · Jun 22, 2018
diff --git a/Cargo.toml b/Cargo.toml
@@ -25,7 +25,7 @@ plist = "0.3"
 bincode = { version = "1.0", optional = true }
 flate2 = { version = "1.0", optional = true, default-features = false }
 fnv = { version = "1.0", optional = true }
-serde = { version = "1.0", features = ["rc"] }
+serde = "1.0"
 serde_derive = "1.0"
 serde_json = "1.0"
 

diff --git a/Readme.md b/Readme.md
@@ -122,9 +122,13 @@ Any time the file is changed the latest cached state is found, the cache is clea
 
 ### Parallelizing
 
-`syntect` doesn't provide any built-in facilities to enable highlighting in parallel. Some of the important data structures are not thread-safe, either, most notably `SyntaxSet`. However, if you find yourself in need of highlighting lots of files in parallel, the recommendation is to use some sort of thread pooling, along with the `thread_local!` macro from `libstd`, so that each thread that needs, say, a `SyntaxSet`, will have one, while minimizing the amount of them that need to be initialized. For adding parallelism to a previously single-threaded program, the recommended thread pooling is [`rayon`](https://github.com/nikomatsakis/rayon). However, if you're working in an already-threaded context where there might be more threads than you want (such as writing a handler for an Iron request), the recommendation is to force all highlighting to be done within a fixed-size thread pool using [`rust-scoped-pool`](https://github.com/reem/rust-scoped-pool). An example of the former is in `examples/parsyncat.rs`.
+Since 3.0, `syntect` can be used to do parsing/highlighting in parallel. `SyntaxSet` is both `Send` and `Sync` and so can easily be used from multiple threads. It is also `Clone`, which means you can construct a syntax set and then clone it to use for other threads if you prefer.
 
-See [#20](https://github.com/trishume/syntect/issues/20) and [#78](https://github.com/trishume/syntect/pull/78) for more detail and discussion about why `syntect` doesn't provide parallelism by default.
+Compared to older versions, there's nothing preventing the serialization of a `SyntaxSet` either. So you can directly deserialize a fully linked `SyntaxSet` and start using it for parsing/highlighting. Before, it was always necessary to do linking first.
+
+It is worth mentioning that regex compilation is done lazily only when the regexes are actually needed. Once a regex has been compiled, the compiled version is used for all threads after that. Note that this is done using interior mutability, so if multiple threads happen to encounter the same uncompiled regex at the same time, compiling might happen multiple times. After that, one of the compiled regexes will be used. When a `SyntaxSet` is cloned, the regexes in the cloned set will need to be recompiled currently.
+
+For adding parallelism to a previously single-threaded program, the recommended thread pooling is [`rayon`](https://github.com/nikomatsakis/rayon). However, if you're working in an already-threaded context where there might be more threads than you want (such as writing a handler for an Iron request), the recommendation is to force all highlighting to be done within a fixed-size thread pool using [`rust-scoped-pool`](https://github.com/reem/rust-scoped-pool). An example of the former is in `examples/parsyncat.rs`.
 
 ## Examples Available
 

diff --git a/assets/default_newlines.packdump b/assets/default_newlines.packdump
diff --git a/assets/default_nonewlines.packdump b/assets/default_nonewlines.packdump
diff --git a/benches/highlighting.rs b/benches/highlighting.rs
@@ -4,18 +4,18 @@ extern crate syntect;
 
 use criterion::{Bencher, Criterion};
 
-use syntect::parsing::{SyntaxSet, SyntaxDefinition, ScopeStack};
+use syntect::parsing::{SyntaxSet, SyntaxReference, ScopeStack};
 use syntect::highlighting::{ThemeSet, Theme};
 use syntect::easy::HighlightLines;
 use std::str::FromStr;
 use std::fs::File;
 use std::io::Read;
 
-fn do_highlight(s: &str, syntax: &SyntaxDefinition, theme: &Theme) -> usize {
+fn do_highlight(s: &str, syntax_set: &SyntaxSet, syntax: &SyntaxReference, theme: &Theme) -> usize {
     let mut h = HighlightLines::new(syntax, theme);
     let mut count = 0;
     for line in s.lines() {
-        let regions = h.highlight(line);
+        let regions = h.highlight(line, syntax_set);
         count += regions.len();
     }
     count
@@ -33,16 +33,16 @@ fn highlight_file(b: &mut Bencher, file: &str) {
     };
 
     // don't load from dump so we don't count lazy regex compilation time
-    let ps = SyntaxSet::load_defaults_nonewlines();
+    let ss = SyntaxSet::load_defaults_nonewlines();
     let ts = ThemeSet::load_defaults();
 
-    let syntax = ps.find_syntax_for_file(path).unwrap().unwrap();
+    let syntax = ss.find_syntax_for_file(path).unwrap().unwrap();
     let mut f = File::open(path).unwrap();
     let mut s = String::new();
     f.read_to_string(&mut s).unwrap();
 
     b.iter(|| {
-        do_highlight(&s, syntax, &ts.themes["base16-ocean.dark"])
+        do_highlight(&s, &ss, syntax, &ts.themes["base16-ocean.dark"])
     });
 }
 

diff --git a/benches/loading.rs b/benches/loading.rs
@@ -3,7 +3,7 @@ extern crate criterion;
 extern crate syntect;
 
 use criterion::{Bencher, Criterion};
-use syntect::parsing::SyntaxSet;
+use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
 use syntect::highlighting::ThemeSet;
 
 
@@ -27,16 +27,16 @@ fn bench_load_theme(b: &mut Bencher) {
 
 fn bench_load_syntaxes(b: &mut Bencher) {
     b.iter(|| {
-        let mut ps = SyntaxSet::new();
-        ps.load_syntaxes("testdata/Packages", false).unwrap()
+        let mut builder = SyntaxSetBuilder::new();
+        builder.load_syntaxes("testdata/Packages", false).unwrap()
     });
 }
 
 fn bench_link_syntaxes(b: &mut Bencher) {
-    let mut ps = SyntaxSet::new();
-    ps.load_syntaxes("testdata/Packages", false).unwrap();
+    let mut builder = SyntaxSetBuilder::new();
+    builder.load_syntaxes("testdata/Packages", false).unwrap();
     b.iter(|| {
-        ps.link_syntaxes();
+        builder.clone().build();
     });
 }
 

diff --git a/benches/parsing.rs b/benches/parsing.rs
@@ -5,13 +5,13 @@ extern crate syntect;
 use criterion::{Bencher, Criterion};
 use std::fs::File;
 use std::io::Read;
-use syntect::parsing::{ParseState, SyntaxDefinition, SyntaxSet};
+use syntect::parsing::{ParseState, SyntaxReference, SyntaxSet};
 
-fn do_parse(s: &str, syntax: &SyntaxDefinition) -> usize {
+fn do_parse(s: &str, ss: &SyntaxSet, syntax: &SyntaxReference) -> usize {
     let mut state = ParseState::new(syntax);
     let mut count = 0;
     for line in s.lines() {
-        let ops = state.parse_line(line);
+        let ops = state.parse_line(line, ss);
         count += ops.len();
     }
     count
@@ -29,14 +29,14 @@ fn parse_file(b: &mut Bencher, file: &str) {
     };
 
     // don't load from dump so we don't count lazy regex compilation time
-    let ps = SyntaxSet::load_defaults_nonewlines();
+    let ss = SyntaxSet::load_defaults_nonewlines();
 
-    let syntax = ps.find_syntax_for_file(path).unwrap().unwrap();
+    let syntax = ss.find_syntax_for_file(path).unwrap().unwrap();
     let mut f = File::open(path).unwrap();
     let mut s = String::new();
     f.read_to_string(&mut s).unwrap();
 
-    b.iter(|| do_parse(&s, syntax));
+    b.iter(|| do_parse(&s, &ss, syntax));
 }
 
 fn parsing_benchmark(c: &mut Criterion) {

diff --git a/examples/gendata.rs b/examples/gendata.rs
@@ -3,7 +3,7 @@
 //! Although it is a valid example for serializing syntaxes, you probably won't need
 //! to do this yourself unless you want to cache your own compiled grammars.
 extern crate syntect;
-use syntect::parsing::SyntaxSet;
+use syntect::parsing::SyntaxSetBuilder;
 use syntect::highlighting::ThemeSet;
 use syntect::dumps::*;
 use std::env;
@@ -22,16 +22,17 @@ fn main() {
          Some(ref package_dir),
          Some(ref packpath_newlines),
          Some(ref packpath_nonewlines)) if cmd == "synpack" => {
-            let mut ps = SyntaxSet::new();
-            ps.load_plain_text_syntax();
-            ps.load_syntaxes(package_dir, true).unwrap();
-            dump_to_file(&ps, packpath_newlines).unwrap();
-
-            ps = SyntaxSet::new();
-            ps.load_plain_text_syntax();
-            ps.load_syntaxes(package_dir, false).unwrap();
-            dump_to_file(&ps, packpath_nonewlines).unwrap();
+            let mut builder = SyntaxSetBuilder::new();
+            builder.load_plain_text_syntax();
+            builder.load_syntaxes(package_dir, true).unwrap();
+            let ss = builder.build();
+            dump_to_file(&ss, packpath_newlines).unwrap();
 
+            let mut builder_nonewlines = SyntaxSetBuilder::new();
+            builder_nonewlines.load_plain_text_syntax();
+            builder_nonewlines.load_syntaxes(package_dir, false).unwrap();
+            let ss_nonewlines = builder_nonewlines.build();
+            dump_to_file(&ss_nonewlines, packpath_nonewlines).unwrap();
         }
         (Some(ref s), Some(ref theme_dir), Some(ref packpath), None) if s == "themepack" => {
             let ts = ThemeSet::load_from_folder(theme_dir).unwrap();

diff --git a/examples/parsyncat.rs b/examples/parsyncat.rs
@@ -1,7 +1,6 @@
 //! Highlights the files given on the command line, in parallel.
 //! Prints the highlighted output to stdout.
 
-#[macro_use] extern crate lazy_static;
 extern crate rayon;
 extern crate syntect;
 
@@ -13,13 +12,6 @@ use rayon::prelude::*;
 use std::fs::File;
 use std::io::{BufReader, BufRead};
 
-thread_local! {
-    static SYNTAX_SET: SyntaxSet = SyntaxSet::load_defaults_newlines();
-}
-
-lazy_static! {
-    static ref THEME_SET: ThemeSet = ThemeSet::load_defaults();
-}
 
 fn main() {
     let files: Vec<String> = std::env::args().skip(1).collect();
@@ -29,6 +21,9 @@ fn main() {
         return;
     }
 
+    let syntax_set = SyntaxSet::load_defaults_newlines();
+    let theme_set = ThemeSet::load_defaults();
+
     // We first collect the contents of the files...
     let contents: Vec<Vec<String>> = files.par_iter()
         .map(|filename| {
@@ -49,19 +44,17 @@ fn main() {
     let regions: Vec<Vec<(Style, &str)>> = files.par_iter()
         .zip(&contents)
         .map(|(filename, contents)| {
-            SYNTAX_SET.with(|ss| {
-                let mut regions = Vec::new();
-                let theme = &THEME_SET.themes["base16-ocean.dark"];
-                let mut highlighter = HighlightFile::new(filename, ss, theme).unwrap();
+            let mut regions = Vec::new();
+            let theme = &theme_set.themes["base16-ocean.dark"];
+            let mut highlighter = HighlightFile::new(filename, &syntax_set, theme).unwrap();
 
-                for line in contents {
-                    for region in highlighter.highlight_lines.highlight(line) {
-                        regions.push(region);
-                    }
+            for line in contents {
+                for region in highlighter.highlight_lines.highlight(line, &syntax_set) {
+                    regions.push(region);
                 }
+            }
 
-                regions
-            })
+            regions
         })
         .collect();
 

diff --git a/examples/syncat.rs b/examples/syncat.rs
@@ -57,8 +57,10 @@ fn main() {
     };
 
     if let Some(folder) = matches.opt_str("extra-syntaxes") {
-        ss.load_syntaxes(folder, !no_newlines).unwrap();
-        ss.link_syntaxes();
+        // TODO: no way to go back to builder anymore :/
+        let mut builder = ss.into_builder();
+        builder.load_syntaxes(folder, !no_newlines).unwrap();
+        ss = builder.build();
     }
 
     let ts = ThemeSet::load_defaults();
@@ -107,7 +109,7 @@ fn main() {
                 }
 
                 {
-                    let regions: Vec<(Style, &str)> = highlighter.highlight_lines.highlight(&line);
+                    let regions: Vec<(Style, &str)> = highlighter.highlight_lines.highlight(&line, &ss);
                     print!("{}", as_24_bit_terminal_escaped(&regions[..], true));
                 }
                 line.clear();

diff --git a/examples/synstats.rs b/examples/synstats.rs
@@ -135,7 +135,7 @@ fn count(ss: &SyntaxSet, path: &Path, stats: &mut Stats) {
     let mut stack = ScopeStack::new();
     while reader.read_line(&mut line).unwrap() > 0 {
         {
-            let ops = state.parse_line(&line);
+            let ops = state.parse_line(&line, &ss);
             stats.chars += line.len();
             count_line(&ops, &line, &mut stack, stats);
         }

diff --git a/examples/syntest.rs b/examples/syntest.rs
@@ -14,8 +14,7 @@ extern crate lazy_static;
 extern crate regex;
 extern crate getopts;
 
-//extern crate onig;
-use syntect::parsing::{SyntaxSet, ParseState, ScopeStack, Scope};
+use syntect::parsing::{SyntaxSet, SyntaxSetBuilder, ParseState, ScopeStack, Scope};
 use syntect::highlighting::ScopeSelectors;
 use syntect::easy::{ScopeRegionIterator};
 
@@ -215,7 +214,7 @@ fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool, out_opts: Outp
             if out_opts.debug && !line_only_has_assertion {
                 println!("-- debugging line {} -- scope stack: {:?}", current_line_number, stack);
             }
-            let ops = state.parse_line(&line);
+            let ops = state.parse_line(&line, &ss);
             if out_opts.debug && !line_only_has_assertion {
                 if ops.is_empty() && !line.is_empty() {
                     println!("no operations for this line...");
@@ -305,8 +304,9 @@ fn main() {
     };
     if !syntaxes_path.is_empty() {
         println!("loading syntax definitions from {}", syntaxes_path);
-        ss.load_syntaxes(&syntaxes_path, true).unwrap(); // note that we load the version with newlines
-        ss.link_syntaxes();
+        let mut builder = SyntaxSetBuilder::new();
+        builder.load_syntaxes(&syntaxes_path, true).unwrap(); // note that we load the version with newlines
+        ss = builder.build();
     }
 
     let out_opts = OutputOptions {

diff --git a/src/dumps.rs b/src/dumps.rs
@@ -80,7 +80,7 @@ pub fn from_dump_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T>
 #[cfg(all(feature = "parsing", feature = "assets", any(feature = "dump-load", feature = "dump-load-rs")))]
 impl SyntaxSet {
     /// Instantiates a new syntax set from a binary dump of
-    /// Sublime Text's default open source syntax definitions and then links it.
+    /// Sublime Text's default open source syntax definitions.
     /// These dumps are included in this library's binary for convenience.
     ///
     /// This method loads the version for parsing line strings with no `\n` characters at the end.
@@ -90,24 +90,21 @@ impl SyntaxSet {
     /// This is the recommended way of creating a syntax set for
     /// non-advanced use cases. It is also significantly faster than loading the YAML files.
     ///
-    /// Note that you can load additional syntaxes after doing this,
-    /// you'll just have to link again. If you want you can even
-    /// use the fact that SyntaxDefinitions are serializable with
+    /// Note that you can load additional syntaxes after doing this. If you want
+    /// you can even use the fact that SyntaxDefinitions are serializable with
     /// the bincode crate to cache dumps of additional syntaxes yourself.
     pub fn load_defaults_nonewlines() -> SyntaxSet {
-        let mut ps: SyntaxSet = from_binary(include_bytes!("../assets/default_nonewlines.\
+        let ss: SyntaxSet = from_binary(include_bytes!("../assets/default_nonewlines.\
                                                              packdump"));
-        ps.link_syntaxes();
-        ps
+        ss
     }
 
     /// Same as `load_defaults_nonewlines` but for parsing line strings with newlines at the end.
     /// These are separate methods because thanks to linker garbage collection, only the serialized
     /// dumps for the method(s) you call will be included in the binary (each is ~200kb for now).
     pub fn load_defaults_newlines() -> SyntaxSet {
-        let mut ps: SyntaxSet = from_binary(include_bytes!("../assets/default_newlines.packdump"));
-        ps.link_syntaxes();
-        ps
+        let ss: SyntaxSet = from_binary(include_bytes!("../assets/default_newlines.packdump"));
+        ss
     }
 }
 
@@ -130,29 +127,35 @@ mod tests {
     #[test]
     fn can_dump_and_load() {
         use super::*;
-        use parsing::SyntaxSet;
-        let mut ps = SyntaxSet::new();
-        ps.load_syntaxes("testdata/Packages", false).unwrap();
+        use parsing::SyntaxSetBuilder;
+        let mut builder = SyntaxSetBuilder::new();
+        builder.load_syntaxes("testdata/Packages", false).unwrap();
+        let ss = builder.build();
 
-        let bin = dump_binary(&ps);
+        let bin = dump_binary(&ss);
         println!("{:?}", bin.len());
-        let ps2: SyntaxSet = from_binary(&bin[..]);
-        assert_eq!(ps.syntaxes().len(), ps2.syntaxes().len());
+        let ss2: SyntaxSet = from_binary(&bin[..]);
+        assert_eq!(ss.syntaxes().len(), ss2.syntaxes().len());
     }
 
     #[cfg(all(feature = "yaml-load", any(feature = "dump-create", feature = "dump-create-rs"), any(feature = "dump-load", feature = "dump-load-rs")))]
     #[test]
     fn dump_is_deterministic() {
         use super::*;
-        use parsing::SyntaxSet;
+        use parsing::SyntaxSetBuilder;
 
-        let mut ps1 = SyntaxSet::new();
-        ps1.load_syntaxes("testdata/Packages", false).unwrap();
-        let bin1 = dump_binary(&ps1);
+        let mut builder1 = SyntaxSetBuilder::new();
+        builder1.load_syntaxes("testdata/Packages", false).unwrap();
+        let ss1 = builder1.build();
+        let bin1 = dump_binary(&ss1);
 
-        let mut ps2 = SyntaxSet::new();
-        ps2.load_syntaxes("testdata/Packages", false).unwrap();
-        let bin2 = dump_binary(&ps2);
+        let mut builder2 = SyntaxSetBuilder::new();
+        builder2.load_syntaxes("testdata/Packages", false).unwrap();
+        let ss2 = builder2.build();
+        let bin2 = dump_binary(&ss2);
+        // This is redundant, but assert_eq! can be really slow on a large
+        // vector, so check the length first to fail faster.
+        assert_eq!(bin1.len(), bin2.len());
         assert_eq!(bin1, bin2);
     }