diff --git a/runtime/lua/vim/net.lua b/runtime/lua/vim/net.lua index cd36e3027a5329..888f29b39354c1 100644 --- a/runtime/lua/vim/net.lua +++ b/runtime/lua/vim/net.lua @@ -25,7 +25,7 @@ function M.request(url, opts, on_response) local retry = opts.retry or 3 -- Build curl command - local args = { 'curl' } + local args = { 'curl', '--max-time', '5' } if opts.verbose then table.insert(args, '--verbose') else diff --git a/scripts/check_urls.lua b/scripts/check_urls.lua new file mode 100755 index 00000000000000..05a3468fa90d86 --- /dev/null +++ b/scripts/check_urls.lua @@ -0,0 +1,92 @@ +#!/usr/bin/env -S nvim -l + +-- Finds unreachable URLs in help files. +-- +-- Usage: +-- $ ./scripts/check_urls.lua [DIR...] +-- +-- [DIR...] defaults to all 'doc' directories in the runtimepath. + +local ts = vim.treesitter + +local query = ts.query.parse('vimdoc', '(url) @url') + +---Read and return full content of given file path. +---@param path string +---@return string +local function read_file(path) + local fd = assert(vim.uv.fs_open(path, 'r', tonumber('644', 8))) + local stat = assert(vim.uv.fs_fstat(fd)) + local data = assert(vim.uv.fs_read(fd, stat.size, 0)) + assert(vim.uv.fs_close(fd)) + return data +end + +---Extract URLs from a vimdoc file using the vimdoc TS parser. +---@param helpfile string Path to help file +---@return string[] # list of URLs found in the document +local function extract_urls(helpfile) + ---@type string[] + local urls = {} + local source = read_file(helpfile) + local tree = ts.get_string_parser(source, 'vimdoc'):parse()[1] + + for id, node in query:iter_captures(tree:root(), source) do + if query.captures[id] == 'url' then + local url = ts.get_node_text(node, source) + -- tree-sitter-vimdoc parses these as part of the url + if vim.endswith(url, '.') or vim.endswith(url, ',') then + url = url:sub(0, #url - 1) + end + urls[#urls + 1] = url + end + end + + return urls +end + +local function run() + local dirs = vim.list_slice(_G.arg, 1) + if #dirs < 1 then + dirs = vim.api.nvim_get_runtime_file('doc', true) + end + + ---@type string[] + local help_files = {} + for _, dir in ipairs(dirs) do + vim.list_extend( + help_files, + vim.fs.find(function(name, _) + return vim.endswith(name, '.txt') + end, { path = dir, type = 'file', limit = math.huge }) + ) + end + + ---@type table + local all_urls = {} + local requests = 0 + for _, file in ipairs(help_files) do + local urls = extract_urls(file) + requests = requests + #urls + all_urls[file] = urls + end + + for file, file_urls in pairs(all_urls) do + for _, url in ipairs(file_urls) do + vim.net.request(url, { retry = 3 }, function(err, _) + if err then + vim.print(('Unreachable url %s in %s'):format(url, file)) + end + requests = requests - 1 + if requests <= 0 then + vim.uv.stop() + end + end) + end + end + + -- wait for all pending async requests to finish (by calling vim.uv.stop()) + vim.uv.run() +end + +run() diff --git a/scripts/check_urls.vim b/scripts/check_urls.vim deleted file mode 100644 index b75dc29c48278e..00000000000000 --- a/scripts/check_urls.vim +++ /dev/null @@ -1,89 +0,0 @@ -" Test for URLs in help documents. -" -" Opens a new window with all found URLS followed by return code from curl -" (anything other than 0 means unreachable) -" -" Written by Christian Brabandt. - -func Test_check_URLs() -"20.10.23, added by Restorer - if has("win32") - let s:outdev = 'nul' - else - let s:outdev = '/dev/null' - endif -" Restorer: For Windows users. If "curl" or "wget" is installed on the system -" but not in %PATH%, add the full path to them to %PATH% environment variable. - if executable('curl') - " Note: does not follow redirects! - let s:command1 = 'curl --silent --max-time 5 --fail --output ' ..s:outdev.. ' --head ' - let s:command2 = "" - elseif executable('wget') - " Note: only allow a couple of redirects - let s:command1 = 'wget --quiet -S --spider --max-redirect=2 --timeout=5 --tries=2 -O ' ..s:outdev.. ' ' - let s:command2 = "" - elseif has("win32") "20.10.23, added by Restorer - if executable('powershell') - if 2 == system('powershell -nologo -noprofile "$psversiontable.psversion.major"') - echoerr 'To work in OS Windows requires the program "PowerShell" version 3.0 or higher' - return - endif - let s:command1 = - \ "powershell -nologo -noprofile \"{[Net.ServicePointManager]::SecurityProtocol = 'Tls12, Tls11, Tls, Ssl3'};try{(Invoke-WebRequest -MaximumRedirection 2 -TimeoutSec 5 -Uri " - let s:command2 = ').StatusCode}catch{exit [int]$Error[0].Exception.Status}"' - endif - else - echoerr 'Only works when "curl" or "wget", or "powershell" is available' - return - endif - - " Do the testing. - set report =999 - set nomore shm +=s - - let pat='\(https\?\|ftp\)://[^\t* ]\+' - exe 'helpgrep' pat - helpclose - - let urls = map(getqflist(), 'v:val.text') - " do not use submatch(1)! - let urls = map(urls, {key, val -> matchstr(val, pat)}) - " remove examples like user@host (invalid urls) - let urls = filter(urls, 'v:val !~ "@"') - " Remove example URLs which are invalid - let urls = filter(urls, {key, val -> val !~ '\<\(\(my\|some\)\?host\|machine\|hostname\|file\)\>'}) - new - put =urls - " remove some more invalid items - " empty lines - "20.10.23, Restorer: '_' is a little faster, see `:h global` - v/./d _ - " remove # anchors - %s/#.*$//e - " remove trailing stuff (parenthesis, dot, comma, quotes), but only for HTTP - " links - g/^h/s#[.),'"`/>][:.,]\?$## - g#^[hf]t\?tp:/\(/\?\.*\)$#d _ - silent! g/ftp://,$/d _ - silent! g/=$/d _ - let a = getline(1,'$') - let a = uniq(sort(a)) - %d _ - call setline(1, a) - - %s/.*/\=TestURL(submatch(0))/ - - " highlight the failures - /.* \([0-9]*[1-9]\|[0-9]\{2,}\)$ -endfunc - -func TestURL(url) - " Relies on the return code to determine whether a page is valid - echom printf("Testing URL: %d/%d %s", line('.'), line('$'), a:url) - call system(s:command1 .. shellescape(a:url) .. s:command2) - return printf("%s %d", a:url, v:shell_error) -endfunc - -call Test_check_URLs() - -" vim: sw=2 sts=2 et