Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 579cf4a

Browse files
authored
Merge pull request #19424 from github/tausbn/python-extract-hidden-file-by-default
Python: Extract files in hidden dirs by default
2 parents cadcb20 + 9ee3e4c commit 579cf4a

File tree

15 files changed

+69
-37
lines changed

15 files changed

+69
-37
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name: Test Config
2+
paths-ignore:
3+
- "**/.*/**"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
| name |
2+
+-------------------------------+
3+
| .hidden_file.py |
4+
| another_non_hidden.py |
5+
| foo.py |
6+
| visible_file_in_hidden_dir.py |
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
| name |
2+
+-----------------+
3+
| .hidden_file.py |
4+
| foo.py |
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import python
2+
3+
select any(File f).getShortName() as name order by name

python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/internal_non_hidden/another_non_hidden.py

Whitespace-only changes.

python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/visible_file_in_hidden_dir.py

Whitespace-only changes.

python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_file.py

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
print(42)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
4+
5+
set -x
6+
7+
CODEQL=${CODEQL:-codeql}
8+
9+
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
10+
cd "$SCRIPTDIR"
11+
12+
rm -rf db db-skipped
13+
14+
# Test 1: Default behavior should be to extract files in hidden directories
15+
$CODEQL database create db --language python --source-root repo_dir/
16+
$CODEQL query run --database db query.ql > query-default.actual
17+
diff query-default.expected query-default.actual
18+
19+
# Test 2: The default behavior can be overridden by setting `paths-ignore` in the config file
20+
$CODEQL database create db-skipped --language python --source-root repo_dir/ --codescanning-config=config.yml
21+
$CODEQL query run --database db-skipped query.ql > query-skipped.actual
22+
diff query-skipped.expected query-skipped.actual
23+
24+
rm -rf db db-skipped

python/extractor/semmle/path_filters.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,27 @@ def glob_part_to_regex(glob, add_sep):
4141

4242
def glob_to_regex(glob, prefix=""):
4343
'''Convert entire glob to a compiled regex'''
44+
# When the glob ends in `/`, we need to remember this so that we don't accidentally add an
45+
# extra separator to the final regex.
46+
end_sep = "" if glob.endswith("/") else SEP
4447
glob = glob.strip().strip("/")
4548
parts = glob.split("/")
4649
#Trailing '**' is redundant, so strip it off.
4750
if parts[-1] == "**":
4851
parts = parts[:-1]
4952
if not parts:
5053
return ".*"
54+
# The `glob.strip("/")` call above will have removed all trailing slashes, but if there was at
55+
# least one trailing slash, we want there to be an extra part, so we add it explicitly here in
56+
# that case, using the emptyness of `end_sep` as a proxy.
57+
if end_sep == "":
58+
parts += [""]
5159
parts = [ glob_part_to_regex(escape(p), True) for p in parts[:-1] ] + [ glob_part_to_regex(escape(parts[-1]), False) ]
5260
# we need to escape the prefix, specifically because on windows the prefix will be
5361
# something like `C:\\folder\\subfolder\\` and without escaping the
5462
# backslash-path-separators will get interpreted as regex escapes (which might be
5563
# invalid sequences, causing the extractor to crash)
56-
full_pattern = escape(prefix) + ''.join(parts) + "(?:" + SEP + ".*|$)"
64+
full_pattern = escape(prefix) + ''.join(parts) + "(?:" + end_sep + ".*|$)"
5765
return re.compile(full_pattern)
5866

5967
def filter_from_pattern(pattern, prev_filter, prefix):

python/extractor/semmle/traverser.py

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -83,46 +83,21 @@ def _treewalk(self, path):
8383
self.logger.debug("Ignoring %s (symlink)", fullpath)
8484
continue
8585
if isdir(fullpath):
86-
if fullpath in self.exclude_paths or is_hidden(fullpath):
87-
if is_hidden(fullpath):
88-
self.logger.debug("Ignoring %s (hidden)", fullpath)
89-
else:
90-
self.logger.debug("Ignoring %s (excluded)", fullpath)
91-
else:
92-
empty = True
93-
for item in self._treewalk(fullpath):
94-
yield item
95-
empty = False
96-
if not empty:
97-
yield fullpath
86+
if fullpath in self.exclude_paths:
87+
self.logger.debug("Ignoring %s (excluded)", fullpath)
88+
continue
89+
90+
empty = True
91+
for item in self._treewalk(fullpath):
92+
yield item
93+
empty = False
94+
if not empty:
95+
yield fullpath
9896
elif self.filter(fullpath):
9997
yield fullpath
10098
else:
10199
self.logger.debug("Ignoring %s (filter)", fullpath)
102100

103-
104-
if os.name== 'nt':
105-
import ctypes
106-
107-
def is_hidden(path):
108-
#Magical windows code
109-
try:
110-
attrs = ctypes.windll.kernel32.GetFileAttributesW(str(path))
111-
if attrs == -1:
112-
return False
113-
if attrs&2:
114-
return True
115-
except Exception:
116-
#Not sure what to log here, probably best to carry on.
117-
pass
118-
return os.path.basename(path).startswith(".")
119-
120-
else:
121-
122-
def is_hidden(path):
123-
return os.path.basename(path).startswith(".")
124-
125-
126101
def exclude_filter_from_options(options):
127102
if options.exclude_package:
128103
choices = '|'.join(mod.replace('.', r'\.') for mod in options.exclude_package)

python/extractor/semmle/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
#Semantic version of extractor.
1212
#Update this if any changes are made
13-
VERSION = "7.1.2"
13+
VERSION = "7.1.3"
1414

1515
PY_EXTENSIONS = ".py", ".pyw"
1616

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
5+
- The Python extractor now extracts files in hidden directories by default. If you would like to skip files in hidden directories, add `paths-ignore: ["**/.*/**"]` to your [Code Scanning config](https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#specifying-directories-to-scan). If you would like to skip all hidden files, you can use `paths-ignore: ["**/.*"]`. When using the CodeQL CLI for extraction, specify the configuration (creating the configuration file if necessary) using the `--codescanning-config` option.

python/ql/test/2/extractor-tests/hidden/test.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
| .hidden/inner/test.py |
2+
| .hidden/module.py |
13
| folder/module.py |
24
| package |
35
| package/__init__.py |

python/ql/test/extractor-tests/filter-option/Test.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
| Module foo.bar |
44
| Module foo.include_test |
55
| Package foo |
6+
| Script hidden_foo.py |

0 commit comments

Comments
 (0)