Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9420773

Browse files
committed
cut: Add cut.py and test script
1 parent 393752c commit 9420773

File tree

3 files changed

+296
-1
lines changed

3 files changed

+296
-1
lines changed

tests/cut.sh

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env sh
2+
3+
set -eux
4+
5+
## cut by bytes with range
6+
7+
result="$(echo 'abcdefghi' | python -m userland cut -b -3,5-6,8-)"
8+
9+
test "${result}" = 'abcefhi'
10+
11+
## cut by bytes, zero-terminated
12+
13+
result="$(printf 'foo\0bar' | python -m userland cut -b 3 -z)"
14+
15+
test "${result}" = 'or'
16+
17+
## cut by field
18+
19+
result="$(echo 'foo:bar' | python -m userland cut -f 2 -d ':')"
20+
21+
test "${result}" = 'bar'
22+
23+
## cut by field complement
24+
25+
result="$(echo 'foo:bar' | python -m userland cut -f 2 -d ':' --complement)"
26+
27+
test "${result}" = 'foo'
28+
29+
## cut by field, only delimited
30+
31+
result="$(printf 'foo\tbar\naaa\n' | python -m userland cut -f 2 -s)"
32+
33+
test "${result}" = 'bar'
34+
35+
## cut by field, with output delimiter
36+
37+
result="$(echo 'foo:bar' | python -m userland cut -f 1,2 -d ':' \
38+
--output-delimiter='d')"
39+
40+
test "${result}" = 'foodbar'
41+
42+
## cut by field, with newline as delimiter
43+
44+
result="$(printf 'foo\nbar' | python -m userland cut -f 2 -d '
45+
')"
46+
47+
test "${result}" = 'bar'
48+
49+
## cut by field, with newline as delimiter, only delimited
50+
51+
result="$(printf 'foo\0bar\nx' | python -m userland cut -f 2 -d '
52+
' -s -z)"
53+
54+
test "${result}" = 'x'
55+
56+
exit 0

userland/core/io.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import sys
3-
from typing import Any, Generator
3+
from typing import Any, Generator, IO
44

55

66
def perror(*errors: Any) -> None:
@@ -18,3 +18,21 @@ def readwords_stdin() -> Generator[str]:
1818
def readwords_stdin_raw() -> Generator[bytes]:
1919
for line in sys.stdin.buffer:
2020
yield from line.split()
21+
22+
23+
def get_lines_by_delimiter[T: (
24+
str,
25+
bytes,
26+
)](stream: IO[T], delimiter: T) -> Generator[T]:
27+
joiner = type(delimiter)()
28+
line = []
29+
30+
while char := stream.read(1):
31+
if char == delimiter:
32+
yield joiner.join(line)
33+
line.clear()
34+
else:
35+
line.append(char)
36+
37+
if line:
38+
yield joiner.join(line)

userland/utilities/cut.py

+221
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
import sys
2+
from typing import BinaryIO, Callable, Iterable, cast
3+
4+
from .. import core
5+
6+
7+
type Ranges = list[int | tuple[int, int | None]]
8+
type RangeChecker = Callable[[int], bool]
9+
10+
type Cutter = Callable[[bytes], bytes | None]
11+
12+
13+
def get_check_range(ranges: Ranges, complement: bool) -> RangeChecker:
14+
def check_range(pos: int) -> bool:
15+
for r in ranges:
16+
match r:
17+
case [min_pos, None]:
18+
if pos >= min_pos:
19+
return True
20+
case [min_pos, max_pos]:
21+
if min_pos <= pos <= max_pos:
22+
return True
23+
case wanted_pos:
24+
if pos == wanted_pos:
25+
return True
26+
27+
return False
28+
29+
return (lambda pos: not check_range(pos)) if complement else check_range
30+
31+
32+
def get_cut_by_bytes(check_range: RangeChecker, line_terminator: bytes) -> Cutter:
33+
def cut_by_bytes(data: bytes) -> bytes:
34+
return b"".join(
35+
[n.to_bytes() for i, n in enumerate(data) if check_range(i + 1)]
36+
+ [line_terminator]
37+
)
38+
39+
return cut_by_bytes
40+
41+
42+
def get_cut_by_fields(
43+
check_range: RangeChecker,
44+
input_delimiter: bytes,
45+
output_delimiter: bytes,
46+
only_delimited: bool,
47+
) -> Cutter:
48+
def cut_by_fields(data: bytes) -> bytes | None:
49+
fields = data.split(input_delimiter)
50+
51+
if len(fields) < 2:
52+
return None if only_delimited else data
53+
54+
return output_delimiter.join(
55+
[field for i, field in enumerate(fields) if check_range(i + 1)]
56+
)
57+
58+
return cut_by_fields
59+
60+
61+
def cut_and_print_stream(stream: Iterable[bytes], cutter: Cutter) -> None:
62+
for line in stream:
63+
if (processed := cutter(line)) is not None:
64+
sys.stdout.buffer.write(processed)
65+
sys.stdout.buffer.flush()
66+
67+
68+
parser = core.ExtendedOptionParser(usage="%prog OPTION... [FILE]...", description="wow")
69+
70+
parser.add_option("-b", "--bytes", metavar="LIST", help="select bytes in LIST")
71+
parser.add_option("-c", "--characters", metavar="LIST", help="identical to -b")
72+
parser.add_option("-f", "--fields", metavar="LIST", help="select fields in LIST")
73+
74+
parser.add_option("--complement", action="store_true", help="invert selection")
75+
76+
parser.add_option(
77+
"-s",
78+
"--only-delimited",
79+
action="store_true",
80+
help="ignore lines not containing the delimiter",
81+
)
82+
parser.add_option(
83+
"-d",
84+
"--delimiter",
85+
metavar="STRING",
86+
help="use STRING instead of TAB as field delimiter",
87+
)
88+
parser.add_option(
89+
"--output-delimiter",
90+
metavar="STRING",
91+
help="use STRING instead of input delimiter as output delimiter",
92+
)
93+
94+
parser.add_option(
95+
"-z",
96+
"--zero-terminated",
97+
action="store_true",
98+
help="line delimiter is NUL instead of newline",
99+
)
100+
101+
parser.add_option(
102+
"-n", action="store_true", help="(ignored; present for POSIX compatibility)"
103+
)
104+
105+
106+
def parse_range(range_specs: str) -> Ranges:
107+
ranges: Ranges = []
108+
109+
for range_spec in range_specs.split(","):
110+
parts = range_spec.split("-")
111+
112+
try:
113+
match parts:
114+
case [n]:
115+
ranges.append(int(n))
116+
case [n, ""]:
117+
ranges.append((int(n), None))
118+
case ["", m]:
119+
ranges.append((0, int(m)))
120+
case [n, m]:
121+
ranges.append((int(n), int(m)))
122+
case _:
123+
raise ValueError
124+
except ValueError:
125+
parser.error(f"invalid range specification: {range_specs}")
126+
127+
return ranges
128+
129+
130+
@core.command(parser)
131+
def python_userland_cut(opts, args: list[str]) -> int:
132+
cutter: Cutter
133+
134+
match (opts.bytes, opts.characters, opts.fields):
135+
case (None, None, None):
136+
parser.error("expected one of --bytes, --characters or --fields")
137+
case (byte_range_spec, None, None) | (None, byte_range_spec, None):
138+
if opts.delimiter:
139+
parser.error("--delimiter is only allowed with --fields")
140+
141+
if opts.only_delimited:
142+
parser.error("--only-delimited is only allowed with --fields")
143+
144+
cutter = get_cut_by_bytes(
145+
check_range=get_check_range(
146+
parse_range(cast(str, byte_range_spec)), opts.complement
147+
),
148+
line_terminator=b"\0" if opts.zero_terminated else b"\n",
149+
)
150+
case (None, None, field_range_spec):
151+
opts.delimiter = opts.delimiter or "\t"
152+
153+
if len(opts.delimiter) > 1:
154+
parser.error("the delimiter must be a single character")
155+
156+
cutter = get_cut_by_fields(
157+
check_range=get_check_range(
158+
parse_range(field_range_spec), opts.complement
159+
),
160+
input_delimiter=(input_delimiter := opts.delimiter.encode()),
161+
output_delimiter=(
162+
opts.output_delimiter.encode()
163+
if opts.output_delimiter is not None
164+
else input_delimiter
165+
),
166+
only_delimited=opts.only_delimited,
167+
)
168+
case _:
169+
parser.error("only one list may be specified")
170+
171+
append_newline = False
172+
173+
# This is a hack to handle "\n" as a field delimiter.
174+
def process_line_stream(stream: BinaryIO) -> Iterable[bytes]:
175+
nonlocal append_newline
176+
177+
if not (opts.fields and opts.delimiter == "\n"):
178+
return stream
179+
180+
data = stream.read()
181+
if data and data[-1] == ord(b"\n"):
182+
# Don't treat the last newline as a delimiter.
183+
data = data[:-1]
184+
append_newline = True
185+
186+
return (data for _ in (None,))
187+
188+
failed = False
189+
190+
for name in args or ["-"]:
191+
append_newline = False
192+
193+
if name == "-":
194+
cut_and_print_stream(
195+
(
196+
core.get_lines_by_delimiter(sys.stdin.buffer, b"\0")
197+
if opts.zero_terminated
198+
else process_line_stream(sys.stdin.buffer)
199+
),
200+
cutter,
201+
)
202+
else:
203+
try:
204+
with open(name, "rb") as f:
205+
cut_and_print_stream(
206+
(
207+
core.get_lines_by_delimiter(f, b"\0")
208+
if opts.zero_terminated
209+
else process_line_stream(f)
210+
),
211+
cutter,
212+
)
213+
except OSError as e:
214+
failed = True
215+
core.perror(e)
216+
continue
217+
218+
if append_newline:
219+
print()
220+
221+
return int(failed)

0 commit comments

Comments
 (0)