Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 36b44b1

Browse files
committed
add zip-byte-offset support to zip utils
Signed-off-by: Alex Goodman <[email protected]>
1 parent 8f85c8a commit 36b44b1

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed

internal/file/zip_read_closer.go

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
package file
2+
3+
import (
4+
"archive/zip"
5+
"encoding/binary"
6+
"fmt"
7+
"io"
8+
"os"
9+
)
10+
11+
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
12+
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
13+
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
14+
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
15+
16+
const directoryEndLen = 22
17+
18+
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
19+
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
20+
type ZipReadCloser struct {
21+
*zip.Reader
22+
io.Closer
23+
}
24+
25+
// OpenZip provides a ZipReadCloser for the given filepath.
26+
func OpenZip(filepath string) (*ZipReadCloser, error) {
27+
f, err := os.Open(filepath)
28+
if err != nil {
29+
return nil, err
30+
}
31+
fi, err := f.Stat()
32+
if err != nil {
33+
f.Close()
34+
return nil, err
35+
}
36+
37+
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
38+
// need to find the start of the archive and keep track of this offset.
39+
offset, err := findArchiveStartOffset(f, fi.Size())
40+
if err != nil {
41+
return nil, fmt.Errorf("cannot find beginning of zip archive=%q : %w", filepath, err)
42+
}
43+
44+
if _, err := f.Seek(0, io.SeekStart); err != nil {
45+
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
46+
}
47+
48+
size := fi.Size() - int64(offset)
49+
50+
r, err := zip.NewReader(io.NewSectionReader(f, int64(offset), size), size)
51+
if err != nil {
52+
return nil, fmt.Errorf("unable to open ZipReadCloser @ %q: %w", filepath, err)
53+
}
54+
55+
return &ZipReadCloser{
56+
Reader: r,
57+
Closer: f,
58+
}, nil
59+
}
60+
61+
type readBuf []byte
62+
63+
func (b *readBuf) uint16() uint16 {
64+
v := binary.LittleEndian.Uint16(*b)
65+
*b = (*b)[2:]
66+
return v
67+
}
68+
69+
func (b *readBuf) uint32() uint32 {
70+
v := binary.LittleEndian.Uint32(*b)
71+
*b = (*b)[4:]
72+
return v
73+
}
74+
75+
type directoryEnd struct {
76+
diskNbr uint32 // unused
77+
dirDiskNbr uint32 // unused
78+
dirRecordsThisDisk uint64 // unused
79+
directoryRecords uint64
80+
directorySize uint64
81+
directoryOffset uint64 // relative to file
82+
}
83+
84+
// note: this is derived from readDirectoryEnd within the archive/zip package
85+
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
86+
// look for directoryEndSignature in the last 1k, then in the last 65k
87+
var buf []byte
88+
var directoryEndOffset int64
89+
for i, bLen := range []int64{1024, 65 * 1024} {
90+
if bLen > size {
91+
bLen = size
92+
}
93+
buf = make([]byte, int(bLen))
94+
if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
95+
return 0, err
96+
}
97+
if p := findSignatureInBlock(buf); p >= 0 {
98+
buf = buf[p:]
99+
directoryEndOffset = size - bLen + int64(p)
100+
break
101+
}
102+
if i == 1 || bLen == size {
103+
return 0, zip.ErrFormat
104+
}
105+
}
106+
107+
if buf == nil {
108+
// we were unable to find the directoryEndSignature block
109+
return 0, zip.ErrFormat
110+
}
111+
112+
// read header into struct
113+
b := readBuf(buf[4:]) // skip signature
114+
d := &directoryEnd{
115+
diskNbr: uint32(b.uint16()),
116+
dirDiskNbr: uint32(b.uint16()),
117+
dirRecordsThisDisk: uint64(b.uint16()),
118+
directoryRecords: uint64(b.uint16()),
119+
directorySize: uint64(b.uint32()),
120+
directoryOffset: uint64(b.uint32()),
121+
}
122+
// Calculate where the zip data actually begins
123+
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
124+
125+
// These values mean that the file can be a zip64 file
126+
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
127+
startOfArchive = 0 // Prefixed data not supported
128+
}
129+
130+
// Make sure directoryOffset points to somewhere in our file.
131+
if o := int64(d.directoryOffset); o < 0 || o >= size {
132+
return 0, zip.ErrFormat
133+
}
134+
return startOfArchive, nil
135+
}
136+
137+
func findSignatureInBlock(b []byte) int {
138+
for i := len(b) - directoryEndLen; i >= 0; i-- {
139+
// defined from directoryEndSignature
140+
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
141+
// n is length of comment
142+
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
143+
if n+directoryEndLen+i <= len(b) {
144+
return i
145+
}
146+
}
147+
}
148+
return -1
149+
}

0 commit comments

Comments
 (0)