diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a576ed9..af8cec4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,16 +18,18 @@ jobs: strategy: fail-fast: false matrix: - go-version: [1.19.x] + go-version: [1.20.x] platform: [ubuntu-latest] runs-on: ${{ matrix.platform }} steps: - name: Checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 - name: Setup Go - uses: actions/setup-go@c4a742cab115ed795e34d4513e2cf7d472deb55f # v3.3.1 + uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 with: go-version: ${{ matrix.go-version }} + - name: Run Verify + run: make verify - name: Run Cross Build run: make cross-build \ No newline at end of file diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 241656e..6b72734 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -30,11 +30,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@678fc3afe258fb2e0cdc165ccf77b85719de7b3c # v2.1.33 + uses: github/codeql-action/init@17573ee1cc1b9d061760f3a006fc4aac4f944fd5 # v2.2.4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -48,7 +48,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@678fc3afe258fb2e0cdc165ccf77b85719de7b3c # v2.1.33 + uses: github/codeql-action/autobuild@17573ee1cc1b9d061760f3a006fc4aac4f944fd5 # v2.2.4 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -61,6 +61,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@678fc3afe258fb2e0cdc165ccf77b85719de7b3c # v2.1.33 + uses: github/codeql-action/analyze@17573ee1cc1b9d061760f3a006fc4aac4f944fd5 # v2.2.4 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 1689b5d..c61b8d0 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -14,11 +14,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 - name: Setup Go - uses: actions/setup-go@c4a742cab115ed795e34d4513e2cf7d472deb55f # v3.3.1 + uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 with: - go-version: 1.19.x + go-version: 1.20.x - name: Run fuzzers env: FUZZ_TIME: 30m diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 39a6c9f..eab7647 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,15 +21,15 @@ jobs: strategy: fail-fast: false matrix: - go-version: [1.15.x, 1.16.x, 1.17.x, 1.18.x, 1.19.x] + go-version: [1.16.x, 1.17.x, 1.18.x, 1.19.x, 1.20.x] platform: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: - name: Checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 - name: Setup Go - uses: actions/setup-go@c4a742cab115ed795e34d4513e2cf7d472deb55f # v3.3.1 + uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 with: go-version: ${{ matrix.go-version }} - name: Run Tests @@ -39,11 +39,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0 + uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 - name: Setup Go - uses: actions/setup-go@c4a742cab115ed795e34d4513e2cf7d472deb55f # v3.3.1 + uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 with: - go-version: 1.19.x + go-version: 1.20.x - name: Run benchmarks run: make bench - name: Run fuzzers diff --git a/Dockerfile.arm b/Dockerfile.arm index babfdfe..9976129 100644 --- a/Dockerfile.arm +++ b/Dockerfile.arm @@ -1,4 +1,4 @@ -FROM golang:1.19@sha256:dc76ef03e54c34a00dcdca81e55c242d24b34d231637776c4bb5c1a8e8514253 +FROM golang:1.20@sha256:2edf6aab2d57644f3fe7407132a0d1770846867465a39c2083770cf62734b05d ENV GOOS=linux ENV GOARCH=arm diff --git a/Dockerfile.arm64 b/Dockerfile.arm64 index 1a0dc59..66bd094 100644 --- a/Dockerfile.arm64 +++ b/Dockerfile.arm64 @@ -1,4 +1,4 @@ -FROM golang:1.19@sha256:dc76ef03e54c34a00dcdca81e55c242d24b34d231637776c4bb5c1a8e8514253 +FROM golang:1.20@sha256:2edf6aab2d57644f3fe7407132a0d1770846867465a39c2083770cf62734b05d ENV GOOS=linux ENV GOARCH=arm64 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile index d1eac44..b24f2cb 100644 --- a/Makefile +++ b/Makefile @@ -30,3 +30,11 @@ build-nocgo: # Run cross-compilation to assure supported architectures. cross-build: build-arm build-arm64 build-nocgo + +generate: + go run sha1cdblock_amd64_asm.go -out sha1cdblock_amd64.s + sed -i 's;&\samd64;&\n// +build !noasm,gc,amd64;g' sha1cdblock_amd64.s + +verify: generate + git diff --exit-code + go vet ./... diff --git a/cgo/fallback_no_cgo.go b/cgo/fallback_no_cgo.go index 6e8bef2..c6b91ee 100644 --- a/cgo/fallback_no_cgo.go +++ b/cgo/fallback_no_cgo.go @@ -12,7 +12,7 @@ import ( // CalculateDvMask falls back to github.com/pjbgf/sha1cd/ubc implementation // due to CGO being disabled at compilation time. -func CalculateDvMask(W []uint32) (uint32, error) { +func CalculateDvMask(W [80]uint32) uint32 { return ubc.CalculateDvMask(W) } diff --git a/cgo/sha1.go b/cgo/sha1.go index b32dada..29f9699 100644 --- a/cgo/sha1.go +++ b/cgo/sha1.go @@ -27,28 +27,28 @@ func New() hash.Hash { type digest struct { ctx C.SHA1_CTX + h [Size]byte } -func (d *digest) sum() ([]byte, bool) { - b := make([]byte, Size) - c := C.SHA1DCFinal((*C.uchar)(unsafe.Pointer(&b[0])), &d.ctx) +func (d *digest) sum() ([Size]byte, bool) { + c := C.SHA1DCFinal((*C.uchar)(unsafe.Pointer(&d.h[0])), &d.ctx) if c != 0 { - return b, true + return d.h, true } - return b, false + return d.h, false } func (d *digest) Sum(in []byte) []byte { d0 := *d // use a copy of d to avoid race conditions. - h, _ := d0.sum() - return append(in, h...) + h, _ := d0.CollisionResistantSum(in) + return h } func (d *digest) CollisionResistantSum(in []byte) ([]byte, bool) { d0 := *d // use a copy of d to avoid race conditions. h, c := d0.sum() - return append(in, h...), c + return append(in, h[:]...), c } func (d *digest) Reset() { @@ -63,7 +63,8 @@ func Sum(data []byte) ([]byte, bool) { d := New().(*digest) d.Write(data) - return d.sum() + h, c := d.sum() + return h[:], c } func (d *digest) Write(p []byte) (nn int, err error) { diff --git a/cgo/ubc_check.go b/cgo/ubc_check.go index f8d8c6d..1d10c09 100644 --- a/cgo/ubc_check.go +++ b/cgo/ubc_check.go @@ -11,7 +11,6 @@ package cgo // } import "C" import ( - "fmt" "unsafe" ) @@ -19,10 +18,6 @@ import ( // bitconditions for all listed DVs. It returns a dvmask where each bit belonging to a DV // is set if all unavoidable bitconditions for that DV have been met. // Thus, one needs to do the recompression check for each DV that has its bit set. -func CalculateDvMask(W []uint32) (uint32, error) { - if len(W) < 80 { - return 0, fmt.Errorf("invalid input: len(W) must be 80, was %d", len(W)) - } - - return uint32(C.check((*C.uint32_t)(unsafe.Pointer(&W[0])))), nil +func CalculateDvMask(W [80]uint32) uint32 { + return uint32(C.check((*C.uint32_t)(unsafe.Pointer(&W[0])))) } diff --git a/go.mod b/go.mod index b62571e..a158cab 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,10 @@ module github.com/pjbgf/sha1cd -go 1.15 +go 1.19 + +require ( + github.com/mmcloughlin/avo v0.5.0 // indirect + golang.org/x/mod v0.6.0 // indirect + golang.org/x/sys v0.1.0 // indirect + golang.org/x/tools v0.2.0 // indirect +) diff --git a/go.sum b/go.sum index e69de29..8befd00 100644 --- a/go.sum +++ b/go.sum @@ -0,0 +1,37 @@ +github.com/mmcloughlin/avo v0.5.0 h1:nAco9/aI9Lg2kiuROBY6BhCI/z0t5jEvJfjWbL8qXLU= +github.com/mmcloughlin/avo v0.5.0/go.mod h1:ChHFdoV7ql95Wi7vuq2YT1bwCJqiWdZrQ1im3VujLYM= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/arch v0.1.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.6.0 h1:b9gGHsz9/HhJ3HF5DHQytPpuwocVTChQJK3AvoLRD5I= +golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE= +golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/internal/const.go b/internal/const.go new file mode 100644 index 0000000..944a131 --- /dev/null +++ b/internal/const.go @@ -0,0 +1,42 @@ +package shared + +const ( + // Constants for the SHA-1 hash function. + K0 = 0x5A827999 + K1 = 0x6ED9EBA1 + K2 = 0x8F1BBCDC + K3 = 0xCA62C1D6 + + // Initial values for the buffer variables: h0, h1, h2, h3, h4. + Init0 = 0x67452301 + Init1 = 0xEFCDAB89 + Init2 = 0x98BADCFE + Init3 = 0x10325476 + Init4 = 0xC3D2E1F0 + + // Initial values for the temporary variables (ihvtmp0, ihvtmp1, ihvtmp2, ihvtmp3, ihvtmp4) during the SHA recompression step. + InitTmp0 = 0xD5 + InitTmp1 = 0x394 + InitTmp2 = 0x8152A8 + InitTmp3 = 0x0 + InitTmp4 = 0xA7ECE0 + + // SHA1 contains 2 buffers, each based off 5 32-bit words. + WordBuffers = 5 + + // The output of SHA1 is 20 bytes (160 bits). + Size = 20 + + // Rounds represents the number of steps required to process each chunk. + Rounds = 80 + + // SHA1 processes the input data in chunks. Each chunk contains 64 bytes. + Chunk = 64 + + // The number of pre-step compression state to store. + // Currently there are 3 pre-step compression states required: 0, 58, 65. + PreStepState = 3 + + Magic = "shacd\x01" + MarshaledSize = len(Magic) + 5*4 + Chunk + 8 +) diff --git a/sha1block.go b/sha1block.go deleted file mode 100644 index 156e1c7..0000000 --- a/sha1block.go +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Originally from: https://github.com/go/blob/master/src/crypto/sha1/sha1block.go - -package sha1cd - -import ( - "math/bits" - - "github.com/pjbgf/sha1cd/ubc" -) - -const ( - msize = 80 - - _K0 = 0x5A827999 - _K1 = 0x6ED9EBA1 - _K2 = 0x8F1BBCDC - _K3 = 0xCA62C1D6 -) - -// TODO: Implement SIMD support. -func block(dig *digest, p []byte) { - blockGeneric(dig, p) -} - -// blockGeneric is a portable, pure Go version of the SHA-1 block step. -// It's used by sha1block_generic.go and tests. -func blockGeneric(dig *digest, p []byte) { - var w [16]uint32 - - h0, h1, h2, h3, h4 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] - for len(p) >= chunk { - m1 := make([]uint32, msize) - bcol := false - - // Can interlace the computation of w with the - // rounds below if needed for speed. - for i := 0; i < 16; i++ { - j := i * 4 - w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) - } - - a, b, c, d, e := h0, h1, h2, h3, h4 - - // Each of the four 20-iteration rounds - // differs only in the computation of f and - // the choice of K (_K0, _K1, etc). - i := 0 - for ; i < 16; i++ { - // Store pre-step compression state for the collision detection. - dig.cs[i] = [5]uint32{a, b, c, d, e} - - f := b&c | (^b)&d - t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + _K0 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - - // Store compression state for the collision detection. - m1[i] = w[i&0xf] - } - for ; i < 20; i++ { - // Store pre-step compression state for the collision detection. - dig.cs[i] = [5]uint32{a, b, c, d, e} - - tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] - w[i&0xf] = tmp<<1 | tmp>>(32-1) - - f := b&c | (^b)&d - t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + _K0 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - - // Store compression state for the collision detection. - m1[i] = w[i&0xf] - } - for ; i < 40; i++ { - // Store pre-step compression state for the collision detection. - dig.cs[i] = [5]uint32{a, b, c, d, e} - - tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] - w[i&0xf] = tmp<<1 | tmp>>(32-1) - - f := b ^ c ^ d - t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + _K1 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - - // Store compression state for the collision detection. - m1[i] = w[i&0xf] - } - for ; i < 60; i++ { - // Store pre-step compression state for the collision detection. - dig.cs[i] = [5]uint32{a, b, c, d, e} - - tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] - w[i&0xf] = tmp<<1 | tmp>>(32-1) - - f := ((b | c) & d) | (b & c) - t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + _K2 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - - // Store compression state for the collision detection. - m1[i] = w[i&0xf] - } - for ; i < 80; i++ { - // Store pre-step compression state for the collision detection. - dig.cs[i] = [5]uint32{a, b, c, d, e} - - tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] - w[i&0xf] = tmp<<1 | tmp>>(32-1) - - f := b ^ c ^ d - t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + _K3 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - - // Store compression state for the collision detection. - m1[i] = w[i&0xf] - } - - h0 += a - h1 += b - h2 += c - h3 += d - h4 += e - - if mask, err := ubc.CalculateDvMask(m1); err == nil && mask != 0 { - dvs := ubc.SHA1_dvs() - for i := 0; dvs[i].DvType != 0; i++ { - if (mask & ((uint32)(1) << uint32(dvs[i].MaskB))) != 0 { - for j := 0; j < msize; j++ { - dig.m2[j] = m1[j] ^ dvs[i].Dm[j] - } - - recompressionStep(dvs[i].TestT, &dig.ihv2, &dig.ihvtmp, dig.m2, dig.cs[dvs[i].TestT]) - - if 0 == ((dig.ihvtmp[0] ^ h0) | (dig.ihvtmp[1] ^ h1) | - (dig.ihvtmp[2] ^ h2) | (dig.ihvtmp[3] ^ h3) | (dig.ihvtmp[4] ^ h4)) { - dig.col = true - bcol = true - } - } - } - } - - // Collision attacks are thwarted by hashing a detected near-collision block 3 times. - // Think of it as extending SHA-1 from 80-steps to 240-steps for such blocks: - // The best collision attacks against SHA-1 have complexity about 2^60, - // thus for 240-steps an immediate lower-bound for the best cryptanalytic attacks would be 2^180. - // An attacker would be better off using a generic birthday search of complexity 2^80. - if bcol { - for j := 0; j < 2; j++ { - a, b, c, d, e := h0, h1, h2, h3, h4 - - i := 0 - for ; i < 20; i++ { - f := b&c | (^b)&d - t := bits.RotateLeft32(a, 5) + f + e + m1[i] + _K0 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - for ; i < 40; i++ { - f := b ^ c ^ d - t := bits.RotateLeft32(a, 5) + f + e + m1[i] + _K1 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - for ; i < 60; i++ { - f := ((b | c) & d) | (b & c) - t := bits.RotateLeft32(a, 5) + f + e + m1[i] + _K2 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - for ; i < 80; i++ { - f := b ^ c ^ d - t := bits.RotateLeft32(a, 5) + f + e + m1[i] + _K3 - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - - h0 += a - h1 += b - h2 += c - h3 += d - h4 += e - } - } - - p = p[chunk:] - } - - dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] = h0, h1, h2, h3, h4 -} - -func recompressionStep(step int, ihvin, ihvout *[5]uint32, m2 [msize]uint32, state [5]uint32) { - a, b, c, d, e := state[0], state[1], state[2], state[3], state[4] - - // Walk backwards from current step to undo previous compression. - for i := 79; i >= 60; i-- { - a, b, c, d, e = b, c, d, e, a - if step > i { - b = bits.RotateLeft32(b, -30) - f := b ^ c ^ d - e -= bits.RotateLeft32(a, 5) + f + _K3 + m2[i] - } - } - for i := 59; i >= 40; i-- { - a, b, c, d, e = b, c, d, e, a - if step > i { - b = bits.RotateLeft32(b, -30) - f := ((b | c) & d) | (b & c) - e -= bits.RotateLeft32(a, 5) + f + _K2 + m2[i] - } - } - for i := 39; i >= 20; i-- { - a, b, c, d, e = b, c, d, e, a - if step > i { - b = bits.RotateLeft32(b, -30) - f := b ^ c ^ d - e -= bits.RotateLeft32(a, 5) + f + _K1 + m2[i] - } - } - for i := 19; i >= 0; i-- { - a, b, c, d, e = b, c, d, e, a - if step > i { - b = bits.RotateLeft32(b, -30) - f := b&c | (^b)&d - e -= bits.RotateLeft32(a, 5) + f + _K0 + m2[i] - } - } - - ihvin[0] = a - ihvin[1] = b - ihvin[2] = c - ihvin[3] = d - ihvin[4] = e - a = state[0] - b = state[1] - c = state[2] - d = state[3] - e = state[4] - - // Recompress blocks based on the current step. - for i := 0; i < 20; i++ { - if step <= i { - f := b&c | (^b)&d - t := bits.RotateLeft32(a, 5) + f + e + _K0 + m2[i] - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - } - for i := 20; i < 40; i++ { - if step <= i { - f := b ^ c ^ d - t := bits.RotateLeft32(a, 5) + f + e + _K1 + m2[i] - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - } - for i := 40; i < 60; i++ { - if step <= i { - f := ((b | c) & d) | (b & c) - t := bits.RotateLeft32(a, 5) + f + e + _K2 + m2[i] - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - } - for i := 60; i < 80; i++ { - if step <= i { - f := b ^ c ^ d - t := bits.RotateLeft32(a, 5) + f + e + _K3 + m2[i] - a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d - } - } - - ihvout[0] = ihvin[0] + a - ihvout[1] = ihvin[1] + b - ihvout[2] = ihvin[2] + c - ihvout[3] = ihvin[3] + d - ihvout[4] = ihvin[4] + e -} diff --git a/sha1cd.go b/sha1cd.go index f897e21..a69e480 100644 --- a/sha1cd.go +++ b/sha1cd.go @@ -16,6 +16,8 @@ import ( "encoding/binary" "errors" "hash" + + shared "github.com/pjbgf/sha1cd/internal" ) func init() { @@ -23,47 +25,26 @@ func init() { } // The size of a SHA-1 checksum in bytes. -const Size = 20 +const Size = shared.Size // The blocksize of SHA-1 in bytes. -const BlockSize = 64 - -const ( - chunk = 64 - init0 = 0x67452301 - init1 = 0xEFCDAB89 - init2 = 0x98BADCFE - init3 = 0x10325476 - init4 = 0xC3D2E1F0 -) +const BlockSize = shared.Chunk // digest represents the partial evaluation of a checksum. type digest struct { - h [5]uint32 - x [chunk]byte + h [shared.WordBuffers]uint32 + x [shared.Chunk]byte nx int len uint64 // col defines whether a collision has been found. - col bool - // cs stores the compression state for each of the SHA1's 80 steps. - cs map[int][5]uint32 - // m2 is a secondary message created XORing with ubc's DM prior to the SHA recompression step. - m2 [msize]uint32 - // ihv2 is an Intermediary Hash Value created during the SHA recompression step. - ihv2 [5]uint32 - // ihvtmp is an Intermediary Hash Value created during the SHA recompression step. - ihvtmp [5]uint32 + col bool + blockFunc func(dig *digest, p []byte) } -const ( - magic = "shacd\x01" - marshaledSize = len(magic) + 5*4 + chunk + 8 -) - func (d *digest) MarshalBinary() ([]byte, error) { - b := make([]byte, 0, marshaledSize) - b = append(b, magic...) + b := make([]byte, 0, shared.MarshaledSize) + b = append(b, shared.Magic...) b = appendUint32(b, d.h[0]) b = appendUint32(b, d.h[1]) b = appendUint32(b, d.h[2]) @@ -98,13 +79,13 @@ func appendUint64(b []byte, v uint64) []byte { } func (d *digest) UnmarshalBinary(b []byte) error { - if len(b) < len(magic) || string(b[:len(magic)]) != magic { + if len(b) < len(shared.Magic) || string(b[:len(shared.Magic)]) != shared.Magic { return errors.New("crypto/sha1: invalid hash state identifier") } - if len(b) != marshaledSize { + if len(b) != shared.MarshaledSize { return errors.New("crypto/sha1: invalid hash state size") } - b = b[len(magic):] + b = b[len(shared.Magic):] b, d.h[0] = consumeUint32(b) b, d.h[1] = consumeUint32(b) b, d.h[2] = consumeUint32(b) @@ -112,13 +93,13 @@ func (d *digest) UnmarshalBinary(b []byte) error { b, d.h[4] = consumeUint32(b) b = b[copy(d.x[:], b):] b, d.len = consumeUint64(b) - d.nx = int(d.len % chunk) + d.nx = int(d.len % shared.Chunk) return nil } func consumeUint64(b []byte) ([]byte, uint64) { _ = b[7] - x := uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | + x := uint64(b[7]) | uint64(b[6])<<8 | uint64(b[shared.WordBuffers])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56 return b[8:], x } @@ -130,34 +111,15 @@ func consumeUint32(b []byte) ([]byte, uint32) { } func (d *digest) Reset() { - d.h[0] = init0 - d.h[1] = init1 - d.h[2] = init2 - d.h[3] = init3 - d.h[4] = init4 + d.h[0] = shared.Init0 + d.h[1] = shared.Init1 + d.h[2] = shared.Init2 + d.h[3] = shared.Init3 + d.h[4] = shared.Init4 d.nx = 0 d.len = 0 d.col = false - d.ihv2[0] = 0x0 - d.ihv2[1] = 0x0 - d.ihv2[2] = 0x0 - d.ihv2[3] = 0x0 - d.ihv2[4] = 0x0 - - d.ihvtmp[0] = 0xD5 - d.ihvtmp[1] = 0x394 - d.ihvtmp[2] = 0x8152A8 - d.ihvtmp[3] = 0x0 - d.ihvtmp[4] = 0xA7ECE0 - - for i := range d.m2 { - d.m2[i] = 0x0 - } - - for k := range d.cs { - delete(d.cs, k) - } } // New returns a new hash.Hash computing the SHA1 checksum. The Hash also @@ -166,9 +128,17 @@ func (d *digest) Reset() { func New() hash.Hash { d := new(digest) - d.cs = map[int][5]uint32{} - d.m2 = [msize]uint32{} + d.blockFunc = block + d.Reset() + return d +} +// NewGeneric is equivalent to New but uses the Go generic implementation, +// avoiding any processor-specific optimizations. +func NewGeneric() hash.Hash { + d := new(digest) + + d.blockFunc = blockGeneric d.Reset() return d } @@ -187,15 +157,15 @@ func (d *digest) Write(p []byte) (nn int, err error) { if d.nx > 0 { n := copy(d.x[d.nx:], p) d.nx += n - if d.nx == chunk { - block(d, d.x[:]) + if d.nx == shared.Chunk { + d.blockFunc(d, d.x[:]) d.nx = 0 } p = p[n:] } - if len(p) >= chunk { - n := len(p) &^ (chunk - 1) - block(d, p[:n]) + if len(p) >= shared.Chunk { + n := len(p) &^ (shared.Chunk - 1) + d.blockFunc(d, p[:n]) p = p[n:] } if len(p) > 0 { @@ -242,77 +212,6 @@ func (d *digest) checkSum() [Size]byte { return digest } -// ConstantTimeSum computes the same result of Sum() but in constant time -func (d *digest) ConstantTimeSum(in []byte) ([]byte, error) { - d0 := *d - hash, err := d0.constSum() - if err != nil { - return nil, err - } - return append(in, hash[:]...), nil -} - -func (d *digest) constSum() ([Size]byte, error) { - var length [8]byte - l := d.len << 3 - for i := uint(0); i < 8; i++ { - length[i] = byte(l >> (56 - 8*i)) - } - - nx := byte(d.nx) - t := nx - 56 // if nx < 56 then the MSB of t is one - mask1b := byte(int8(t) >> 7) // mask1b is 0xFF iff one block is enough - - separator := byte(0x80) // gets reset to 0x00 once used - for i := byte(0); i < chunk; i++ { - mask := byte(int8(i-nx) >> 7) // 0x00 after the end of data - - // if we reached the end of the data, replace with 0x80 or 0x00 - d.x[i] = (^mask & separator) | (mask & d.x[i]) - - // zero the separator once used - separator &= mask - - if i >= 56 { - // we might have to write the length here if all fit in one block - d.x[i] |= mask1b & length[i-56] - } - } - - // compress, and only keep the digest if all fit in one block - block(d, d.x[:]) - - var digest [Size]byte - for i, s := range d.h { - digest[i*4] = mask1b & byte(s>>24) - digest[i*4+1] = mask1b & byte(s>>16) - digest[i*4+2] = mask1b & byte(s>>8) - digest[i*4+3] = mask1b & byte(s) - } - - for i := byte(0); i < chunk; i++ { - // second block, it's always past the end of data, might start with 0x80 - if i < 56 { - d.x[i] = separator - separator = 0 - } else { - d.x[i] = length[i-56] - } - } - - // compress, and only keep the digest if we actually needed the second block - block(d, d.x[:]) - - for i, s := range d.h { - digest[i*4] |= ^mask1b & byte(s>>24) - digest[i*4+1] |= ^mask1b & byte(s>>16) - digest[i*4+2] |= ^mask1b & byte(s>>8) - digest[i*4+3] |= ^mask1b & byte(s) - } - - return digest, nil -} - // Sum returns the SHA-1 checksum of the data. func Sum(data []byte) ([Size]byte, bool) { d := New().(*digest) diff --git a/sha1cd_test.go b/sha1cd_test.go index 4aea8c4..b6efd47 100644 --- a/sha1cd_test.go +++ b/sha1cd_test.go @@ -193,17 +193,25 @@ func TestLargeHashes(t *testing.T) { } func TestAllocations(t *testing.T) { + t.Run("generic", func(t *testing.T) { + testAllocations(NewGeneric(), t) + }) + + t.Run("native", func(t *testing.T) { + testAllocations(New(), t) + }) +} + +func testAllocations(h hash.Hash, t *testing.T) { in := []byte("hello, world!") out := make([]byte, 0, Size) - h := New() n := int(testing.AllocsPerRun(10, func() { h.Reset() h.Write(in) out = h.Sum(out[:0]) })) - //TODO: Optimise resetting state to enforce 0 allocs. - if n > 4 { - t.Errorf("allocs = %d, want < 5", n) + if n > 2 { + t.Errorf("allocs = %d, want < 3", n) } } diff --git a/sha1cdblock_amd64.go b/sha1cdblock_amd64.go new file mode 100644 index 0000000..95e0830 --- /dev/null +++ b/sha1cdblock_amd64.go @@ -0,0 +1,50 @@ +//go:build !noasm && gc && amd64 +// +build !noasm,gc,amd64 + +package sha1cd + +import ( + "math" + "unsafe" + + shared "github.com/pjbgf/sha1cd/internal" +) + +type sliceHeader struct { + base uintptr + len int + cap int +} + +// blockAMD64 hashes the message p into the current state in dig. +// Both m1 and cs are used to store intermediate results which are used by the collision detection logic. +// +//go:noescape +func blockAMD64(dig *digest, p sliceHeader, m1 []uint32, cs [][5]uint32) + +func block(dig *digest, p []byte) { + m1 := [shared.Rounds]uint32{} + cs := [shared.PreStepState][shared.WordBuffers]uint32{} + + for len(p) >= shared.Chunk { + // Only send a block to be processed, as the collission detection + // works on a block by block basis. + ips := sliceHeader{ + base: uintptr(unsafe.Pointer(&p[0])), + len: int(math.Min(float64(len(p)), float64(shared.Chunk))), + cap: shared.Chunk, + } + + blockAMD64(dig, ips, m1[:], cs[:]) + + col := checkCollision(m1, cs, dig.h) + if col { + dig.col = true + + blockAMD64(dig, ips, m1[:], cs[:]) + blockAMD64(dig, ips, m1[:], cs[:]) + } + + p = p[shared.Chunk:] + } +} diff --git a/sha1cdblock_amd64.s b/sha1cdblock_amd64.s new file mode 100644 index 0000000..86f9821 --- /dev/null +++ b/sha1cdblock_amd64.s @@ -0,0 +1,2274 @@ +// Code generated by command: go run sha1cdblock_amd64_asm.go -out sha1cdblock_amd64.s. DO NOT EDIT. + +//go:build !noasm && gc && amd64 +// +build !noasm,gc,amd64 + +#include "textflag.h" + +// func blockAMD64(dig *digest, p []byte, m1 []uint32, cs [][5]uint32) +TEXT ·blockAMD64(SB), NOSPLIT, $64-80 + MOVQ dig+0(FP), R8 + MOVQ p_base+8(FP), DI + MOVQ p_len+16(FP), DX + SHRQ $+6, DX + SHLQ $+6, DX + LEAQ (DI)(DX*1), SI + + // Load h0, h1, h2, h3, h4. + MOVL (R8), AX + MOVL 4(R8), BX + MOVL 8(R8), CX + MOVL 12(R8), DX + MOVL 16(R8), BP + + // len(p) >= chunk + CMPQ DI, SI + JEQ end + +loop: + // Initialize registers a, b, c, d, e. + MOVL AX, R10 + MOVL BX, R11 + MOVL CX, R12 + MOVL DX, R13 + MOVL BP, R14 + + // ROUND1 (steps 0-15) + // Load cs + MOVQ cs_base+56(FP), R8 + MOVL R10, (R8) + MOVL R11, 4(R8) + MOVL R12, 8(R8) + MOVL R13, 12(R8) + MOVL R14, 16(R8) + + // ROUND1(0) + // LOAD + MOVL (DI), R9 + BSWAPL R9 + MOVL R9, (SP) + + // FUNC1 + MOVL R13, R15 + XORL R12, R15 + ANDL R11, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1518500249(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL (SP), R9 + MOVL R9, (R8) + + // ROUND1(1) + // LOAD + MOVL 4(DI), R9 + BSWAPL R9 + MOVL R9, 4(SP) + + // FUNC1 + MOVL R12, R15 + XORL R11, R15 + ANDL R10, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1518500249(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 4(SP), R9 + MOVL R9, 4(R8) + + // ROUND1(2) + // LOAD + MOVL 8(DI), R9 + BSWAPL R9 + MOVL R9, 8(SP) + + // FUNC1 + MOVL R11, R15 + XORL R10, R15 + ANDL R14, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1518500249(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 8(SP), R9 + MOVL R9, 8(R8) + + // ROUND1(3) + // LOAD + MOVL 12(DI), R9 + BSWAPL R9 + MOVL R9, 12(SP) + + // FUNC1 + MOVL R10, R15 + XORL R14, R15 + ANDL R13, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1518500249(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 12(SP), R9 + MOVL R9, 12(R8) + + // ROUND1(4) + // LOAD + MOVL 16(DI), R9 + BSWAPL R9 + MOVL R9, 16(SP) + + // FUNC1 + MOVL R14, R15 + XORL R13, R15 + ANDL R12, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1518500249(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 16(SP), R9 + MOVL R9, 16(R8) + + // ROUND1(5) + // LOAD + MOVL 20(DI), R9 + BSWAPL R9 + MOVL R9, 20(SP) + + // FUNC1 + MOVL R13, R15 + XORL R12, R15 + ANDL R11, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1518500249(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 20(SP), R9 + MOVL R9, 20(R8) + + // ROUND1(6) + // LOAD + MOVL 24(DI), R9 + BSWAPL R9 + MOVL R9, 24(SP) + + // FUNC1 + MOVL R12, R15 + XORL R11, R15 + ANDL R10, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1518500249(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 24(SP), R9 + MOVL R9, 24(R8) + + // ROUND1(7) + // LOAD + MOVL 28(DI), R9 + BSWAPL R9 + MOVL R9, 28(SP) + + // FUNC1 + MOVL R11, R15 + XORL R10, R15 + ANDL R14, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1518500249(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 28(SP), R9 + MOVL R9, 28(R8) + + // ROUND1(8) + // LOAD + MOVL 32(DI), R9 + BSWAPL R9 + MOVL R9, 32(SP) + + // FUNC1 + MOVL R10, R15 + XORL R14, R15 + ANDL R13, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1518500249(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 32(SP), R9 + MOVL R9, 32(R8) + + // ROUND1(9) + // LOAD + MOVL 36(DI), R9 + BSWAPL R9 + MOVL R9, 36(SP) + + // FUNC1 + MOVL R14, R15 + XORL R13, R15 + ANDL R12, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1518500249(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 36(SP), R9 + MOVL R9, 36(R8) + + // ROUND1(10) + // LOAD + MOVL 40(DI), R9 + BSWAPL R9 + MOVL R9, 40(SP) + + // FUNC1 + MOVL R13, R15 + XORL R12, R15 + ANDL R11, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1518500249(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 40(SP), R9 + MOVL R9, 40(R8) + + // ROUND1(11) + // LOAD + MOVL 44(DI), R9 + BSWAPL R9 + MOVL R9, 44(SP) + + // FUNC1 + MOVL R12, R15 + XORL R11, R15 + ANDL R10, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1518500249(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 44(SP), R9 + MOVL R9, 44(R8) + + // ROUND1(12) + // LOAD + MOVL 48(DI), R9 + BSWAPL R9 + MOVL R9, 48(SP) + + // FUNC1 + MOVL R11, R15 + XORL R10, R15 + ANDL R14, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1518500249(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 48(SP), R9 + MOVL R9, 48(R8) + + // ROUND1(13) + // LOAD + MOVL 52(DI), R9 + BSWAPL R9 + MOVL R9, 52(SP) + + // FUNC1 + MOVL R10, R15 + XORL R14, R15 + ANDL R13, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1518500249(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 52(SP), R9 + MOVL R9, 52(R8) + + // ROUND1(14) + // LOAD + MOVL 56(DI), R9 + BSWAPL R9 + MOVL R9, 56(SP) + + // FUNC1 + MOVL R14, R15 + XORL R13, R15 + ANDL R12, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1518500249(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 56(SP), R9 + MOVL R9, 56(R8) + + // ROUND1(15) + // LOAD + MOVL 60(DI), R9 + BSWAPL R9 + MOVL R9, 60(SP) + + // FUNC1 + MOVL R13, R15 + XORL R12, R15 + ANDL R11, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1518500249(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 60(SP), R9 + MOVL R9, 60(R8) + + // ROUND1x (steps 16-19) - same as ROUND1 but with no data load. + // ROUND1x(16) + // SHUFFLE + MOVL (SP), R9 + XORL 52(SP), R9 + XORL 32(SP), R9 + XORL 8(SP), R9 + ROLL $+1, R9 + MOVL R9, (SP) + + // FUNC1 + MOVL R12, R15 + XORL R11, R15 + ANDL R10, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1518500249(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL (SP), R9 + MOVL R9, 64(R8) + + // ROUND1x(17) + // SHUFFLE + MOVL 4(SP), R9 + XORL 56(SP), R9 + XORL 36(SP), R9 + XORL 12(SP), R9 + ROLL $+1, R9 + MOVL R9, 4(SP) + + // FUNC1 + MOVL R11, R15 + XORL R10, R15 + ANDL R14, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1518500249(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 4(SP), R9 + MOVL R9, 68(R8) + + // ROUND1x(18) + // SHUFFLE + MOVL 8(SP), R9 + XORL 60(SP), R9 + XORL 40(SP), R9 + XORL 16(SP), R9 + ROLL $+1, R9 + MOVL R9, 8(SP) + + // FUNC1 + MOVL R10, R15 + XORL R14, R15 + ANDL R13, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1518500249(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 8(SP), R9 + MOVL R9, 72(R8) + + // ROUND1x(19) + // SHUFFLE + MOVL 12(SP), R9 + XORL (SP), R9 + XORL 44(SP), R9 + XORL 20(SP), R9 + ROLL $+1, R9 + MOVL R9, 12(SP) + + // FUNC1 + MOVL R14, R15 + XORL R13, R15 + ANDL R12, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1518500249(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 12(SP), R9 + MOVL R9, 76(R8) + + // ROUND2 (steps 20-39) + // ROUND2(20) + // SHUFFLE + MOVL 16(SP), R9 + XORL 4(SP), R9 + XORL 48(SP), R9 + XORL 24(SP), R9 + ROLL $+1, R9 + MOVL R9, 16(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1859775393(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 16(SP), R9 + MOVL R9, 80(R8) + + // ROUND2(21) + // SHUFFLE + MOVL 20(SP), R9 + XORL 8(SP), R9 + XORL 52(SP), R9 + XORL 28(SP), R9 + ROLL $+1, R9 + MOVL R9, 20(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1859775393(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 20(SP), R9 + MOVL R9, 84(R8) + + // ROUND2(22) + // SHUFFLE + MOVL 24(SP), R9 + XORL 12(SP), R9 + XORL 56(SP), R9 + XORL 32(SP), R9 + ROLL $+1, R9 + MOVL R9, 24(SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1859775393(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 24(SP), R9 + MOVL R9, 88(R8) + + // ROUND2(23) + // SHUFFLE + MOVL 28(SP), R9 + XORL 16(SP), R9 + XORL 60(SP), R9 + XORL 36(SP), R9 + ROLL $+1, R9 + MOVL R9, 28(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1859775393(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 28(SP), R9 + MOVL R9, 92(R8) + + // ROUND2(24) + // SHUFFLE + MOVL 32(SP), R9 + XORL 20(SP), R9 + XORL (SP), R9 + XORL 40(SP), R9 + ROLL $+1, R9 + MOVL R9, 32(SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1859775393(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 32(SP), R9 + MOVL R9, 96(R8) + + // ROUND2(25) + // SHUFFLE + MOVL 36(SP), R9 + XORL 24(SP), R9 + XORL 4(SP), R9 + XORL 44(SP), R9 + ROLL $+1, R9 + MOVL R9, 36(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1859775393(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 36(SP), R9 + MOVL R9, 100(R8) + + // ROUND2(26) + // SHUFFLE + MOVL 40(SP), R9 + XORL 28(SP), R9 + XORL 8(SP), R9 + XORL 48(SP), R9 + ROLL $+1, R9 + MOVL R9, 40(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1859775393(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 40(SP), R9 + MOVL R9, 104(R8) + + // ROUND2(27) + // SHUFFLE + MOVL 44(SP), R9 + XORL 32(SP), R9 + XORL 12(SP), R9 + XORL 52(SP), R9 + ROLL $+1, R9 + MOVL R9, 44(SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1859775393(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 44(SP), R9 + MOVL R9, 108(R8) + + // ROUND2(28) + // SHUFFLE + MOVL 48(SP), R9 + XORL 36(SP), R9 + XORL 16(SP), R9 + XORL 56(SP), R9 + ROLL $+1, R9 + MOVL R9, 48(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1859775393(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 48(SP), R9 + MOVL R9, 112(R8) + + // ROUND2(29) + // SHUFFLE + MOVL 52(SP), R9 + XORL 40(SP), R9 + XORL 20(SP), R9 + XORL 60(SP), R9 + ROLL $+1, R9 + MOVL R9, 52(SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1859775393(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 52(SP), R9 + MOVL R9, 116(R8) + + // ROUND2(30) + // SHUFFLE + MOVL 56(SP), R9 + XORL 44(SP), R9 + XORL 24(SP), R9 + XORL (SP), R9 + ROLL $+1, R9 + MOVL R9, 56(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1859775393(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 56(SP), R9 + MOVL R9, 120(R8) + + // ROUND2(31) + // SHUFFLE + MOVL 60(SP), R9 + XORL 48(SP), R9 + XORL 28(SP), R9 + XORL 4(SP), R9 + ROLL $+1, R9 + MOVL R9, 60(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1859775393(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 60(SP), R9 + MOVL R9, 124(R8) + + // ROUND2(32) + // SHUFFLE + MOVL (SP), R9 + XORL 52(SP), R9 + XORL 32(SP), R9 + XORL 8(SP), R9 + ROLL $+1, R9 + MOVL R9, (SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1859775393(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL (SP), R9 + MOVL R9, 128(R8) + + // ROUND2(33) + // SHUFFLE + MOVL 4(SP), R9 + XORL 56(SP), R9 + XORL 36(SP), R9 + XORL 12(SP), R9 + ROLL $+1, R9 + MOVL R9, 4(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1859775393(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 4(SP), R9 + MOVL R9, 132(R8) + + // ROUND2(34) + // SHUFFLE + MOVL 8(SP), R9 + XORL 60(SP), R9 + XORL 40(SP), R9 + XORL 16(SP), R9 + ROLL $+1, R9 + MOVL R9, 8(SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1859775393(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 8(SP), R9 + MOVL R9, 136(R8) + + // ROUND2(35) + // SHUFFLE + MOVL 12(SP), R9 + XORL (SP), R9 + XORL 44(SP), R9 + XORL 20(SP), R9 + ROLL $+1, R9 + MOVL R9, 12(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 1859775393(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 12(SP), R9 + MOVL R9, 140(R8) + + // ROUND2(36) + // SHUFFLE + MOVL 16(SP), R9 + XORL 4(SP), R9 + XORL 48(SP), R9 + XORL 24(SP), R9 + ROLL $+1, R9 + MOVL R9, 16(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 1859775393(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 16(SP), R9 + MOVL R9, 144(R8) + + // ROUND2(37) + // SHUFFLE + MOVL 20(SP), R9 + XORL 8(SP), R9 + XORL 52(SP), R9 + XORL 28(SP), R9 + ROLL $+1, R9 + MOVL R9, 20(SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 1859775393(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 20(SP), R9 + MOVL R9, 148(R8) + + // ROUND2(38) + // SHUFFLE + MOVL 24(SP), R9 + XORL 12(SP), R9 + XORL 56(SP), R9 + XORL 32(SP), R9 + ROLL $+1, R9 + MOVL R9, 24(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 1859775393(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 24(SP), R9 + MOVL R9, 152(R8) + + // ROUND2(39) + // SHUFFLE + MOVL 28(SP), R9 + XORL 16(SP), R9 + XORL 60(SP), R9 + XORL 36(SP), R9 + ROLL $+1, R9 + MOVL R9, 28(SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 1859775393(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 28(SP), R9 + MOVL R9, 156(R8) + + // ROUND3 (steps 40-59) + // ROUND3(40) + // SHUFFLE + MOVL 32(SP), R9 + XORL 20(SP), R9 + XORL (SP), R9 + XORL 40(SP), R9 + ROLL $+1, R9 + MOVL R9, 32(SP) + + // FUNC3 + MOVL R11, R8 + ORL R12, R8 + ANDL R13, R8 + MOVL R11, R15 + ANDL R12, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 2400959708(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 32(SP), R9 + MOVL R9, 160(R8) + + // ROUND3(41) + // SHUFFLE + MOVL 36(SP), R9 + XORL 24(SP), R9 + XORL 4(SP), R9 + XORL 44(SP), R9 + ROLL $+1, R9 + MOVL R9, 36(SP) + + // FUNC3 + MOVL R10, R8 + ORL R11, R8 + ANDL R12, R8 + MOVL R10, R15 + ANDL R11, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 2400959708(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 36(SP), R9 + MOVL R9, 164(R8) + + // ROUND3(42) + // SHUFFLE + MOVL 40(SP), R9 + XORL 28(SP), R9 + XORL 8(SP), R9 + XORL 48(SP), R9 + ROLL $+1, R9 + MOVL R9, 40(SP) + + // FUNC3 + MOVL R14, R8 + ORL R10, R8 + ANDL R11, R8 + MOVL R14, R15 + ANDL R10, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 2400959708(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 40(SP), R9 + MOVL R9, 168(R8) + + // ROUND3(43) + // SHUFFLE + MOVL 44(SP), R9 + XORL 32(SP), R9 + XORL 12(SP), R9 + XORL 52(SP), R9 + ROLL $+1, R9 + MOVL R9, 44(SP) + + // FUNC3 + MOVL R13, R8 + ORL R14, R8 + ANDL R10, R8 + MOVL R13, R15 + ANDL R14, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 2400959708(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 44(SP), R9 + MOVL R9, 172(R8) + + // ROUND3(44) + // SHUFFLE + MOVL 48(SP), R9 + XORL 36(SP), R9 + XORL 16(SP), R9 + XORL 56(SP), R9 + ROLL $+1, R9 + MOVL R9, 48(SP) + + // FUNC3 + MOVL R12, R8 + ORL R13, R8 + ANDL R14, R8 + MOVL R12, R15 + ANDL R13, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 2400959708(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 48(SP), R9 + MOVL R9, 176(R8) + + // ROUND3(45) + // SHUFFLE + MOVL 52(SP), R9 + XORL 40(SP), R9 + XORL 20(SP), R9 + XORL 60(SP), R9 + ROLL $+1, R9 + MOVL R9, 52(SP) + + // FUNC3 + MOVL R11, R8 + ORL R12, R8 + ANDL R13, R8 + MOVL R11, R15 + ANDL R12, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 2400959708(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 52(SP), R9 + MOVL R9, 180(R8) + + // ROUND3(46) + // SHUFFLE + MOVL 56(SP), R9 + XORL 44(SP), R9 + XORL 24(SP), R9 + XORL (SP), R9 + ROLL $+1, R9 + MOVL R9, 56(SP) + + // FUNC3 + MOVL R10, R8 + ORL R11, R8 + ANDL R12, R8 + MOVL R10, R15 + ANDL R11, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 2400959708(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 56(SP), R9 + MOVL R9, 184(R8) + + // ROUND3(47) + // SHUFFLE + MOVL 60(SP), R9 + XORL 48(SP), R9 + XORL 28(SP), R9 + XORL 4(SP), R9 + ROLL $+1, R9 + MOVL R9, 60(SP) + + // FUNC3 + MOVL R14, R8 + ORL R10, R8 + ANDL R11, R8 + MOVL R14, R15 + ANDL R10, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 2400959708(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 60(SP), R9 + MOVL R9, 188(R8) + + // ROUND3(48) + // SHUFFLE + MOVL (SP), R9 + XORL 52(SP), R9 + XORL 32(SP), R9 + XORL 8(SP), R9 + ROLL $+1, R9 + MOVL R9, (SP) + + // FUNC3 + MOVL R13, R8 + ORL R14, R8 + ANDL R10, R8 + MOVL R13, R15 + ANDL R14, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 2400959708(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL (SP), R9 + MOVL R9, 192(R8) + + // ROUND3(49) + // SHUFFLE + MOVL 4(SP), R9 + XORL 56(SP), R9 + XORL 36(SP), R9 + XORL 12(SP), R9 + ROLL $+1, R9 + MOVL R9, 4(SP) + + // FUNC3 + MOVL R12, R8 + ORL R13, R8 + ANDL R14, R8 + MOVL R12, R15 + ANDL R13, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 2400959708(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 4(SP), R9 + MOVL R9, 196(R8) + + // ROUND3(50) + // SHUFFLE + MOVL 8(SP), R9 + XORL 60(SP), R9 + XORL 40(SP), R9 + XORL 16(SP), R9 + ROLL $+1, R9 + MOVL R9, 8(SP) + + // FUNC3 + MOVL R11, R8 + ORL R12, R8 + ANDL R13, R8 + MOVL R11, R15 + ANDL R12, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 2400959708(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 8(SP), R9 + MOVL R9, 200(R8) + + // ROUND3(51) + // SHUFFLE + MOVL 12(SP), R9 + XORL (SP), R9 + XORL 44(SP), R9 + XORL 20(SP), R9 + ROLL $+1, R9 + MOVL R9, 12(SP) + + // FUNC3 + MOVL R10, R8 + ORL R11, R8 + ANDL R12, R8 + MOVL R10, R15 + ANDL R11, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 2400959708(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 12(SP), R9 + MOVL R9, 204(R8) + + // ROUND3(52) + // SHUFFLE + MOVL 16(SP), R9 + XORL 4(SP), R9 + XORL 48(SP), R9 + XORL 24(SP), R9 + ROLL $+1, R9 + MOVL R9, 16(SP) + + // FUNC3 + MOVL R14, R8 + ORL R10, R8 + ANDL R11, R8 + MOVL R14, R15 + ANDL R10, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 2400959708(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 16(SP), R9 + MOVL R9, 208(R8) + + // ROUND3(53) + // SHUFFLE + MOVL 20(SP), R9 + XORL 8(SP), R9 + XORL 52(SP), R9 + XORL 28(SP), R9 + ROLL $+1, R9 + MOVL R9, 20(SP) + + // FUNC3 + MOVL R13, R8 + ORL R14, R8 + ANDL R10, R8 + MOVL R13, R15 + ANDL R14, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 2400959708(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 20(SP), R9 + MOVL R9, 212(R8) + + // ROUND3(54) + // SHUFFLE + MOVL 24(SP), R9 + XORL 12(SP), R9 + XORL 56(SP), R9 + XORL 32(SP), R9 + ROLL $+1, R9 + MOVL R9, 24(SP) + + // FUNC3 + MOVL R12, R8 + ORL R13, R8 + ANDL R14, R8 + MOVL R12, R15 + ANDL R13, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 2400959708(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 24(SP), R9 + MOVL R9, 216(R8) + + // ROUND3(55) + // SHUFFLE + MOVL 28(SP), R9 + XORL 16(SP), R9 + XORL 60(SP), R9 + XORL 36(SP), R9 + ROLL $+1, R9 + MOVL R9, 28(SP) + + // FUNC3 + MOVL R11, R8 + ORL R12, R8 + ANDL R13, R8 + MOVL R11, R15 + ANDL R12, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 2400959708(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 28(SP), R9 + MOVL R9, 220(R8) + + // ROUND3(56) + // SHUFFLE + MOVL 32(SP), R9 + XORL 20(SP), R9 + XORL (SP), R9 + XORL 40(SP), R9 + ROLL $+1, R9 + MOVL R9, 32(SP) + + // FUNC3 + MOVL R10, R8 + ORL R11, R8 + ANDL R12, R8 + MOVL R10, R15 + ANDL R11, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 2400959708(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 32(SP), R9 + MOVL R9, 224(R8) + + // ROUND3(57) + // SHUFFLE + MOVL 36(SP), R9 + XORL 24(SP), R9 + XORL 4(SP), R9 + XORL 44(SP), R9 + ROLL $+1, R9 + MOVL R9, 36(SP) + + // FUNC3 + MOVL R14, R8 + ORL R10, R8 + ANDL R11, R8 + MOVL R14, R15 + ANDL R10, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 2400959708(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 36(SP), R9 + MOVL R9, 228(R8) + + // Load cs + MOVQ cs_base+56(FP), R8 + MOVL R12, 20(R8) + MOVL R13, 24(R8) + MOVL R14, 28(R8) + MOVL R10, 32(R8) + MOVL R11, 36(R8) + + // ROUND3(58) + // SHUFFLE + MOVL 40(SP), R9 + XORL 28(SP), R9 + XORL 8(SP), R9 + XORL 48(SP), R9 + ROLL $+1, R9 + MOVL R9, 40(SP) + + // FUNC3 + MOVL R13, R8 + ORL R14, R8 + ANDL R10, R8 + MOVL R13, R15 + ANDL R14, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 2400959708(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 40(SP), R9 + MOVL R9, 232(R8) + + // ROUND3(59) + // SHUFFLE + MOVL 44(SP), R9 + XORL 32(SP), R9 + XORL 12(SP), R9 + XORL 52(SP), R9 + ROLL $+1, R9 + MOVL R9, 44(SP) + + // FUNC3 + MOVL R12, R8 + ORL R13, R8 + ANDL R14, R8 + MOVL R12, R15 + ANDL R13, R15 + ORL R8, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 2400959708(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 44(SP), R9 + MOVL R9, 236(R8) + + // ROUND4 (steps 60-79) + // ROUND4(60) + // SHUFFLE + MOVL 48(SP), R9 + XORL 36(SP), R9 + XORL 16(SP), R9 + XORL 56(SP), R9 + ROLL $+1, R9 + MOVL R9, 48(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 3395469782(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 48(SP), R9 + MOVL R9, 240(R8) + + // ROUND4(61) + // SHUFFLE + MOVL 52(SP), R9 + XORL 40(SP), R9 + XORL 20(SP), R9 + XORL 60(SP), R9 + ROLL $+1, R9 + MOVL R9, 52(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 3395469782(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 52(SP), R9 + MOVL R9, 244(R8) + + // ROUND4(62) + // SHUFFLE + MOVL 56(SP), R9 + XORL 44(SP), R9 + XORL 24(SP), R9 + XORL (SP), R9 + ROLL $+1, R9 + MOVL R9, 56(SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 3395469782(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 56(SP), R9 + MOVL R9, 248(R8) + + // ROUND4(63) + // SHUFFLE + MOVL 60(SP), R9 + XORL 48(SP), R9 + XORL 28(SP), R9 + XORL 4(SP), R9 + ROLL $+1, R9 + MOVL R9, 60(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 3395469782(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 60(SP), R9 + MOVL R9, 252(R8) + + // ROUND4(64) + // SHUFFLE + MOVL (SP), R9 + XORL 52(SP), R9 + XORL 32(SP), R9 + XORL 8(SP), R9 + ROLL $+1, R9 + MOVL R9, (SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 3395469782(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL (SP), R9 + MOVL R9, 256(R8) + + // Load cs + MOVQ cs_base+56(FP), R8 + MOVL R10, 40(R8) + MOVL R11, 44(R8) + MOVL R12, 48(R8) + MOVL R13, 52(R8) + MOVL R14, 56(R8) + + // ROUND4(65) + // SHUFFLE + MOVL 4(SP), R9 + XORL 56(SP), R9 + XORL 36(SP), R9 + XORL 12(SP), R9 + ROLL $+1, R9 + MOVL R9, 4(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 3395469782(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 4(SP), R9 + MOVL R9, 260(R8) + + // ROUND4(66) + // SHUFFLE + MOVL 8(SP), R9 + XORL 60(SP), R9 + XORL 40(SP), R9 + XORL 16(SP), R9 + ROLL $+1, R9 + MOVL R9, 8(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 3395469782(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 8(SP), R9 + MOVL R9, 264(R8) + + // ROUND4(67) + // SHUFFLE + MOVL 12(SP), R9 + XORL (SP), R9 + XORL 44(SP), R9 + XORL 20(SP), R9 + ROLL $+1, R9 + MOVL R9, 12(SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 3395469782(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 12(SP), R9 + MOVL R9, 268(R8) + + // ROUND4(68) + // SHUFFLE + MOVL 16(SP), R9 + XORL 4(SP), R9 + XORL 48(SP), R9 + XORL 24(SP), R9 + ROLL $+1, R9 + MOVL R9, 16(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 3395469782(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 16(SP), R9 + MOVL R9, 272(R8) + + // ROUND4(69) + // SHUFFLE + MOVL 20(SP), R9 + XORL 8(SP), R9 + XORL 52(SP), R9 + XORL 28(SP), R9 + ROLL $+1, R9 + MOVL R9, 20(SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 3395469782(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 20(SP), R9 + MOVL R9, 276(R8) + + // ROUND4(70) + // SHUFFLE + MOVL 24(SP), R9 + XORL 12(SP), R9 + XORL 56(SP), R9 + XORL 32(SP), R9 + ROLL $+1, R9 + MOVL R9, 24(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 3395469782(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 24(SP), R9 + MOVL R9, 280(R8) + + // ROUND4(71) + // SHUFFLE + MOVL 28(SP), R9 + XORL 16(SP), R9 + XORL 60(SP), R9 + XORL 36(SP), R9 + ROLL $+1, R9 + MOVL R9, 28(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 3395469782(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 28(SP), R9 + MOVL R9, 284(R8) + + // ROUND4(72) + // SHUFFLE + MOVL 32(SP), R9 + XORL 20(SP), R9 + XORL (SP), R9 + XORL 40(SP), R9 + ROLL $+1, R9 + MOVL R9, 32(SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 3395469782(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 32(SP), R9 + MOVL R9, 288(R8) + + // ROUND4(73) + // SHUFFLE + MOVL 36(SP), R9 + XORL 24(SP), R9 + XORL 4(SP), R9 + XORL 44(SP), R9 + ROLL $+1, R9 + MOVL R9, 36(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 3395469782(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 36(SP), R9 + MOVL R9, 292(R8) + + // ROUND4(74) + // SHUFFLE + MOVL 40(SP), R9 + XORL 28(SP), R9 + XORL 8(SP), R9 + XORL 48(SP), R9 + ROLL $+1, R9 + MOVL R9, 40(SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 3395469782(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 40(SP), R9 + MOVL R9, 296(R8) + + // ROUND4(75) + // SHUFFLE + MOVL 44(SP), R9 + XORL 32(SP), R9 + XORL 12(SP), R9 + XORL 52(SP), R9 + ROLL $+1, R9 + MOVL R9, 44(SP) + + // FUNC2 + MOVL R11, R15 + XORL R12, R15 + XORL R13, R15 + + // MIX + ROLL $+30, R11 + ADDL R15, R14 + MOVL R10, R8 + ROLL $+5, R8 + LEAL 3395469782(R14)(R9*1), R14 + ADDL R8, R14 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 44(SP), R9 + MOVL R9, 300(R8) + + // ROUND4(76) + // SHUFFLE + MOVL 48(SP), R9 + XORL 36(SP), R9 + XORL 16(SP), R9 + XORL 56(SP), R9 + ROLL $+1, R9 + MOVL R9, 48(SP) + + // FUNC2 + MOVL R10, R15 + XORL R11, R15 + XORL R12, R15 + + // MIX + ROLL $+30, R10 + ADDL R15, R13 + MOVL R14, R8 + ROLL $+5, R8 + LEAL 3395469782(R13)(R9*1), R13 + ADDL R8, R13 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 48(SP), R9 + MOVL R9, 304(R8) + + // ROUND4(77) + // SHUFFLE + MOVL 52(SP), R9 + XORL 40(SP), R9 + XORL 20(SP), R9 + XORL 60(SP), R9 + ROLL $+1, R9 + MOVL R9, 52(SP) + + // FUNC2 + MOVL R14, R15 + XORL R10, R15 + XORL R11, R15 + + // MIX + ROLL $+30, R14 + ADDL R15, R12 + MOVL R13, R8 + ROLL $+5, R8 + LEAL 3395469782(R12)(R9*1), R12 + ADDL R8, R12 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 52(SP), R9 + MOVL R9, 308(R8) + + // ROUND4(78) + // SHUFFLE + MOVL 56(SP), R9 + XORL 44(SP), R9 + XORL 24(SP), R9 + XORL (SP), R9 + ROLL $+1, R9 + MOVL R9, 56(SP) + + // FUNC2 + MOVL R13, R15 + XORL R14, R15 + XORL R10, R15 + + // MIX + ROLL $+30, R13 + ADDL R15, R11 + MOVL R12, R8 + ROLL $+5, R8 + LEAL 3395469782(R11)(R9*1), R11 + ADDL R8, R11 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 56(SP), R9 + MOVL R9, 312(R8) + + // ROUND4(79) + // SHUFFLE + MOVL 60(SP), R9 + XORL 48(SP), R9 + XORL 28(SP), R9 + XORL 4(SP), R9 + ROLL $+1, R9 + MOVL R9, 60(SP) + + // FUNC2 + MOVL R12, R15 + XORL R13, R15 + XORL R14, R15 + + // MIX + ROLL $+30, R12 + ADDL R15, R10 + MOVL R11, R8 + ROLL $+5, R8 + LEAL 3395469782(R10)(R9*1), R10 + ADDL R8, R10 + + // Load m1 + MOVQ m1_base+32(FP), R8 + MOVL 60(SP), R9 + MOVL R9, 316(R8) + + // Add registers to temp hash. + ADDL R10, AX + ADDL R11, BX + ADDL R12, CX + ADDL R13, DX + ADDL R14, BP + ADDQ $+64, DI + CMPQ DI, SI + JB loop + +end: + MOVQ dig+0(FP), SI + MOVL AX, (SI) + MOVL BX, 4(SI) + MOVL CX, 8(SI) + MOVL DX, 12(SI) + MOVL BP, 16(SI) + RET diff --git a/sha1cdblock_amd64_asm.go b/sha1cdblock_amd64_asm.go new file mode 100644 index 0000000..9570273 --- /dev/null +++ b/sha1cdblock_amd64_asm.go @@ -0,0 +1,312 @@ +//go:build ignore +// +build ignore + +package main + +import ( + . "github.com/mmcloughlin/avo/build" + "github.com/mmcloughlin/avo/buildtags" + . "github.com/mmcloughlin/avo/operand" + . "github.com/mmcloughlin/avo/reg" + shared "github.com/pjbgf/sha1cd/internal" +) + +//go:generate go run sha1cdblock_amd64_asm.go -out sha1cdblock_amd64.s + +func main() { + Constraint(buildtags.Not("noasm").ToConstraint()) + Constraint(buildtags.Term("gc").ToConstraint()) + Constraint(buildtags.Term("amd64").ToConstraint()) + + Package("github.com/pjbgf/sha1cd") + + TEXT("blockAMD64", NOSPLIT, "func(dig *digest, p []byte, m1 []uint32, cs [][5]uint32)") + Doc("blockAMD64 hashes the message p into the current state in dig.", + "Both m1 and cs are used to store intermediate results which are used by the collision detection logic.") + + // Using the same registers as the Go SHA1 implementation, for + // easier comparison. In the future this will be reviewed. + ax := GP32() + cx := GP32() + dx64 := GP64() + bx := GP32() + bp64 := GP64() + di64 := GP64() + si64 := GP64() + + r8 := GP32() + r9 := GP32() + r10 := GP32() + r11 := GP32() + r12 := GP32() + r13 := GP32() + r14 := GP32() + r15 := GP32() + + dx := dx64.As32() + bp := RBP.As32() + + dig := Load(Param("dig"), bp64) + p_base := Load(Param("p").Base(), si64) + p_len := Load(Param("p").Len(), dx64) + SHRQ(I8(6), p_len) + SHLQ(I8(6), p_len) + + LEAQ(Mem{Base: p_base, Index: p_len, Scale: 1}, di64) + + Comment("Load h0, h1, h2, h3, h4.") + hash := [5]Register{ax, bx, cx, dx, bp} + for i, r := range hash { + MOVL(Mem{Base: dig}.Offset(4*i), r) + } + + // Store message values on the stack. + w := AllocLocal(shared.Chunk) + W := func(r int) Mem { return w.Offset((r % 16) * 4) } + + Comment("len(p) >= chunk") + CMPQ(p_base, di64) + JEQ(LabelRef("end")) + + Label("loop") + Comment("Initialize registers a, b, c, d, e.") + + a, b, c, d, e := r11, r12, r13, r14, r15 + for i, r := range []Register{a, b, c, d, e} { + MOVL(hash[i], r) + } + + LOAD := func(index int) { + Comment("LOAD") + + MOVL(Mem{Base: si64}.Offset(4*index), r10) + BSWAPL(r10) + MOVL(r10, W(index)) + } + + SHUFFLE := func(index int) { + Comment("SHUFFLE") + sp := Mem{Base: StackPointer} + MOVL(sp.Offset(((index)&0xf)*4), r10) + XORL(sp.Offset(((index-3)&0xf)*4), r10) + XORL(sp.Offset(((index-8)&0xf)*4), r10) + XORL(sp.Offset(((index - 14) & 0xf * 4)), r10) + ROLL(I8(1), r10) + MOVL(r10, sp.Offset(((index)&0xf)*4)) + } + + FUNC1 := func(a, b, c, d, e GPVirtual) { + Comment("FUNC1") + MOVL(d, r9) + XORL(c, r9) + ANDL(b, r9) + XORL(d, r9) + } + + FUNC2 := func(a, b, c, d, e GPVirtual) { + Comment("FUNC2") + MOVL(b, r9) + XORL(c, r9) + XORL(d, r9) + } + + FUNC3 := func(a, b, c, d, e GPVirtual) { + Comment("FUNC3") + MOVL(b, r8) + ORL(c, r8) + ANDL(d, r8) + + MOVL(b, r9) + ANDL(c, r9) + ORL(r8, r9) + } + + FUNC4 := FUNC2 + + MIX := func(a, b, c, d, e GPVirtual, k int) { + Comment("MIX") + ROLL(I8(30), b) + ADDL(r9, e) + MOVL(a, r8) + ROLL(I8(5), r8) + LEAL(Mem{Base: e, Disp: k, Index: r10, Scale: 1}, e) + ADDL(r8, e) + } + + LOADM1 := func(index int) { + Comment("Load m1") + m1_base := Load(Param("m1").Base(), R8) + m1 := Mem{Base: m1_base, Scale: 1} + MOVL(W(index&0xf), r10) + MOVL(r10, m1.Offset(index*4)) + } + + csOffset := 0 + // Load the current compression state into cs, so it can be used later. + // This must be done before shuffles or changes in to the buffer. + LOADCS := func(a, b, c, d, e GPVirtual, index int) { + Comment("Load cs") + cs_base := Load(Param("cs").Base(), R8) + cs := Mem{Base: cs_base, Scale: 1} + + MOVL(a, cs.Offset(csOffset)) + MOVL(b, cs.Offset(csOffset+4)) + MOVL(c, cs.Offset(csOffset+8)) + MOVL(d, cs.Offset(csOffset+12)) + MOVL(e, cs.Offset(csOffset+16)) + csOffset += 5 * 4 + } + + ROUND1 := func(a, b, c, d, e GPVirtual, index int) { + Commentf("ROUND1(%d)", index) + LOAD(index) + FUNC1(a, b, c, d, e) + MIX(a, b, c, d, e, shared.K0) + LOADM1(index) + } + + ROUND1x := func(a, b, c, d, e GPVirtual, index int) { + Commentf("ROUND1x(%d)", index) + SHUFFLE(index) + FUNC1(a, b, c, d, e) + MIX(a, b, c, d, e, shared.K0) + LOADM1(index) + } + + ROUND2 := func(a, b, c, d, e GPVirtual, index int) { + Commentf("ROUND2(%d)", index) + SHUFFLE(index) + FUNC2(a, b, c, d, e) + MIX(a, b, c, d, e, shared.K1) + LOADM1(index) + } + + ROUND3 := func(a, b, c, d, e GPVirtual, index int) { + Commentf("ROUND3(%d)", index) + SHUFFLE(index) + FUNC3(a, b, c, d, e) + MIX(a, b, c, d, e, shared.K2) + LOADM1(index) + } + + ROUND4 := func(a, b, c, d, e GPVirtual, index int) { + Commentf("ROUND4(%d)", index) + SHUFFLE(index) + FUNC4(a, b, c, d, e) + MIX(a, b, c, d, e, shared.K3) + LOADM1(index) + } + + Comment("ROUND1 (steps 0-15)") + LOADCS(a, b, c, d, e, 0) + ROUND1(a, b, c, d, e, 0) + ROUND1(e, a, b, c, d, 1) + ROUND1(d, e, a, b, c, 2) + ROUND1(c, d, e, a, b, 3) + ROUND1(b, c, d, e, a, 4) + ROUND1(a, b, c, d, e, 5) + ROUND1(e, a, b, c, d, 6) + ROUND1(d, e, a, b, c, 7) + ROUND1(c, d, e, a, b, 8) + ROUND1(b, c, d, e, a, 9) + ROUND1(a, b, c, d, e, 10) + ROUND1(e, a, b, c, d, 11) + ROUND1(d, e, a, b, c, 12) + ROUND1(c, d, e, a, b, 13) + ROUND1(b, c, d, e, a, 14) + ROUND1(a, b, c, d, e, 15) + + Comment("ROUND1x (steps 16-19) - same as ROUND1 but with no data load.") + ROUND1x(e, a, b, c, d, 16) + ROUND1x(d, e, a, b, c, 17) + ROUND1x(c, d, e, a, b, 18) + ROUND1x(b, c, d, e, a, 19) + + Comment("ROUND2 (steps 20-39)") + ROUND2(a, b, c, d, e, 20) + ROUND2(e, a, b, c, d, 21) + ROUND2(d, e, a, b, c, 22) + ROUND2(c, d, e, a, b, 23) + ROUND2(b, c, d, e, a, 24) + ROUND2(a, b, c, d, e, 25) + ROUND2(e, a, b, c, d, 26) + ROUND2(d, e, a, b, c, 27) + ROUND2(c, d, e, a, b, 28) + ROUND2(b, c, d, e, a, 29) + ROUND2(a, b, c, d, e, 30) + ROUND2(e, a, b, c, d, 31) + ROUND2(d, e, a, b, c, 32) + ROUND2(c, d, e, a, b, 33) + ROUND2(b, c, d, e, a, 34) + ROUND2(a, b, c, d, e, 35) + ROUND2(e, a, b, c, d, 36) + ROUND2(d, e, a, b, c, 37) + ROUND2(c, d, e, a, b, 38) + ROUND2(b, c, d, e, a, 39) + + Comment("ROUND3 (steps 40-59)") + ROUND3(a, b, c, d, e, 40) + ROUND3(e, a, b, c, d, 41) + ROUND3(d, e, a, b, c, 42) + ROUND3(c, d, e, a, b, 43) + ROUND3(b, c, d, e, a, 44) + ROUND3(a, b, c, d, e, 45) + ROUND3(e, a, b, c, d, 46) + ROUND3(d, e, a, b, c, 47) + ROUND3(c, d, e, a, b, 48) + ROUND3(b, c, d, e, a, 49) + ROUND3(a, b, c, d, e, 50) + ROUND3(e, a, b, c, d, 51) + ROUND3(d, e, a, b, c, 52) + ROUND3(c, d, e, a, b, 53) + ROUND3(b, c, d, e, a, 54) + ROUND3(a, b, c, d, e, 55) + ROUND3(e, a, b, c, d, 56) + ROUND3(d, e, a, b, c, 57) + + LOADCS(c, d, e, a, b, 58) + ROUND3(c, d, e, a, b, 58) + ROUND3(b, c, d, e, a, 59) + + Comment("ROUND4 (steps 60-79)") + ROUND4(a, b, c, d, e, 60) + ROUND4(e, a, b, c, d, 61) + ROUND4(d, e, a, b, c, 62) + ROUND4(c, d, e, a, b, 63) + ROUND4(b, c, d, e, a, 64) + + LOADCS(a, b, c, d, e, 65) + ROUND4(a, b, c, d, e, 65) + ROUND4(e, a, b, c, d, 66) + ROUND4(d, e, a, b, c, 67) + ROUND4(c, d, e, a, b, 68) + ROUND4(b, c, d, e, a, 69) + ROUND4(a, b, c, d, e, 70) + ROUND4(e, a, b, c, d, 71) + ROUND4(d, e, a, b, c, 72) + ROUND4(c, d, e, a, b, 73) + ROUND4(b, c, d, e, a, 74) + ROUND4(a, b, c, d, e, 75) + ROUND4(e, a, b, c, d, 76) + ROUND4(d, e, a, b, c, 77) + ROUND4(c, d, e, a, b, 78) + ROUND4(b, c, d, e, a, 79) + + Comment("Add registers to temp hash.") + for i, r := range []Register{a, b, c, d, e} { + ADDL(r, hash[i]) + } + + ADDQ(I8(shared.Chunk), p_base) + CMPQ(p_base, di64) + JB(LabelRef("loop")) + + Label("end") + dig = Load(Param("dig"), di64) + for i, r := range hash { + MOVL(r, Mem{Base: dig}.Offset(4*i)) + } + + RET() + Generate() +} diff --git a/sha1cdblock_generic.go b/sha1cdblock_generic.go new file mode 100644 index 0000000..ba8b96e --- /dev/null +++ b/sha1cdblock_generic.go @@ -0,0 +1,268 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Originally from: https://github.com/go/blob/master/src/crypto/sha1/sha1block.go +// It has been modified to support collision detection. + +package sha1cd + +import ( + "fmt" + "math/bits" + + shared "github.com/pjbgf/sha1cd/internal" + "github.com/pjbgf/sha1cd/ubc" +) + +// blockGeneric is a portable, pure Go version of the SHA-1 block step. +// It's used by sha1block_generic.go and tests. +func blockGeneric(dig *digest, p []byte) { + var w [16]uint32 + + // cs stores the pre-step compression state for only the steps required for the + // collision detection, which are 0, 58 and 65. + // Refer to ubc/const.go for more details. + cs := [shared.PreStepState][shared.WordBuffers]uint32{} + + h0, h1, h2, h3, h4 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] + for len(p) >= shared.Chunk { + m1 := [shared.Rounds]uint32{} + hi := 1 + + // Collision attacks are thwarted by hashing a detected near-collision block 3 times. + // Think of it as extending SHA-1 from 80-steps to 240-steps for such blocks: + // The best collision attacks against SHA-1 have complexity about 2^60, + // thus for 240-steps an immediate lower-bound for the best cryptanalytic attacks would be 2^180. + // An attacker would be better off using a generic birthday search of complexity 2^80. + rehash: + a, b, c, d, e := h0, h1, h2, h3, h4 + + // Each of the four 20-iteration rounds + // differs only in the computation of f and + // the choice of K (K0, K1, etc). + i := 0 + + // Store pre-step compression state for the collision detection. + cs[0] = [shared.WordBuffers]uint32{a, b, c, d, e} + + for ; i < 16; i++ { + // load step + j := i * 4 + w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) + + f := b&c | (^b)&d + t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + shared.K0 + a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d + + // Store compression state for the collision detection. + m1[i] = w[i&0xf] + } + for ; i < 20; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + + f := b&c | (^b)&d + t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + shared.K0 + a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d + + // Store compression state for the collision detection. + m1[i] = w[i&0xf] + } + for ; i < 40; i++ { + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + + f := b ^ c ^ d + t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + shared.K1 + a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d + + // Store compression state for the collision detection. + m1[i] = w[i&0xf] + } + for ; i < 60; i++ { + if i == 58 { + // Store pre-step compression state for the collision detection. + cs[1] = [shared.WordBuffers]uint32{a, b, c, d, e} + } + + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + + f := ((b | c) & d) | (b & c) + t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + shared.K2 + a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d + + // Store compression state for the collision detection. + m1[i] = w[i&0xf] + } + for ; i < 80; i++ { + if i == 65 { + // Store pre-step compression state for the collision detection. + cs[2] = [shared.WordBuffers]uint32{a, b, c, d, e} + } + + tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] + w[i&0xf] = tmp<<1 | tmp>>(32-1) + + f := b ^ c ^ d + t := bits.RotateLeft32(a, 5) + f + e + w[i&0xf] + shared.K3 + a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d + + // Store compression state for the collision detection. + m1[i] = w[i&0xf] + } + + h0 += a + h1 += b + h2 += c + h3 += d + h4 += e + + if hi == 2 { + hi++ + goto rehash + } + + if hi == 1 { + col := checkCollision(m1, cs, [shared.WordBuffers]uint32{h0, h1, h2, h3, h4}) + if col { + dig.col = true + hi++ + goto rehash + } + } + + p = p[shared.Chunk:] + } + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4] = h0, h1, h2, h3, h4 +} + +func checkCollision( + m1 [shared.Rounds]uint32, + cs [shared.PreStepState][shared.WordBuffers]uint32, + state [shared.WordBuffers]uint32) bool { + + if mask := ubc.CalculateDvMask(m1); mask != 0 { + dvs := ubc.SHA1_dvs() + + for i := 0; dvs[i].DvType != 0; i++ { + if (mask & ((uint32)(1) << uint32(dvs[i].MaskB))) != 0 { + var csState [shared.WordBuffers]uint32 + switch dvs[i].TestT { + case 58: + csState = cs[1] + case 65: + csState = cs[2] + case 0: + csState = cs[0] + default: + panic(fmt.Sprintf("dvs data is trying to use a testT that isn't available: %d", dvs[i].TestT)) + } + + col := hasCollided( + dvs[i].TestT, // testT is the step number + // m2 is a secondary message created XORing with + // ubc's DM prior to the SHA recompression step. + m1, dvs[i].Dm, + csState, + state) + + if col { + return true + } + } + } + } + return false +} + +func hasCollided(step uint32, m1, dm [shared.Rounds]uint32, + state [shared.WordBuffers]uint32, h [shared.WordBuffers]uint32) bool { + // Intermediary Hash Value. + ihv := [shared.WordBuffers]uint32{} + + a, b, c, d, e := state[0], state[1], state[2], state[3], state[4] + + // Walk backwards from current step to undo previous compression. + // The existing collision detection does not have dvs higher than 65, + // start value of i accordingly. + for i := uint32(64); i >= 60; i-- { + a, b, c, d, e = b, c, d, e, a + if step > i { + b = bits.RotateLeft32(b, -30) + f := b ^ c ^ d + e -= bits.RotateLeft32(a, 5) + f + shared.K3 + (m1[i] ^ dm[i]) // m2 = m1 ^ dm. + } + } + for i := uint32(59); i >= 40; i-- { + a, b, c, d, e = b, c, d, e, a + if step > i { + b = bits.RotateLeft32(b, -30) + f := ((b | c) & d) | (b & c) + e -= bits.RotateLeft32(a, 5) + f + shared.K2 + (m1[i] ^ dm[i]) + } + } + for i := uint32(39); i >= 20; i-- { + a, b, c, d, e = b, c, d, e, a + if step > i { + b = bits.RotateLeft32(b, -30) + f := b ^ c ^ d + e -= bits.RotateLeft32(a, 5) + f + shared.K1 + (m1[i] ^ dm[i]) + } + } + for i := uint32(20); i > 0; i-- { + j := i - 1 + a, b, c, d, e = b, c, d, e, a + if step > j { + b = bits.RotateLeft32(b, -30) // undo the rotate left + f := b&c | (^b)&d + // subtract from e + e -= bits.RotateLeft32(a, 5) + f + shared.K0 + (m1[j] ^ dm[j]) + } + } + + ihv[0] = a + ihv[1] = b + ihv[2] = c + ihv[3] = d + ihv[4] = e + a = state[0] + b = state[1] + c = state[2] + d = state[3] + e = state[4] + + // Recompress blocks based on the current step. + // The existing collision detection does not have dvs below 58, so they have been removed + // from the source code. If new dvs are added which target rounds below 40, that logic + // will need to be readded here. + for i := uint32(40); i < 60; i++ { + if step <= i { + f := ((b | c) & d) | (b & c) + t := bits.RotateLeft32(a, 5) + f + e + shared.K2 + (m1[i] ^ dm[i]) + a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d + } + } + for i := uint32(60); i < 80; i++ { + if step <= i { + f := b ^ c ^ d + t := bits.RotateLeft32(a, 5) + f + e + shared.K3 + (m1[i] ^ dm[i]) + a, b, c, d, e = t, a, bits.RotateLeft32(b, 30), c, d + } + } + + ihv[0] += a + ihv[1] += b + ihv[2] += c + ihv[3] += d + ihv[4] += e + + if ((ihv[0] ^ h[0]) | (ihv[1] ^ h[1]) | + (ihv[2] ^ h[2]) | (ihv[3] ^ h[3]) | (ihv[4] ^ h[4])) == 0 { + return true + } + + return false +} diff --git a/sha1cdblock_noasm.go b/sha1cdblock_noasm.go new file mode 100644 index 0000000..15bae5a --- /dev/null +++ b/sha1cdblock_noasm.go @@ -0,0 +1,8 @@ +//go:build !amd64 || noasm || !gc +// +build !amd64 noasm !gc + +package sha1cd + +func block(dig *digest, p []byte) { + blockGeneric(dig, p) +} diff --git a/test/bench_test.go b/test/bench_test.go index c069fd8..f4127ca 100644 --- a/test/bench_test.go +++ b/test/bench_test.go @@ -3,16 +3,16 @@ package test import ( "crypto/sha1" "hash" + "os" "testing" "github.com/pjbgf/sha1cd" "github.com/pjbgf/sha1cd/cgo" - "github.com/pjbgf/sha1cd/testdata" "github.com/pjbgf/sha1cd/ubc" ) func BenchmarkCalculateDvMask(b *testing.B) { - data := testdata.Shattered1M1s[0] + data := shattered1M1s[0] b.Run("go", func(b *testing.B) { b.ReportAllocs() @@ -41,26 +41,52 @@ func benchmarkSize(b *testing.B, n string, d hash.Hash, size int) { }) } +func benchmarkContent(b *testing.B, n string, d hash.Hash, data []byte) { + b.Run(n, func(b *testing.B) { + b.ReportAllocs() + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + d.Reset() + d.Write(data) + d.Sum(data[:0]) + } + }) +} + func BenchmarkHash8Bytes(b *testing.B) { benchmarkSize(b, "sha1", sha1.New(), 8) - benchmarkSize(b, "sha1cd", sha1cd.New(), 8) + benchmarkSize(b, "sha1cd_native", sha1cd.New(), 8) + benchmarkSize(b, "sha1cd_generic", sha1cd.NewGeneric(), 8) benchmarkSize(b, "sha1cd_cgo", cgo.New(), 8) } func BenchmarkHash320Bytes(b *testing.B) { benchmarkSize(b, "sha1", sha1.New(), 320) - benchmarkSize(b, "sha1cd", sha1cd.New(), 320) + benchmarkSize(b, "sha1cd_native", sha1cd.New(), 320) + benchmarkSize(b, "sha1cd_generic", sha1cd.NewGeneric(), 320) benchmarkSize(b, "sha1cd_cgo", cgo.New(), 320) } func BenchmarkHash1K(b *testing.B) { benchmarkSize(b, "sha1", sha1.New(), 1024) - benchmarkSize(b, "sha1cd", sha1cd.New(), 1024) + benchmarkSize(b, "sha1cd_native", sha1cd.New(), 1024) + benchmarkSize(b, "sha1cd_generic", sha1cd.NewGeneric(), 1024) benchmarkSize(b, "sha1cd_cgo", cgo.New(), 1024) } func BenchmarkHash8K(b *testing.B) { benchmarkSize(b, "sha1", sha1.New(), 8192) - benchmarkSize(b, "sha1cd", sha1cd.New(), 8192) + benchmarkSize(b, "sha1cd_native", sha1cd.New(), 8192) + benchmarkSize(b, "sha1cd_generic", sha1cd.NewGeneric(), 8192) benchmarkSize(b, "sha1cd_cgo", cgo.New(), 8192) } + +func BenchmarkHashWithCollision(b *testing.B) { + shambles, err := os.ReadFile("testdata/files/sha-mbles-1.bin") + if err != nil { + b.Fatal(err) + } + benchmarkContent(b, "sha1cd_native", sha1cd.New(), shambles) + benchmarkContent(b, "sha1cd_generic", sha1cd.NewGeneric(), shambles) + benchmarkContent(b, "sha1cd_cgo", cgo.New(), shambles) +} diff --git a/test/collisiondetection_test.go b/test/collisiondetection_test.go index 2ec8b96..ee1b7d5 100644 --- a/test/collisiondetection_test.go +++ b/test/collisiondetection_test.go @@ -4,19 +4,21 @@ import ( "encoding/hex" "fmt" "io/ioutil" - "strings" "testing" "github.com/pjbgf/sha1cd" "github.com/pjbgf/sha1cd/cgo" - "github.com/pjbgf/sha1cd/testdata" "github.com/pjbgf/sha1cd/ubc" ) func TestCollisionDetection(t *testing.T) { - defaultHashers := []sha1cd.CollisionResistantHash{ - cgo.New().(sha1cd.CollisionResistantHash), - sha1cd.New().(sha1cd.CollisionResistantHash), + hashers := []struct { + name string + hasher sha1cd.CollisionResistantHash + }{ + {name: "sha1cd_cgo", hasher: cgo.New().(sha1cd.CollisionResistantHash)}, + {name: "sha1cd_native", hasher: sha1cd.New().(sha1cd.CollisionResistantHash)}, + {name: "sha1cd_generic", hasher: sha1cd.NewGeneric().(sha1cd.CollisionResistantHash)}, } tests := []struct { @@ -24,52 +26,47 @@ func TestCollisionDetection(t *testing.T) { inputFile string wantHash string wantCollision bool - hashers []sha1cd.CollisionResistantHash }{ { name: "shattered-1 ", - inputFile: "../testdata/files/shattered-1.pdf", + inputFile: "testdata/files/shattered-1.pdf", wantCollision: true, wantHash: "16e96b70000dd1e7c85b8368ee197754400e58ec", - hashers: defaultHashers, }, { name: "shattered-2", - inputFile: "../testdata/files/shattered-2.pdf", + inputFile: "testdata/files/shattered-2.pdf", wantCollision: true, wantHash: "e1761773e6a35916d99f891b77663e6405313587", - hashers: defaultHashers, }, { name: "sha-mbles-1", - inputFile: "../testdata/files/sha-mbles-1.bin", + inputFile: "testdata/files/sha-mbles-1.bin", wantCollision: true, wantHash: "4f3d9be4a472c4dae83c6314aa6c36a064c1fd14", - hashers: defaultHashers, }, { name: "sha-mbles-2", - inputFile: "../testdata/files/sha-mbles-2.bin", + inputFile: "testdata/files/sha-mbles-2.bin", wantCollision: true, wantHash: "9ed5d77a4f48be1dbf3e9e15650733eb850897f2", - hashers: defaultHashers, }, { name: "Valid File", - inputFile: "../testdata/files/valid-file.txt", + inputFile: "testdata/files/valid-file.txt", wantHash: "2b915da50f163514d390c9d87a4f3e23eb663f8a", - hashers: defaultHashers, }, } for _, tt := range tests { - for i, d := range tt.hashers { - t.Run(fmt.Sprintf("%s[%d]", tt.name, i), func(t *testing.T) { + for _, hasher := range hashers { + t.Run(fmt.Sprintf("%s[%s]", tt.name, hasher.name), func(t *testing.T) { data, err := ioutil.ReadFile(tt.inputFile) if err != nil { t.Fatalf("unexpected error: %v", err) } + d := hasher.hasher d.Reset() d.Write(data) @@ -86,64 +83,13 @@ func TestCollisionDetection(t *testing.T) { } func TestCalculateDvMask_Shattered1(t *testing.T) { - for i := range testdata.Shattered1M1s { + for i := range shattered1M1s { t.Run(fmt.Sprintf("m1[%d]", i), func(t *testing.T) { - got, gotErr := ubc.CalculateDvMask(testdata.Shattered1M1s[i]) - want, wantErr := cgo.CalculateDvMask(testdata.Shattered1M1s[i]) - - if want != got || gotErr != wantErr { - t.Fatalf("dvmask: %d %v\nwant %d %v", got, gotErr, want, wantErr) - } - }) - } -} + got := ubc.CalculateDvMask(shattered1M1s[i]) + want := cgo.CalculateDvMask(shattered1M1s[i]) -func TestCalculateDvMask(t *testing.T) { - tests := []struct { - name string - input []uint32 - want uint32 - wantErr string - }{ - { - name: "empty", - input: nil, - wantErr: "invalid input: len(W) must be 80, was 0", - }, - { - name: "[79]uint32{}", - input: make([]uint32, 79), - wantErr: "invalid input: len(W) must be 80, was 79", - }, - { - name: "[80]uint32{}", - input: make([]uint32, 80), - }, - } - - impls := []func(W []uint32) (uint32, error){ - cgo.CalculateDvMask, - ubc.CalculateDvMask, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - for _, impl := range impls { - got, err := impl(tt.input) - if tt.wantErr == "" && err != nil { - t.Errorf("unexpected error: %v", err) - } - if tt.wantErr != "" { - if err == nil { - t.Errorf("expected error: %q, got nil", tt.wantErr) - } else if !strings.Contains(err.Error(), tt.wantErr) { - t.Errorf("got: %q, want: %q", err.Error(), tt.wantErr) - } - } - - if got != tt.want { - t.Errorf(" got: %d\n want: %v", got, tt.want) - } + if want != got { + t.Fatalf("dvmask: %d\nwant %d", got, want) } }) } diff --git a/testdata/shattered1_m1s.go b/test/shattered1_m1s.go similarity index 99% rename from testdata/shattered1_m1s.go rename to test/shattered1_m1s.go index 47572f7..c0418c8 100644 --- a/testdata/shattered1_m1s.go +++ b/test/shattered1_m1s.go @@ -1,8 +1,8 @@ -package testdata +package test -// Shattered1M1s contains all m1 messages generated by the original sha1.c code +// shattered1M1s contains all m1 messages generated by the original sha1.c code // when calling ubc_check.c. Used to attest correctness of the Go implementation. -var Shattered1M1s = [][]uint32{ +var shattered1M1s = [][80]uint32{ { 0x25504446, 0x2D312E33, 0xA25E2E3, 0xCFD30A0A, 0xA312030, 0x206F626A, 0xA3C3C2F, 0x57696474, 0x68203220, 0x3020522F, 0x48656967, 0x68742033, 0x20302052, 0x2F547970, 0x65203420, 0x3020522F, 0xD003DBEA, 0x6FC4846D, 0xF0A3F336, 0xAF972772, diff --git a/testdata/files/sha-mbles-1.bin b/test/testdata/files/sha-mbles-1.bin similarity index 100% rename from testdata/files/sha-mbles-1.bin rename to test/testdata/files/sha-mbles-1.bin diff --git a/testdata/files/sha-mbles-2.bin b/test/testdata/files/sha-mbles-2.bin similarity index 100% rename from testdata/files/sha-mbles-2.bin rename to test/testdata/files/sha-mbles-2.bin diff --git a/testdata/files/shattered-1.pdf b/test/testdata/files/shattered-1.pdf similarity index 100% rename from testdata/files/shattered-1.pdf rename to test/testdata/files/shattered-1.pdf diff --git a/testdata/files/shattered-2.pdf b/test/testdata/files/shattered-2.pdf similarity index 100% rename from testdata/files/shattered-2.pdf rename to test/testdata/files/shattered-2.pdf diff --git a/testdata/files/valid-file.txt b/test/testdata/files/valid-file.txt similarity index 100% rename from testdata/files/valid-file.txt rename to test/testdata/files/valid-file.txt diff --git a/test/testdata/fuzz/Fuzz_DeviationDetection/6cfa486437287a4340595f43c88ee06028ccbbf64661768e9596789910b0b2c1 b/test/testdata/fuzz/Fuzz_DeviationDetection/6cfa486437287a4340595f43c88ee06028ccbbf64661768e9596789910b0b2c1 deleted file mode 100644 index 2fc5131..0000000 --- a/test/testdata/fuzz/Fuzz_DeviationDetection/6cfa486437287a4340595f43c88ee06028ccbbf64661768e9596789910b0b2c1 +++ /dev/null @@ -1,2 +0,0 @@ -go test fuzz v1 -[]byte("0009cA0$") diff --git a/ubc/check.go b/ubc/check.go index fc0659e..167a555 100644 --- a/ubc/check.go +++ b/ubc/check.go @@ -3,35 +3,29 @@ package ubc -import "fmt" - type DvInfo struct { // DvType, DvK and DvB define the DV: I(K,B) or II(K,B) (see the paper). // https://marc-stevens.nl/research/papers/C13-S.pdf - DvType int - DvK int - DvB int + DvType uint32 + DvK uint32 + DvB uint32 // TestT is the step to do the recompression from for collision detection. - TestT int + TestT uint32 // MaskI and MaskB define the bit to check for each DV in the dvmask returned by ubc_check. - MaskI int - MaskB int + MaskI uint32 + MaskB uint32 // Dm is the expanded message block XOR-difference defined by the DV. Dm [80]uint32 } -// Check takes as input an expanded message block and verifies the unavoidable bitconditions +// CalculateDvMask takes as input an expanded message block and verifies the unavoidable bitconditions // for all listed DVs. It returns a dvmask where each bit belonging to a DV is set if all // unavoidable bitconditions for that DV have been met. // Thus, one needs to do the recompression check for each DV that has its bit set. -func CalculateDvMask(W []uint32) (uint32, error) { - if len(W) < 80 { - return 0, fmt.Errorf("invalid input: len(W) must be 80, was %d", len(W)) - } - +func CalculateDvMask(W [80]uint32) uint32 { mask := uint32(0xFFFFFFFF) mask &= (((((W[44] ^ W[45]) >> 29) & 1) - 1) | ^(DV_I_48_0_bit | DV_I_51_0_bit | DV_I_52_0_bit | DV_II_45_0_bit | DV_II_46_0_bit | DV_II_50_0_bit | DV_II_51_0_bit)) mask &= (((((W[49] ^ W[50]) >> 29) & 1) - 1) | ^(DV_I_46_0_bit | DV_II_45_0_bit | DV_II_50_0_bit | DV_II_51_0_bit | DV_II_55_0_bit | DV_II_56_0_bit)) @@ -358,7 +352,7 @@ func CalculateDvMask(W []uint32) (uint32, error) { } } - return mask, nil + return mask } func not(x uint32) uint32 {