-
Notifications
You must be signed in to change notification settings - Fork 39
Support matching chunks using uncompressed digests in zck_delta_size and zckdl #112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -248,10 +248,16 @@ bool ZCK_PUBLIC_API zck_copy_chunks(zckCtx *src, zckCtx *tgt) { | |
| } | ||
| zckChunk *f = NULL; | ||
|
|
||
| HASH_FIND(hh, src_info->ht, tgt_idx->digest, tgt_idx->digest_size, f); | ||
| if(f && f->length == tgt_idx->length && | ||
| f->comp_length == tgt_idx->comp_length) | ||
| /* If both archives has uncompressed data digests, check them instead of the compressed ones */ | ||
| if(src->has_uncompressed_source && tgt->has_uncompressed_source){ | ||
| HASH_FIND(hhuncomp, src_info->htuncomp, tgt_idx->digest_uncompressed, tgt_idx->digest_size, f); | ||
| }else{ | ||
| HASH_FIND(hh, src_info->ht, tgt_idx->digest, tgt_idx->digest_size, f); | ||
| } | ||
|
|
||
| if(f && f->length == tgt_idx->length) | ||
| write_and_verify_chunk(src, tgt, f, tgt_idx); | ||
|
|
||
| tgt_idx = tgt_idx->next; | ||
| } | ||
| return true; | ||
|
|
@@ -279,13 +285,14 @@ bool ZCK_PUBLIC_API zck_find_matching_chunks(zckCtx *src, zckCtx *tgt) { | |
| } | ||
|
|
||
| /* | ||
| * Compare digest for compressed data if the same compressor | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you make the above change, is this full change necessary? Maybe it's better to just duplicate the logic I suggested above? |
||
| * Compare digest for uncompressed data if both archives support it, | ||
| * otherwise compare digest for compressed data if the same compressor | ||
| * was iused | ||
| */ | ||
| if (src->comp.type == tgt->comp.type) { | ||
| HASH_FIND(hh, src_info->ht, tgt_idx->digest, tgt_idx->digest_size, f); | ||
| } else if (src->has_uncompressed_source && tgt->has_uncompressed_source) { | ||
| if (src->has_uncompressed_source && tgt->has_uncompressed_source) { | ||
| HASH_FIND(hhuncomp, src_info->htuncomp, tgt_idx->digest_uncompressed, tgt_idx->digest_size, f); | ||
| }else if (src->comp.type == tgt->comp.type) { | ||
| HASH_FIND(hh, src_info->ht, tgt_idx->digest, tgt_idx->digest_size, f); | ||
| } else { | ||
|
|
||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -271,11 +271,19 @@ bool ZCK_PUBLIC_API zck_compare_chunk_digest(zckChunk *a, zckChunk *b) { | |
| } else { | ||
| ALLOCD_BOOL(NULL, b); | ||
| } | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer not to have spaces/tabs in empty lines. |
||
| if(a->digest_size != b->digest_size) | ||
| return false; | ||
| if(memcmp(a->digest, b->digest, a->digest_size) != 0) | ||
| return false; | ||
|
|
||
| if(a->zck->has_uncompressed_source && b->zck->has_uncompressed_source){ | ||
| /* If both archives has uncompressed digest, compare them instead */ | ||
| if(memcmp(a->digest_uncompressed, b->digest_uncompressed, a->digest_size) != 0) | ||
| return false; | ||
| }else { | ||
| if(memcmp(a->digest, b->digest, a->digest_size) != 0) | ||
| return false; | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think my preference here is to first check that the compression types are identical, and, if so, run HASH_FIND. If f is still NULL (either because the compression types weren't identical or because HASH_FIND didn't find a matching hash), then check if both have uncompressed sources, and if so, run HASH_FIND on the uncompressed sources. This ensures that matching compressed hashes are checked first, and we then only check for matching uncompressed sources if no matches were found.