diff --git a/Compression/bqof_file_compression.pl b/Compression/bqof_file_compression.pl new file mode 100644 index 00000000..fc73d030 --- /dev/null +++ b/Compression/bqof_file_compression.pl @@ -0,0 +1,273 @@ +#!/usr/bin/perl + +# A general purpose lossless compressor, based on ideas from the QOI compressor. (+BWT) + +# See also: +# https://qoiformat.org/ + +use 5.036; +use File::Basename qw(basename); +use Compression::Util qw(:all); +use List::Util qw(max); +use Getopt::Std qw(getopts); + +binmode(STDIN, ":raw"); +binmode(STDOUT, ":raw"); + +use constant { + PKGNAME => 'BQOF', + FORMAT => 'bqof', + VERSION => '0.01', + CHUNK_SIZE => 1 << 17, + }; + +# Container signature +use constant SIGNATURE => uc(FORMAT) . chr(1); + +sub version { + printf("%s %s\n", PKGNAME, VERSION); + exit; +} + +sub valid_archive { + my ($fh) = @_; + + if (read($fh, (my $sig), length(SIGNATURE), 0) == length(SIGNATURE)) { + $sig eq SIGNATURE || return; + } + + return 1; +} + +sub usage ($code = 0) { + print <<"EOH"; +usage: $0 [options] [input file] [output file] + +options: + -e : extract + -i : input filename + -o : output filename + -r : rewrite output + -h : this message + +examples: + $0 document.txt + $0 document.txt archive.${\FORMAT} + $0 archive.${\FORMAT} document.txt + $0 -e -i archive.${\FORMAT} -o document.txt + +EOH + + exit($code // 0); +} + +sub qof_encoder ($string) { + + use constant { + QOI_OP_RGB => 0b1111_1110, + QOI_OP_DIFF => 0b01_000_000, + QOI_OP_RUN => 0b11_000_000, + QOI_OP_LUMA => 0b10_000_000, + }; + + my $run = 0; + my $px = 0; + my $prev_px = -1; + + my $rle4 = rle4_encode(string2symbols($string)); + my ($bwt, $idx) = bwt_encode(symbols2string($rle4)); + + my @bytes; + my @table = (0) x 64; + my @chars = unpack('C*', $bwt); + + push @bytes, unpack('C*', pack('N', $idx)); + + while (@chars) { + + $px = shift(@chars); + + if ($px == $prev_px) { + if (++$run == 62) { + push @bytes, QOI_OP_RUN | ($run - 1); + $run = 0; + } + } + else { + + if ($run > 0) { + push @bytes, (QOI_OP_RUN | ($run - 1)); + $run = 0; + } + + my $hash = $px % 64; + my $index_px = $table[$hash]; + + if ($px == $index_px) { + push @bytes, $hash; + } + else { + + $table[$hash] = $px; + my $diff = $px - $prev_px; + + if ($diff > -33 and $diff < 32) { + push(@bytes, QOI_OP_DIFF | ($diff + 32)); + } + else { + push(@bytes, QOI_OP_RGB, $px); + } + } + } + + $prev_px = $px; + } + + if ($run > 0) { + push(@bytes, QOI_OP_RUN | ($run - 1)); + } + + create_huffman_entry(\@bytes); +} + +sub qof_decoder ($fh) { + + use constant { + QOI_OP_RGB => 0b1111_1110, + QOI_OP_DIFF => 0b01_000_000, + QOI_OP_RUN => 0b11_000_000, + QOI_OP_LUMA => 0b10_000_000, + QOI_OP_INDEX => 0b00_000_000, + }; + + my $run = 0; + my $px = -1; + + my @bytes; + my @table = ((0) x 64); + + my $index = 0; + my @symbols = @{decode_huffman_entry($fh)}; + + my $idx = unpack('N', pack('C*', map { $symbols[$index++] } 1 .. 4)); + + while (1) { + + if ($run > 0) { + --$run; + } + else { + my $byte = $symbols[$index++] // last; + + if ($byte == QOI_OP_RGB) { # OP RGB + $px = $symbols[$index++]; + } + elsif (($byte >> 6) == (QOI_OP_INDEX >> 6)) { # OP INDEX + $px = $table[$byte]; + } + elsif (($byte >> 6) == (QOI_OP_DIFF >> 6)) { # OP DIFF + $px += ($byte & 0b00_111_111) - 32; + } + elsif (($byte >> 6) == (QOI_OP_RUN >> 6)) { # OP RUN + $run = ($byte & 0b00_111_111); + } + + $table[$px % 64] = $px; + } + + push @bytes, $px; + } + + my $bwt = pack('C*', @bytes); + my $rle4 = string2symbols(bwt_decode($bwt, $idx)); + + return symbols2string(rle4_decode($rle4)); +} + +# Compress file +sub compress_file ($input, $output) { + + open my $fh, '<:raw', $input + or die "Can't open file <<$input>> for reading: $!"; + + my $header = SIGNATURE; + + # Open the output file for writing + open my $out_fh, '>:raw', $output + or die "Can't open file <<$output>> for write: $!"; + + # Print the header + print $out_fh $header; + + # Compress data + while (read($fh, (my $chunk), CHUNK_SIZE)) { + print $out_fh qof_encoder($chunk); + } + + # Close the file + close $out_fh; +} + +# Decompress file +sub decompress_file ($input, $output) { + + # Open and validate the input file + open my $fh, '<:raw', $input + or die "Can't open file <<$input>> for reading: $!"; + + valid_archive($fh) || die "$0: file `$input' is not a \U${\FORMAT}\E v${\VERSION} archive!\n"; + + # Open the output file + open my $out_fh, '>:raw', $output + or die "Can't open file <<$output>> for writing: $!"; + + while (!eof($fh)) { + print $out_fh qof_decoder($fh); + } + + # Close the file + close $fh; + close $out_fh; +} + +sub main { + my %opt; + getopts('ei:o:vhr', \%opt); + + $opt{h} && usage(0); + $opt{v} && version(); + + my ($input, $output) = @ARGV; + $input //= $opt{i} // usage(2); + $output //= $opt{o}; + + my $ext = qr{\.${\FORMAT}\z}io; + if ($opt{e} || $input =~ $ext) { + + if (not defined $output) { + ($output = basename($input)) =~ s{$ext}{} + || die "$0: no output file specified!\n"; + } + + if (not $opt{r} and -e $output) { + print "'$output' already exists! -- Replace? [y/N] "; + =~ /^y/i || exit 17; + } + + decompress_file($input, $output) + || die "$0: error: decompression failed!\n"; + } + elsif ($input !~ $ext || (defined($output) && $output =~ $ext)) { + $output //= basename($input) . '.' . FORMAT; + + compress_file($input, $output) + || die "$0: error: compression failed!\n"; + } + else { + warn "$0: don't know what to do...\n"; + usage(1); + } +} + +main(); +exit(0); diff --git a/Compression/qof_file_compression.pl b/Compression/qof_file_compression.pl new file mode 100644 index 00000000..dfc6dac1 --- /dev/null +++ b/Compression/qof_file_compression.pl @@ -0,0 +1,263 @@ +#!/usr/bin/perl + +# A general purpose lossless compressor, based on ideas from the QOI compressor. + +# See also: +# https://qoiformat.org/ + +use 5.036; +use File::Basename qw(basename); +use Compression::Util qw(:all); +use List::Util qw(max); +use Getopt::Std qw(getopts); + +binmode(STDIN, ":raw"); +binmode(STDOUT, ":raw"); + +use constant { + PKGNAME => 'QOF', + FORMAT => 'qof', + VERSION => '0.01', + CHUNK_SIZE => 1 << 14, + }; + +# Container signature +use constant SIGNATURE => uc(FORMAT) . chr(1); + +sub version { + printf("%s %s\n", PKGNAME, VERSION); + exit; +} + +sub valid_archive { + my ($fh) = @_; + + if (read($fh, (my $sig), length(SIGNATURE), 0) == length(SIGNATURE)) { + $sig eq SIGNATURE || return; + } + + return 1; +} + +sub usage ($code = 0) { + print <<"EOH"; +usage: $0 [options] [input file] [output file] + +options: + -e : extract + -i : input filename + -o : output filename + -r : rewrite output + -h : this message + +examples: + $0 document.txt + $0 document.txt archive.${\FORMAT} + $0 archive.${\FORMAT} document.txt + $0 -e -i archive.${\FORMAT} -o document.txt + +EOH + + exit($code // 0); +} + +sub qof_encoder ($string) { + + use constant { + QOI_OP_RGB => 0b1111_1110, + QOI_OP_DIFF => 0b01_000_000, + QOI_OP_RUN => 0b11_000_000, + QOI_OP_LUMA => 0b10_000_000, + }; + + my $run = 0; + my $px = 0; + my $prev_px = -1; + + my @bytes; + my @table = (0) x 64; + my @chars = unpack('C*', $string); + + while (@chars) { + + $px = shift(@chars); + + if ($px == $prev_px) { + if (++$run == 62) { + push @bytes, QOI_OP_RUN | ($run - 1); + $run = 0; + } + } + else { + + if ($run > 0) { + push @bytes, (QOI_OP_RUN | ($run - 1)); + $run = 0; + } + + my $hash = $px % 64; + my $index_px = $table[$hash]; + + if ($px == $index_px) { + push @bytes, $hash; + } + else { + + $table[$hash] = $px; + my $diff = $px - $prev_px; + + if ($diff > -33 and $diff < 32) { + push(@bytes, QOI_OP_DIFF | ($diff + 32)); + } + else { + push(@bytes, QOI_OP_RGB, $px); + } + } + } + + $prev_px = $px; + } + + if ($run > 0) { + push(@bytes, QOI_OP_RUN | ($run - 1)); + } + + create_huffman_entry(\@bytes); +} + +sub qof_decoder ($fh) { + + use constant { + QOI_OP_RGB => 0b1111_1110, + QOI_OP_DIFF => 0b01_000_000, + QOI_OP_RUN => 0b11_000_000, + QOI_OP_LUMA => 0b10_000_000, + QOI_OP_INDEX => 0b00_000_000, + }; + + my $run = 0; + my $px = -1; + + my @bytes; + my @table = ((0) x 64); + + my $index = 0; + my @symbols = @{decode_huffman_entry($fh)}; + + while (1) { + + if ($run > 0) { + --$run; + } + else { + my $byte = $symbols[$index++] // last; + + if ($byte == QOI_OP_RGB) { # OP RGB + $px = $symbols[$index++]; + } + elsif (($byte >> 6) == (QOI_OP_INDEX >> 6)) { # OP INDEX + $px = $table[$byte]; + } + elsif (($byte >> 6) == (QOI_OP_DIFF >> 6)) { # OP DIFF + $px += ($byte & 0b00_111_111) - 32; + } + elsif (($byte >> 6) == (QOI_OP_RUN >> 6)) { # OP RUN + $run = ($byte & 0b00_111_111); + } + + $table[$px % 64] = $px; + } + + push @bytes, $px; + } + + return pack('C*', @bytes); +} + +# Compress file +sub compress_file ($input, $output) { + + open my $fh, '<:raw', $input + or die "Can't open file <<$input>> for reading: $!"; + + my $header = SIGNATURE; + + # Open the output file for writing + open my $out_fh, '>:raw', $output + or die "Can't open file <<$output>> for write: $!"; + + # Print the header + print $out_fh $header; + + # Compress data + while (read($fh, (my $chunk), CHUNK_SIZE)) { + print $out_fh qof_encoder($chunk); + } + + # Close the file + close $out_fh; +} + +# Decompress file +sub decompress_file ($input, $output) { + + # Open and validate the input file + open my $fh, '<:raw', $input + or die "Can't open file <<$input>> for reading: $!"; + + valid_archive($fh) || die "$0: file `$input' is not a \U${\FORMAT}\E v${\VERSION} archive!\n"; + + # Open the output file + open my $out_fh, '>:raw', $output + or die "Can't open file <<$output>> for writing: $!"; + + while (!eof($fh)) { + print $out_fh qof_decoder($fh); + } + + # Close the file + close $fh; + close $out_fh; +} + +sub main { + my %opt; + getopts('ei:o:vhr', \%opt); + + $opt{h} && usage(0); + $opt{v} && version(); + + my ($input, $output) = @ARGV; + $input //= $opt{i} // usage(2); + $output //= $opt{o}; + + my $ext = qr{\.${\FORMAT}\z}io; + if ($opt{e} || $input =~ $ext) { + + if (not defined $output) { + ($output = basename($input)) =~ s{$ext}{} + || die "$0: no output file specified!\n"; + } + + if (not $opt{r} and -e $output) { + print "'$output' already exists! -- Replace? [y/N] "; + =~ /^y/i || exit 17; + } + + decompress_file($input, $output) + || die "$0: error: decompression failed!\n"; + } + elsif ($input !~ $ext || (defined($output) && $output =~ $ext)) { + $output //= basename($input) . '.' . FORMAT; + + compress_file($input, $output) + || die "$0: error: compression failed!\n"; + } + else { + warn "$0: don't know what to do...\n"; + usage(1); + } +} + +main(); +exit(0); diff --git a/Compression/unzip.pl b/Compression/unzip.pl new file mode 100644 index 00000000..7be6bf47 --- /dev/null +++ b/Compression/unzip.pl @@ -0,0 +1,206 @@ +#!/usr/bin/perl + +# Author: Daniel "Trizen" Șuteu +# Date: 20 November 2024 +# https://github.com/trizen + +# Basic implementation of a ZIP file extractor. + +# Reference: +# https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + +use 5.036; +use Compression::Util qw(:all); +use File::Path qw(make_path); +use File::Spec::Functions qw(catfile catdir); +use File::Basename qw(dirname); + +local $Compression::Util::LZ_MIN_LEN = 4; # minimum match length in LZ parsing +local $Compression::Util::LZ_MAX_LEN = 258; # maximum match length in LZ parsing +local $Compression::Util::LZ_MAX_DIST = 32768; # maximum allowed back-reference distance in LZ parsing + +my $output_directory = 'OUTPUT'; + +if (not -d $output_directory) { + make_path($output_directory); +} + +sub extract_file($fh) { + + my $version_needed = bytes2int_lsb($fh, 2); + my $general_purpose_bit_flag = bytes2int_lsb($fh, 2); + my $compression_method = bytes2int_lsb($fh, 2); + + my $last_mod_file_time = bytes2int_lsb($fh, 2); + my $last_mod_file_date = bytes2int_lsb($fh, 2); + my $crc32 = bytes2int_lsb($fh, 4); + my $compressed_size = bytes2int_lsb($fh, 4); + my $uncompressed_size = bytes2int_lsb($fh, 4); + my $file_name_length = bytes2int_lsb($fh, 2); + my $extra_field_length = bytes2int_lsb($fh, 2); + + my $skip_crc32 = 0; + + if ($general_purpose_bit_flag & 0b1000) { + $skip_crc32 = 1; + $crc32 == 0 or warn "[WARNING] Bit 3 is set, therefore CRC-32 must be set to zero (got: $crc32)\n"; + $compressed_size == 0 or warn "[WARNING] Bit 3 is set, thefore compressed size must be set to zero (got: $compressed_size)\n"; + $uncompressed_size == 0 or warn "[WARNING] Bit 3 is set, therefore uncompressed size must be set to zero (got: $uncompressed_size)\n"; + } + + read($fh, (my $file_name), $file_name_length); + read($fh, (my $extra_field), $extra_field_length); + + if ($general_purpose_bit_flag & 0x01) { + die "Encrypted file are currently not supported!\n"; + } + + say STDERR ":: Extracting: $file_name ($uncompressed_size bytes)"; + + # It's a directory + if ($uncompressed_size == 0 and substr($file_name, -1) eq '/') { + my $dir = catdir($output_directory, $file_name); + make_path($dir) if not -d $dir; + return 1; + } + + my $out_filename = catfile($output_directory, $file_name); + + my $out_dir = dirname($out_filename); + make_path($out_dir) if not -d $out_dir; + + open my $out_fh, '>:raw', $out_filename + or die "Can't create file <<$out_filename>>: $!\n"; + + my $actual_crc32 = 0; + my $buffer = ''; + my $search_window = ''; + my $actual_uncompressed_size = 0; + + if ($compression_method == 8) { # DEFLATE method + while (1) { + my $is_last = read_bit_lsb($fh, \$buffer); + my $chunk = deflate_extract_next_block($fh, \$buffer, \$search_window); + $actual_crc32 = crc32($chunk, $actual_crc32); + $actual_uncompressed_size += length($chunk); + print $out_fh $chunk; + last if $is_last; + } + } + elsif ($compression_method == 0) { # uncompressed (stored) + + # TODO: do not read the entire content at once (read in small chunks) + read($fh, (my $chunk), $uncompressed_size); + $actual_crc32 = crc32($chunk); + $actual_uncompressed_size += length($chunk); + print $out_fh $chunk; + } + else { + die "Unsupported compression method: $compression_method\n"; + } + + if (not $skip_crc32 and $crc32 != $actual_crc32) { + die "CRC32 error: $crc32 (stored) != $actual_crc32 (actual)\n"; + } + + if ($general_purpose_bit_flag & 0b100) { # TODO + die "Data descriptor is currently not supported!\n"; + } + + if ($skip_crc32) { + my $header_signature = bytes2int_lsb($fh, 4); + if ($header_signature == 0x8074b50) { + + my $stored_crc32 = bytes2int_lsb($fh, 4); + my $compressed_size = bytes2int_lsb($fh, 4); + my $uncompressed_size = bytes2int_lsb($fh, 4); + + if ($stored_crc32 != $actual_crc32) { + die "CRC32 error: $stored_crc32 (stored) != $actual_crc32 (actual)\n"; + } + + if ($uncompressed_size != $actual_uncompressed_size) { + die "Uncompressed size error: $uncompressed_size (stored) != $actual_uncompressed_size (actual)\n"; + } + } + else { + die "Unknown signature: $header_signature\n"; + } + } + + close $out_fh; + return $actual_crc32; +} + +sub extract_central_directory($fh) { # TODO + + my $version_made_by = bytes2int_lsb($fh, 2); + my $version_needed_to_extract = bytes2int_lsb($fh, 2); + my $general_purpose_bit_flag = bytes2int_lsb($fh, 2); + my $compression_method = bytes2int_lsb($fh, 2); + my $last_mod_file_time = bytes2int_lsb($fh, 2); + my $last_mod_file_date = bytes2int_lsb($fh, 2); + my $crc_32 = bytes2int_lsb($fh, 4); + my $compressed_size = bytes2int_lsb($fh, 4); + my $uncompressed_size = bytes2int_lsb($fh, 4); + my $file_name_length = bytes2int_lsb($fh, 2); + my $extra_field_length = bytes2int_lsb($fh, 2); + my $file_comment_length = bytes2int_lsb($fh, 2); + my $disk_number_start = bytes2int_lsb($fh, 2); + my $internal_file_attributes = bytes2int_lsb($fh, 2); + my $external_file_attributes = bytes2int_lsb($fh, 4); + my $relative_offset_of_local_header = bytes2int_lsb($fh, 4); + + read($fh, (my $file_name), $file_name_length); + read($fh, (my $extra_field), $extra_field_length); + read($fh, (my $file_comment), $file_comment_length); +} + +sub extract_end_of_file ($fh) { # TODO + + my $number_of_this_disk = bytes2int_lsb($fh, 2); + my $number_of_the_disk_central_dir = bytes2int_lsb($fh, 2); + my $start_of_central_dir = bytes2int_lsb($fh, 2); + my $total_number_of_entries = bytes2int_lsb($fh, 2); + my $size_of_the_central_directory = bytes2int_lsb($fh, 4); + my $offset = bytes2int_lsb($fh, 4); + my $ZIP_file_comment_length = bytes2int_lsb($fh, 2); + + read($fh, (my $ZIP_file_comment), $ZIP_file_comment_length); +} + +sub unzip($file) { + + open my $fh, '<:raw', $file + or die "Can't open file <<$file>> for reading: $!"; + + while (!eof($fh)) { + my $header_signature = bytes2int_lsb($fh, 4); + + if ($header_signature == 0x04034b50) { + extract_file($fh); + } + elsif ($header_signature == 0x02014b50) { + extract_central_directory($fh); + } + elsif ($header_signature == 0x05054b50) { # TODO + die "Digital signature is currently not supported!\n"; + } + elsif ($header_signature == 0x06064b50) { # TODO + die "ZIP64 is currently not supported!\n"; + } + elsif ($header_signature == 0x08064b50) { # TODO + die "Extra data record is currently not supported!\n"; + } + elsif ($header_signature == 0x06054b50) { + extract_end_of_file($fh); + } + else { + die "Unknown header signature: $header_signature\n"; + } + } +} + +foreach my $input_file (@ARGV) { + unzip($input_file); +} diff --git a/Compression/zip.pl b/Compression/zip.pl new file mode 100644 index 00000000..3db02274 --- /dev/null +++ b/Compression/zip.pl @@ -0,0 +1,227 @@ +#!/usr/bin/perl + +# Author: Daniel "Trizen" Șuteu +# Date: 03 February 2025 +# Edit: 04 February 2025 +# https://github.com/trizen + +# Basic implementation of a ZIP archiver. (WIP) + +# Reference: +# https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + +use 5.036; +use Compression::Util qw(:all); +use File::Path qw(make_path); +use File::Spec::Functions qw(catfile catdir); +use File::Basename qw(dirname); +use File::Find qw(find); + +use constant { + FORMAT => 'zip', + CHUNK_SIZE => (1 << 15) - 1, + }; + +local $Compression::Util::LZ_MIN_LEN = 4; # minimum match length in LZ parsing +local $Compression::Util::LZ_MAX_LEN = 258; # maximum match length in LZ parsing +local $Compression::Util::LZ_MAX_DIST = 32768; # maximum allowed back-reference distance in LZ parsing + +binmode(STDOUT, ':raw'); +binmode(STDIN, ':raw'); + +my $OFFSET = 0; + +sub zip_directory ($dir) { + + if (substr($dir, 0, -1) ne '/') { + $dir .= '/'; + } + + print STDOUT int2bytes_lsb(0x04034b50, 4); # header signature + print STDOUT int2bytes_lsb(20, 2); # version needed + print STDOUT int2bytes_lsb(0, 2); # general purpose bit + print STDOUT int2bytes_lsb(0, 2); # compression method (8 = DEFLATE) + print STDOUT int2bytes_lsb(0, 2); # last mod file time + print STDOUT int2bytes_lsb(0, 2); # last mod file date + print STDOUT int2bytes_lsb(0, 4); # CRC32 + print STDOUT int2bytes_lsb(0, 4); # compressed size + print STDOUT int2bytes_lsb(0, 4); # uncompressed size + print STDOUT int2bytes_lsb(length($dir), 2); # filename length + print STDOUT int2bytes_lsb(0, 2); # extra field length + + print STDOUT $dir; + + my $info = { + crc32 => 0, + name => $dir, + compressed_size => 0, + uncompressed_size => 0, + compression_method => 0, + offset => $OFFSET, + }; + + $OFFSET += 4 * 4 + 2 * 7 + length($dir); + + return $info; +} + +sub zip_file ($file) { + + if (-d $file) { + return zip_directory($file); + } + + print STDOUT int2bytes_lsb(0x04034b50, 4); # header signature + print STDOUT int2bytes_lsb(20, 2); # version needed + print STDOUT int2bytes_lsb(0b1000, 2); # general purpose bit + print STDOUT int2bytes_lsb(8, 2); # compression method (8 = DEFLATE) + print STDOUT int2bytes_lsb(0, 2); # last mod file time + print STDOUT int2bytes_lsb(0, 2); # last mod file date + print STDOUT int2bytes_lsb(0, 4); # CRC32 + print STDOUT int2bytes_lsb(0, 4); # compressed size + print STDOUT int2bytes_lsb(0, 4); # uncompressed size + print STDOUT int2bytes_lsb(length($file), 2); # filename length + print STDOUT int2bytes_lsb(0, 2); # extra field length + + print STDOUT $file; # filename + + my $crc32 = 0; + my $uncompressed_size = 0; + my $compressed_size = 0; + + my $bitstring = ''; + + open my $in_fh, '<:raw', $file; + + if (eof($in_fh)) { # empty file + $bitstring = '1' . '10' . '0000000'; + } + + while (read($in_fh, (my $chunk), CHUNK_SIZE)) { + + $crc32 = crc32($chunk, $crc32); + $uncompressed_size += length($chunk); + + my ($literals, $distances, $lengths) = lzss_encode($chunk); + + $bitstring .= eof($in_fh) ? '1' : '0'; + + my $bt1_bitstring = deflate_create_block_type_1($literals, $distances, $lengths); + + # When block type 1 is larger than the input, then we have random uncompressible data: use block type 0 + if ((length($bt1_bitstring) >> 3) > length($chunk) + 5) { + + say STDERR ":: Using block type: 0"; + + $bitstring .= '00'; + + my $comp = pack('b*', $bitstring); # pads to a byte + $comp .= pack('b*', deflate_create_block_type_0_header($chunk)); + $comp .= $chunk; + $compressed_size .= length($comp); + print STDOUT $comp; + + $bitstring = ''; + next; + } + + my $bt2_bitstring = deflate_create_block_type_2($literals, $distances, $lengths); + + # When block type 2 is larger than block type 1, then we may have very small data + if (length($bt2_bitstring) > length($bt1_bitstring)) { + say STDERR ":: Using block type: 1"; + $bitstring .= $bt1_bitstring; + } + else { + say STDERR ":: Using block type: 2"; + $bitstring .= $bt2_bitstring; + } + + my $comp = pack('b*', substr($bitstring, 0, length($bitstring) - (length($bitstring) % 8), '')); + $compressed_size += length($comp); + print STDOUT $comp; + } + + if ($bitstring ne '') { + my $comp = pack('b*', $bitstring); + $compressed_size += length($comp); + print STDOUT $comp; + } + + print STDOUT int2bytes_lsb(0x8074b50, 4); + print STDOUT int2bytes_lsb($crc32, 4); + print STDOUT int2bytes_lsb($compressed_size, 4); + print STDOUT int2bytes_lsb($uncompressed_size, 4); + + my $info = { + compression_method => 8, + crc32 => $crc32, + name => $file, + compressed_size => $compressed_size, + uncompressed_size => $uncompressed_size, + offset => $OFFSET, + }; + + $OFFSET += 4 * 8 + 2 * 7 + length($file) + $compressed_size; + + return $info; +} + +sub central_directory($entry) { + + # FIXME: the offset of the local header is incorrect + + print STDOUT int2bytes_lsb(0x02014b50, 4); # header signature + print STDOUT int2bytes_lsb(831, 2); # version made by + print STDOUT int2bytes_lsb(20, 2); # version needed to extract + print STDOUT int2bytes_lsb(0, 2); # general purpose bit + print STDOUT int2bytes_lsb($entry->{compression_method}, 2); # compression method + print STDOUT int2bytes_lsb(0, 2); # last mod file time + print STDOUT int2bytes_lsb(0, 2); # last mod file date + print STDOUT int2bytes_lsb($entry->{crc32}, 4); # crc32 + print STDOUT int2bytes_lsb($entry->{compressed_size}, 4); # compressed size + print STDOUT int2bytes_lsb($entry->{uncompressed_size}, 4); # uncompressed size + print STDOUT int2bytes_lsb(length($entry->{name}), 2); # file name length + print STDOUT int2bytes_lsb(0, 2); # extra field length + print STDOUT int2bytes_lsb(0, 2); # file comment length + print STDOUT int2bytes_lsb(0, 2); # disk number start + print STDOUT int2bytes_lsb(0, 2); # internal file attributes + print STDOUT int2bytes_lsb(0, 4); # external file attributes + print STDOUT int2bytes_lsb($entry->{offset}, 4); # relative offset of local header (TODO) + + print STDOUT $entry->{name}; +} + +sub end_of_zip_file (@entries) { + + print STDOUT int2bytes_lsb(0x06054b50, 4); # header signature + print STDOUT int2bytes_lsb(0, 2); # number of this disk + print STDOUT int2bytes_lsb(0, 2); # number of the disk central dir + print STDOUT int2bytes_lsb(0, 2); # start of central dir + print STDOUT int2bytes_lsb(scalar(@entries), 2); # total number of entries + print STDOUT int2bytes_lsb(0, 4); # size of the central directory + print STDOUT int2bytes_lsb(0, 4); # offset + print STDOUT int2bytes_lsb(0, 2); # zip file comment length +} + +my @entries; + +sub zip ($file) { + find( + { + no_chdir => 1, + wanted => sub { + push @entries, zip_file($_); + } + }, + $file + ); +} + +zip($ARGV[0]); + +#~ foreach my $entry(@entries) { +#~ central_directory($entry); +#~ } + +#~ end_of_zip_file(@entries); diff --git a/File Workers/arxiv_pdf_renamer.pl b/File Workers/arxiv_pdf_renamer.pl index 9d918141..f994762b 100644 --- a/File Workers/arxiv_pdf_renamer.pl +++ b/File Workers/arxiv_pdf_renamer.pl @@ -14,6 +14,7 @@ use File::Spec::Functions qw(catfile); my $mech = WWW::Mechanize->new( + autocheck => 0, show_progress => 0, stack_depth => 10, agent => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0', diff --git a/Image/image-unpack.pl b/Image/image-unpack.pl new file mode 100755 index 00000000..a1e6dc86 --- /dev/null +++ b/Image/image-unpack.pl @@ -0,0 +1,86 @@ +#!/usr/bin/perl + +# Author: Trizen +# Date: 29 April 2025 +# https://github.com/trizen + +# Extract the {R,G,B} channels of an image, as binary data. + +use 5.036; +use GD qw(); +use Getopt::Long qw(GetOptions); + +binmode(STDOUT, ':raw'); + +GD::Image->trueColor(1); + +my $size = 80; +my $red = 0; +my $green = 0; +my $blue = 0; + +sub help($code = 0) { + print <<"HELP"; +usage: $0 [options] [files] + +options: + -w --width=i : resize image to this width (default: $size) + -R --red : extract only the RED channel (default: $red) + -G --green : extract only the GREEN channel (default: $green) + -B --blue : extract only the BLUE channel (default: $blue) + +example: + perl $0 --width 200 --red image.png > red_channel.bin +HELP + exit($code); +} + +GetOptions( + 'w|width=s' => \$size, + 'R|red!' => \$red, + 'G|green!' => \$green, + 'B|blue!' => \$blue, + 'h|help' => sub { help(0) }, + ) + or die "Error in command-line arguments!"; + +sub img_unpack($image) { + + my $img = GD::Image->new($image) // return; + my ($width, $height) = $img->getBounds; + + if ($size != 0) { + my $scale_width = $size; + my $scale_height = int($height / ($width / ($size / 2))); + + my $resized = GD::Image->new($scale_width, $scale_height); + $resized->copyResampled($img, 0, 0, 0, 0, $scale_width, $scale_height, $width, $height); + + ($width, $height) = ($scale_width, $scale_height); + $img = $resized; + } + + my @values; + + foreach my $y (0 .. $height - 1) { + foreach my $x (0 .. $width - 1) { + my $index = $img->getPixel($x, $y); + my ($R, $G, $B) = $img->rgb($index); + + if ($red) { + push @values, $R; + } + if ($green) { + push @values, $G; + } + if ($blue) { + push @values, $B; + } + } + } + + my $output_width = $width * ($red + $green + $blue); + return unpack("(A$output_width)*", pack('C*', @values)); +} + +print for img_unpack($ARGV[0] // help(1)); diff --git a/Image/image2digits.pl b/Image/image2digits.pl index 68b980f7..6f8c8bd9 100644 --- a/Image/image2digits.pl +++ b/Image/image2digits.pl @@ -66,19 +66,18 @@ sub img2digits { $img = $resized; } - my $avg = 0; - my @averages; + my @values; foreach my $y (0 .. $height - 1) { foreach my $x (0 .. $width - 1) { my $index = $img->getPixel($x, $y); my ($r, $g, $b) = $img->rgb($index); my $value = max($r, $g, $b); - push @averages, $digits[map_value($value, 0, 255, 0, $#digits)]; + push @values, $digits[map_value($value, 0, 255, 0, $#digits)]; } } - unpack("(A$width)*", join('', @averages)); + unpack("(A$width)*", join('', @values)); } say for img2digits($ARGV[0] // help(1)); diff --git a/Image/slideshow.pl b/Image/slideshow.pl index 5be0daa4..2c8f2655 100644 --- a/Image/slideshow.pl +++ b/Image/slideshow.pl @@ -6,13 +6,35 @@ # perl slideshow.pl 'glob_pattern*.jpg' 'output.mp4' use 5.036; +use Getopt::Long qw(GetOptions); -@ARGV == 2 or die "usage: $0 [glob pattern] [output.mp4]\n"; +my $width = 1920; +my $height = 1080; +my $delay = 2; -system('ffmpeg', - qw(-framerate 1/2), +GetOptions( + "width=i" => \$width, + "height=i" => \$height, + "delay=i" => \$delay + ) + or die("Error in command line arguments\n"); + +@ARGV == 2 or die <<"USAGE"; +usage: $0 [options] [glob pattern] [output.mp4] + +options: + + --width=i : width of the video (default: $width) + --height=i : height of the video (default: $height) + --delay=i : delay in seconds between pictures (default: $delay) +USAGE + +system('ffmpeg', qw(-framerate), + join('/', 1, $delay), qw(-pattern_type glob -i), $ARGV[0], '-vf', - "scale=1920:1080:force_original_aspect_ratio=decrease,pad=1929:1080:(ow-iw)/2:(oh-ih)/2", - qw(-c:v libx264 -s 1920x1080 -crf 18 -tune stillimage -r 24), + "scale=${width}:${height}:force_original_aspect_ratio=decrease,pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2", + qw(-c:v libx264 -s), + join('x', $width, $height), + qw(-crf 18 -tune stillimage -r 24), $ARGV[1]); diff --git a/Math/count_of_k-almost_primes.pl b/Math/count_of_k-almost_primes.pl index 2fbb3fa6..85b64882 100755 --- a/Math/count_of_k-almost_primes.pl +++ b/Math/count_of_k-almost_primes.pl @@ -19,9 +19,8 @@ # https://oeis.org/A082996 -- count of 4-almost primes # https://oeis.org/A126280 -- Triangle read by rows: T(k,n) is number of numbers <= 10^n that are products of k primes. -use 5.020; +use 5.036; use ntheory qw(:all); -use experimental qw(signatures); sub k_prime_count ($n, $k) { @@ -41,14 +40,14 @@ ($n, $k) if ($k == 2) { - foreach my $q (@{primes($p, $s)}) { - $count += prime_count(divint($n, mulint($m, $q))) - $j++; - } + forprimes { + $count += prime_count(divint($n, mulint($m, $_))) - $j++; + } $p, $s; return; } - for (my $q = $p ; $q <= $s ; $q = next_prime($q)) { + foreach my $q (@{primes($p, $s)}) { __SUB__->($m * $q, $q, $k - 1, $j++); } }->(1, 2, $k); diff --git a/Math/count_of_squarefree_k-almost_primes.pl b/Math/count_of_squarefree_k-almost_primes.pl index 7232a6b6..153241e3 100644 --- a/Math/count_of_squarefree_k-almost_primes.pl +++ b/Math/count_of_squarefree_k-almost_primes.pl @@ -39,9 +39,9 @@ ($n, $k) if ($k == 2) { - for (; $p <= $s ; $p = next_prime($p)) { - $count += prime_count(divint($n, mulint($m, $p))) - $j++; - } + forprimes { + $count += prime_count(divint($n, mulint($m, $_))) - $j++; + } $p, $s; return; } @@ -63,7 +63,7 @@ ($n, $k) my $upto = pn_primorial($k) + int(rand(1e5)); my $x = squarefree_almost_prime_count($upto, $k); - my $y = scalar grep { is_square_free($_) } @{almost_primes($k, $upto)}; + my $y = scalar grep { is_square_free($_) } @{almost_primes($k, 1, $upto)}; say "Testing: $k with n = $upto -> $x"; diff --git a/Math/omega_prime_numbers_in_range.pl b/Math/omega_prime_numbers_in_range.pl index 8e92c41f..91514394 100644 --- a/Math/omega_prime_numbers_in_range.pl +++ b/Math/omega_prime_numbers_in_range.pl @@ -33,10 +33,8 @@ ($A, $B, $k, $callback) if ($k == 1) { $callback->($v) if ($v >= $A); } - else { - if (mulint($v, $r) <= $B) { - __SUB__->($v, $r, $k - 1); - } + elsif (mulint($v, $r) <= $B) { + __SUB__->($v, $r, $k - 1); } } } diff --git a/Math/omega_prime_numbers_in_range_simple.pl b/Math/omega_prime_numbers_in_range_simple.pl new file mode 100644 index 00000000..20dc7d5f --- /dev/null +++ b/Math/omega_prime_numbers_in_range_simple.pl @@ -0,0 +1,74 @@ +#!/usr/bin/perl + +# Daniel "Trizen" Șuteu +# Date: 14 March 2021 +# Edit: 25 March 2025 +# https://github.com/trizen + +# Generate k-omega primes in range [a,b]. (not in sorted order) + +# Definition: +# k-omega primes are numbers n such that omega(n) = k. + +# See also: +# https://en.wikipedia.org/wiki/Almost_prime +# https://en.wikipedia.org/wiki/Prime_omega_function + +use 5.020; +use integer; +use ntheory qw(:all); +use experimental qw(signatures); + +sub omega_prime_numbers ($A, $B, $k, $callback) { + + $A = vecmax($A, pn_primorial($k)); + + sub ($m, $p, $k) { + + my $s = rootint($B / $m, $k); + + foreach my $q (@{primes($p, $s)}) { + + my $r = next_prime($q); + + for (my $v = $m * $q ; $v <= $B ; $v *= $q) { + if ($k == 1) { + $callback->($v) if ($v >= $A); + } + elsif ($v * $r <= $B) { + __SUB__->($v, $r, $k - 1); + } + } + } + }->(1, 2, $k); +} + +# Generate 5-omega primes in the range [3000, 10000] + +my $k = 5; +my $from = 3000; +my $upto = 10000; + +my @arr; +omega_prime_numbers($from, $upto, $k, sub ($n) { push @arr, $n }); + +my @test = grep { prime_omega($_) == $k } $from .. $upto; # just for testing +join(' ', sort { $a <=> $b } @arr) eq join(' ', @test) or die "Error: not equal!"; + +say join(', ', @arr); + +# Run some tests + +foreach my $k (1 .. 6) { + + my $from = pn_primorial($k) + int(rand(1e4)); + my $upto = $from + int(rand(1e5)); + + say "Testing: $k with $from .. $upto"; + + my @arr; + omega_prime_numbers($from, $upto, $k, sub ($n) { push @arr, $n }); + + my @test = grep { prime_omega($_) == $k } $from .. $upto; + join(' ', sort { $a <=> $b } @arr) eq join(' ', @test) or die "Error: not equal!"; +} diff --git a/Math/partial_sums_of_euler_totient_function_fast.pl b/Math/partial_sums_of_euler_totient_function_fast.pl index eb30f3fd..06bfc94b 100755 --- a/Math/partial_sums_of_euler_totient_function_fast.pl +++ b/Math/partial_sums_of_euler_totient_function_fast.pl @@ -74,7 +74,7 @@ ($n) } foreach my $k (1 .. $s) { - $T -= (int($n / $k) - int($n / ($k + 1))) * __SUB__->($k); + $T -= (int($n / $k) - int($n / ($k + 1))) * $euler_sum_lookup[$k]; } $seen{$n} = $T; diff --git a/Math/partial_sums_of_euler_totient_function_times_k.pl b/Math/partial_sums_of_euler_totient_function_times_k.pl index 031d6ff1..938af9e8 100644 --- a/Math/partial_sums_of_euler_totient_function_times_k.pl +++ b/Math/partial_sums_of_euler_totient_function_times_k.pl @@ -91,7 +91,7 @@ ($n) foreach my $k (1 .. $s) { my $curr = triangular(divint($n, $k + 1)); - $T = subint($T, mulint(subint($prev, $curr), __SUB__->($k))); + $T = subint($T, mulint(subint($prev, $curr), $euler_sum_lookup[$k])); $prev = $curr; } diff --git a/Math/partial_sums_of_euler_totient_function_times_k_to_the_m.pl b/Math/partial_sums_of_euler_totient_function_times_k_to_the_m.pl index 0111d781..b2bcdd81 100755 --- a/Math/partial_sums_of_euler_totient_function_times_k_to_the_m.pl +++ b/Math/partial_sums_of_euler_totient_function_times_k_to_the_m.pl @@ -80,7 +80,7 @@ ($n, $m) } foreach my $k (1 .. $s) { - $T -= (faulhaber_sum(int($n / $k), $m) - faulhaber_sum(int($n / ($k + 1)), $m)) * __SUB__->($k); + $T -= (faulhaber_sum(int($n / $k), $m) - faulhaber_sum(int($n / ($k + 1)), $m)) * $euler_sum_lookup[$k]; } $seen{$n} = $T; diff --git a/Math/partial_sums_of_exponential_prime_omega_functions.pl b/Math/partial_sums_of_exponential_prime_omega_functions.pl index 34a53f8b..17a62238 100644 --- a/Math/partial_sums_of_exponential_prime_omega_functions.pl +++ b/Math/partial_sums_of_exponential_prime_omega_functions.pl @@ -9,11 +9,8 @@ # S2(n) = Sum_{k=1..n} v^omega(k) # S3(n) = Sum_{k=1..n} v^omega(k) * mu(k)^2 -use 5.020; -use warnings; - +use 5.036; use ntheory qw(:all); -use experimental qw(signatures); sub squarefree_almost_prime_count ($k, $n) { @@ -27,29 +24,23 @@ ($k, $n) my $count = 0; - sub ($m, $p, $k, $j = 0) { + sub ($m, $p, $k, $j = 1) { my $s = rootint(divint($n, $m), $k); if ($k == 2) { - foreach my $q (@{primes($p, $s)}) { - - ++$j; - - if (modint($m, $q) != 0) { - $count += prime_count(divint($n, mulint($m, $q))) - $j; - } - } + forprimes { + $count += prime_count(divint($n, mulint($m, $_))) - $j++; + } $p, $s; return; } - foreach my $p (@{primes($p, $s)}) { - if (modint($m, $p) != 0) { - __SUB__->(mulint($m, $p), $p, $k - 1, $j); - } - ++$j; + for (; $p <= $s ; ++$j) { + my $r = next_prime($p); + __SUB__->(mulint($m, $p), $r, $k - 1, $j + 1); + $p = $r; } }->(1, 2, $k); diff --git a/Math/partial_sums_of_gcd-sum_function.pl b/Math/partial_sums_of_gcd-sum_function.pl index 7b38ef8d..e35267e0 100755 --- a/Math/partial_sums_of_gcd-sum_function.pl +++ b/Math/partial_sums_of_gcd-sum_function.pl @@ -41,9 +41,9 @@ use strict; use warnings; -use Math::GMPz qw(); +use Math::GMPz qw(); use experimental qw(signatures); -use ntheory qw(euler_phi moebius mertens sqrtint forsquarefree); +use ntheory qw(euler_phi moebius mertens sqrtint forsquarefree); sub euler_totient_partial_sum ($n) { @@ -91,6 +91,23 @@ ($n) return $total / 2; } +sub gcd_sum_partial_sum_dirichlet($n) { + + my $total = Math::GMPz->new(0); + + my $s = sqrtint($n); + + for my $k (1 .. $s) { + my $t = int($n / $k); + $total += $k * euler_totient_partial_sum($t); + $total += euler_phi($k) * (($t * ($t + 1)) >> 1); + } + + $total -= euler_totient_partial_sum($s) * (($s * ($s + 1)) >> 1); + + return $total; +} + sub gcd_sum_partial_sum_test ($n) { # just for testing my $sum = Math::GMPz->new(0); @@ -107,9 +124,11 @@ ($n) my $n = int rand 10000; my $t1 = gcd_sum_partial_sum($n); - my $t2 = gcd_sum_partial_sum_test($n); + my $t2 = gcd_sum_partial_sum_dirichlet($n); + my $t3 = gcd_sum_partial_sum_test($n); die "error: $t1 != $t2" if ($t1 != $t2); + die "error: $t1 != $t3" if ($t1 != $t3); say "Sum_{k=1..$n} G(k) = $t1"; } diff --git a/Math/partial_sums_of_gcd-sum_function_faster.pl b/Math/partial_sums_of_gcd-sum_function_faster.pl index ecc7b80d..38b21635 100755 --- a/Math/partial_sums_of_gcd-sum_function_faster.pl +++ b/Math/partial_sums_of_gcd-sum_function_faster.pl @@ -76,7 +76,7 @@ ($n) } foreach my $k (1 .. $s) { - $T -= (int($n / $k) - int($n / ($k + 1))) * __SUB__->($k); + $T -= (int($n / $k) - int($n / ($k + 1))) * $euler_sum_lookup[$k]; } $seen{$n} = $T; diff --git a/Math/partial_sums_of_generalized_gcd-sum_function.pl b/Math/partial_sums_of_generalized_gcd-sum_function.pl new file mode 100644 index 00000000..1bcb0274 --- /dev/null +++ b/Math/partial_sums_of_generalized_gcd-sum_function.pl @@ -0,0 +1,131 @@ +#!/usr/bin/perl + +# Daniel "Trizen" Șuteu +# Date: 25 May 2025 +# https://github.com/trizen + +# A sublinear algorithm for computing the partial sums of the generalized gcd-sum function, using Dirichlet's hyperbola method. + +# Generalized Pillai's function: +# pillai(n,k) = Sum_{d|n} mu(n/d) * d^k * tau(d) + +# Multiplicative formula for Sum_{1 <= x_1, x_2, ..., x_k <= n} gcd(x_1, x_2, ..., x_k, n)^k: +# a(p^e) = (e - e/p^k + 1) * p^(k*e) = p^((e - 1) * k) * (p^k + e*(p^k - 1)) + +# The partial sums of the gcd-sum function is defined as: +# +# a(n) = Sum_{k=1..n} Sum_{d|k} d*phi(k/d) +# +# where phi(k) is the Euler totient function. + +# Also equivalent with: +# a(n) = Sum_{j=1..n} Sum_{i=1..j} gcd(i, j) + +# Based on the formula: +# a(n) = (1/2)*Sum_{k=1..n} phi(k) * floor(n/k) * floor(1+n/k) + +# Generalized formula: +# a(n,k) = Sum_{x=1..n} J_k(x) * F_k(floor(n/x)) +# where F_k(n) are the Faulhaber polynomials: F_k(n) = Sum_{x=1..n} x^k. + +# Example: +# a(10^1) = 122 +# a(10^2) = 18065 +# a(10^3) = 2475190 +# a(10^4) = 317257140 +# a(10^5) = 38717197452 +# a(10^6) = 4571629173912 +# a(10^7) = 527148712519016 +# a(10^8) = 59713873168012716 +# a(10^9) = 6671288261316915052 + +# a(10^1, 2) = 1106 +# a(10^2, 2) = 1598361 +# a(10^3, 2) = 2193987154 +# a(10^4, 2) = 2828894776292 +# a(10^5, 2) = 3466053625977000 +# a(10^6, 2) = 4104546122851466704 +# a(10^7, 2) = 4742992578252739471520 +# a(10^8, 2) = 5381500783126483704718848 +# a(10^9, 2) = 6020011093886996189443484608 + +# OEIS sequences: +# https://oeis.org/A272718 -- Partial sums of gcd-sum sequence A018804. +# https://oeis.org/A018804 -- Pillai's arithmetical function: Sum_{k=1..n} gcd(k, n). + +# See also: +# https://en.wikipedia.org/wiki/Dirichlet_hyperbola_method +# https://trizenx.blogspot.com/2018/11/partial-sums-of-arithmetical-functions.html + +use 5.020; +use strict; +use warnings; + +use experimental qw(signatures); +use Math::AnyNum qw(faulhaber_sum ipow); +use ntheory qw(jordan_totient sqrtint rootint); + +sub partial_sums_of_gcd_sum_function($n, $m) { + + my $s = sqrtint($n); + my @totient_sum_lookup = (0); + + my $lookup_size = 2 + 2 * rootint($n, 3)**2; + my @jordan_totient = (0); + + foreach my $x (1 .. $lookup_size) { + push @jordan_totient, jordan_totient($m, $x); + } + + foreach my $i (1 .. $lookup_size) { + $totient_sum_lookup[$i] = $totient_sum_lookup[$i - 1] + $jordan_totient[$i]; + } + + my %seen; + + my sub totient_partial_sum($n) { + + if ($n <= $lookup_size) { + return $totient_sum_lookup[$n]; + } + + if (exists $seen{$n}) { + return $seen{$n}; + } + + my $s = sqrtint($n); + my $T = ${faulhaber_sum($n, $m)}; + + foreach my $k (2 .. int($n / ($s + 1))) { + $T -= __SUB__->(int($n / $k)); + } + + foreach my $k (1 .. $s) { + $T -= (int($n / $k) - int($n / ($k + 1))) * $totient_sum_lookup[$k]; + } + + $seen{$n} = $T; + } + + my $A = 0; + + foreach my $k (1 .. $s) { + my $t = int($n / $k); + $A += ${ipow($k, $m)} * totient_partial_sum($t) + $jordan_totient[$k] * ${faulhaber_sum($t, $m)}; + } + + my $T = ${faulhaber_sum($s, $m)}; + my $C = totient_partial_sum($s); + + return ($A - $T * $C); +} + +foreach my $n (1 .. 8) { # takes less than 1 second + say "a(10^$n, 1) = ", partial_sums_of_gcd_sum_function(10**$n, 1); +} + +say ''; + +foreach my $n (1 .. 8) { # takes less than 1 second + say "a(10^$n, 2) = ", partial_sums_of_gcd_sum_function(10**$n, 2); +} diff --git a/Math/partial_sums_of_jordan_totient_function_fast.pl b/Math/partial_sums_of_jordan_totient_function_fast.pl index cbf89810..af05e28a 100755 --- a/Math/partial_sums_of_jordan_totient_function_fast.pl +++ b/Math/partial_sums_of_jordan_totient_function_fast.pl @@ -85,7 +85,7 @@ ($n, $m) } foreach my $k (1 .. $s) { - $A -= (int($n / $k) - int($n / ($k + 1))) * __SUB__->($k); + $A -= (int($n / $k) - int($n / ($k + 1))) * $jordan_sum_lookup[$k]; } $seen{$n} = $A; diff --git a/Math/partial_sums_of_jordan_totient_function_times_k_to_the_m.pl b/Math/partial_sums_of_jordan_totient_function_times_k_to_the_m.pl index 9392187a..b03e1913 100755 --- a/Math/partial_sums_of_jordan_totient_function_times_k_to_the_m.pl +++ b/Math/partial_sums_of_jordan_totient_function_times_k_to_the_m.pl @@ -79,7 +79,7 @@ ($n, $j, $m) } foreach my $k (1 .. $s) { - $T -= (faulhaber_sum(int($n / $k), $m) - faulhaber_sum(int($n / ($k + 1)), $m)) * __SUB__->($k); + $T -= (faulhaber_sum(int($n / $k), $m) - faulhaber_sum(int($n / ($k + 1)), $m)) * $jordan_sum_lookup[$k]; } $seen{$n} = $T; diff --git a/Math/partial_sums_of_sigma0_function.pl b/Math/partial_sums_of_sigma0_function.pl new file mode 100644 index 00000000..e0c6d555 --- /dev/null +++ b/Math/partial_sums_of_sigma0_function.pl @@ -0,0 +1,63 @@ +#!/usr/bin/perl + +# Author: Daniel "Trizen" Șuteu +# Date: 09 November 2018 +# Edit: 30 March 2025 +# https://github.com/trizen + +# Algorithm with O(sqrt(n)) complexity for computing the partial-sums of the `sigma_0(k)` function: +# Sum_{k=1..n} sigma_0(k) + +# See also: +# https://oeis.org/A006218 +# https://en.wikipedia.org/wiki/Divisor_function +# https://en.wikipedia.org/wiki/Faulhaber%27s_formula +# https://en.wikipedia.org/wiki/Bernoulli_polynomials +# https://trizenx.blogspot.com/2018/11/partial-sums-of-arithmetical-functions.html + +use 5.036; + +sub sigma0_partial_sum_faulhaber ($n) { + + my $s = int(sqrt($n)); + my $sum = 0; + + foreach my $k (1 .. $s) { + $sum += 2 * int($n / $k); + } + + return ($sum - $s * $s); +} + +sub sigma0_partial_sum_test ($n) { # just for testing + my $sum = 0; + foreach my $k (1 .. $n) { + $sum += int($n / $k); + } + return $sum; +} + +foreach my $m (0 .. 10) { + + my $n = int(rand(1 << (2 * $m))); + + my $t1 = sigma0_partial_sum_test($n); + my $t2 = sigma0_partial_sum_faulhaber($n); + + say "Sum_{k=1..$n} sigma_0(k) = $t2"; + + die "error: $t1 != $t2" if ($t1 != $t2); +} + +__END__ +Sum_{k=1..0} sigma_0(k) = 0 +Sum_{k=1..3} sigma_0(k) = 5 +Sum_{k=1..13} sigma_0(k) = 37 +Sum_{k=1..30} sigma_0(k) = 111 +Sum_{k=1..193} sigma_0(k) = 1049 +Sum_{k=1..51} sigma_0(k) = 211 +Sum_{k=1..2288} sigma_0(k) = 18059 +Sum_{k=1..15985} sigma_0(k) = 157208 +Sum_{k=1..10112} sigma_0(k) = 94818 +Sum_{k=1..152099} sigma_0(k) = 1838389 +Sum_{k=1..446108} sigma_0(k) = 5872025 diff --git a/Math/partial_sums_of_sigma_function.pl b/Math/partial_sums_of_sigma_function.pl index f518c20b..b503191e 100755 --- a/Math/partial_sums_of_sigma_function.pl +++ b/Math/partial_sums_of_sigma_function.pl @@ -2,6 +2,7 @@ # Author: Daniel "Trizen" Șuteu # Date: 09 November 2018 +# Edit: 30 March 2025 # https://github.com/trizen # A new generalized algorithm with O(sqrt(n)) complexity for computing the partial-sums of the `sigma_j(k)` function: @@ -16,15 +17,11 @@ # https://en.wikipedia.org/wiki/Bernoulli_polynomials # https://trizenx.blogspot.com/2018/11/partial-sums-of-arithmetical-functions.html -use 5.020; -use strict; -use warnings; - -use ntheory qw(divisors); -use experimental qw(signatures); +use 5.036; +use ntheory qw(divisors); use Math::AnyNum qw(faulhaber_sum bernoulli sum isqrt ipow); -sub sigma_partial_sum_faulhaber ($n, $m = 1) { # using Faulhaber's formula +sub sigma_partial_sum_faulhaber ($n, $m = 1) { # using Faulhaber's formula my $s = isqrt($n); my $u = int($n / ($s + 1)); @@ -32,7 +29,7 @@ ($n, $m = 1) my $sum = 0; foreach my $k (1 .. $s) { - $sum += $k * (faulhaber_sum(int($n/$k), $m) - faulhaber_sum(int($n/($k+1)), $m)); + $sum += $k * (faulhaber_sum(int($n / $k), $m) - faulhaber_sum(int($n / ($k + 1)), $m)); } foreach my $k (1 .. $u) { @@ -42,7 +39,22 @@ ($n, $m = 1) return $sum; } -sub sigma_partial_sum_bernoulli ($n, $m = 1) { # using Bernoulli polynomials +sub sigma_partial_sum_dirichlet ($n, $m = 1) { # using the Dirichlet hyperbola method + + my $total = 0; + my $s = isqrt($n); + + for my $k (1 .. $s) { + $total += faulhaber_sum(int($n / $k), $m); + $total += ipow($k, $m) * int($n / $k); + } + + $total -= $s * faulhaber_sum($s, $m); + + return $total; +} + +sub sigma_partial_sum_bernoulli ($n, $m = 1) { # using Bernoulli polynomials my $s = isqrt($n); my $u = int($n / ($s + 1)); @@ -50,7 +62,7 @@ ($n, $m = 1) my $sum = 0; foreach my $k (1 .. $s) { - $sum += $k * (bernoulli($m+1, 1+int($n/$k)) - bernoulli($m+1, 1+int($n/($k+1)))) / ($m+1); + $sum += $k * (bernoulli($m + 1, 1 + int($n / $k)) - bernoulli($m + 1, 1 + int($n / ($k + 1)))) / ($m + 1); } foreach my $k (1 .. $u) { @@ -61,7 +73,11 @@ ($n, $m = 1) } sub sigma_partial_sum_test ($n, $m = 1) { # just for testing - sum(map { sum(map { ipow($_, $m) } divisors($_)) } 1..$n); + sum( + map { + sum(map { ipow($_, $m) } divisors($_)) + } 1 .. $n + ); } foreach my $m (0 .. 10) { @@ -71,11 +87,13 @@ ($n, $m = 1) my $t1 = sigma_partial_sum_test($n, $m); my $t2 = sigma_partial_sum_faulhaber($n, $m); my $t3 = sigma_partial_sum_bernoulli($n, $m); + my $t4 = sigma_partial_sum_dirichlet($n, $m); say "Sum_{k=1..$n} sigma_$m(k) = $t2"; die "error: $t1 != $t2" if ($t1 != $t2); die "error: $t1 != $t3" if ($t1 != $t3); + die "error: $t1 != $t4" if ($t1 != $t4); } __END__ diff --git a/Math/prime_counting_liouville_formula.pl b/Math/prime_counting_liouville_formula.pl new file mode 100644 index 00000000..5bfb671a --- /dev/null +++ b/Math/prime_counting_liouville_formula.pl @@ -0,0 +1,78 @@ +#!/usr/bin/perl + +# Author: Trizen +# Date: 17 July 2025 +# https://github.com/trizen + +# A sublinear algorithm for computing the Prime Counting function `pi(n)`, +# based on the Liouville function and the number of k-almost primes <= n, for `k >= 2`. + +# See also: +# https://en.wikipedia.org/wiki/Mertens_function +# https://en.wikipedia.org/wiki/M%C3%B6bius_function + +use 5.036; +use ntheory qw(:all); + +sub k_prime_count ($k, $n) { + + if ($k == 1) { + return my_prime_count($n); + } + + my $count = 0; + + sub ($m, $p, $k, $j = 0) { + + my $s = rootint(divint($n, $m), $k); + + if ($k == 2) { + + forprimes { + $count += my_prime_count(divint($n, mulint($m, $_))) - $j++; + } $p, $s; + + return; + } + + foreach my $q (@{primes($p, $s)}) { + __SUB__->($m * $q, $q, $k - 1, $j++); + } + }->(1, 2, $k); + + return $count; +} + +sub my_prime_count ($n) { + + state $pi_table = [0, 0, 1, 2, 2]; # a larger lookup table helps a lot! + + if ($n < 0) { + return 0; + } + + if (defined($pi_table->[$n])) { + return $pi_table->[$n]; + } + + my $M = sumliouville($n); + + foreach my $k (2 .. logint($n, 2)) { + $M -= (-1)**$k * k_prime_count($k, $n); + } + + return ($pi_table->[$n] //= 1 - $M); +} + +foreach my $n (1..7) { # takes ~3 seconds + say "pi(10^$n) = ", my_prime_count(10**$n); +} + +__END__ +pi(10^1) = 4 +pi(10^2) = 25 +pi(10^3) = 168 +pi(10^4) = 1229 +pi(10^5) = 9592 +pi(10^6) = 78498 +pi(10^7) = 664579 diff --git a/Math/prime_counting_mertens_formula.pl b/Math/prime_counting_mertens_formula.pl new file mode 100644 index 00000000..521bf52e --- /dev/null +++ b/Math/prime_counting_mertens_formula.pl @@ -0,0 +1,83 @@ +#!/usr/bin/perl + +# Author: Trizen +# Date: 17 July 2025 +# https://github.com/trizen + +# A sublinear algorithm for computing the Prime Counting function `pi(n)`, based on the +# Mertens function and the number of squarefree k-almost primes <= n, for `k >= 2`. + +# See also: +# https://en.wikipedia.org/wiki/Mertens_function +# https://en.wikipedia.org/wiki/M%C3%B6bius_function + +use 5.036; +use ntheory qw(:all); + +sub squarefree_almost_prime_count ($k, $n) { + + if ($k == 0) { + return (($n <= 0) ? 0 : 1); + } + + if ($k == 1) { + return my_prime_count($n); + } + + my $count = 0; + + sub ($m, $p, $k, $j = 1) { + + my $s = rootint(divint($n, $m), $k); + + if ($k == 2) { + + forprimes { + $count += my_prime_count(divint($n, mulint($m, $_))) - $j++; + } $p, $s; + + return; + } + + foreach my $q (@{primes($p, $s)}) { + __SUB__->(mulint($m, $q), $q + 1, $k - 1, ++$j); + } + } + ->(1, 2, $k); + + return $count; +} + +sub my_prime_count ($n) { + + state $pi_table = [0, 0, 1, 2, 2]; # a larger lookup table helps a lot! + + if ($n < 0) { + return 0; + } + + if (defined($pi_table->[$n])) { + return $pi_table->[$n]; + } + + my $M = mertens($n); + + foreach my $k (2 .. logint($n, 2)) { + $M -= (-1)**$k * squarefree_almost_prime_count($k, $n); + } + + return ($pi_table->[$n] //= 1 - $M); +} + +foreach my $n (1 .. 7) { # takes ~1 second + say "pi(10^$n) = ", my_prime_count(10**$n); +} + +__END__ +pi(10^1) = 4 +pi(10^2) = 25 +pi(10^3) = 168 +pi(10^4) = 1229 +pi(10^5) = 9592 +pi(10^6) = 78498 +pi(10^7) = 664579 diff --git a/README.md b/README.md index acce8884..2585a94a 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ A nice collection of day-to-day Perl scripts. * [Update summary](./Book%20tools/update_summary.pl) * Compression * [Bbwr file compression](./Compression/bbwr_file_compression.pl) + * [Bqof file compression](./Compression/bqof_file_compression.pl) * [Bwac file compression](./Compression/bwac_file_compression.pl) * [Bwad file compression](./Compression/bwad_file_compression.pl) * [Bwaz file compression](./Compression/bwaz_file_compression.pl) @@ -187,6 +188,7 @@ A nice collection of day-to-day Perl scripts. * [Mrh file compression](./Compression/mrh_file_compression.pl) * [Mrlz file compression](./Compression/mrlz_file_compression.pl) * [Ppmh file compression](./Compression/ppmh_file_compression.pl) + * [Qof file compression](./Compression/qof_file_compression.pl) * [Rans file compression](./Compression/rans_file_compression.pl) * [Rlac file compression](./Compression/rlac_file_compression.pl) * [Rlh file compression](./Compression/rlh_file_compression.pl) @@ -195,6 +197,8 @@ A nice collection of day-to-day Perl scripts. * [Test compressors](./Compression/test_compressors.pl) * [Tzip2 file compression](./Compression/tzip2_file_compression.pl) * [Tzip file compression](./Compression/tzip_file_compression.pl) + * [Unzip](./Compression/unzip.pl) + * [Zip](./Compression/zip.pl) * [Zlib compressor](./Compression/zlib_compressor.pl) * [Zlib decompressor](./Compression/zlib_decompressor.pl) * [Zlib file compression](./Compression/zlib_file_compression.pl) @@ -535,6 +539,7 @@ A nice collection of day-to-day Perl scripts. * [Gd star trails](./Image/gd_star_trails.pl) * [Gif2webp](./Image/gif2webp.pl) * [Horizontal scrambler](./Image/horizontal_scrambler.pl) + * [Image-unpack](./Image/image-unpack.pl) * [Image2ascii](./Image/image2ascii.pl) * [Image2audio](./Image/image2audio.pl) * [Image2digits](./Image/image2digits.pl) @@ -995,6 +1000,7 @@ A nice collection of day-to-day Perl scripts. * [Omega prime divisors](./Math/omega_prime_divisors.pl) * [Omega prime numbers in range](./Math/omega_prime_numbers_in_range.pl) * [Omega prime numbers in range mpz](./Math/omega_prime_numbers_in_range_mpz.pl) + * [Omega prime numbers in range simple](./Math/omega_prime_numbers_in_range_simple.pl) * [Order factorization method](./Math/order_factorization_method.pl) * [Palindrome iteration](./Math/palindrome_iteration.pl) * [Partial sums of dedekind psi function](./Math/partial_sums_of_dedekind_psi_function.pl) @@ -1007,6 +1013,7 @@ A nice collection of day-to-day Perl scripts. * [Partial sums of gcd-sum function](./Math/partial_sums_of_gcd-sum_function.pl) * [Partial sums of gcd-sum function fast](./Math/partial_sums_of_gcd-sum_function_fast.pl) * [Partial sums of gcd-sum function faster](./Math/partial_sums_of_gcd-sum_function_faster.pl) + * [Partial sums of generalized gcd-sum function](./Math/partial_sums_of_generalized_gcd-sum_function.pl) * [Partial sums of gpf](./Math/partial_sums_of_gpf.pl) * [Partial sums of inverse moebius transform of dedekind function](./Math/partial_sums_of_inverse_moebius_transform_of_dedekind_function.pl) * [Partial sums of jordan totient function](./Math/partial_sums_of_jordan_totient_function.pl) @@ -1020,6 +1027,7 @@ A nice collection of day-to-day Perl scripts. * [Partial sums of powerfree part](./Math/partial_sums_of_powerfree_part.pl) * [Partial sums of prime bigomega function](./Math/partial_sums_of_prime_bigomega_function.pl) * [Partial sums of prime omega function](./Math/partial_sums_of_prime_omega_function.pl) + * [Partial sums of sigma0 function](./Math/partial_sums_of_sigma0_function.pl) * [Partial sums of sigma function](./Math/partial_sums_of_sigma_function.pl) * [Partial sums of sigma function times k](./Math/partial_sums_of_sigma_function_times_k.pl) * [Partial sums of sigma function times k to the m](./Math/partial_sums_of_sigma_function_times_k_to_the_m.pl) @@ -1057,6 +1065,8 @@ A nice collection of day-to-day Perl scripts. * [Prime 41](./Math/prime_41.pl) * [Prime abundant sequences](./Math/prime_abundant_sequences.pl) * [Prime count smooth sum](./Math/prime_count_smooth_sum.pl) + * [Prime counting liouville formula](./Math/prime_counting_liouville_formula.pl) + * [Prime counting mertens formula](./Math/prime_counting_mertens_formula.pl) * [Prime factorization concept](./Math/prime_factorization_concept.pl) * [Prime factors of binomial coefficients](./Math/prime_factors_of_binomial_coefficients.pl) * [Prime factors of binomial product](./Math/prime_factors_of_binomial_product.pl)