#!/usr/bin/env luajit -- Primarily written by ChatGPT using GPT-3.5, with corrections and modifications by me. -- Do whatever the hell you want with it. local lfs = require "lfs" -- Function to get the filesize of a given file function get_filesize(filepath) local file = io.open(filepath, "rb") if file then local size = file:seek("end") file:close() return size else return nil end end -- Function to recursively traverse directories and get file sizes function traverse_directory(path) local total_size = 0 local total_files = 0 local file_sizes = {} for entry in lfs.dir(path) do if entry ~= "." and entry ~= ".." then local full_path = path..'\\'..entry local attributes = lfs.attributes(full_path) if attributes and attributes.mode == "file" then local size = get_filesize(full_path) if size then print(full_path, size, "bytes") table.insert(file_sizes, size) total_size = total_size + size total_files = total_files + 1 else print(full_path, "File not found or inaccessible") end elseif attributes and attributes.mode == "directory" then local subdir_total_size, subdir_total_files, subdir_file_sizes = traverse_directory(full_path) total_size = total_size + subdir_total_size total_files = total_files + subdir_total_files while #subdir_file_sizes > 0 do table.insert(file_sizes, table.remove(subdir_file_sizes)) end end end end return total_size, total_files, file_sizes end -- Function to calculate evenly spaced percentiles function calculate_percentiles(data, num_percentiles) local result = {} table.sort(data) for i = 1, num_percentiles do local p = (i - 1) / (num_percentiles - 1) * 100 local index = math.ceil(#data * p / 100) if index == 0 then index = 1 end result[i] = data[index] end return result end -- Function to print percentiles table returned from calculate_percentiles function print_percentiles(percentiles) for i, value in pairs(percentiles) do local p = (i - 1) / (#percentiles - 1) * 100 if p == 50 then print(p .. "th percentile (median):", value, "bytes") else print(p .. "th percentile:", value, "bytes") end end end -- Function to calculate mode function calculate_mode(data) local freq_map = {} local max_freq = 0 local modes = {} for _, value in ipairs(data) do freq_map[value] = (freq_map[value] or 0) + 1 if freq_map[value] > max_freq then max_freq = freq_map[value] end end if max_freq == 1 then return modes, max_freq -- no mode end for value, freq in pairs(freq_map) do if freq == max_freq then table.insert(modes, value) end end table.sort(modes) return modes, max_freq end -- Function to print mode results function print_mode_results(modes, max_freq) if #modes == 0 then print("No mode found.") elseif #modes == 1 then print("Mode:", modes[1], "bytes") else print("Multiple modes:") for i, mode in ipairs(modes) do print("Mode " .. i .. ":", mode, "bytes") end end print("Frequency:", max_freq) end -- Function to calculate standard deviation function calculate_standard_deviation(data) local n = #data local sum = 0 local sum_of_squared_deviations = 0 if n < 1 then return 0 -- Standard deviation is undefined for small sample sizes end -- Calculate mean for _, value in ipairs(data) do sum = sum + value end local mean = sum / n -- Calculate sum of squared deviations for _, value in ipairs(data) do local deviation = value - mean sum_of_squared_deviations = sum_of_squared_deviations + deviation^2 end -- Calculate standard deviation local variance = sum_of_squared_deviations / (n - 1) local standard_deviation = math.sqrt(variance) return standard_deviation end -- Function to calculate a histogram function calculate_histogram(data, num_bins) local histogram = {} local min_value = math.min(unpack(data)) local max_value = math.max(unpack(data)) local bin_width = (max_value - min_value) / num_bins for i = 1, num_bins do local bin_start = min_value + (i - 1) * bin_width local bin_end = bin_start + bin_width histogram[i] = {bin_start, bin_end, 0} end for _, value in ipairs(data) do local bin_index = math.floor((value - min_value) / bin_width) + 1 if bin_index <= num_bins then histogram[bin_index][3] = histogram[bin_index][3] + 1 else -- the largest file always calculates to an nth + 1 bin histogram[num_bins][3] = histogram[num_bins][3] + 1 end end return histogram end -- Function to print histogram results function print_histogram(histogram) for i, bin in ipairs(histogram) do local bin_start, bin_end, count = unpack(bin) print(string.format("%.2f - %.2f:", bin_start, bin_end), count, "files") end end -- Function to print histogram results with logarithmic scaling and aligned graphical representation function print_histogram_graphical(histogram, graph_width) local max_count = 0 -- Find the maximum count to determine the scale for _, bin in ipairs(histogram) do local count = bin[3] if count > max_count then max_count = count end end local max_log_scaled = math.log(max_count + 1) -- Add 1 to avoid log(0) -- Print the histogram with graphical representation and aligned text data for _, bin in ipairs(histogram) do local bin_start, bin_end, count = unpack(bin) local log_scaled_count = math.log(count + 1) -- Add 1 to avoid log(0) local scaled_width = math.floor((log_scaled_count / max_log_scaled) * graph_width) -- Adjust the width as needed local bar = string.rep("#", scaled_width) local empty_spaces = string.rep(" ", graph_width - scaled_width) -- Add empty spaces for alignment print(string.format("[%s%s] %.2f - %.2f: %d files", bar, empty_spaces, bin_start, bin_end, count)) end end local root_directory = "." -- bodge to work in-place local total_size, total_files, total_file_sizes = traverse_directory(root_directory) if total_files > 0 then print("") print(total_files, "files found.") local average_size = total_size / total_files print("Average (mean) file size:", average_size, "bytes") local standard_deviation = calculate_standard_deviation(total_file_sizes) print("Standard deviation:", standard_deviation) local mode_results, max_freq = calculate_mode(total_file_sizes) print_mode_results(mode_results, max_freq) local bin_size = math.ceil(math.sqrt(total_files)) -- Square Root Rule local histogram_results = calculate_histogram(total_file_sizes, bin_size) print_histogram_graphical(histogram_results, 40) local percentiles = calculate_percentiles(total_file_sizes, 11) print_percentiles(percentiles) else print("No files found.") end