mirror of
https://github.com/TangentFoxy/.lua-files.git
synced 2024-11-20 21:34:23 +00:00
234 lines
7.3 KiB
Lua
234 lines
7.3 KiB
Lua
|
#!/usr/bin/env luajit
|
||
|
|
||
|
-- Primarily written by ChatGPT using GPT-3.5, with corrections and modifications by me.
|
||
|
-- Do whatever the hell you want with it.
|
||
|
|
||
|
local lfs = require "lfs"
|
||
|
|
||
|
-- Function to get the filesize of a given file
|
||
|
function get_filesize(filepath)
|
||
|
local file = io.open(filepath, "rb")
|
||
|
if file then
|
||
|
local size = file:seek("end")
|
||
|
file:close()
|
||
|
return size
|
||
|
else
|
||
|
return nil
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Function to recursively traverse directories and get file sizes
|
||
|
function traverse_directory(path)
|
||
|
local total_size = 0
|
||
|
local total_files = 0
|
||
|
local file_sizes = {}
|
||
|
|
||
|
for entry in lfs.dir(path) do
|
||
|
if entry ~= "." and entry ~= ".." then
|
||
|
local full_path = path..'\\'..entry
|
||
|
local attributes = lfs.attributes(full_path)
|
||
|
|
||
|
if attributes and attributes.mode == "file" then
|
||
|
local size = get_filesize(full_path)
|
||
|
|
||
|
if size then
|
||
|
print(full_path, size, "bytes")
|
||
|
table.insert(file_sizes, size)
|
||
|
total_size = total_size + size
|
||
|
total_files = total_files + 1
|
||
|
else
|
||
|
print(full_path, "File not found or inaccessible")
|
||
|
end
|
||
|
elseif attributes and attributes.mode == "directory" then
|
||
|
local subdir_total_size, subdir_total_files, subdir_file_sizes = traverse_directory(full_path)
|
||
|
total_size = total_size + subdir_total_size
|
||
|
total_files = total_files + subdir_total_files
|
||
|
while #subdir_file_sizes > 0 do
|
||
|
table.insert(file_sizes, table.remove(subdir_file_sizes))
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return total_size, total_files, file_sizes
|
||
|
end
|
||
|
|
||
|
-- Function to calculate evenly spaced percentiles
|
||
|
function calculate_percentiles(data, num_percentiles)
|
||
|
local result = {}
|
||
|
table.sort(data)
|
||
|
|
||
|
for i = 1, num_percentiles do
|
||
|
local p = (i - 1) / (num_percentiles - 1) * 100
|
||
|
local index = math.ceil(#data * p / 100)
|
||
|
if index == 0 then index = 1 end
|
||
|
result[i] = data[index]
|
||
|
end
|
||
|
|
||
|
return result
|
||
|
end
|
||
|
|
||
|
-- Function to print percentiles table returned from calculate_percentiles
|
||
|
function print_percentiles(percentiles)
|
||
|
for i, value in pairs(percentiles) do
|
||
|
local p = (i - 1) / (#percentiles - 1) * 100
|
||
|
if p == 50 then
|
||
|
print(p .. "th percentile (median):", value, "bytes")
|
||
|
else
|
||
|
print(p .. "th percentile:", value, "bytes")
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Function to calculate mode
|
||
|
function calculate_mode(data)
|
||
|
local freq_map = {}
|
||
|
local max_freq = 0
|
||
|
local modes = {}
|
||
|
|
||
|
for _, value in ipairs(data) do
|
||
|
freq_map[value] = (freq_map[value] or 0) + 1
|
||
|
if freq_map[value] > max_freq then
|
||
|
max_freq = freq_map[value]
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if max_freq == 1 then
|
||
|
return modes, max_freq -- no mode
|
||
|
end
|
||
|
|
||
|
for value, freq in pairs(freq_map) do
|
||
|
if freq == max_freq then
|
||
|
table.insert(modes, value)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
table.sort(modes)
|
||
|
return modes, max_freq
|
||
|
end
|
||
|
|
||
|
-- Function to print mode results
|
||
|
function print_mode_results(modes, max_freq)
|
||
|
if #modes == 0 then
|
||
|
print("No mode found.")
|
||
|
elseif #modes == 1 then
|
||
|
print("Mode:", modes[1], "bytes")
|
||
|
else
|
||
|
print("Multiple modes:")
|
||
|
for i, mode in ipairs(modes) do
|
||
|
print("Mode " .. i .. ":", mode, "bytes")
|
||
|
end
|
||
|
end
|
||
|
print("Frequency:", max_freq)
|
||
|
end
|
||
|
|
||
|
-- Function to calculate standard deviation
|
||
|
function calculate_standard_deviation(data)
|
||
|
local n = #data
|
||
|
local sum = 0
|
||
|
local sum_of_squared_deviations = 0
|
||
|
|
||
|
if n < 1 then
|
||
|
return 0 -- Standard deviation is undefined for small sample sizes
|
||
|
end
|
||
|
|
||
|
-- Calculate mean
|
||
|
for _, value in ipairs(data) do
|
||
|
sum = sum + value
|
||
|
end
|
||
|
local mean = sum / n
|
||
|
|
||
|
-- Calculate sum of squared deviations
|
||
|
for _, value in ipairs(data) do
|
||
|
local deviation = value - mean
|
||
|
sum_of_squared_deviations = sum_of_squared_deviations + deviation^2
|
||
|
end
|
||
|
|
||
|
-- Calculate standard deviation
|
||
|
local variance = sum_of_squared_deviations / (n - 1)
|
||
|
local standard_deviation = math.sqrt(variance)
|
||
|
|
||
|
return standard_deviation
|
||
|
end
|
||
|
|
||
|
-- Function to calculate a histogram
|
||
|
function calculate_histogram(data, num_bins)
|
||
|
local histogram = {}
|
||
|
local min_value = math.min(unpack(data))
|
||
|
local max_value = math.max(unpack(data))
|
||
|
local bin_width = (max_value - min_value) / num_bins
|
||
|
|
||
|
for i = 1, num_bins do
|
||
|
local bin_start = min_value + (i - 1) * bin_width
|
||
|
local bin_end = bin_start + bin_width
|
||
|
histogram[i] = {bin_start, bin_end, 0}
|
||
|
end
|
||
|
|
||
|
for _, value in ipairs(data) do
|
||
|
local bin_index = math.floor((value - min_value) / bin_width) + 1
|
||
|
if bin_index <= num_bins then
|
||
|
histogram[bin_index][3] = histogram[bin_index][3] + 1
|
||
|
else
|
||
|
-- the largest file always calculates to an nth + 1 bin
|
||
|
histogram[num_bins][3] = histogram[num_bins][3] + 1
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return histogram
|
||
|
end
|
||
|
|
||
|
-- Function to print histogram results
|
||
|
function print_histogram(histogram)
|
||
|
for i, bin in ipairs(histogram) do
|
||
|
local bin_start, bin_end, count = unpack(bin)
|
||
|
print(string.format("%.2f - %.2f:", bin_start, bin_end), count, "files")
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Function to print histogram results with logarithmic scaling and aligned graphical representation
|
||
|
function print_histogram_graphical(histogram, graph_width)
|
||
|
local max_count = 0
|
||
|
|
||
|
-- Find the maximum count to determine the scale
|
||
|
for _, bin in ipairs(histogram) do
|
||
|
local count = bin[3]
|
||
|
if count > max_count then
|
||
|
max_count = count
|
||
|
end
|
||
|
end
|
||
|
|
||
|
local max_log_scaled = math.log(max_count + 1) -- Add 1 to avoid log(0)
|
||
|
|
||
|
-- Print the histogram with graphical representation and aligned text data
|
||
|
for _, bin in ipairs(histogram) do
|
||
|
local bin_start, bin_end, count = unpack(bin)
|
||
|
local log_scaled_count = math.log(count + 1) -- Add 1 to avoid log(0)
|
||
|
local scaled_width = math.floor((log_scaled_count / max_log_scaled) * graph_width) -- Adjust the width as needed
|
||
|
|
||
|
local bar = string.rep("#", scaled_width)
|
||
|
local empty_spaces = string.rep(" ", graph_width - scaled_width) -- Add empty spaces for alignment
|
||
|
print(string.format("[%s%s] %.2f - %.2f: %d files", bar, empty_spaces, bin_start, bin_end, count))
|
||
|
end
|
||
|
end
|
||
|
|
||
|
local root_directory = "." -- bodge to work in-place
|
||
|
local total_size, total_files, total_file_sizes = traverse_directory(root_directory)
|
||
|
|
||
|
if total_files > 0 then
|
||
|
print("")
|
||
|
print(total_files, "files found.")
|
||
|
local average_size = total_size / total_files
|
||
|
print("Average (mean) file size:", average_size, "bytes")
|
||
|
local standard_deviation = calculate_standard_deviation(total_file_sizes)
|
||
|
print("Standard deviation:", standard_deviation)
|
||
|
local mode_results, max_freq = calculate_mode(total_file_sizes)
|
||
|
print_mode_results(mode_results, max_freq)
|
||
|
local bin_size = math.ceil(math.sqrt(total_files)) -- Square Root Rule
|
||
|
local histogram_results = calculate_histogram(total_file_sizes, bin_size)
|
||
|
print_histogram_graphical(histogram_results, 40)
|
||
|
local percentiles = calculate_percentiles(total_file_sizes, 11)
|
||
|
print_percentiles(percentiles)
|
||
|
else
|
||
|
print("No files found.")
|
||
|
end
|