mirror of
https://github.com/TangentFoxy/.lua-files.git
synced 2025-01-15 15:34:20 +00:00
lua_file_size imported as storage-statistics.lua
This commit is contained in:
parent
161e81bc70
commit
e9ecceb0b4
@ -5,6 +5,7 @@ It's like dotfiles, but no, it's just Lua scripts I find useful.
|
||||
- `2webm.lua`: Converts everything in the working directory to .webm files.
|
||||
- `llm.lua`: (Windows only!) A very WIP script for working with LLMs through WSL using [ollama](https://github.com/jmorganca/ollama).
|
||||
- `print-arguments.lua`: For testing how a Lua script receives arguments, because this can be platform-specific.
|
||||
- `storage-statistics.lua`: Prints a variety of statistics about the files within the current directory. Requires LFS.
|
||||
- `test.lua`: (Dev Test) Used repeatedly while working on these scripts to verify minor details I'm forgetful about.
|
||||
- `utility-functions.lua`: (Library) Required for many of these scripts to run.
|
||||
- `video-dl.lua`: A few premade command lines for using `yt-dlp` to download what I want quicker.
|
||||
|
233
storage-statistics.lua
Normal file
233
storage-statistics.lua
Normal file
@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env luajit
|
||||
|
||||
-- Primarily written by ChatGPT using GPT-3.5, with corrections and modifications by me.
|
||||
-- Do whatever the hell you want with it.
|
||||
|
||||
local lfs = require "lfs"
|
||||
|
||||
-- Function to get the filesize of a given file
|
||||
function get_filesize(filepath)
|
||||
local file = io.open(filepath, "rb")
|
||||
if file then
|
||||
local size = file:seek("end")
|
||||
file:close()
|
||||
return size
|
||||
else
|
||||
return nil
|
||||
end
|
||||
end
|
||||
|
||||
-- Function to recursively traverse directories and get file sizes
|
||||
function traverse_directory(path)
|
||||
local total_size = 0
|
||||
local total_files = 0
|
||||
local file_sizes = {}
|
||||
|
||||
for entry in lfs.dir(path) do
|
||||
if entry ~= "." and entry ~= ".." then
|
||||
local full_path = path..'\\'..entry
|
||||
local attributes = lfs.attributes(full_path)
|
||||
|
||||
if attributes and attributes.mode == "file" then
|
||||
local size = get_filesize(full_path)
|
||||
|
||||
if size then
|
||||
print(full_path, size, "bytes")
|
||||
table.insert(file_sizes, size)
|
||||
total_size = total_size + size
|
||||
total_files = total_files + 1
|
||||
else
|
||||
print(full_path, "File not found or inaccessible")
|
||||
end
|
||||
elseif attributes and attributes.mode == "directory" then
|
||||
local subdir_total_size, subdir_total_files, subdir_file_sizes = traverse_directory(full_path)
|
||||
total_size = total_size + subdir_total_size
|
||||
total_files = total_files + subdir_total_files
|
||||
while #subdir_file_sizes > 0 do
|
||||
table.insert(file_sizes, table.remove(subdir_file_sizes))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return total_size, total_files, file_sizes
|
||||
end
|
||||
|
||||
-- Function to calculate evenly spaced percentiles
|
||||
function calculate_percentiles(data, num_percentiles)
|
||||
local result = {}
|
||||
table.sort(data)
|
||||
|
||||
for i = 1, num_percentiles do
|
||||
local p = (i - 1) / (num_percentiles - 1) * 100
|
||||
local index = math.ceil(#data * p / 100)
|
||||
if index == 0 then index = 1 end
|
||||
result[i] = data[index]
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
||||
|
||||
-- Function to print percentiles table returned from calculate_percentiles
|
||||
function print_percentiles(percentiles)
|
||||
for i, value in pairs(percentiles) do
|
||||
local p = (i - 1) / (#percentiles - 1) * 100
|
||||
if p == 50 then
|
||||
print(p .. "th percentile (median):", value, "bytes")
|
||||
else
|
||||
print(p .. "th percentile:", value, "bytes")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- Function to calculate mode
|
||||
function calculate_mode(data)
|
||||
local freq_map = {}
|
||||
local max_freq = 0
|
||||
local modes = {}
|
||||
|
||||
for _, value in ipairs(data) do
|
||||
freq_map[value] = (freq_map[value] or 0) + 1
|
||||
if freq_map[value] > max_freq then
|
||||
max_freq = freq_map[value]
|
||||
end
|
||||
end
|
||||
|
||||
if max_freq == 1 then
|
||||
return modes, max_freq -- no mode
|
||||
end
|
||||
|
||||
for value, freq in pairs(freq_map) do
|
||||
if freq == max_freq then
|
||||
table.insert(modes, value)
|
||||
end
|
||||
end
|
||||
|
||||
table.sort(modes)
|
||||
return modes, max_freq
|
||||
end
|
||||
|
||||
-- Function to print mode results
|
||||
function print_mode_results(modes, max_freq)
|
||||
if #modes == 0 then
|
||||
print("No mode found.")
|
||||
elseif #modes == 1 then
|
||||
print("Mode:", modes[1], "bytes")
|
||||
else
|
||||
print("Multiple modes:")
|
||||
for i, mode in ipairs(modes) do
|
||||
print("Mode " .. i .. ":", mode, "bytes")
|
||||
end
|
||||
end
|
||||
print("Frequency:", max_freq)
|
||||
end
|
||||
|
||||
-- Function to calculate standard deviation
|
||||
function calculate_standard_deviation(data)
|
||||
local n = #data
|
||||
local sum = 0
|
||||
local sum_of_squared_deviations = 0
|
||||
|
||||
if n < 1 then
|
||||
return 0 -- Standard deviation is undefined for small sample sizes
|
||||
end
|
||||
|
||||
-- Calculate mean
|
||||
for _, value in ipairs(data) do
|
||||
sum = sum + value
|
||||
end
|
||||
local mean = sum / n
|
||||
|
||||
-- Calculate sum of squared deviations
|
||||
for _, value in ipairs(data) do
|
||||
local deviation = value - mean
|
||||
sum_of_squared_deviations = sum_of_squared_deviations + deviation^2
|
||||
end
|
||||
|
||||
-- Calculate standard deviation
|
||||
local variance = sum_of_squared_deviations / (n - 1)
|
||||
local standard_deviation = math.sqrt(variance)
|
||||
|
||||
return standard_deviation
|
||||
end
|
||||
|
||||
-- Function to calculate a histogram
|
||||
function calculate_histogram(data, num_bins)
|
||||
local histogram = {}
|
||||
local min_value = math.min(unpack(data))
|
||||
local max_value = math.max(unpack(data))
|
||||
local bin_width = (max_value - min_value) / num_bins
|
||||
|
||||
for i = 1, num_bins do
|
||||
local bin_start = min_value + (i - 1) * bin_width
|
||||
local bin_end = bin_start + bin_width
|
||||
histogram[i] = {bin_start, bin_end, 0}
|
||||
end
|
||||
|
||||
for _, value in ipairs(data) do
|
||||
local bin_index = math.floor((value - min_value) / bin_width) + 1
|
||||
if bin_index <= num_bins then
|
||||
histogram[bin_index][3] = histogram[bin_index][3] + 1
|
||||
else
|
||||
-- the largest file always calculates to an nth + 1 bin
|
||||
histogram[num_bins][3] = histogram[num_bins][3] + 1
|
||||
end
|
||||
end
|
||||
|
||||
return histogram
|
||||
end
|
||||
|
||||
-- Function to print histogram results
|
||||
function print_histogram(histogram)
|
||||
for i, bin in ipairs(histogram) do
|
||||
local bin_start, bin_end, count = unpack(bin)
|
||||
print(string.format("%.2f - %.2f:", bin_start, bin_end), count, "files")
|
||||
end
|
||||
end
|
||||
|
||||
-- Function to print histogram results with logarithmic scaling and aligned graphical representation
|
||||
function print_histogram_graphical(histogram, graph_width)
|
||||
local max_count = 0
|
||||
|
||||
-- Find the maximum count to determine the scale
|
||||
for _, bin in ipairs(histogram) do
|
||||
local count = bin[3]
|
||||
if count > max_count then
|
||||
max_count = count
|
||||
end
|
||||
end
|
||||
|
||||
local max_log_scaled = math.log(max_count + 1) -- Add 1 to avoid log(0)
|
||||
|
||||
-- Print the histogram with graphical representation and aligned text data
|
||||
for _, bin in ipairs(histogram) do
|
||||
local bin_start, bin_end, count = unpack(bin)
|
||||
local log_scaled_count = math.log(count + 1) -- Add 1 to avoid log(0)
|
||||
local scaled_width = math.floor((log_scaled_count / max_log_scaled) * graph_width) -- Adjust the width as needed
|
||||
|
||||
local bar = string.rep("#", scaled_width)
|
||||
local empty_spaces = string.rep(" ", graph_width - scaled_width) -- Add empty spaces for alignment
|
||||
print(string.format("[%s%s] %.2f - %.2f: %d files", bar, empty_spaces, bin_start, bin_end, count))
|
||||
end
|
||||
end
|
||||
|
||||
local root_directory = "." -- bodge to work in-place
|
||||
local total_size, total_files, total_file_sizes = traverse_directory(root_directory)
|
||||
|
||||
if total_files > 0 then
|
||||
print("")
|
||||
print(total_files, "files found.")
|
||||
local average_size = total_size / total_files
|
||||
print("Average (mean) file size:", average_size, "bytes")
|
||||
local standard_deviation = calculate_standard_deviation(total_file_sizes)
|
||||
print("Standard deviation:", standard_deviation)
|
||||
local mode_results, max_freq = calculate_mode(total_file_sizes)
|
||||
print_mode_results(mode_results, max_freq)
|
||||
local bin_size = math.ceil(math.sqrt(total_files)) -- Square Root Rule
|
||||
local histogram_results = calculate_histogram(total_file_sizes, bin_size)
|
||||
print_histogram_graphical(histogram_results, 40)
|
||||
local percentiles = calculate_percentiles(total_file_sizes, 11)
|
||||
print_percentiles(percentiles)
|
||||
else
|
||||
print("No files found.")
|
||||
end
|
Loading…
Reference in New Issue
Block a user