diff options
-rwxr-xr-x | src/main/python/create_measurements.py | 28 |
1 files changed, 12 insertions, 16 deletions
diff --git a/src/main/python/create_measurements.py b/src/main/python/create_measurements.py index 26ec768..52e9fc1 100755 --- a/src/main/python/create_measurements.py +++ b/src/main/python/create_measurements.py @@ -84,22 +84,18 @@ def estimate_file_size(weather_station_names, num_rows_to_create): """ Tries to estimate how large a file the test data will be """ - max_string = float('-inf') - min_string = float('inf') - per_record_size = 0 - record_size_unit = "bytes" - - for station in weather_station_names: - if len(station) > max_string: - max_string = len(station) - if len(station) < min_string: - min_string = len(station) - per_record_size = ((max_string + min_string * 2) + len(",-123.4")) / 2 - - total_file_size = num_rows_to_create * per_record_size - human_file_size = convert_bytes(total_file_size) - - return f"Estimated max file size is: {human_file_size}.\nTrue size is probably much smaller (around half)." + total_name_bytes = sum(len(s.encode("utf-8")) for s in weather_station_names) + avg_name_bytes = total_name_bytes / float(len(weather_station_names)) + + # avg_temp_bytes = sum(len(str(n / 10)) for n in range(-999, 1000)) / 1999 + avg_temp_bytes = 4.400200100050025 + + # add 2 for separator and newline + avg_line_length = avg_name_bytes + avg_temp_bytes + 2 + + human_file_size = convert_bytes(num_rows_to_create * avg_line_length) + + return f"Estimated max file size is: {human_file_size}." def build_test_data(weather_station_names, num_rows_to_create): |