summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJurenIvan <43958858+JurenIvan@users.noreply.github.com>2024-02-01 14:30:22 +0100
committerGitHub <noreply@github.com>2024-02-01 14:30:22 +0100
commit1b23172afb069a84979c738595b9025e36b79816 (patch)
treebc8a54087f4c58ccca4221f2cb783bd33c86481f
parentdda3c3b3116cb7786173f453fb189e50a87763bc (diff)
My first submission (#697)
* Common sense implementation * fix filename * formatting * remove excess system.out.println * fix hash collisions * ajdust so taht segment size smaller than Integer.MAX_VALUE
-rwxr-xr-xcalculate_average_JurenIvan.sh19
-rw-r--r--src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java219
2 files changed, 238 insertions, 0 deletions
diff --git a/calculate_average_JurenIvan.sh b/calculate_average_JurenIvan.sh
new file mode 100755
index 0000000..73d956e
--- /dev/null
+++ b/calculate_average_JurenIvan.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+#
+# Copyright 2023 The original authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+JAVA_OPTS="--enable-preview"
+java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_JurenIvan
diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java b/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java
new file mode 100644
index 0000000..3f93068
--- /dev/null
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_JurenIvan.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2023 The original authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package dev.morling.onebrc;
+
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static java.lang.Math.round;
+import static java.nio.channels.FileChannel.MapMode.READ_ONLY;
+import static java.nio.file.StandardOpenOption.READ;
+
+public class CalculateAverage_JurenIvan {
+
+ private static final String FILE_NAME = "./measurements.txt";
+
+ public static void main(String[] args) throws IOException {
+ long[] segments = getSegments(Runtime.getRuntime().availableProcessors());
+
+ var result = IntStream.range(0, segments.length - 1)
+ .parallel()
+ .mapToObj(i -> processSegment(segments[i], segments[i + 1]))
+ .flatMap(m -> Arrays.stream(m.hashTable).filter(Objects::nonNull))
+ .collect(Collectors.toMap(m -> new String(m.city), m -> m, Measurement::merge, TreeMap::new));
+
+ System.out.println(result);
+ }
+
+ private static LinearProbingHashMap processSegment(long start, long end) {
+ var results = new LinearProbingHashMap(1 << 19);
+
+ try (var fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE_NAME), READ)) {
+ var bb = fileChannel.map(READ_ONLY, start, end - start);
+ var buffer = new byte[100];
+
+ int limit = bb.limit();
+ for (int startLine = bb.position(); startLine < limit; startLine = bb.position()) {
+ int currentPosition = startLine;
+
+ byte b;
+ int hash = 7;
+ int wordLen = 0;
+ while (currentPosition < end && (b = bb.get(currentPosition++)) != ';') {
+ buffer[wordLen++] = b;
+ hash = hash * 31 + b;
+ }
+
+ int temp;
+ int negative = 1;
+ if (bb.get(currentPosition) == '-') {
+ negative = -1;
+ currentPosition++;
+ }
+
+ if (bb.get(currentPosition + 1) == '.') {
+ temp = negative * ((bb.get(currentPosition) - '0') * 10 + (bb.get(currentPosition + 2) - '0'));
+ currentPosition += 3;
+ }
+ else {
+ temp = negative * ((bb.get(currentPosition) - '0') * 100 + ((bb.get(currentPosition + 1) - '0') * 10 + (bb.get(currentPosition + 3) - '0')));
+ currentPosition += 4;
+ }
+
+ currentPosition++;
+
+ results.put(hash, buffer, wordLen, temp);
+
+ bb.position(currentPosition);
+ }
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return results;
+ }
+
+ private static long[] getSegments(int segmentCount) throws IOException {
+ try (var raf = new RandomAccessFile(FILE_NAME, "r")) {
+ long fileSize = raf.length();
+
+ if (fileSize < 100000) {
+ long[] chunks = new long[2];
+ chunks[1] = fileSize;
+ return chunks;
+ }
+
+ while (fileSize / segmentCount >= (Integer.MAX_VALUE - 150)) {
+ segmentCount *= 2;
+ }
+
+ long[] chunks = new long[segmentCount + 1];
+
+ chunks[0] = 0;
+ long segmentSize = fileSize / segmentCount;
+
+ for (int i = 1; i < segmentCount; i++) {
+ long chunkOffset = chunks[i - 1] + segmentSize;
+ raf.seek(chunkOffset);
+ while (raf.readByte() != '\n') {
+ }
+ chunks[i] = raf.getFilePointer();
+ }
+ chunks[segmentCount] = fileSize;
+ return chunks;
+ }
+ }
+
+ public static class LinearProbingHashMap {
+ final Measurement[] hashTable;
+ int slots;
+
+ public LinearProbingHashMap(int slots) {
+ this.slots = slots;
+ this.hashTable = new Measurement[slots];
+ }
+
+ void put(int hash, byte[] key, int len, int temperature) {
+ hash = Math.abs(hash);
+ int index = hash & (slots - 1);
+
+ int i = index;
+ while (hashTable[i] != null) {
+ if (keyIsEqual(key, hashTable[i].city, len)) { // handling hash collisions
+ hashTable[i].add(temperature);
+ return;
+ }
+ i++;
+ if (i == slots) {
+ i = 0;
+ }
+ }
+
+ var cityArr = new byte[len];
+ System.arraycopy(key, 0, cityArr, 0, len);
+ hashTable[i] = new Measurement(cityArr, hash, temperature, temperature, 1, temperature);
+ }
+
+ private boolean keyIsEqual(byte[] one, byte[] other, int len) {
+ if (len != other.length)
+ return false;
+ for (int i = 0; i < len; i++) {
+ if (one[i] != other[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ }
+
+ static class Measurement {
+ byte[] city;
+ int hash;
+ int min;
+ int max;
+ int count;
+ long sum;
+
+ public Measurement(byte[] city, int hash, int min, int max, int count, long sum) {
+ this.city = city;
+ this.hash = hash;
+ this.min = min;
+ this.max = max;
+ this.count = count;
+ this.sum = sum;
+ }
+
+ public void add(int temperature) {
+ min = Math.min(min, temperature);
+ max = Math.max(max, temperature);
+ count++;
+ sum += temperature;
+ }
+
+ public Measurement merge(Measurement other) {
+ min = Math.min(min, other.min);
+ max = Math.max(max, other.max);
+ count += other.count;
+ sum += other.sum;
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ return (min * 1.0) / 10 + "/" + round((sum * 1.0) / count) / 10.0 + "/" + (max * 1.0) / 10;
+ }
+
+ @Override
+ public int hashCode() {
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return Arrays.equals(city, ((Measurement) obj).city);
+ }
+ }
+}