aboutsummaryrefslogtreecommitdiffstats
path: root/datasets/normalize_dataset.py
diff options
context:
space:
mode:
Diffstat (limited to 'datasets/normalize_dataset.py')
-rw-r--r--datasets/normalize_dataset.py35
1 files changed, 30 insertions, 5 deletions
diff --git a/datasets/normalize_dataset.py b/datasets/normalize_dataset.py
index befebee..95d8537 100644
--- a/datasets/normalize_dataset.py
+++ b/datasets/normalize_dataset.py
@@ -1,6 +1,31 @@
-"""
-Run the following script on a dataset!
+import numpy as np
+from itertools import izip
-If the nodes are not numbered 0 to number_of_nodes - 1, then it will print
-out the normalized version of the dataset in the same directory
-"""
+def normalize_file(filename):
+ """
+ Normalizes file:
+ If nodes are not numbered 0 to number_of_nodes - 1, then prints
+ normalized version of dataset in the same directory
+ """
+ #Read number of unique node identifications
+ nodes = []
+ with open(filename, "r") as f:
+ for line in f:
+ if "#" not in line:
+ node_1, node_2 = line.split()
+ nodes.append(node_1); nodes.append(node_2)
+ uniq_nodes = np.unique(nodes)
+
+ #Hash nodes to an index between 0 and number_of_nodes - 1
+ hash_nodes = {}
+ for idx, node in enumerate(uniq_nodes):
+ hash_nodes[node] = idx
+
+ #Write to file
+ with open(filename[:-4]+"normalize.txt", "w") as g:
+ with open(filename, "r") as f:
+ for line_f in f:
+ f_node_1, f_node_2 = line_f.split()
+ g_node_1 = hash_nodes[f_node_1]
+ g_node_2 = hash_nodes[f_node_2]
+ g.write(str(g_node_1)+" "+str(g_node_2)+"\n")