Finished writing for first gen filler

fenneltheloon · Jun 27, 2023 · 0e3e6e7 · 0e3e6e7
1 parent 039f89f
commit 0e3e6e7
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 5 deletions.
diff --git a/first-gen-filler.py b/first-gen-filler.py
@@ -21,15 +21,23 @@
 
 import argparse
 import sys
+import os
 import random
+import scipy
+import math
 import pymatgen
 
+def phi(x):
+    return (1 / math.sqrt(2 * math.pi)) * (scipy.integrate.quad(lambda t:\
+            math.exp(-((t ** 2) / 2)), 0, x))
+
 CU_OCC = 18
 CU_AV = 108
 AG_OCC = 9
 BI_OCC = 9
 AG_BI_AV = 27
 I_AV_OCC = 54
+SPACE_GROUPS = 230
 
 arg_parser = argparse.ArgumentParser()
 
@@ -44,6 +52,11 @@
 arg_parser.add_argument("-n", "--number", type=int, default=100,
                         help="Specifies how many configurations there will be\
                         in the generation. Default 100.")
+arg_parser.add_argument("-a", "--aggressiveness", type=float, default=3, help=\
+                        "Specifies how aggressively biased the binning will be\
+                        towards higher order space groups. Mathematically, this\
+                        is specifying a z-score as a cutoff on the curve\
+                        that is being sampled. Default 3.")
 
 args = arg_parser.parse_args()
 
@@ -95,6 +108,18 @@
         case _:
             print("Syntax error on lines 6 and 7.")
 
+# Calculate the distribution of space groups into bins on a normal
+# distribution
+Bins = [math.round(2 * SPACE_GROUPS *\
+                    phi((args.aggressiveness * i) / args.bins)) for i in\
+                    range(1, args.bins)]
+Bins[args.bins - 1] = SPACE_GROUPS
+
+# Create subdirectories for each bin
+parent_dir = os.path.dirname(args.index)
+for i in range(0, args.bins):
+    os.mkdir(os.path.join(parent_dir, str(i)))
+
 # Now generate the random config
 for i in range(args.number):
     AgBiOcc = random.choices(AgBiIndex, k=AG_OCC + BI_OCC)
@@ -107,19 +132,30 @@
     vasp_file.write(f"{i}\n")
     vasp_file.write(lattice_constant)
     vasp_file.writelines(LatticeMatrix)
-    vasp_file.write("{'Ag':<3}{'Bi':<3}{'Cu':<3}{'I':<3}\n")
+    vasp_file.write(f"{'Ag':<3}{'Bi':<3}{'Cu':<3}{'I':<3}\n")
     vasp_file.write(f"{AG_OCC:3d}{BI_OCC:3d}{CU_OCC:3d}{I_AV_OCC:3d}\n")
     vasp_file.writelines(AgOcc)
     vasp_file.writelines(BiOcc)
     vasp_file.writelines(CuOcc)
     vasp_file.writelines(IIndex)
-    vasp_file.close()
 
+    # Calculate and append spacegroup to the start of the file
     poscar = pymatgen.io.vasp.inputs.Poscar\
         .from_file(f"{i}.vasp", check_for_POTCAR=False, read_velocities=False)
 
     spacegroup = pymatgen.symmetry.analyzer.SpacegroupAnalyzer(poscar)\
         .get_space_group_number()
 
-    # TODO: Figure out the mathings for the bin distribution of spacegroups
-    # over the entire generation
+    vasp_file.seek(0)
+    vfline = vasp_file.readline()
+    vfline = vfline + f" ({spacegroup})"
+    vasp_file.seek(0)
+    vasp_file.writeline(vfline)
+    vasp_file.close()
+
+    # Now determine the proper bin and move file into that bin.
+    for j in range(0, args.bins):
+        if spacegroup <= Bins[j]:
+            new_path = os.path.join(parent_dir, j, f"{i}.vasp")
+            os.rename(f"{i}.vasp", new_path)
+            break
diff --git a/notes/first-gen-filler.md b/notes/first-gen-filler.md
@@ -36,4 +36,25 @@ Ag-n1-n2-n3...-n9_Bi-n1-n2-n3...n9_Cu-n1_n2_n3...n18
 
 Name of file that contains all indices should be called `INDEX.vasp` and it should be in the root of the directory that is passed into the program.
 
-## Formula for 
+## Algorithim for computing num. of space groups in each bin
+
+Compute the values of each $B_i$ for the following series:
+
+$$\sum_{i=1}^n{B_i} = a$$
+where:
+$$B_i = \text{round}\left[2a\left(\Phi\left(\frac{zi}{n}\right) - \Phi\left(\frac{z(i-1)}{n}\right)\right)\right]$$
+
+$$\Phi(x) = \frac{1}{\sqrt{2\pi}}\int_0^x{\exp\left(-\frac{t^2}{2}\right)dt}$$
+
+where:
+- $B_i$ is the i-th bin
+- $n$ is the total number of bins being divided into (positive integer)
+- $a$ is the number of space groups (positive integer)
+- $z$ is the number of standard deviations captured on the exponential curve (practically, will control how aggressively we will select for high space groups, the higher $z$ the more aggressive it is) (positive real)
+
+for $B_n$, sum the values of the series so far including $B_n$ and adjust up or down to $a$ (should be $\pm 1$) due to rounding error.
+
+Alternatively, just mark down
+$$B_i = \text{round}\left[2a\Phi\left(\frac{zi}{n}\right)\right]$$
+
+and let $B_n := a$, then when sorting just put any space group less than or equal to that upper value into that bin.