-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Ross Devito
committed
Oct 31, 2024
1 parent
f1ad9a0
commit 2b69139
Showing
131 changed files
with
28,806 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
"""Convert CITRUS output CSV to plink phenotype file format. | ||
Example output: | ||
#IID gene1 phenotype | ||
1110 23.31 22.22 | ||
2202 34.12 18.23 | ||
... | ||
""" | ||
|
||
import os | ||
|
||
import pandas as pd | ||
|
||
# # Write a function to replace main | ||
# def citrus_out_to_plink_phenotype( | ||
# sample_id_col='' | ||
# ) | ||
|
||
import argparse | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
# args | ||
parser = argparse.ArgumentParser() | ||
|
||
parser.add_argument("desc") | ||
|
||
args = parser.parse_args() | ||
|
||
sample_id_col = 'sample_id' | ||
|
||
pheno_cols = ['phenotype'] | ||
|
||
csv_path = f'citrus_output/{args.desc}.csv' | ||
output_path = f'citrus_phenos/{args.desc}.pheno' | ||
|
||
# function | ||
df = pd.read_csv( | ||
csv_path, | ||
index_col=False | ||
) | ||
|
||
df = df[[sample_id_col] + pheno_cols] | ||
|
||
# Split FID and IID | ||
df['FID'] = df[sample_id_col].str.split('_').map(lambda x: x[0]) | ||
df['IID'] = df[sample_id_col].str.split('_').map(lambda x: x[1]) | ||
|
||
df[['FID', 'IID'] + pheno_cols].to_csv( | ||
output_path, | ||
sep='\t', | ||
index=False | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
"""Estimate heritability of simulated CITRUS phenotypes.""" | ||
|
||
import json | ||
import os | ||
|
||
from pheno_sim import PhenoSimulation | ||
from pheno_sim.heritability_estimation import broad_sense_heritability | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
sim_name = 'gepsi_lin_add_hFULL_hFULL' | ||
config_dir = 'citrus_output' | ||
|
||
print(sim_name) | ||
|
||
# Load simulation config | ||
with open(os.path.join(config_dir, sim_name + '_config.json'), 'r') as f: | ||
sim_config = json.load(f) | ||
|
||
# Create simulation | ||
sim = PhenoSimulation(sim_config) | ||
|
||
# Load genotype data | ||
input_vals = sim.run_input_step() | ||
|
||
# Compute heritability | ||
confidence_level = 95 | ||
|
||
heritability = broad_sense_heritability( | ||
sim, | ||
input_vals=input_vals, | ||
n_samples=0.1, | ||
confidence_level=confidence_level/100, | ||
) | ||
|
||
# Save results | ||
with open( | ||
os.path.join( | ||
config_dir, sim_name + f'_heritability_ci{confidence_level}.json' | ||
), | ||
'w' | ||
) as f: | ||
json.dump(heritability, f, indent=4) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
"""Generate linear additive phenotype CITRUS simulation configs.""" | ||
|
||
import copy | ||
import json | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
|
||
CONFIG_TEMPLATE = { | ||
"input": [ | ||
{ | ||
"file": "../data/ukb/vcf/phased_chr19.vcf", | ||
"file_format": "vcf", | ||
"reference_genome": "GRCh37", | ||
"input_nodes": [ | ||
{ | ||
"alias": "variants", | ||
"type": "SNP", | ||
"chr": "19", | ||
"pos": [] | ||
} | ||
] | ||
} | ||
], | ||
"simulation_steps": [ | ||
{ | ||
"type": "RandomConstant", | ||
"alias": "variants_betas", | ||
"input_match_size": "variants", | ||
"dist_name": "normal", | ||
"dist_kwargs": { | ||
"loc": 0.0, | ||
"scale": 1.0 | ||
}, | ||
"by_feat": True | ||
}, | ||
{ | ||
"type": "Product", | ||
"alias": "variant_effects", | ||
"input_aliases": [ | ||
"variants_betas", | ||
"variants" | ||
] | ||
}, | ||
{ | ||
"type": "AdditiveCombine", | ||
"alias": "combined_variant_effects", | ||
"input_alias": "variant_effects" | ||
}, | ||
{ | ||
"type": "SumReduce", | ||
"alias": "no_noise_phenotype", | ||
"input_alias": "combined_variant_effects" | ||
}, | ||
{ | ||
"type": "Heritability", | ||
"alias": "phenotype", | ||
"input_alias": "no_noise_phenotype", | ||
} | ||
] | ||
} | ||
|
||
|
||
class NpEncoder(json.JSONEncoder): | ||
def default(self, obj): | ||
if isinstance(obj, np.integer): | ||
return int(obj) | ||
if isinstance(obj, np.floating): | ||
return float(obj) | ||
if isinstance(obj, np.ndarray): | ||
return obj.tolist() | ||
return super(NpEncoder, self).default(obj) | ||
|
||
|
||
def create_config(pos, heritability): | ||
sim_config = copy.deepcopy(CONFIG_TEMPLATE) | ||
|
||
sim_config['input'][0]['input_nodes'][0]['pos'] = list(pos) | ||
sim_config['simulation_steps'][-1]['heritability'] = float(heritability) | ||
|
||
return sim_config | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
# Options | ||
n_vars = [10, 100, 1000] | ||
herit_vals = [0.05, 0.1, 0.2, 0.4, 0.8] | ||
version_num = 1 | ||
|
||
config_save_dir = 'sim_configs' | ||
|
||
# Load variant info | ||
var_info_path = '../data/ukb/chr19_var_info.csv' | ||
var_df = pd.read_csv( | ||
var_info_path | ||
) | ||
|
||
# Add position column | ||
var_df['pos'] = var_df.locus.apply(lambda x: int(x.split(':')[1])) | ||
|
||
for n_var in n_vars: | ||
for herit in herit_vals: | ||
rng = np.random.default_rng(seed=147*version_num) | ||
|
||
# Select loci | ||
pos = rng.choice( | ||
var_df.pos, | ||
size=n_var, | ||
replace=False | ||
) | ||
|
||
sim_config = create_config(list(pos), herit) | ||
|
||
# Save config | ||
h_str = str(herit).split('.')[1] | ||
config_fname = f'lin_add_nvar{n_var}_h{h_str}_v{version_num}' | ||
|
||
with open(f'{config_save_dir}/{config_fname}.json', 'w') as f: | ||
json.dump(sim_config, f, indent=4, cls=NpEncoder) |
Oops, something went wrong.