diff --git a/test_data/cancer_breast.tsv b/test_data/cancer_breast.tsv new file mode 100644 index 0000000..8147dbe --- /dev/null +++ b/test_data/cancer_breast.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit breast_cancer_status_emerge_1 breast_cancer_status_registry_1 breast_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 breast_cancer_type_1 cancer_behavior_1 her2_1 pr_1 er_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 grade_clinical_1 grade_pathological_1 grade_unknown_1 screening_history_1 recurrence_1 surgery_1 radiotherapy_1 chemotherapy_1 hormone_therapy_1 NSAID_1 age_at_natural_menopause_1 post_menopausal_hormone_use_1 parity_1 age_at_first_birth_1 age_at_menarche_1 deceased_1 cause_of_death_breast_cancer_1 age_at_death_1 +subject1 59 visit_1 0 0 0 63 2017 unilateral benign negative unknown positive unstaged stage 4 stage 3 unknown unknown unstaged N2 M1 NA grade 1 grade 1 grade 3 0 recurrence_second_primary 0 0 1 pharmaceutical 1 64 1 0 26 15 0 0 64 +subject2 46 visit_1 0 1 0 46 2013 bilateral borderline unknown unknown negative unstaged stage 4 stage 3 localized distant in_situ NX MX NA grade 1 grade 3 grade 1 1 none 0 0 0 surgical 0 58 0 2 30 9 1 1 48 +subject3 55 visit_1 0 0 0 62 2011 unilateral invasive positive negative positive stage 3 unstaged stage 1 unstaged unstaged regional N2 MX NA grade 2 grade 2 grade 1 0 unknown 1 0 0 surgical 1 62 1 2 32 18 0 0 72 +subject4 55 visit_1 0 0 0 64 2017 bilateral borderline unknown negative unknown stage 3 stage 2 stage 4 localized localized regional N0 MX NA grade 2 grade 3 grade 3 0 unknown 1 1 0 pharmaceutical 1 59 0 2 30 14 0 0 78 +subject5 62 visit_1 0 0 0 63 2016 unilateral benign positive positive negative stage 3 unstaged unknown regional regional unknown NX MX NA grade 2 grade 3 grade 2 0 recurrence_second_primary 0 0 0 unknown 0 63 1 2 30 13 0 0 77 +subject6 55 visit_1 0 0 0 59 2002 bilateral in_situ negative positive positive stage 1 stage 4 stage 1 localized localized unknown N3 MX NA grade 2 grade 3 grade 2 1 none 1 1 1 both 0 57 0 0 23 20 0 0 73 +subject7 61 visit_1 0 0 0 61 2002 bilateral invasive negative positive unknown stage 3 stage 4 stage 3 localized unstaged unknown NX MX NA grade 1 grade 1 grade 3 1 unknown 1 1 0 none 0 61 0 2 17 12 0 0 73 +subject8 65 visit_1 0 0 0 68 2021 unilateral benign unknown positive positive stage 3 stage 3 unstaged in_situ localized unknown NX M0 NA grade 1 grade 3 grade 3 0 unknown 1 1 0 none 1 81 1 0 31 13 0 0 86 +subject9 66 visit_1 0 0 0 72 2006 bilateral in_situ unknown positive unknown stage 4 stage 3 unknown regional regional regional N3 M1 NA grade 3 grade 2 grade 1 1 none 0 0 1 none 0 68 1 0 25 14 0 0 81 +subject10 51 visit_1 0 0 0 62 2017 bilateral borderline negative unknown unknown stage 1 unknown stage 2 unknown distant unknown N2 MX NA grade 1 grade 1 grade 1 1 recurrence_primary 1 0 0 surgical 0 59 1 0 27 16 0 0 74 +subject11 61 visit_1 0 0 0 63 2006 unilateral borderline negative positive negative stage 1 stage 4 unknown unknown regional unstaged NX M0 NA grade 1 grade 1 grade 3 0 recurrence_primary 0 1 1 unknown 0 77 0 1 30 17 0 0 67 +subject12 55 visit_1 0 0 0 59 2004 unilateral benign positive negative unknown unknown unknown stage 1 unknown localized unstaged N1 MX NA grade 1 grade 1 grade 2 0 none 0 1 1 pharmaceutical 1 74 1 1 26 15 0 0 59 +subject13 52 visit_1 0 0 0 55 2010 unilateral borderline negative positive unknown unknown stage 2 stage 2 unstaged localized regional N1 MX NA grade 2 grade 1 grade 1 0 unknown 0 1 0 surgical 1 53 0 1 25 12 0 0 66 +subject14 66 visit_1 1 0 0 68 2007 unilateral benign unknown negative positive stage 4 stage 2 stage 3 in_situ distant unknown N1 M0 NA grade 2 grade 3 grade 1 1 recurrence_primary 1 1 1 both 0 82 0 2 22 11 0 0 72 +subject15 57 visit_1 0 0 0 64 2008 bilateral invasive positive positive positive unstaged stage 2 stage 4 distant in_situ regional N0 M1 NA grade 3 grade 3 grade 3 0 recurrence_second_primary 0 1 1 both 1 66 0 1 22 12 0 0 80 +subject16 57 visit_1 0 1 0 62 2003 bilateral invasive unknown positive unknown unstaged stage 4 stage 4 unknown unknown unstaged N0 M0 NA grade 2 grade 1 grade 1 1 recurrence_second_primary 0 0 0 both 0 57 1 1 26 19 1 0 70 +subject17 67 visit_1 1 0 0 69 2014 unilateral in_situ positive negative unknown stage 1 unknown stage 2 localized regional localized N1 MX NA grade 2 grade 1 grade 3 1 none 0 1 1 none 0 74 0 1 32 14 0 0 85 +subject18 59 visit_1 0 0 0 62 2009 bilateral borderline positive positive negative stage 2 stage 2 unknown unknown regional distant N3 M0 NA grade 3 grade 2 grade 2 0 recurrence_primary 0 1 0 pharmaceutical 0 59 0 1 23 12 0 0 74 +subject19 67 visit_1 1 0 0 69 2001 bilateral borderline negative negative negative stage 1 stage 2 stage 1 unknown unstaged unknown N0 MX NA grade 2 grade 3 grade 2 0 recurrence_second_primary 1 1 1 both 0 78 0 1 32 18 0 0 73 +subject20 62 visit_1 0 0 0 63 2013 unilateral benign unknown unknown positive stage 4 stage 2 unknown in_situ regional distant N0 M0 NA grade 1 grade 2 grade 3 0 recurrence_second_primary 0 0 0 surgical 0 75 0 2 28 16 1 0 64 diff --git a/test_data/cancer_prostate.tsv b/test_data/cancer_prostate.tsv new file mode 100644 index 0000000..112f50b --- /dev/null +++ b/test_data/cancer_prostate.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit prostate_cancer_status_emerge_1 prostate_cancer_status_registry_1 prostate_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 cancer_behavior_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 gleason_score_clinical_1 gleason_score_pathological_1 gleason_score_unknown_1 psa_1 psa_at_diagnosis_1 screening_history_1 recurrence_1 surgery_1 radiotherapy_1 chemotherapy_1 hormone_therapy_1 NSAID_1 deceased_1 cause_of_death_prostate_cancer_1 age_at_death_1 +subject1 59 visit_1 1 1 1 63 2017 borderline stage 1 stage 4 unknown localized unstaged in_situ N1 M1 NA 9 9 7 3.3273850651366184 0.6910954617090586 0 none 0 0 0 0 1 0 0 68 +subject2 46 visit_1 0 1 0 46 2013 in_situ stage 3 unstaged unknown in_situ unstaged in_situ N0 MX NA 5 6 3 0.1117048051118088 1.114277099266359 1 recurrence_second_primary 1 1 0 0 0 0 0 70 +subject3 55 visit_1 0 1 1 62 2011 borderline unknown stage 2 stage 3 regional distant unstaged NX M1 NA 8 3 8 1.1467807022659444 1.650565194078115 0 recurrence_primary 0 1 1 0 0 0 0 67 +subject4 55 visit_1 0 0 0 64 2017 in_situ stage 2 unknown stage 3 unknown distant regional N2 MX NA 2 3 5 0.6244635030052077 1.385998555747178 1 none 0 1 1 1 0 0 1 62 +subject5 62 visit_1 1 1 1 63 2016 borderline stage 1 stage 2 stage 2 in_situ distant unstaged N3 MX NA 3 10 3 1.0444848894466576 1.9458136918517794 1 recurrence_second_primary 0 0 0 0 0 1 0 63 +subject6 55 visit_1 0 1 0 59 2002 in_situ unstaged stage 3 stage 1 regional localized in_situ NX MX NA 2 10 9 0.959323014536773 1.2909907535132685 1 recurrence_second_primary 1 1 1 1 1 0 0 59 +subject7 61 visit_1 1 0 1 61 2002 in_situ unknown stage 4 stage 1 unstaged distant in_situ N0 MX NA 8 10 2 0.08122257203704342 0.302979697472334 0 none 1 1 1 1 0 0 0 62 +subject8 65 visit_1 1 1 1 68 2021 benign stage 2 unknown stage 1 regional distant distant N2 M0 NA 2 10 2 1.6165848336901831 0.3081623612021196 1 none 0 1 1 1 0 0 0 86 +subject9 66 visit_1 1 1 1 72 2006 invasive stage 4 unstaged stage 1 unstaged in_situ distant N3 MX NA 4 4 9 3.215384566425846 3.4306225959208776 0 recurrence_primary 1 1 0 0 1 0 0 70 +subject10 51 visit_1 0 1 1 62 2017 invasive stage 3 stage 3 unstaged unknown localized unknown N0 M1 NA 5 9 8 0.21256352824871372 1.1323419133081138 0 recurrence_primary 1 0 1 0 0 0 0 66 +subject11 61 visit_1 1 1 0 63 2006 borderline stage 3 stage 3 stage 1 distant localized in_situ N3 M1 NA 4 8 2 1.6517128367933704 0.9405233862074993 0 none 0 0 0 1 1 1 0 85 +subject12 55 visit_1 0 0 1 59 2004 borderline stage 2 stage 1 stage 4 unknown unknown unknown NX M1 NA 2 8 6 0.2398326754427973 0.7250777214867976 0 unknown 0 1 0 1 1 0 0 85 +subject13 52 visit_1 0 1 1 55 2010 borderline stage 3 stage 4 stage 2 unknown unknown regional NX M1 NA 3 3 6 1.1806309466433595 1.9446144610218248 0 recurrence_primary 0 1 0 1 0 0 0 54 +subject14 66 visit_1 1 1 1 68 2007 borderline stage 2 unknown stage 4 regional in_situ regional N0 M0 NA 6 9 6 1.6857904625831341 1.52517841165081 1 recurrence_primary 1 0 1 1 1 0 0 86 +subject15 57 visit_1 0 0 1 64 2008 invasive unstaged stage 2 stage 1 regional unstaged regional N2 M0 NA 7 4 3 2.1816372213400363 2.002552414007943 1 unknown 0 0 0 1 1 0 0 73 +subject16 57 visit_1 0 1 0 62 2003 in_situ unknown unknown stage 1 unstaged unstaged in_situ N2 M1 NA 10 8 4 1.8889410806023983 1.223594159764647 0 recurrence_primary 1 0 0 0 0 0 0 57 +subject17 67 visit_1 1 0 0 69 2014 borderline unstaged stage 2 stage 4 unknown localized unknown N0 MX NA 4 2 5 0.8706631343253838 1.8269750432423142 0 none 1 1 0 1 1 0 0 78 +subject18 59 visit_1 1 1 0 62 2009 invasive stage 2 stage 2 stage 2 distant regional regional N3 M1 NA 4 5 10 1.5219466926525085 2.223313852420131 0 unknown 0 0 0 1 0 0 0 60 +subject19 67 visit_1 1 0 1 69 2001 invasive stage 3 stage 3 stage 3 distant localized regional NX M1 NA 9 9 5 0.61399603928097 1.4170698016606862 1 unknown 0 0 1 1 1 0 0 82 +subject20 62 visit_1 1 0 1 63 2013 benign stage 1 unstaged unstaged localized in_situ regional N3 M0 NA 3 5 4 1.211037802492477 0.8747108557841603 1 none 0 0 0 0 0 0 0 81 diff --git a/test_data/cmqt_anthropometry.tsv b/test_data/cmqt_anthropometry.tsv new file mode 100644 index 0000000..e7c4800 --- /dev/null +++ b/test_data/cmqt_anthropometry.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit height_1 weight_1 bmi_1 waist_hip_ratio_1 +subject1 59 visit_1 166.51728404004504 87.70407490435488 31.630141478991852 0.9074966900959498 +subject2 46 visit_1 161.202551994156 80.8258450985496 31.103297082655853 0.8145228307678352 +subject3 55 visit_1 171.23801251550628 86.53811180127285 29.51257188546359 0.9034009869133715 +subject4 55 visit_1 169.17186404031233 86.4412843896079 30.204033404764687 0.6649561139308164 +subject5 62 visit_1 176.4493260077908 82.96448470324013 26.647238715038643 0.7343205137895237 +subject6 55 visit_1 169.82492809343805 78.58528157838867 27.248232409006288 0.7310283084720187 +subject7 61 visit_1 156.0312735892919 86.27942012799926 35.439200098235276 0.8079074951310364 +subject8 65 visit_1 163.50798836505146 84.54919575614312 31.62507251261148 0.7699475884641312 +subject9 66 visit_1 178.2757791033649 75.35985947462825 23.711301552730852 0.8579123324227408 +subject10 51 visit_1 177.43804249577906 86.20090419001572 27.379048176451334 0.656209438514197 +subject11 61 visit_1 168.9662314872622 80.76732089777315 28.290188511041826 0.7469005486739022 +subject12 55 visit_1 165.110036178032 85.25966289494805 31.274953024607136 0.7501018809016656 +subject13 52 visit_1 167.68140136962006 76.22894393591241 27.111285687273472 0.7936294054529471 +subject14 66 visit_1 164.68404018860684 72.58905440174385 26.765053327547104 0.8348499810263527 +subject15 57 visit_1 165.24046335177894 84.30565936248858 30.876205386787987 0.9576720775779393 +subject16 57 visit_1 166.18318741952814 77.97740084621502 28.23544170233956 0.75225930619927 +subject17 67 visit_1 173.1551878731937 78.86297291363192 26.30280885590887 0.7557994230712831 +subject18 59 visit_1 164.6905720192354 84.67048085431246 31.217243374076368 0.8556773306960885 +subject19 67 visit_1 164.29742090189868 77.67052060104524 28.773634591122615 0.7875468828313612 +subject20 62 visit_1 163.01588801788486 76.81228250714892 28.90485548023756 0.9079118556158186 diff --git a/test_data/cmqt_blood_pressure.tsv b/test_data/cmqt_blood_pressure.tsv new file mode 100644 index 0000000..2afdd3b --- /dev/null +++ b/test_data/cmqt_blood_pressure.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit systolic_bp_1 diastolic_bp_1 hypertension_1 +subject1 59 visit_1 124.33509725727151 95.40814980870978 0 +subject2 46 visit_1 109.15014855473149 81.65169019709921 0 +subject3 55 visit_1 137.82289290144655 93.0762236025457 0 +subject4 55 visit_1 131.91961154374948 92.88256877921579 0 +subject5 62 visit_1 152.71236002225947 85.92896940648028 0 +subject6 55 visit_1 133.78550883839446 77.17056315677733 0 +subject7 61 visit_1 94.37506739797686 92.5588402559985 0 +subject8 65 visit_1 115.73710961443275 89.09839151228624 0 +subject9 66 visit_1 157.93079743818544 70.7197189492565 0 +subject10 51 visit_1 155.53726427365447 92.40180838003143 1 +subject11 61 visit_1 131.33208996360634 81.53464179554629 0 +subject12 55 visit_1 120.3143890800914 90.5193257898961 0 +subject13 52 visit_1 127.66114677034301 72.45788787182482 0 +subject14 66 visit_1 119.09725768173382 65.17810880348769 0 +subject15 57 visit_1 120.68703814793986 88.61131872497717 0 +subject16 57 visit_1 123.38053548436613 75.95480169243004 0 +subject17 67 visit_1 143.30053678055347 77.72594582726384 0 +subject18 59 visit_1 119.11592005495825 89.34096170862492 0 +subject19 67 visit_1 117.9926311482819 75.3410412020905 0 +subject20 62 visit_1 114.33110862252818 73.62456501429784 0 diff --git a/test_data/cmqt_flags.tsv b/test_data/cmqt_flags.tsv new file mode 100644 index 0000000..8d256a2 --- /dev/null +++ b/test_data/cmqt_flags.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit flag_pregnancy_1 flag_acute_illness_1 flag_bld_1 flag_anemia_1 flag_hiv_1 flag_eskd_1 flag_splenectomy_1 flag_cirrhosis_1 flag_fasting_1 flag_lipids_med_1 flag_bp_med_1 flag_cvd_1 flag_t2d_1 flag_t1d_1 flag_diabetes_other_1 +subject1 59 visit_1 unknown unknown data not collected unknown data not collected yes no unknown data not collected no data not collected unknown yes data not collected data not collected +subject2 46 visit_1 no data not collected no no unknown unknown yes unknown data not collected unknown data not collected unknown yes no yes +subject3 55 visit_1 yes unknown unknown data not collected unknown yes data not collected yes yes no yes data not collected no unknown data not collected +subject4 55 visit_1 yes yes no data not collected data not collected unknown yes data not collected yes data not collected yes yes unknown no no +subject5 62 visit_1 data not collected unknown data not collected no data not collected yes no no yes unknown yes data not collected data not collected no no +subject6 55 visit_1 yes data not collected no unknown no unknown unknown yes no no no unknown no no no +subject7 61 visit_1 unknown yes data not collected no no unknown data not collected unknown no unknown yes unknown yes no unknown +subject8 65 visit_1 data not collected data not collected data not collected unknown data not collected no no data not collected no yes yes yes unknown unknown no +subject9 66 visit_1 data not collected unknown unknown data not collected no data not collected unknown data not collected no data not collected unknown yes data not collected yes no +subject10 51 visit_1 no unknown unknown data not collected data not collected data not collected yes yes data not collected data not collected no data not collected yes data not collected yes +subject11 61 visit_1 data not collected unknown yes yes no yes yes yes no yes no unknown data not collected data not collected yes +subject12 55 visit_1 yes no unknown unknown no yes no no unknown data not collected data not collected data not collected no data not collected no +subject13 52 visit_1 no data not collected data not collected yes unknown yes yes unknown no data not collected data not collected no no unknown no +subject14 66 visit_1 data not collected unknown data not collected yes yes yes no data not collected unknown no unknown no yes unknown yes +subject15 57 visit_1 yes yes data not collected data not collected yes data not collected data not collected yes no yes data not collected yes unknown yes unknown +subject16 57 visit_1 yes data not collected yes unknown no unknown data not collected data not collected no unknown unknown unknown unknown data not collected data not collected +subject17 67 visit_1 data not collected yes no yes data not collected yes unknown no unknown data not collected no data not collected yes no yes +subject18 59 visit_1 unknown unknown no yes yes data not collected yes yes no yes no yes data not collected data not collected yes +subject19 67 visit_1 data not collected yes data not collected yes no data not collected yes yes yes yes no no no data not collected data not collected +subject20 62 visit_1 data not collected no unknown no unknown no no data not collected unknown no unknown yes data not collected unknown no diff --git a/test_data/cmqt_glycemic.tsv b/test_data/cmqt_glycemic.tsv new file mode 100644 index 0000000..42fb3cf --- /dev/null +++ b/test_data/cmqt_glycemic.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit fasting_glucose_plasma_1 fasting_glucose_serum_1 fasting_insulin_1 hba1c_1 +subject1 59 visit_1 54.335097257271514 100.40814980870978 44.062251757196236 0.8629538590264505 +subject2 46 visit_1 39.150148554731494 86.65169019709921 37.08921230758763 5.12890149360826 +subject3 55 visit_1 67.82289290144655 98.0762236025457 43.75507401850285 0.3745564709250373 +subject4 55 visit_1 61.9196115437495 97.88256877921579 25.871708544811234 7.12738940450381 +subject5 62 visit_1 82.71236002225946 90.92896940648028 31.074038534214274 3.262766021325356 +subject6 55 visit_1 63.78550883839445 82.17056315677733 30.827123135401397 2.5366231021701244 +subject7 61 visit_1 24.37506739797687 97.5588402559985 36.59306213482773 2.2052889540530853 +subject8 65 visit_1 45.73710961443275 94.09839151228624 33.746069134809844 4.778864164562747 +subject9 66 visit_1 87.93079743818544 75.7197189492565 40.343424931705556 4.052338078993457 +subject10 51 visit_1 85.53726427365447 97.40180838003143 25.215707888564772 2.657453514074762 +subject11 61 visit_1 61.332089963606336 86.53464179554629 32.01754115054266 3.31735379488651 +subject12 55 visit_1 50.3143890800914 95.5193257898961 32.25764106762492 2.0286698676551476 +subject13 52 visit_1 57.66114677034302 77.45788787182482 35.52220540897103 1.0821878500015871 +subject14 66 visit_1 49.09725768173383 70.17810880348769 38.61374857697645 3.36103458421411 +subject15 57 visit_1 50.68703814793985 93.61131872497717 47.825405818345445 4.443468565637837 +subject16 57 visit_1 53.38053548436613 80.95480169243004 32.41944796494524 2.2609190437584035 +subject17 67 visit_1 73.30053678055346 82.72594582726384 32.68495673034623 3.475076625079314 +subject18 59 visit_1 49.11592005495825 94.34096170862492 40.17579980220664 1.6681557751701703 +subject19 67 visit_1 47.99263114828191 80.3410412020905 35.06601621235209 1.4063849803144863 +subject20 62 visit_1 44.33110862252818 78.62456501429784 44.09338917118639 2.8966061373772334 diff --git a/test_data/cmqt_hematology.tsv b/test_data/cmqt_hematology.tsv new file mode 100644 index 0000000..1eaa6e2 --- /dev/null +++ b/test_data/cmqt_hematology.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit rbc_1 hemoglobin_1 hematocrit_1 mcv_1 mch_1 mchc_1 rdw_1 wbc_1 basophil_count_1 eosinophil_count_1 lymphocyte_count_1 monocyte_count_1 neutrophil_count_1 platelet_count_1 mean_platelet_volume_1 +subject1 59 visit_1 4.331242107540918 14.699831955867218 1.0507007173563738 2.425864662534217 38.306928369408766 36.68741726479722 9.33625684885929 4.837801587922566 71.42772404350012 94.27227724731605 10.65974915778088 92.64244538466608 67.63941344781097 736.8528701918846 12.156038334456646 +subject2 46 visit_1 0.6490170494106229 21.12323036312705 0.25918453116543316 3.9934934128587307 35.066673320966075 35.10191109304475 10.330876844482647 7.20416513742026 79.39636294584326 96.77233454209555 62.15570094970758 93.96357057999614 65.78050849353485 698.0350961379306 7.553121986292599 +subject3 55 visit_1 3.1980202589448847 13.047185612998463 0.5375344561422558 1.6808350561218874 40.10386511486943 34.36307073793644 9.3762735204598755 11.200648410483769 48.88121147558951 58.93912225932215 22.321686175271225 79.93157314915618 78.02542645121639 578.0545075181684 11.336490679094158 +subject4 55 visit_1 3.1263743668291184 12.924394473329048 0.20262728579993228 0.6481222720791566 40.72793324820852 36.76992766588175 10.081419678713079 6.134925878556956 97.00901780820928 51.03445334772283 90.0376464491842 74.33729324054906 30.01939965825568 810.6677990274707 7.639034212955718 +subject5 62 visit_1 5.340715423573847 14.149192814228957 0.961091097136232 1.7995549676621758 36.49157757418824 36.58502471622523 9.920367569109894 4.122746433104384 31.185114488057593 45.50139768568556 57.8448072929441 74.33198103732025 81.40702468371228 979.8198153207237 8.070750625543084 +subject6 55 visit_1 3.0500120361002647 15.872085292265494 0.10389887095259559 0.3406506916131398 38.65608445705722 30.867105720639945 10.18757961117527 9.79678417198308 61.22373470412549 15.56409593425434 25.968998061953926 36.07140212522006 55.974919301464524 460.46475274142426 4.829004093854326 +subject7 61 visit_1 4.898715793982734 12.864617239095438 0.9536798790737273 1.9467956892807825 35.03562180804909 30.62390278207973 10.43562475975714 14.195821163793196 72.0935297201866 15.276083235005075 74.26374632377974 80.13123679754148 63.14665804360129 700.5809197907139 6.049535145450654 +subject8 65 visit_1 5.978826777809061 16.00499616517567 0.9333799093905899 1.5611422509427955 38.07448401003646 38.5676104111825 9.200891240400765 5.514167198490328 98.56613688039826 29.772883761288625 27.885157284530123 81.65744798624922 48.47308829934582 851.8412695389802 10.276039822533358 +subject9 66 visit_1 6.456227524503385 13.103082604607906 0.691945093478521 1.0717483094459959 39.72226371777236 32.35801282228957 11.970900990481827 8.407161180831487 85.78047607122514 28.587851084480377 76.55138112768714 83.176752761290345 46.68863779671028 906.2353440351303 8.830453411428179 +subject10 51 visit_1 1.8585079840661032 13.224158007248933 0.5368324581033729 2.888513058355949 41.815538156982306 36.93759254094799 10.875624035500651 6.827592319362273 61.459014070450024 84.94724259419142 96.03383508037382 27.33603824459749 97.0884073963125 650.5337788435525 12.809173350157668 +subject11 61 visit_1 5.038398349541582 13.507104596895823 0.385568952474845 0.7652609534335965 37.19096031542644 32.27570772185876 9.40324132625517 7.322427283778194 53.24530426480112 19.77684802955605 49.685158953081555 29.513045299988676 67.49661696302564 907.9834930821021 11.93855445597262 +subject12 55 visit_1 3.164376155023718 10.870990649246837 0.7173750797582257 2.2670347790964462 38.670933254618845 31.71671126150096 9.744384026449396 5.378905951845624 8.4349891455212 75.11231898863137 91.44127828763908 97.28269461946172 84.839729514792 520.2914742217849 13.170023303923719 +subject13 52 visit_1 2.1072189633830214 16.49509377712789 0.941302211920198 4.467035596571275 37.54518918214765 34.19768738212932 9.44749277902548 6.828305332148935 73.79284419739945 35.57231620975256 99.0685898491156 98.38209897507045 22.095863207618095 589.093879395676 11.104585125000993 +subject14 66 visit_1 6.531223987188424 14.192694888090621 0.7967450472284645 1.2199015816810916 37.206056033510244 32.78847995045633 9.982435738478829 5.330558522765378 97.8167732176243 66.63150241736679 24.389152077818153 88.02449089787314 19.080751758793213 705.5060479810853 10.393691497825055 +subject15 57 visit_1 3.6888878001464 12.867416595874264 0.8100279740111506 2.1958596137811597 39.86819231089057 33.24868973829392 10.695966638651418 10.676300620280028 42.0384670478268 25.273330428640776 29.472236180941763 93.58004215951803 28.28153386847771 817.845017374823 9.835689755023369 +subject16 57 visit_1 3.8387251164439995 18.68845527121347 0.4130665035858365 1.0760512697728157 38.99113682714732 31.24404162103528 10.026817792153352 11.55648022781224 91.01508567928023 0.7303644154252709 71.66503914633745 92.50372265149338 59.27362056677657 805.4123743012162 12.300959536797052 +subject17 67 visit_1 6.8473317646864995 12.698924635972869 0.19861312771314057 0.2900591566739047 37.068208237656556 35.44780830112809 9.844336035800064 9.588419550006606 37.1110597034245 77.4808808162987 84.19512572069016 52.078220338043195 95.88002580707143 653.762105604393 6.646578015284696 +subject18 59 visit_1 4.324289957643888 13.932680937521425 0.1958217641468802 0.45284142845401315 37.70480218882887 33.72078973767995 10.809597015514536 6.244331249967679 44.95512367435171 40.63588105728326 48.12692674732034 57.738195580550155 26.119343846592074 999.2530795459547 12.79798374411447 +subject19 67 visit_1 6.667937982954537 12.149701457099965 0.9353034472497997 1.4026876819201766 36.724913033950294 30.405235842855383 11.348898207846023 7.50277481368734 64.62057896292924 59.381829864982564 40.572445263914005 29.447147916447022 24.611362824565276 311.2736854250777 12.425215195338607 +subject20 62 visit_1 5.07211015424058 5.5650216694648265 0.7143852596478991 1.4084576989137971 35.77725913248309 35.056223678308804 8.53216220346032 1.610828711104471 86.77092059988642 82.49153079927703 9.94479230358479 89.43651066161601 28.236995685596412 929.0797952542335 10.555195850440816 diff --git a/test_data/cmqt_kidney_function.tsv b/test_data/cmqt_kidney_function.tsv new file mode 100644 index 0000000..00309da --- /dev/null +++ b/test_data/cmqt_kidney_function.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit cystatin_c_1 serum_creatinine_1 +subject1 59 visit_1 3.2167548628635756 0.8852037452177444 +subject2 46 visit_1 2.4575074277365747 0.5412922549274802 +subject3 55 visit_1 3.8911446450723277 0.8269055900636425 +subject4 55 visit_1 3.5959805771874747 0.8220642194803949 +subject5 62 visit_1 4.635618001112973 0.6482242351620069 +subject6 55 visit_1 3.6892754419197225 0.4292640789194334 +subject7 61 visit_1 1.7187533698988435 0.8139710063999628 +subject8 65 visit_1 2.7868554807216372 0.7274597878071561 +subject9 66 visit_1 4.896539871909272 0.2679929737314125 +subject10 51 visit_1 4.776863213682724 0.8100452095007857 +subject11 61 visit_1 3.566604498180317 0.5383660448886572 +subject12 55 visit_1 3.01571945400457 0.7629831447474025 +subject13 52 visit_1 3.3830573385171507 0.31144719679562066 +subject14 66 visit_1 2.9548628840866913 0.12945272008719222 +subject15 57 visit_1 3.0343519073969927 0.7152829681244294 +subject16 57 visit_1 3.1690267742183065 0.39887004231075096 +subject17 67 visit_1 4.165026839027673 0.44314864568159607 +subject18 59 visit_1 2.9557960027479124 0.7335240427156229 +subject19 67 visit_1 2.899631557414095 0.3835260300522624 +subject20 62 visit_1 2.716555431126409 0.34061412535744595 diff --git a/test_data/cmqt_lipids.tsv b/test_data/cmqt_lipids.tsv new file mode 100644 index 0000000..c7ae69a --- /dev/null +++ b/test_data/cmqt_lipids.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit triglycerides_1 hdl_1 total_cholesterol_1 ldl_1 ldl_emerge_1 non_hdl_1 +subject1 59 visit_1 118.94786613494463 78.11222471306466 258.0920536741743 82.46464639198933 169.61634833200802 108.39207743928065 +subject2 46 visit_1 108.62210101721742 57.47753529564881 210.4429507685155 161.38467763175282 114.07646247867957 76.39545961834311 +subject3 55 visit_1 128.11956717298366 74.61433540381856 255.99300579310284 73.42929471211319 100.73441782084811 66.74099280726072 +subject4 55 visit_1 124.10533584974965 74.32385316882369 133.79000838954343 198.35670398332047 67.58309936118381 76.7691356944378 +subject5 62 visit_1 138.24440481513642 63.893454109720416 169.33926331713087 126.86117139451909 83.78867798556752 82.7953116034168 +subject6 55 visit_1 125.37414601010823 50.75584473516601 167.65200809190955 113.4275273901473 73.65858035613226 11.953070720472283 +subject7 61 visit_1 98.57504583062428 73.83826038399776 207.05259125465614 107.29784564998208 90.9846609291917 143.23148081242402 +subject8 65 visit_1 113.10123453781426 68.64758726842938 187.59813908786725 154.90898704441082 80.16581380208316 112.05650766930025 +subject9 66 visit_1 141.7929422579661 41.079578423884755 232.6800703666546 141.46825446137896 135.6436824854716 37.05969964986049 +subject10 51 visit_1 140.16533970608504 73.60271257004715 129.30733723852595 115.6628900103831 114.53328824775753 11.879209854312279 +subject11 61 visit_1 123.70582117525231 57.30196269331943 175.78653119537483 127.87104520540043 74.72658359739302 98.10552898227554 +subject12 55 visit_1 116.21378457446215 70.77898868484415 177.42721396210362 104.03039255162022 92.47353821526272 110.78258580975216 +subject13 52 visit_1 121.20957980383325 43.68683180773724 199.73507029463536 86.52047522502937 127.88604964900193 115.60883188057969 +subject14 66 visit_1 115.386135223579 32.76716320523153 220.86061527600572 128.67913980796104 144.74751242630774 93.21315240224303 +subject15 57 visit_1 116.4671859405991 67.91697808746576 283.8069397586939 148.70416846429998 147.4540746082973 76.43908835010222 +subject16 57 visit_1 118.29876412936896 48.93220253864506 178.5328944271258 108.32700230953047 120.25911259091913 97.94608960993273 +subject17 67 visit_1 131.84436501077636 51.588918740895764 180.34720432403256 130.78891756396732 208.22190209708123 49.09161252772545 +subject18 59 visit_1 115.39882563737162 69.01144256293738 231.53463198174538 97.36088184064815 100.63005833370325 56.832110025209516 +subject19 67 visit_1 114.6349891808317 48.01156180313574 196.6177774510726 92.518122135818 157.83372796879834 149.60042375003604 +subject20 62 visit_1 112.14515386331917 45.43684752144676 258.304826003107 120.08721354147882 111.73118178389402 52.362068888075726 diff --git a/test_data/cvd_cad.tsv b/test_data/cvd_cad.tsv new file mode 100644 index 0000000..e33e24f --- /dev/null +++ b/test_data/cvd_cad.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit cad_1 cad_emerge_1 cad_emerge_mod_1 +subject1 59 visit_1 0 0 0 +subject2 46 visit_1 0 1 0 +subject3 55 visit_1 0 0 0 +subject4 55 visit_1 0 0 0 +subject5 62 visit_1 0 0 0 +subject6 55 visit_1 0 0 0 +subject7 61 visit_1 0 0 0 +subject8 65 visit_1 0 0 0 +subject9 66 visit_1 0 0 0 +subject10 51 visit_1 0 0 0 +subject11 61 visit_1 0 0 0 +subject12 55 visit_1 0 0 0 +subject13 52 visit_1 0 0 0 +subject14 66 visit_1 1 0 0 +subject15 57 visit_1 0 0 0 +subject16 57 visit_1 0 1 0 +subject17 67 visit_1 1 0 0 +subject18 59 visit_1 0 0 0 +subject19 67 visit_1 1 0 0 +subject20 62 visit_1 0 0 0 diff --git a/test_data/diabetes_diabetes.tsv b/test_data/diabetes_diabetes.tsv new file mode 100644 index 0000000..c5c6440 --- /dev/null +++ b/test_data/diabetes_diabetes.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit t1d_1 t2d_1 t1d_dprism_1 t2d_dprism_1 +subject1 59 visit_1 0 0 0 0 +subject2 46 visit_1 0 1 0 0 +subject3 55 visit_1 0 0 0 0 +subject4 55 visit_1 0 0 0 0 +subject5 62 visit_1 0 0 0 0 +subject6 55 visit_1 0 0 0 0 +subject7 61 visit_1 0 0 0 0 +subject8 65 visit_1 0 0 0 0 +subject9 66 visit_1 0 0 0 0 +subject10 51 visit_1 0 0 0 1 +subject11 61 visit_1 0 0 0 0 +subject12 55 visit_1 0 0 0 0 +subject13 52 visit_1 0 0 0 0 +subject14 66 visit_1 1 0 0 0 +subject15 57 visit_1 0 0 0 0 +subject16 57 visit_1 0 1 0 0 +subject17 67 visit_1 1 0 0 0 +subject18 59 visit_1 0 0 0 0 +subject19 67 visit_1 1 0 0 0 +subject20 62 visit_1 0 0 0 0 diff --git a/test_data/family_history.tsv b/test_data/family_history.tsv new file mode 100644 index 0000000..992414a --- /dev/null +++ b/test_data/family_history.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit family_hx_cancer_breast_1 family_hx_cancer_breast_relatedness_1 family_hx_cancer_prostate family_hx_cancer_prostate_relatedness_1 family_hx_cancer_pancreatic family_hx_cancer_pancreatic_relatedness_1 family_hx_cancer_colorectal family_hx_cancer_colorectal_relatedness_1 family_hx_cancer_lung family_hx_cancer_lung_relatedness_1 family_hx_cancer_any family_hx_type2_diabetes family_hx_type2_diabetes_relatedness_1 family_hx_myocardial_infarction family_hx_myocardial_infarction_relatedness_1 family_hx_dementia family_hx_dementia_relatedness_1 family_hx_type1_diabetes family_hx_type1_diabetes_relatedness_1 family_hx_asthma family_hx_asthma_relatedness_1 family_hx_stroke family_hx_stroke_relatedness_1 family_hx_heart_failure family_hx_heart_failure_relatedness_1 +subject1 59 1 1 unknown 1 2nd degree 1 1st degree 0 2nd degree 1 1st degree 1 1 2nd degree 1 unknown 1 2nd degree 1 1st degree 0 unknown 0 2nd degree 0 1st degree +subject2 46 1 0 unknown 0 1st degree 1 2nd degree 0 unknown 1 2nd degree 1 1 2nd degree 0 2nd degree 1 1st degree 0 1st degree 1 1st degree 1 unknown 0 2nd degree +subject3 55 1 0 2nd degree 1 unknown 1 1st degree 1 1st degree 0 1st degree 0 1 1st degree 1 unknown 0 unknown 0 2nd degree 0 1st degree 0 unknown 1 2nd degree +subject4 55 1 0 2nd degree 0 unknown 1 2nd degree 0 unknown 0 unknown 0 0 2nd degree 0 1st degree 0 2nd degree 0 2nd degree 1 unknown 0 2nd degree 1 unknown +subject5 62 1 1 2nd degree 1 1st degree 1 2nd degree 0 1st degree 0 2nd degree 0 1 unknown 0 1st degree 1 1st degree 0 2nd degree 1 2nd degree 0 2nd degree 0 1st degree +subject6 55 1 0 unknown 0 2nd degree 0 2nd degree 1 2nd degree 0 1st degree 0 1 1st degree 0 1st degree 1 unknown 0 2nd degree 1 2nd degree 1 unknown 1 2nd degree +subject7 61 1 1 2nd degree 1 1st degree 0 unknown 1 2nd degree 0 unknown 0 1 2nd degree 0 unknown 1 1st degree 0 1st degree 0 unknown 1 2nd degree 1 unknown +subject8 65 1 1 unknown 1 2nd degree 1 1st degree 0 unknown 0 1st degree 0 0 unknown 1 1st degree 1 1st degree 1 1st degree 1 unknown 0 2nd degree 1 2nd degree +subject9 66 1 1 2nd degree 1 unknown 0 unknown 1 unknown 0 unknown 1 0 unknown 0 1st degree 0 unknown 1 unknown 0 1st degree 1 unknown 0 1st degree +subject10 51 1 0 2nd degree 1 unknown 1 unknown 0 2nd degree 1 unknown 0 1 1st degree 1 2nd degree 1 unknown 0 1st degree 0 1st degree 1 1st degree 1 1st degree +subject11 61 1 1 2nd degree 0 2nd degree 0 1st degree 0 2nd degree 0 2nd degree 0 1 unknown 1 1st degree 1 1st degree 1 1st degree 0 unknown 0 1st degree 0 unknown +subject12 55 1 0 1st degree 1 2nd degree 0 2nd degree 0 1st degree 1 unknown 1 1 1st degree 1 1st degree 1 2nd degree 0 1st degree 0 2nd degree 0 unknown 0 2nd degree +subject13 52 1 0 unknown 1 2nd degree 1 1st degree 0 2nd degree 0 unknown 1 0 1st degree 1 1st degree 0 2nd degree 0 2nd degree 0 1st degree 0 unknown 0 2nd degree +subject14 66 1 1 2nd degree 1 2nd degree 0 2nd degree 0 unknown 1 1st degree 1 0 2nd degree 1 2nd degree 1 2nd degree 1 2nd degree 1 1st degree 1 1st degree 1 unknown +subject15 57 1 0 2nd degree 1 unknown 0 unknown 1 1st degree 0 1st degree 1 0 2nd degree 0 2nd degree 0 1st degree 1 unknown 1 unknown 0 1st degree 0 2nd degree +subject16 57 1 0 unknown 0 unknown 0 2nd degree 1 unknown 0 unknown 1 1 2nd degree 1 unknown 1 1st degree 1 2nd degree 0 1st degree 1 2nd degree 0 2nd degree +subject17 67 1 1 2nd degree 0 1st degree 1 2nd degree 1 1st degree 1 unknown 0 1 1st degree 0 1st degree 0 2nd degree 0 2nd degree 0 unknown 1 unknown 0 unknown +subject18 59 1 1 2nd degree 0 2nd degree 0 unknown 0 1st degree 0 2nd degree 0 0 unknown 1 1st degree 0 unknown 0 unknown 0 2nd degree 0 1st degree 0 unknown +subject19 67 1 1 2nd degree 1 1st degree 0 unknown 0 2nd degree 0 2nd degree 0 0 1st degree 1 unknown 1 2nd degree 0 2nd degree 1 2nd degree 0 2nd degree 1 unknown +subject20 62 1 1 1st degree 1 1st degree 1 1st degree 0 unknown 1 1st degree 1 0 unknown 1 1st degree 0 1st degree 0 1st degree 1 unknown 0 2nd degree 0 1st degree diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv new file mode 100644 index 0000000..36f6253 --- /dev/null +++ b/test_data/phenotype_harmonized.tsv @@ -0,0 +1,14 @@ +domain md5sum file_path file_readme_path n_subjects n_rows +population_descriptor 98316ca7ae3b5332e28e916155b844d3 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/population_descriptor.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_flags 99dee9ebbef7e7a0681d4ae5b3b0063e gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_flags.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_anthropometry d26b5af92459c0961442e2dcf7ce9235 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_blood_pressure 9877023cd800d4235e1b99a650f88694 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_lipids 17ce825be3d94425e26c08987fa78cd9 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_hematology 155f8eac3c84a91fdb17eff3739e7799 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_hematology.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_glycemic 4af06300bac223b5462356532fa98729 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_glycemic.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_kidney_function 35962811d3e9c081de82e4f3f8e4bfb5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_kidney_function.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +diabetes_diabetes bf4ff29e1312614c66a08a27b4d129c5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cvd_cad 26439afc298880695450a008d3f92290 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cvd_cad.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_breast 322959303fc4c173f503aaea46fbccbf gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_prostate af65adca42868373afc81bef2dd2cd2b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +family_history 53fe6db8b6d0864ca9adc9e46061a6b0 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/family_history.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/population_descriptor.tsv b/test_data/population_descriptor.tsv new file mode 100644 index 0000000..92b22ae --- /dev/null +++ b/test_data/population_descriptor.tsv @@ -0,0 +1,21 @@ +subject_id population_descriptor population_label country_of_recruitment country_of_birth +subject1 population|superpopulation GBR|EUR Sierra Leone USA +subject2 population|superpopulation MSL|AFR Spain Peru +subject3 population|superpopulation MXL|AMR USA Sierra Leone +subject4 population|superpopulation MXL|AMR Peru Sierra Leone +subject5 population|superpopulation GBR|EUR Sierra Leone Spain +subject6 population|superpopulation MSL|AFR Peru UK +subject7 population|superpopulation MXL|AMR Sierra Leone Peru +subject8 population|superpopulation MSL|AFR Sierra Leone USA +subject9 population|superpopulation MXL|AMR UK Sierra Leone +subject10 population|superpopulation MXL|AMR USA Sierra Leone +subject11 population|superpopulation MXL|AMR Spain Spain +subject12 population|superpopulation IBS|EUR UK UK +subject13 population|superpopulation MSL|AFR Sierra Leone USA +subject14 population|superpopulation GBR|EUR Sierra Leone Spain +subject15 population|superpopulation MXL|AMR Sierra Leone Sierra Leone +subject16 population|superpopulation MSL|AFR USA UK +subject17 population|superpopulation MXL|AMR Peru Spain +subject18 population|superpopulation GBR|EUR Peru USA +subject19 population|superpopulation IBS|EUR Sierra Leone Spain +subject20 population|superpopulation PEL|AMR UK Peru diff --git a/test_data/readme.tsv b/test_data/readme.tsv new file mode 100644 index 0000000..bb2250c --- /dev/null +++ b/test_data/readme.tsv @@ -0,0 +1,2 @@ +read_me +NA diff --git a/test_data/subject.tsv b/test_data/subject.tsv new file mode 100644 index 0000000..3bab20a --- /dev/null +++ b/test_data/subject.tsv @@ -0,0 +1,21 @@ +subject_id consent_code study_nickname dbgap_submission reported_sex +subject1 DS-CVD ARIC TRUE Unknown +subject2 DS-CVD UKBB TRUE Female +subject3 HMB-IRB JHS FALSE Other +subject4 HMB-IRB UKBB FALSE Other +subject5 HMB-IRB ARIC FALSE Female +subject6 DS-CVD UKBB FALSE Unknown +subject7 HMB-IRB ARIC FALSE Female +subject8 DS-CVD ARIC FALSE Unknown +subject9 HMB-IRB ARIC FALSE Other +subject10 HMB-IRB JHS FALSE Other +subject11 HMB-IRB JHS FALSE Male +subject12 GRU ARIC FALSE Unknown +subject13 DS-CVD ARIC FALSE Male +subject14 HMB-IRB ARIC FALSE Male +subject15 HMB-IRB ARIC FALSE Other +subject16 DS-CVD JHS FALSE Unknown +subject17 HMB-IRB UKBB FALSE Male +subject18 HMB-IRB UKBB FALSE Male +subject19 HMB-IRB ARIC FALSE Male +subject20 GRU ARIC FALSE Female diff --git a/test_data/test_files.R b/test_data/test_files.R new file mode 100644 index 0000000..2c067d1 --- /dev/null +++ b/test_data/test_files.R @@ -0,0 +1,342 @@ +library(dplyr) +library(readr) +library(tools) + +# number of rows in test data +n <- 20 + +file_names <- c( + # "pilot", + "population_descriptor", + "cmqt_flags", + "cmqt_anthropometry", + "cmqt_blood_pressure", + "cmqt_lipids", + "cmqt_hematology", + "cmqt_glycemic", + "cmqt_kidney_function", + "diabetes_diabetes", + "cvd_cad", + "cancer_breast", + "cancer_prostate", + "family_history") + +# Compatibility for using set.seed between different versions of R: +# https://stackoverflow.com/questions/47199415/is-set-seed-consistent-over-different-versions-of-r-and-ubuntu +RNGkind( + kind = "Mersenne-Twister", + normal.kind = "Inversion", + sample.kind = "Rounding" +) + +# truncated normal distribution +rtnorm <- function(n, mean, sd, a = -Inf, b = Inf){ + qnorm(runif(n, pnorm(a, mean, sd), pnorm(b, mean, sd)), mean, sd) +} + +set.seed(4) + +subject <- tibble( + subject_id = paste0("subject", 1:n), + age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), + consent_code = sample(x = c("GRU", "HMB-IRB", "DS-CVD"), size = n, replace = TRUE), + study_nickname = sample(x = c("UKBB", "JHS", "ARIC"), size = n, replace = TRUE), + dbgap_submission = c(rep(TRUE, 2), rep(FALSE, n-2)), + reported_sex = sample(x = c("Female", "Male", "Unknown", "Other"), size = n, replace = TRUE) +) + +set.seed(4) + +population_descriptor <- tibble( + subject_id=rep(subject$subject_id), + # population_descriptor_id = sample(x = c("01bb18a183122d64", "022f19bfe0b628e1", "0224684f6cb9e980"), size = n, replace = TRUE), + population_descriptor = sample(x = c("population|superpopulation"), size = n, replace = TRUE), + population_label = sample(x = c("PEL|AMR", "IBS|EUR", "MXL|AMR", "GBR|EUR", "MSL|AFR"), size = n, replace = TRUE), + country_of_recruitment = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE), + country_of_birth = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE) +) + +set.seed(4) + +cmqt_flags <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + flag_pregnancy_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_acute_illness_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_bld_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_anemia_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_hiv_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_eskd_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_splenectomy_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_cirrhosis_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_fasting_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_lipids_med_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_bp_med_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_cvd_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_t2d_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_t1d_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_diabetes_other_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), +) + +set.seed(4) + +cmqt_anthropometry <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + height_1=rnorm(n, 165, 7), # height in cm + weight_1=rnorm(n, 80, 5), # weight in kg + bmi_1=weight_1 / (height_1 / 100)^2, # bmi in km/m^2 + waist_hip_ratio_1=rnorm(n, 0.8, 0.08) +) + +set.seed(4) + +cmqt_blood_pressure <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + systolic_bp_1=rnorm(n, 120, 20), + diastolic_bp_1=rnorm(n, 80, 10), + hypertension_1=ifelse(systolic_bp_1 > 140 & diastolic_bp_1 > 90, 1, 0) +) + +set.seed(4) + +cmqt_lipids <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + triglycerides_1=rnorm(n, 116, 13.6), # mg/dL + hdl_1=rnorm(n, 55, 15), + total_cholesterol_1=rnorm(n, 203, 41), + ldl_1=rnorm(n, 122, 37), + ldl_emerge_1=rnorm(n, 122, 37), + non_hdl_1=rnorm(n, 81, 40), +) + +set.seed(4) + +cmqt_hematology <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + rbc_1=rtnorm(n, 4, 1.5, a=0, b=100), + hemoglobin_1=rtnorm(n, 13, 3, a=0, b=100), + hematocrit_1=rtnorm(n, 0.4, 0.4, a=0, b=100), + mcv_1=(hematocrit_1 * 10) / rbc_1, + mch_1=rtnorm(n, 38, 2, a=0, b=1000), + mchc_1=rtnorm(n, 34, 2, a=0, b=1000), + rdw_1=rtnorm(n, 10, 1, a=0, b=100), + wbc_1=rtnorm(n, 8, 3, a=0, b=10000), + basophil_count_1=rtnorm(n, 200, 100, a=0, b=100), + eosinophil_count_1=rtnorm(n, 200, 100, a=0, b=100), + lymphocyte_count_1=rtnorm(n, 1300, 1000, a=0, b=100), + monocyte_count_1=rtnorm(n, 450, 100, a=0, b=100), + neutrophil_count_1=rtnorm(n, 4000, 2000, a=0, b=100), + platelet_count_1=rtnorm(n, 800, 200, a=0, b=1000), + mean_platelet_volume_1=rtnorm(n, 10, 2, a=0, b=100) +) + +set.seed(4) + +cmqt_glycemic <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + fasting_glucose_plasma_1=rnorm(n, 50, 20), + fasting_glucose_serum_1=rnorm(n, 85, 10), + fasting_insulin_1=rnorm(n, 36, 6), + hba1c_1=rnorm(n, 3, 2) +) + +set.seed(4) + +cmqt_kidney_function <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + cystatin_c_1=rnorm(n, 3, 1), + serum_creatinine_1=rnorm(n, .5, 0.25) +) + +set.seed(4) + +diabetes_diabetes <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + t1d_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + t2d_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + t1d_dprism_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + t2d_dprism_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), +) + +set.seed(4) + +cvd_cad <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + cad_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + cad_emerge_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + cad_emerge_mod_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)) +) + +set.seed(4) + +cancer_breast <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + breast_cancer_status_emerge_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + breast_cancer_status_registry_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + breast_cancer_status_survey_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + age_at_diagnosis_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 5, x, 90))), + year_at_diagnosis_1=round(rtnorm(n, 2010, 5, 1900, 2024)), + breast_cancer_type_1=sample(x = c("unilateral", "bilateral"), size = n, replace = TRUE), + cancer_behavior_1=sample(x = c("benign", "borderline", "in_situ", "invasive"), size = n, replace = TRUE), + her2_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), + pr_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), + er_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), + T_stage_clinical_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_uknown_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_clinical_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_unknown_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), + distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), + stage_system_1=rep(NA, n), + grade_clinical_1=sample(x = c("grade 1", "grade 2", "grade 3"), size = n, replace = TRUE), + grade_pathological_1=sample(x = c("grade 1", "grade 2", "grade 3"), size = n, replace = TRUE), + grade_unknown_1=sample(x = c("grade 1", "grade 2", "grade 3"), size = n, replace = TRUE), + screening_history_1=sample(x = c(0, 1), size = n, replace = TRUE), + recurrence_1=sample(x = c("recurrence_primary", "recurrence_second_primary", "unknown", "none"), size = n, replace = TRUE), + surgery_1=sample(x = c(0, 1), size = n, replace = TRUE), + radiotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + chemotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + hormone_therapy_1=sample(x = c("pharmaceutical", "surgical", "both", "none", "unknown"), size = n, replace = TRUE), + NSAID_1=sample(x = c(0, 1), size = n, replace = TRUE), + age_at_natural_menopause_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 10, x, 90))), + post_menopausal_hormone_use_1=sample(x = c(0, 1), size = n, replace = TRUE), + parity_1=sample(x = c(0, 1, 2), size = n, replace = TRUE), + age_at_first_birth_1=round(rtnorm(n, 28, 5, 0, 90)), + age_at_menarche_1=round(rtnorm(n, 15, 3, 0, 90)), + deceased_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + cause_of_death_breast_cancer_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + age_at_death_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 20, x, 90))), +) + +set.seed(4) + +cancer_prostate <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + prostate_cancer_status_emerge_1=sample(x = c(0, 1), size = n, replace = TRUE), + prostate_cancer_status_registry_1=sample(x = c(0, 1), size = n, replace = TRUE), + prostate_cancer_status_survey_1=sample(x = c(0, 1), size = n, replace = TRUE), + age_at_diagnosis_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 5, x, 90))), + year_at_diagnosis_1=round(rtnorm(n, 2010, 5, 1900, 2024)), + cancer_behavior_1=sample(x = c("benign", "borderline", "in_situ", "invasive"), size = n, replace = TRUE), + T_stage_clinical_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_uknown_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_clinical_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_unknown_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), + distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), + stage_system_1=rep(NA, n), + gleason_score_clinical_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), + gleason_score_pathological_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), + gleason_score_unknown_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), + psa_1=rtnorm(n, 1.5, 1, 0, 50), + psa_at_diagnosis_1=rtnorm(n, 1.5, 1, 0, 50), + screening_history_1=sample(x = c(0, 1), size = n, replace = TRUE), + recurrence_1=sample(x = c("recurrence_primary", "recurrence_second_primary", "unknown", "none"), size = n, replace = TRUE), + surgery_1=sample(x = c(0, 1), size = n, replace = TRUE), + radiotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + chemotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + hormone_therapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + NSAID_1=sample(x = c(0, 1), size = n, replace = TRUE), + deceased_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + cause_of_death_prostate_cancer_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + age_at_death_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 20, x, 90))), +) + +set.seed(4) + +family_history <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep(1, n), + family_hx_cancer_breast_1=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_breast_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_prostate=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_prostate_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_pancreatic=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_pancreatic_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_colorectal=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_colorectal_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_lung=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_lung_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_any=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_type2_diabetes=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_type2_diabetes_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_myocardial_infarction=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_myocardial_infarction_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_dementia=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_dementia_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_type1_diabetes=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_type1_diabetes_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_asthma=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_asthma_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_stroke=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_stroke_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_heart_failure=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_heart_failure_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE) +) + +# fill in table after uploading tsv files to anvil + +bucket <- "gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/" + +readme <- tibble( + read_me = c(NA) +) + +subject <- subject %>% select(-age_at_obs) + + +# working in primed_data_models/test_data directory + +write_tsv(readme, "readme.tsv") +write_tsv(subject, "subject.tsv") +write_tsv(population_descriptor, "population_descriptor.tsv") +write_tsv(cmqt_flags, "cmqt_flags.tsv") +write_tsv(cmqt_anthropometry, "cmqt_anthropometry.tsv") +write_tsv(cmqt_blood_pressure, "cmqt_blood_pressure.tsv") +write_tsv(cmqt_lipids, "cmqt_lipids.tsv") +write_tsv(cmqt_hematology, "cmqt_hematology.tsv") +write_tsv(cmqt_glycemic, "cmqt_glycemic.tsv") +write_tsv(cmqt_kidney_function, "cmqt_kidney_function.tsv") +write_tsv(diabetes_diabetes, "diabetes_diabetes.tsv") +write_tsv(cvd_cad, "cvd_cad.tsv") +write_tsv(cancer_breast, "cancer_breast.tsv") +write_tsv(cancer_prostate, "cancer_prostate.tsv") +write_tsv(family_history, "family_history.tsv") + +phenotype_harmonized <- tibble( + # phenotype_harmonized_id= + domain=(file_names), + md5sum=as.vector(md5sum(paste0(file_names, ".tsv"))), + file_path=paste0(bucket, file_names, '.tsv'), + file_readme_path=paste0(bucket, 'readme.tsv'), + n_subjects=rep(n, length(file_names)), + n_rows=rep(n, length(file_names)), +) + +write_tsv(phenotype_harmonized, "phenotype_harmonized.tsv")