diff --git a/README.md b/README.md
index 9242ced..57c3efa 100644
--- a/README.md
+++ b/README.md
@@ -49,12 +49,13 @@ No compilation is required for ntRoot (only the dependencies), so simply add the
 - [ntEdit 2.0.2+](https://github.com/bcgsc/ntEdit)
 - [snakemake](https://snakemake.readthedocs.io/en/stable/)
 - [samtools](https://www.htslib.org/)
-
+- [bedtools](https://bedtools.readthedocs.io/en/latest/)
 
 
 ## Usage <a name=usage></a>
 ```
-usage: ntroot [-h] -r REFERENCE [--reads READS] [--genome GENOME [GENOME ...]] -l L -k K [--tile TILE] [--lai] [-t T] [-z Z] [-j J] [-Y Y] [-v] [-V] [-n] [-f]
+usage: ntroot [-h] [-r REFERENCE] [--reads READS] [--genome GENOME [GENOME ...]] -l L [-k K] [--tile TILE] [--lai] [-t T] [-z Z] [-j J] [-Y Y] [--custom_vcf CUSTOM_VCF]
+              [--strip_info] [-v] [-V] [-n] [-f]
 
 ntRoot: Ancestry inference from genomic data
 
@@ -73,6 +74,9 @@ optional arguments:
   -z Z                  Minimum contig length [default=100]
   -j J                  controls size of k-mer subset. When checking subset of k-mers, check every jth k-mer [default=3]
   -Y Y                  Ratio of number of k-mers in the k subset that should be present to accept an edit (higher=stringent) [default=0.55]
+  --custom_vcf CUSTOM_VCF
+                        Input VCF for computing ancestry. When specified, ntRoot will skip the ntEdit step, and predict ancestry from the provided VCF.
+  --strip_info          When using --custom_vcf, strip the existing INFO field from the input VCF.
   -v, --verbose         Verbose mode [default=False]
   -V, --version         show program's version number and exit
   -n, --dry-run         Print out the commands that will be executed
@@ -122,6 +126,10 @@ Example command:
 ntroot -k 55 --reference GRCh38.fa.gz --reads ERR3242308_ -t 48 -Y 0.55 -l 1000GP_integrated_snv_v2a_27022019.GRCh38.phased_gt1.vcf.gz
 </pre>
 
+If you would like to infer ancestry from a pre-existing VCF file:
+<pre>
+ntroot -r GRCh38.fa.gz --custom_vcf third_party.vcf -l 1000GP_integrated_snv_v2a_27022019.GRCh38.phased_gt1.vcf.gz
+</pre>
 
 Note: For more advanced users, and for ancestry predictions on organisms other than human, please contact us.
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 0a790d5..f9a2ccf 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -19,7 +19,7 @@ jobs:
   - script: |
       source activate ntroot_CI
       conda install --yes --name ntroot_CI -c conda-forge -c bioconda python=3.9 mamba
-      mamba install --yes -c conda-forge -c bioconda meson ninja snakemake perl 'ntedit>=2.0.1' samtools
+      mamba install --yes -c conda-forge -c bioconda meson ninja snakemake perl 'ntedit>=2.0.1' samtools bedtools
     displayName: Install Conda packages
   
   - script: |
@@ -48,7 +48,7 @@ jobs:
   - script: |
       source activate ntroot_CI
       conda install --yes --name ntroot_CI -c conda-forge -c bioconda python=3.9 mamba
-      mamba install --yes -c conda-forge -c bioconda snakemake perl 'ntedit>=2.0.1' samtools
+      mamba install --yes -c conda-forge -c bioconda snakemake perl 'ntedit>=2.0.1' samtools bedtools
     displayName: Install Conda packages
   
   - script: |
diff --git a/demo/HG002.chr21.vcf.gz b/demo/HG002.chr21.vcf.gz
new file mode 100644
index 0000000..0bb94ef
Binary files /dev/null and b/demo/HG002.chr21.vcf.gz differ
diff --git a/demo/run_ntroot_demo.sh b/demo/run_ntroot_demo.sh
index fa37547..75884c1 100755
--- a/demo/run_ntroot_demo.sh
+++ b/demo/run_ntroot_demo.sh
@@ -32,4 +32,16 @@ else
 	exit 1
 fi
 
+echo "Running ntRoot input VCF demo..."
+
+ntroot --reference chr21.fa --custom_vcf HG002.chr21.vcf.gz -l pop-spec-snp_chr21.vcf.gz
+prediction=$(cat HG002.chr21.vcf.gz.cross-ref.vcf_ancestry-predictions_tile5000000.tsv | awk '{print $1}' |head -n 2 |tail -n 1)
+if [ ${prediction} == "EUR" ]; then
+	echo "ntRoot input VCF test successful!"
+else 
+	echo "ntRoot input VCF test failed.. Please check your installation."
+	exit 1
+fi
+ 
+
 echo "Done ntRoot tests!"
diff --git a/ntroot b/ntroot
index 74b5fab..2b9dbf6 100755
--- a/ntroot
+++ b/ntroot
@@ -37,7 +37,7 @@ def set_up_parser():
 
     parser.add_argument("-k",
                         help="k-mer size",
-                        required=True, type=int)
+                        required=False, type=int)
     parser.add_argument("--tile", help="Tile size for ancestry fraction inference (bp) [default=5000000]",
                         default=5000000, type=int)
     parser.add_argument("--lai", help="Output ancestry predictons per tile in a separate output file",
@@ -54,6 +54,12 @@ def set_up_parser():
                         help="Ratio of number of k-mers in the k subset that should be present to accept "
                             "an edit (higher=stringent) "
                             "[default=0.55]", default=0.55, type=float)
+    parser.add_argument("--custom_vcf", help="Input VCF for computing ancestry. "
+                                        "When specified, ntRoot will skip the ntEdit step, and "
+                                        "predict ancestry from the provided VCF.",
+                        type=str)
+    parser.add_argument("--strip_info", help="When using --custom_vcf, strip the existing INFO field from the input VCF.",
+                        action="store_true")
     parser.add_argument("-v", "--verbose",
                         help="Verbose mode [default=False]", action="store_true", default=False)
     parser.add_argument("-V", "--version", action="version", version=NTROOT_VERSION)
@@ -71,7 +77,7 @@ def main():
 
     args = parser.parse_args()
 
-    if (args.reads and args.genome) or (not args.reads and not args.genome):
+    if ((args.reads and args.genome) or (not args.reads and not args.genome)) and not args.custom_vcf:
         raise argparse.ArgumentTypeError("Please specify --reads OR --genome")
 
     if not args.draft and not args.reference:
@@ -79,6 +85,10 @@ def main():
     if args.draft:
         args.reference = args.draft
 
+    if args.reads or args.genome:
+        if not args.k:
+            raise argparse.ArgumentTypeError("Please specify the k-mer size with -k")
+
     base_dir = os.path.dirname(os.path.realpath(__file__))
 
     intro_string = ["Running ntRoot...",
@@ -94,26 +104,38 @@ def main():
             smk_rule = "ntroot_genome_lai"
         else:
             smk_rule = "ntroot_genome"
+    if args.custom_vcf:
+        if args.lai:
+            smk_rule = "ntroot_input_vcf_lai"
+        else:
+            smk_rule = "ntroot_input_vcf"
 
     command = f"snakemake -s {base_dir}/ntroot_run_pipeline.smk {smk_rule} --nolock -p --cores {args.t} " \
-            f"--config reference={args.reference} k={args.k} t={args.t} " \
-            f"z={args.z} tile_size={args.tile} " \
-            f"j={args.j} Y={args.Y} "
+            f"--config reference={args.reference} t={args.t} " \
+            f"tile_size={args.tile} "
 
     if args.genome:
         intro_string.append(f"\t--genome {args.genome}")
-        command += f"genomes={args.genome} "
+        command += f"genomes={args.genome}"
+    elif args.custom_vcf:
+        intro_string.append(f"\t--custom_vcf {args.custom_vcf}")
+        command += f" input_vcf={args.custom_vcf}"
+        if args.strip_info:
+            intro_string.append("\t--strip_info")
+            command += " strip_info=True"
     else:
         intro_string.append(f"\t--reads {args.reads}")
         command += f"reads={args.reads}"
 
-    intro_string.extend([f"\t-k {args.k}",
-                        f"\t--reference {args.reference}",
-                        f"\t-t {args.t}",
-                        f"\t-z {args.z}",
-                        f"\t-j {args.j}",
-                        f"\t-Y {args.Y}"
-                    ])
+    intro_string.extend([f"\t--reference {args.reference}",
+                        f"\t-t {args.t}"])
+
+    if args.genome or args.reads:
+        intro_string.extend([f"\t-k {args.k}",
+                             f"\t-z {args.z}",
+                             f"\t-j {args.j}",
+                             f"\t-Y {args.Y}"])
+        command += f" k={args.k} z={args.z} j={args.j} Y={args.Y}"
 
     if args.lai:
         intro_string.append("\t--lai")
diff --git a/ntroot_cross_reference_vcf.py b/ntroot_cross_reference_vcf.py
new file mode 100755
index 0000000..eff9ff0
--- /dev/null
+++ b/ntroot_cross_reference_vcf.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python3
+'''
+Given the result of LOJ between VCF files, output valid ntEdit VCF file
+'''
+import argparse
+import shlex
+import subprocess
+import gzip
+
+class Vcf:
+    "Represents a VCF entry"
+    def __init__(self, chr, position, idenfitier, ref, alt, qual, filter, info, format=None,
+                 integration=None, parse_info=True, strip_info=True):
+        self.chr = chr
+        self.position = int(position)
+        self.idenfitier = idenfitier
+        self.ref = ref
+        self.alt = alt.split(",")
+        self.qual = qual
+        self.filter = filter
+        self.info_alts = None
+        if parse_info: # We want to parse the elements in info to a dictionary
+            self.info_alts = self.parse_info(info)
+        if "VCF_L" in info: # Accounting for placeholders for INFO line in the temp VCF
+            self.info_str = info.split("VCF_L")[0].strip(";")
+        elif not parse_info and not strip_info:
+            self.info_str = info
+        else:
+            self.info_str = ""
+        self.format = format
+        self.integration = integration
+
+    def remove_info(self):
+        "Clear out the info and position attributes"
+        self.info_alts = {}
+        self.position = -1
+
+    def get_info(self, alt_base=None):
+        "Return the info for the given alt_base, if specified"
+        info_str = ""
+        if self.info_str is not None:
+            info_str += self.info_str
+        if alt_base is None:
+            return info_str
+        if alt_base in self.alt and self.info_alts is not None and alt_base in self.info_alts:
+            info_str += ";" + ";".join(self.info_alts[alt_base])
+        return info_str.strip(";")
+
+    def strip_string_from_info(self, info):
+        "Strip ^NA from the info string" 
+        return info.replace("^NA", "")
+
+    def add_info(self, alt_dict):
+        "Add INFO from the dictionary"
+        for alt_base in alt_dict:
+            if alt_base in self.info_alts:
+                continue
+            self.info_alts[alt_base] = alt_dict[alt_base]
+            self.alt.append(alt_base)
+
+    def parse_info(self, info):
+        "Parse INFO string to data structures for: alt alleles, both"
+        info_formatted = self.strip_string_from_info(info) if "^" in info else info
+        if "VCF_L" in info_formatted:
+            info_formatted = info_formatted.split("VCF_L")[1].strip(";") # Only parse after the VCF_L part
+        alt_alleles = {}
+        for item in info_formatted.split(";"):
+            if "=" in item:
+                key, value = item.split("=")
+                split_val = value.split(",")
+                if len(split_val) > 1 and len(split_val) == len(self.alt):
+                    for i, v in enumerate(split_val):
+                        if self.alt[i] not in alt_alleles:
+                            alt_alleles[self.alt[i]] = []
+                        alt_alleles[self.alt[i]].append(f"{key}={v}")
+                else:
+                    for alt_base in self.alt:
+                        if alt_base not in alt_alleles:
+                            alt_alleles[alt_base] = []
+                        alt_alleles[alt_base].append(f"{key}={value}")
+            else:
+                for alt_base in self.alt:
+                    if alt_base not in alt_alleles:
+                        alt_alleles[alt_base] = []
+                    alt_alleles[alt_base].append(item)
+
+
+        return alt_alleles
+
+    def get_info_dict(self):
+        "Return the INFO in a dictionary form"
+        return {alt_base: {info.split("=")[0]: info.split("=")[1] \
+            if len(info.split("=")) == 2 else None for info in info_list} \
+                for alt_base, info_list in self.info_alts.items()}
+
+
+    def __str__(self):
+        strs = []
+        for alt_base in self.alt:
+            if self.format is None or self.integration is None:
+                strs.append(f"{self.chr}\t{self.position}\t{self.idenfitier}\t{self.ref}\t{alt_base}\t{self.qual}"\
+                        f"\t{self.filter}\t{self.get_info(alt_base)}")
+            else:
+                info = self.get_info(alt_base)
+                strs.append(f"{self.chr}\t{self.position}\t{self.idenfitier}\t{self.ref}\t{alt_base}\t{self.qual}"\
+                            f"\t{self.filter}\t{info}\t{self.format}\t{self.integration}")
+        return "\n".join(strs)
+
+    def get_tmp_print_line(self):
+        "Return a line for printing to a temporary file"
+        strs = []
+        for alt_base in self.alt:
+            if self.get_info(alt_base) == "":
+                info = self.info_str + ";VCF_L;"
+            else:
+                info = self.get_info(alt_base) + ";VCF_L;"
+            if self.format is None or self.integration is None:
+                strs.append(f"{self.chr}\t{self.position}\t{self.idenfitier}\t{self.ref}\t{alt_base}\t{self.qual}"\
+                        f"\t{self.filter}\t{info}")
+            else:
+                strs.append(f"{self.chr}\t{self.position}\t{self.idenfitier}\t{self.ref}\t{alt_base}\t{self.qual}"\
+                            f"\t{self.filter}\t{info}\t{self.format}\t{self.integration}")
+        return "\n".join(strs)
+
+def write_header(vcffile, outfile, info_only=False):
+    "Write header from input VCF file to standard out"
+    vcf_open = open(vcffile, 'r', encoding="utf8") if not vcffile.endswith(".gz") else gzip.open(vcffile, 'rt')
+    for line in vcf_open:
+        line = line.strip()
+        if info_only and line.startswith("##INFO"):
+            outfile.write(f"{line}\n")
+        elif not info_only and line.startswith("##"):
+            outfile.write(f"{line}\n")
+        elif not info_only and line.startswith("#"):
+            vcf_open.close()
+            return line
+        elif line.startswith("#"):
+            continue
+        else:
+            vcf_open.close()
+            return
+    vcf_open.close()
+
+def get_all_vcf_info(vcf1, vcf2, alt_base):
+    "Concatenate INFO strings from both VCF files"
+    return f"{vcf1.get_info(None)};VCF_L;{vcf2.get_info(alt_base)}".strip(";")
+
+def are_compatible(vcf1, vcf2):
+    "Ensure that at least one alt in vcf1 matches vcf2, references match, positions match"
+    if vcf1.ref != vcf2.ref:
+        return False
+    if vcf1.position != vcf2.position:
+        return False
+    for alt_base in vcf1.alt:
+        if alt_base in vcf2.alt:
+            return True
+    return False
+
+def print_vcf_line(ntedit_vcf, l_vcf, outfile):
+    "Print the info into a VCF"
+    if l_vcf.position == -1:
+        outfile.write(f"{ntedit_vcf.get_tmp_print_line()}\n")
+    else:
+        if not are_compatible(ntedit_vcf, l_vcf):
+            outfile.write(f"{ntedit_vcf.get_tmp_print_line()}\n")
+            return
+        if len(ntedit_vcf.alt) > 1:
+            out_strs = []
+            for alt_base in ntedit_vcf.alt:
+                out_strs.append(f"{ntedit_vcf.chr}\t{ntedit_vcf.position}\t{ntedit_vcf.idenfitier}\t{ntedit_vcf.ref}\t"
+                        f"{alt_base}\t{ntedit_vcf.qual}\t{ntedit_vcf.filter}\t"
+                        f"{get_all_vcf_info(ntedit_vcf, l_vcf, alt_base)}\t{ntedit_vcf.format}\t"
+                        f"{ntedit_vcf.integration}")
+            out_str = "\n".join(out_strs)
+            outfile.write(f"{out_str}\n")
+        else:
+            alt_base = ntedit_vcf.alt[0]
+            out_str = (f"{ntedit_vcf.chr}\t{ntedit_vcf.position}\t{ntedit_vcf.idenfitier}"
+                    f"\t{ntedit_vcf.ref}\t{alt_base}\t{ntedit_vcf.qual}\t{ntedit_vcf.filter}\t"
+                    f"{get_all_vcf_info(ntedit_vcf, l_vcf, alt_base)}\t{ntedit_vcf.format}\t"
+                    f"{ntedit_vcf.integration}")
+            outfile.write(f"{out_str}\n")
+
+def parse_bedtools_loj(infile, outfile, strip_info=False):
+    "Parse the LOJ from bedtools to ntEdit-formatted VCF"
+    ntedit_vcf = None
+    l_vcf = None
+
+    with open(infile, 'r', encoding="utf8") as fin:
+        for line in fin:
+            line = line.strip().split("\t")
+            ntedit_vcf_new = Vcf(*line[:10], parse_info=False, strip_info=strip_info)
+            l_vcf_new = Vcf(*line[10:18])
+            if ntedit_vcf is not None and ntedit_vcf.position == ntedit_vcf_new.position \
+                and ntedit_vcf.chr == ntedit_vcf_new.chr:
+                # This is the same position as before, tally extra INFO from l_vcf.
+                # Know that this cannot be due to ntEdit VCF.
+                if l_vcf.position == -1 and are_compatible(ntedit_vcf, l_vcf_new):
+                    # Currently only storing incompatible entry, OK to overwrite
+                    l_vcf = l_vcf_new
+                elif are_compatible(ntedit_vcf, l_vcf_new):
+                    l_vcf.add_info(l_vcf_new.info_alts)
+                    l_vcf.position = l_vcf_new.position
+                else:
+                    continue
+
+            else:
+                if ntedit_vcf is not None:
+                    # This is not the same position, print previous and update
+                    print_vcf_line(ntedit_vcf, l_vcf, outfile)
+                ntedit_vcf = ntedit_vcf_new
+                l_vcf = l_vcf_new
+                if l_vcf_new.position != -1 and not are_compatible(ntedit_vcf_new, l_vcf_new):
+                    l_vcf.remove_info()
+
+        print_vcf_line(ntedit_vcf, l_vcf, outfile)
+
+def fold_attributes(vcf_list):
+    "Fold the attributes"
+    attribute_lists = []
+    alts = []
+    for vcf in vcf_list:
+        assert len(vcf.alt) == 1
+        alts.append(vcf.alt[0])
+        attribute_dict = vcf.get_info_dict()
+        list_attributes = [key for _, base_dict in attribute_dict.items() for key in base_dict]
+        attribute_lists.extend(list_attributes)
+    attribute_set = set(attribute_lists)
+
+
+    new_dict = {} # Key: [] to track lists of INFO
+    for attribute in attribute_set:
+        new_dict[attribute] = []
+        for vcf in vcf_list:
+            vcf_attributes = vcf.get_info_dict()[vcf.alt[0]]
+            if attribute in vcf_attributes and vcf_attributes[attribute] is None and attribute in new_dict:
+                continue
+            elif attribute in vcf_attributes:
+                new_dict[attribute].append(vcf_attributes[attribute])
+            else:
+                new_dict[attribute].append("NA")
+
+    new_info_line = []
+    visited_attributes = set()
+    for attribute in attribute_lists:
+        if attribute not in visited_attributes:
+            if None not in new_dict[attribute] and attribute != "":
+                new_info_line.append(f"{attribute}={','.join(new_dict[attribute])}")
+                visited_attributes.add(attribute)
+            else:
+                new_info_line.append(f"{attribute}")
+    rep_vcf = vcf_list[0]
+    if rep_vcf.info_str is not None:
+        info_line = rep_vcf.info_str + ";" + ";".join(new_info_line)
+    else:
+        info_line = ";".join(new_info_line)
+    info_line = info_line.strip(";")
+    out_str = f"{rep_vcf.chr}\t{rep_vcf.position}\t{rep_vcf.idenfitier}\t{rep_vcf.ref}\t{','.join(alts)}\t"\
+            f"{rep_vcf.qual}\t{rep_vcf.filter}\t{info_line}\t{rep_vcf.format}\t{rep_vcf.integration}"
+    return out_str
+
+
+def print_folded_vcf_line(vcf_list, outfile):
+    "Print the lines to the VCF, folding variants and INFO if applicable"
+    if len(vcf_list) == 1:
+        outline = vcf_list[0]
+        outfile.write(f"{outline}\n")
+    else:
+        new_info_line = fold_attributes(vcf_list)
+        outfile.write(f"{new_info_line}\n")
+
+def refold_variants(in_vcf, out_prefix):
+    "Re-interleave or refold the variants, so a given position is on one line"
+    prev_vcf = None
+    vcfs = []
+    with open(in_vcf, 'r', encoding="utf8") as fin:
+        with open(f"{out_prefix}.vcf", 'w', encoding="utf8") as fout:
+            for line in fin:
+                line = line.strip()
+                if line.startswith("#"):
+                    fout.write(f"{line}\n")
+                else:
+                    vcf_line = Vcf(*line.split("\t"))
+                    if prev_vcf is not None and \
+                        (prev_vcf.position != vcf_line.position or prev_vcf.chr != vcf_line.chr):
+                        # Different, ready to print
+                        print_folded_vcf_line(vcfs, fout)
+                        vcfs = []
+                    vcfs.append(vcf_line)
+                    prev_vcf = vcf_line
+            print_folded_vcf_line(vcfs, fout)
+
+def remove_tmp_vcf(filename):
+    "Remove this temporary file"
+    cmd = f"rm {filename}"
+    ret_code = subprocess.run(shlex.split(cmd), check=True)
+    assert ret_code.returncode == 0
+
+
+def main():
+    """
+    Given the result of LOJ between VCF files, output valid ntEdit VCF file
+    """
+    parser = argparse.ArgumentParser(
+        description="Given the result of LOJ between VCF files, output valid ntEdit VCF file")
+    parser.add_argument("-b", "--bedtools", help="Bedtools intersect file, or - to read from standard in",
+                        required=True, type=str)
+    parser.add_argument("--vcf", help="ntEdit VCF file (for header)", required=True, type=str)
+    parser.add_argument("--vcf_l", help="-l VCF file (for header)", required=True, type=str)
+    parser.add_argument("-p", "--prefix", help="Prefix for out files", required=False,
+                        default="ntedit_snv_vcf", type=str)
+    parser.add_argument("--strip", help="Strip INFO field from --vcf. Use when have ntEdit VCF with already "
+                        "cross-referenced ancestry",
+                        action="store_true")
+
+    args = parser.parse_args()
+
+    args.bedtools = "/dev/stdin" if args.bedtools == "-" else args.bedtools
+
+    with open(f"{args.prefix}.tmp.vcf", 'w', encoding="utf8") as fout:
+        header = write_header(args.vcf, fout, info_only=False)
+        write_header(args.vcf_l, fout, info_only=True)
+        fout.write(f"{header}\n")
+
+        parse_bedtools_loj(args.bedtools, fout, args.strip)
+
+    refold_variants(f"{args.prefix}.tmp.vcf", args.prefix)
+
+    remove_tmp_vcf(f"{args.prefix}.tmp.vcf")
+
+if __name__ == "__main__":
+    main()
diff --git a/ntroot_run_pipeline.smk b/ntroot_run_pipeline.smk
index 0594868..88da020 100644
--- a/ntroot_run_pipeline.smk
+++ b/ntroot_run_pipeline.smk
@@ -11,7 +11,7 @@ onsuccess:
 reference=config["reference"]
 draft_base = os.path.basename(os.path.realpath(reference))
 reads_prefix=config["reads"] if "reads" in config else ""
-k=config["k"]
+k=config["k"] if "k" in config else None
 
 genomes = config["genomes"] if "genomes" in config else ""
 genome_prefix = ".".join([os.path.basename(os.path.realpath(genome)).removesuffix(".fa").removesuffix(".fasta").removesuffix(".fna") for genome in genomes])
@@ -31,6 +31,11 @@ l = config["l"] if "l" in config else ""
 # Ancestry inference parameters
 tile_size = config["tile_size"] if "tile_size" in config else 5000000
 
+# Third-party VCF parameters
+input_vcf = config["input_vcf"] if "input_vcf" in config else None
+input_vcf_basename = os.path.basename(os.path.realpath(input_vcf)) if input_vcf else "None"
+strip_info = config["strip_info"] if "strip_info" in config else None
+
 # time command
 mac_time_command = "command time -l -o"
 linux_time_command = "command time -v -o"
@@ -51,6 +56,12 @@ rule ntroot_genome_lai:
 rule ntroot_reads_lai:
     input: f"{reads_prefix}_ntedit_k{k}_variants.vcf_ancestry-predictions-tile-resolution_tile{tile_size}.tsv"
 
+rule ntroot_input_vcf:
+    input: f"{input_vcf_basename}.cross-ref.vcf_ancestry-predictions_tile{tile_size}.tsv"
+
+rule ntroot_input_vcf_lai:
+    input: f"{input_vcf_basename}.cross-ref.vcf_ancestry-predictions-tile-resolution_tile{tile_size}.tsv"
+
 rule ntedit_reads:
     input: 
         reference = reference
@@ -98,7 +109,7 @@ rule ancestry_prediction:
     output: 
         predictions = "{vcf}_ancestry-predictions_tile{tile_size}.tsv"
     params:
-        benchmark = f"{time_command} ancestry_prediction_k{k}_tile{tile_size}.time",
+        benchmark = f"{time_command} ancestry_prediction_tile{tile_size}.time" if input_vcf_basename else f"{time_command} ancestry_prediction_k{k}_tile{tile_size}.time",
         tile_size = tile_size,
         verbosity = v
     shell:
@@ -112,9 +123,50 @@ rule ancestry_prediction_lai:
     output: 
         lai_output = "{vcf}_ancestry-predictions-tile-resolution_tile{tile_size}.tsv"
     params:
-        benchmark = f"{time_command} ancestry_prediction_k{k}_tile{tile_size}.time",
+        benchmark = f"{time_command} ancestry_prediction_tile{tile_size}.time" if input_vcf_basename else f"{time_command} ancestry_prediction_k{k}_tile{tile_size}.time",
         tile_size = tile_size,
         verbosity = v
     shell:
         "{params.benchmark} ntRootAncestryPredictor.pl -f {input.vcf} -t {params.tile_size} -v {params.verbosity} -r 1 -i {input.ref_fai}"
 
+rule sort_vcf_input:
+    input: vcf = f"{input_vcf}"
+    output: vcf_sorted = temp(f"{input_vcf_basename}_sorted.vcf")
+    params:
+        benchmark = f"{time_command} sort_vcf_{input_vcf_basename}.time",
+        cat_cmd = "gunzip -c" if f"{input_vcf}".endswith(".gz") else "cat"
+    shell:
+        """{params.benchmark} sh -c '(echo "##fileformat=VCFv4.2" ; {params.cat_cmd} {input.vcf} |grep -v "#" |sort -k1,1 -k2,2n) > {output.vcf_sorted}'"""
+
+rule sort_vcf_l:
+    input: vcf = l
+    output: temp(f"{os.path.basename(os.path.realpath(l))}_sorted.tmp.vcf")
+    params:
+        benchmark = f"{time_command} sort_vcf_l.time",
+        cat_cmd = "gunzip -c" if f"{l}".endswith(".gz") else "cat"
+    shell:
+        """{params.benchmark} sh -c "(echo '##fileformat=VCFv4.2' ; {params.cat_cmd} {input.vcf} | awk '\$5 !~ /^</' | grep -v '#' |sort -k1,1 -k2,2n) > {output}" """
+
+rule bedtools_intersect:
+    input:         
+        sorted_vcf = f"{input_vcf_basename}_sorted.vcf",
+        sorted_ref_vars = f"{os.path.basename(os.path.realpath(l))}_sorted.tmp.vcf"
+    output:
+        bedtools = temp(f"{input_vcf_basename}.bedtools-intersect.bed")
+    params:
+        benchmark = f"{time_command} bedtools_intersect_{input_vcf_basename}.time"
+    shell:
+        "{params.benchmark} bedtools intersect -loj -sorted -a {input.sorted_vcf} -b {input.sorted_ref_vars} > {output.bedtools}"
+
+rule cross_reference_vcf:
+    input: 
+        vcf = f"{input_vcf}",
+        ref_vars = l,
+        bedtools = f"{input_vcf_basename}.bedtools-intersect.bed"
+    output: f"{input_vcf_basename}.cross-ref.vcf"
+    params:
+        benchmark = f"{time_command} cross_reference_vcf_{input_vcf_basename}.time",
+        prefix=f"{input_vcf_basename}.cross-ref",
+        strip = "--strip" if strip_info else ""
+    shell: 
+        "{params.benchmark} ntroot_cross_reference_vcf.py -b {input.bedtools} --vcf {input.vcf} --vcf_l {input.ref_vars} -p {params.prefix} {params.strip}"
\ No newline at end of file