diff --git a/bagitutils.py b/bagitutils.py
index 1e679d4..3cdc2dc 100644
--- a/bagitutils.py
+++ b/bagitutils.py
@@ -1,25 +1,115 @@
 #!/usr/bin/env python
-
 import shutil
+import operator
 import zipfile
 import os
+from StringIO import StringIO
+
 import bagit
-import pandas as pd
-import numpy as np
-import re
 import tempfile
+import csv
+
+
+# Some column names from Boardwalk manifest. These are the ones that require
+# special handling when converting into the FireCloud scheme.
+class BoardwalkColumns:
+    def __init__(self):
+        pass
+
+    SAMPLE_UUID = 'Sample UUID'
+    DONOR_UUID = 'Donor UUID'
+    FILE_TYPE = 'File Type'
+    FILE_URLS = 'File URLs'
+    SPECIMEN_UUID = 'Specimen UUID'
+    FILE_DOS_URI = 'File DOS URI'
+    FILE_PATH = 'File Path'
+    UPLOAD_FILE_ID = 'Upload File ID'
+
+
+# Column names in Boardwalk that are file related, except for FILE_URLS,
+# which is extra special because its value is a comma-separated list.
+FILE_COLUMNS = [
+    BoardwalkColumns.FILE_DOS_URI,
+    BoardwalkColumns.FILE_TYPE,
+    BoardwalkColumns.FILE_PATH,
+    BoardwalkColumns.UPLOAD_FILE_ID
+]
+
+# Column names in Boardwalk that cannot simply be copied over to FireCloud;
+# they require extra logic.
+COMPLEX_COLUMNS = FILE_COLUMNS + [
+    BoardwalkColumns.SAMPLE_UUID,
+    BoardwalkColumns.FILE_URLS
+]
+
+
+class RequiredFirecloudColumns:
+    """
+    Columns must be present in FireCloud TSVs. The TSVs can contain additional
+    columns, but these minimal columns must be present.
+    """
+    def __init__(self):
+        pass
+
+    # The column in the participant.tsv
+    PARTICIPANT_ENTITY_ID = 'entity:participant_id'
+
+    # Columns in sample.tsv
+    SAMPLE_SAMPLE_ID = 'entity:sample_id'
+    SAMPLE_PARTICIPANT = 'participant'
 
 
 class BagHandler:
     """
-    Handles data in BagIt data structure.
+    From a Boardwalk manifest, generates a zip file with the contents in a bagit
+    format, where the bagit contains two TSVs that can be uploaded to FireCloud.
+
+    The Boardwalk manifest is a single TSV. Each row in the manifest corresponds
+    to a file. Several files can be part of the same sample, meaning a sample
+    can be spread across multiple rows.
+
+    For FireCloud, the data needs to be broken up into two TSVs, a participant
+    and a sample TSV.
+
+    The participant TSV is a one column TSV with the unique participant UUIDs.
+
+    The sample TSV has one row per sample, linked to the participant TSV by
+    a participant column. Because a sample may contain multiple files, each
+    file for a sample is added as an additional column.
+
+    Simplified example Boardwalk TSV
+
+    DONOR UUID   SAMPLE UUID    FILE
+    d1           s1             f1
+    d1           s1             f2
+    d2           s2             f3
+
+    This gets transformed to a participant TSV, with the two unique donors:
+
+    entity:participant_id
+    d1
+    d2
+
+    And a sample TSV, with the two samples, linked to participant.tsv by the
+    participant column:
+
+    entity:sample_id  participant  file1 file2
+    s1                d1           f1    f2
+    s2                d2           f3
+
+    In FireCloud, the name of column "entity:participant_id" in
+    participant.tsv, and the name of the columns "entity:sample_id" and
+    "participant" in sample.tsv must be exactly those. Additional columns in
+    sample can have any name, although the convention seems to be lower
+    case with underscores, so we convert the Boardwalk column names to follow
+    that convention.
+
+    In this example, the file2 column is empty for the second row. That is
+    because the different samples can have different numbers of files.
     """
+
     def __init__(self, data, bag_name, bag_info):
-        # Create Pandas dataframe from tab-separated values.
-        if isinstance(data, pd.core.frame.DataFrame):
-            self.data = data
-        else:
-            self.data = pd.read_csv(data, sep='\t')
+        self.data = data
         self.name = bag_name
         self.info = bag_info
 
@@ -33,115 +123,198 @@ def create_bag(self):
         data_path = bag_dir + '/data'
         os.makedirs(data_path)
         bag = bagit.make_bag(bag_dir, self.info)
-        self._reformat_headers()
-        participant, sample = self.transform()
-
-        participant.to_csv(path=data_path + '/participant.tsv',
-                           sep='\t',
-                           index=False,
-                           header=True)
-        sample.to_csv(path_or_buf=data_path + '/sample.tsv',
-                      sep='\t',
-                      index=False,
-                      header=True)
+
+        self.write_csv_files(data_path)
+
         # Write BagIt to disk and create checksum manifests.
         bag.save(manifests=True)
+
         # Compress bag.
         zipfile_tmp = tempfile.NamedTemporaryFile(suffix='.zip', delete=False)
         zipfile_handle = zipfile.ZipFile(zipfile_tmp,
                                          'w', zipfile.ZIP_DEFLATED)
-        self.__zipdir(tempd, zipfile_handle)
+        self._zipdir(tempd, zipfile_handle)
         zipfile_handle.close()
         shutil.rmtree(tempd, True)
         return zipfile_tmp.name
 
-    def __zipdir(self, path, zip_fh):
+    @staticmethod
+    def _zipdir(path, zip_fh):
         # zip_fh is zipfile handle
-        pathLength = len(path)
+        path_length = len(path)
         for root, dirs, files in os.walk(path):
             for file in files:
-                zip_fh.write(os.path.join(root, file), arcname=root[pathLength:] + '/' + file)
-
-    def _reformat_headers(self):
-        """Removes whitespace and dots in column names, and sets
-        all header strings to lower case."""
-        df = self.data
-        # Remove all spaces from column headers and make lower case.
-        df.rename(columns=lambda x: x.replace(" ", "_"), inplace=True)
-        df.rename(columns=lambda x: x.replace(".", "_"), inplace=True)
-        df.rename(columns=lambda x: x.lower(), inplace=True)
-
-    def transform(self):
-        """Transforms dataframe df for FireCloud upload and returns
-        two dataframes, a tuple of participant and sample, which are then
-        uploaded to FireCloud in that order.
+                zip_fh.write(os.path.join(root, file),
+                             arcname=root[path_length:] + '/' + file)
+
+    def write_csv_files(self, data_path):
         """
-        df = self.data
-        # Start normalizing the table. First, slice by file type.
-        df1 = df[df['file_type'] == 'crai']
-        # Extract three columns from df with file type 'cram':
-        df2 = df[['file_type',
-                  'file_path',
-                  'upload_file_id',
-                  'file_urls',
-                  'file_dos_url']][df['file_type'] == 'cram']
-        df2.rename(index=str,
-                   columns={'file_type': 'file_type2',
-                            'file_path': 'file_path2',
-                            'upload_file_id': 'upload_file_id2',
-                            'file_urls': 'file_urls2',
-                            'file_dos_url': 'file_dos_url2'},
-                   inplace=True)
-        frames = [df1, df2]  # merge both frames
-        for frame in frames:
-            frame.reset_index(drop=True, inplace=True)
-        # Second, by combining df1 and df2 we obtain a normalized table,
-        # using the index from df1.
-        df_new = pd.concat(frames, axis=1, join_axes=[df1.index])
-        df_new.drop_duplicates(keep='first', inplace=True)
-        # Create a table with only one column (donor will be participant
-        # in FC).
-        participant = df_new['donor_uuid']  # extract one column
-        participant.name = 'entity:participant_id'  # rename column header
-
-        # Re-order index of dataframe to be compliant with FireCloud
-        # specifications.
-        new_index = ([11, 4, 3, 7, 5, 6, 8, 9, 10, 12, 13, 14] +
-                     [0, 1, 2, 18, 19, 15, 16, 17, 20, 21, 22] +
-                     [23, 24, 25, 26])
-        L = df_new.columns.tolist()
-        new_col_order = [L[x] for x in new_index]
-        sample = df_new.reindex(columns=new_col_order)
-        sample = sample.rename(
-            index=str,
-            columns={'sample_uuid': 'entity:sample_id',
-                     'donor_uuid': 'participant_id',
-                     'file_type': 'file_type1',
-                     'file_path': 'file_path1',
-                     'upload_file_id': 'upload_file_id1',
-                     'file_urls': 'file_urls1',
-                     'file_dos_url': 'file_dos_url1',
-                     'metadata.json': 'metadata_json'})
-        return participant, sample
-
-    def __normalize(self):
+        Generates and writes participant.tsv and sample.tsv to data_path
+        directory.
+        :param data_path: Where to write the files
+        :return: None
         """
-        Normalizes dataframe to First Normal Form (1NF) such that it
-        contains only unique entries of donors IDs so it can be used
-        as primary key. Part of that is creating new columns with new
-        column names of those records that are duplicate.
-        :returns df: (Pandas dataframe) normalized
+        participants, samples = self.convert_to_participant_and_sample()
+
+        with open(data_path + '/participant.tsv', 'w') as tsv:
+            writer = csv.DictWriter(tsv, fieldnames=[
+                RequiredFirecloudColumns.PARTICIPANT_ENTITY_ID], delimiter='\t')
+            writer.writeheader()
+            for p in participants:
+                writer.writerow(
+                    {RequiredFirecloudColumns.PARTICIPANT_ENTITY_ID: p})
+
+        with open(data_path + '/sample.tsv', 'w') as tsv:
+            first_row = True
+            for sample in samples:
+                if first_row:
+                    first_row = False
+                    keys = sample.keys()
+                    # entity:sample_id must be first
+                    keys.remove(RequiredFirecloudColumns.SAMPLE_SAMPLE_ID)
+                    fieldnames = [ RequiredFirecloudColumns.SAMPLE_SAMPLE_ID]\
+                                 + sorted(keys)
+                    writer = csv.DictWriter(tsv, fieldnames=fieldnames,
+                                            delimiter='\t')
+                    writer.writeheader()
+                writer.writerow(sample)
+
+    def convert_to_participant_and_sample(self):
+        participants, max_samples, native_protocols = \
+            self.participants_and_max_files_in_sample_and_protocols()
+        return list(participants), self.samples(max_samples, native_protocols)
+
+    def participants_and_max_files_in_sample_and_protocols(self):
+        """
+        Does one pass through the CSV, calculating the unique participants,
+        the maximum number of files for any one specimen, and the total number
+        of cloud native protocols being used.
+        :return: a tuple with a set of participants, the maximum number of
+        files in any one sample, and a set of the unique cloud native protocols.
+        """
+        reader = csv.DictReader(StringIO(self.data), delimiter='\t')
+        participants = set()
+        native_protocols = set()
+        specimens = {}  # key: specimen UUID, value count
+        for row in reader:
+            # Add all participants. It's a set, so no dupes
+            participants.add(row[BoardwalkColumns.DONOR_UUID])
+
+            specimen_uuid = row[BoardwalkColumns.SPECIMEN_UUID]
+            if specimen_uuid in specimens:
+                specimens[specimen_uuid] = specimens[specimen_uuid] + 1
+            else:
+                specimens[specimen_uuid] = 1
+
+            # Track all the different cloud native url protocols
+            for file_url in row[BoardwalkColumns.FILE_URLS].split(','):
+                protocol = self.native_url_protocol(file_url)
+                if protocol is not None:
+                    native_protocols.add(protocol)
+
+        return participants, max(specimens.values()), native_protocols
+
+    def samples(self, max_files_in_sample, native_protocols):
+        """
+        Creates a list of dicts, dict is a row in the sample TSV for FireCloud.
+        For all rows of the same sample in the input, create one row only,
+        where the file-specific data from each row is appended as additional
+        columns to the one row.
+
+        The input is self.data. Requires that data be sorted by
+        BoardwalkColumns.SAMPLE_UUID; this routine sorts it. If data could be
+        sorted before being passed to this method, then we should remove
+        sorting in here.
+
+        :param max_files_in_sample: the maximum number of files in sample
+        :param native_protocols: all the unique native protocols in the data
+        :return: a list of dicts
+        """
+        reader = csv.DictReader(StringIO(self.data), delimiter='\t')
+        samples = []
+
+        current_specimen_uuid = None
+        current_row = None
+
+        for row in sorted(reader, key=operator.itemgetter(
+                BoardwalkColumns.SPECIMEN_UUID)):
+            specimen_uuid = row[BoardwalkColumns.SPECIMEN_UUID]
+            if specimen_uuid != current_specimen_uuid:
+                current_specimen_uuid = specimen_uuid
+                index = 1
+                if current_row is not None:
+                    samples.append(current_row)
+                current_row = self.init_sample_row(row, max_files_in_sample,
+                                                   native_protocols)
+            else:
+                index = index + 1
+
+            self.add_files_to_row(current_row, row, str(index))
+
+        if current_row is not None:
+            samples.append(current_row)
+        return samples
+
+    def add_files_to_row(self, new_row, existing_row, suffix):
+        """
+        Takes the file-specific columns of existing_row, and adds them as
+        new columns to new_row.
+        :param new_row:
+        :param existing_row:
+        :param suffix:
+        :return:
+        """
+        file_urls = existing_row[BoardwalkColumns.FILE_URLS].split(',')
+        for file_url in file_urls:
+            protocol = self.native_url_protocol(file_url)
+            if protocol is not None:
+                new_row[self.native_column_name(protocol, suffix)] = file_url
+
+        for column in FILE_COLUMNS:
+            if column in existing_row:
+                new_row[self.firecloud_column_name(column) + suffix] = \
+                existing_row[column]
+
+    def init_sample_row(self, existing_row, max_files_in_sample,
+                        native_protocols):
         """
-        df = self.data
-        # Get list of all column names.
-        col_names = [col for col in df]
-        # Constrain that list to those column names that hold file info.
-        L = [s for s in col_names if bool(re.search('[Ff]ile', s))]
-        # file_type = "".join(str(s) for s in L)
-
-        nrecords = len(df['donor_uuid'].unique())  # number of donors
-        filetype = df['file_type'].unique()  # create list of filetypes
-        for idx, item in enumerate(L):
-            print(item,)
-            a = np.repeat((filetype[0]), nrecords) 
-        return df
+        Create and initialize a sample row
+        :param existing_row: the existing row
+        :param max_files_in_sample: the maximum number of files in a sample
+        :param native_protocols:
+        :return: the initialized row
+        """
+        # Rename sample column and participant
+        row = {RequiredFirecloudColumns.SAMPLE_SAMPLE_ID: existing_row[
+            BoardwalkColumns.SAMPLE_UUID],
+               RequiredFirecloudColumns.SAMPLE_PARTICIPANT: existing_row[
+                   BoardwalkColumns.DONOR_UUID]}
+
+        # Copy rows that don't need transformation, other than FC naming
+        # conventions
+        for key, value in existing_row.iteritems():
+            if key not in COMPLEX_COLUMNS:
+                row[self.firecloud_column_name(key)] = value
+
+        # Initialize columns for files and cloud native urls
+        for suffix in [str(i) for i in range(1, max_files_in_sample + 1)]:
+            for column in FILE_COLUMNS:
+                row[self.firecloud_column_name(column) + suffix] = None
+
+            for native_protocol in native_protocols:
+                row[self.native_column_name(native_protocol, suffix)] = None
+        return row
+
+    @staticmethod
+    def native_url_protocol(url):
+        index = url.find('://')
+        if index > 0:
+            return url[:index]
+
+    @staticmethod
+    def native_column_name(native_protocol, suffix):
+        return native_protocol + '_url' + suffix
+
+    @staticmethod
+    def firecloud_column_name(column):
+        return column.lower().replace(' ', '_').replace('.', '_')
diff --git a/requirements.txt b/requirements.txt
index f5e0856..87a0c6e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -37,7 +37,6 @@ lxml==3.6.4
 Mako==1.0.6
 MarkupSafe==0.23
 packaging==16.8
-pandas==0.22
 psycopg2==2.7.4
 pyasn1==0.2.2
 pycparser==2.17
@@ -57,5 +56,4 @@ SQLAlchemy==1.1.5
 texttable==0.8.7
 tornado==4.4.2
 tzlocal==1.3
-urllib3==1.20
 Werkzeug==0.11.15
diff --git a/test/manifest_with_crai_cram_bai.tsv b/test/manifest_with_crai_cram_bai.tsv
new file mode 100644
index 0000000..4e8ad37
--- /dev/null
+++ b/test/manifest_with_crai_cram_bai.tsv
@@ -0,0 +1,29 @@
+Program	Project	Center Name	Submitter Donor ID	Donor UUID	Submitter Donor Primary Site	Submitter Specimen ID	Specimen UUID	Submitter Specimen Type	Submitter Experimental Design	Submitter Sample ID	Sample UUID	Analysis Type	Workflow Name	Workflow Version	File Type	File Path	Upload File ID	Data Bundle UUID	Metadata.json	File URLs	File DOS URI
+NHLBI TOPMed: Boston Early-Onset COPD Study in the TOPMed Program	COPD	UW	EO5009365	c2ae54f0-eb16-50d6-b511-e85db0d103a5	Blood	SRS1231161	71557d46-f8eb-547a-924a-d5e9f8623fa2	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD106415	adc186d7-32b6-55f3-806d-5bb85ce94c83	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD106415.b38.irc.v1.cram	fff5a29f-d184-4e3b-9c5b-6f44aea7f527	b5dc2e31-8d75-5da9-b4a2-ba5061492d8d		gs://topmed-irc-share/genomes/NWD106415.b38.irc.v1.cram,s3://nih-nhlbi-datacommons/NWD106415.b38.irc.v1.cram	dos://dos-dss.ucsc-cgp-dev.org/fff5a29f-d184-4e3b-9c5b-6f44aea7f527?version=2018-02-28T033124.129027Z
+TOPMed	1000 Genomes	UW	NWD259170	066c6bb5-7c8c-51ea-b0fb-0c07b105c9b7	B-lymphocyte	HG01110	df2b221c-e8c1-5241-9c53-327cf103034e	Normal - blood derived	WGS	HG01110_sample	9f1e5d7d-90f8-57c6-8ccb-ca1d89d34611	sequence_upload	spinnaker	1.1.2	crai	NWD259170.recab.cram.crai	46c8a5f1-15ab-48fa-8d1c-63099422e3c7	0d6371a8-fc4f-5232-9660-e655903b17ea		s3://commons-dss-commons/blobs/1c7d249c9123007d693857eab3dd4646bc8d742e76c6716c80debbbdd5d48e8b.be947abb597d1a21f2da9d97d96f58e7ca07a214.b933d8fb97268c951e610b6dfa20924d.6fbcd0b3	dos://dos-dss.ucsc-cgp-dev.org/46c8a5f1-15ab-48fa-8d1c-63099422e3c7?version=2018-01-31T081722.854147Z
+TOPMed	1000 Genomes	WashU	NWD100953	4ff60ff5-a1d6-557b-b4ed-3220f62a1b02	B-lymphocyte	HG01110	37865db0-0a24-5b4f-937f-00871886906b	Normal - blood derived	WGS	HG01110_sample	c4ffc283-b53e-5559-9d9f-3238ac281d86	sequence_upload	spinnaker	1.1.3	crai	NWD100953.recab.cram.crai	a62ee491-489d-405a-8a3b-83765f9e91fb	44a8837b-4456-5709-b56b-54e23000f13a		s3://commons-dss-commons/blobs/93e04a07d92ea6732440484ba3b3d2a4841f34c92bd2e41c724d2002e94b44b5.09fcf961a8e3abba39e9a237a104c50668329356.3b0f63a815384a3d44c61b4abd40caf9.a75a2c4b	dos://dos-dss.ucsc-cgp-dev.org/a62ee491-489d-405a-8a3b-83765f9e91fb?version=2018-01-31T152803.900629Z
+TOPMed	HapMap	Baylor	NWD875673	149fd7b7-1c11-593b-9625-c20f279f68ff	B-lymphocyte	NA12878	0e448c1f-81ac-5054-8069-5868469d5308	Normal - solid tissue	WGS	NA12878_sample	7a2e07bc-41b9-5c19-869f-9dcd92892bee	sequence_upload	spinnaker	1.1.2	cram	NWD875673.recab.cram	94694564-bdd3-43dd-9af7-d9c055fd0773	2277b3fc-5a75-5782-86a0-c29f13844e7d		s3://commons-dss-commons/blobs/170f6ea09964e5f0442ac9c2ae997659ddf3bb18ef18cb8936d8db7672efb59f.77c0ce859e1a89e5eba95eeca790257cd033eb3e.15ba273d28a75721f412012f33d9c45e-343.455e8975	dos://dos-dss.ucsc-cgp-dev.org/94694564-bdd3-43dd-9af7-d9c055fd0773?version=2018-01-31T092944.494586Z
+TOPMed	HapMap	Baylor	NWD875673	149fd7b7-1c11-593b-9625-c20f279f68ff	B-lymphocyte	NA12878	0e448c1f-81ac-5054-8069-5868469d5308	Normal - solid tissue	WGS	NA12878_sample	7a2e07bc-41b9-5c19-869f-9dcd92892bee	sequence_upload	spinnaker	1.1.2	crai	NWD875673.recab.cram.crai	b8906dd1-2117-4679-8ac7-4ea6cdc045f1	2277b3fc-5a75-5782-86a0-c29f13844e7d		s3://commons-dss-commons/blobs/df80c4043ce92ba3fb36ff6896f6830585e572358efd00e17d460ff6705824b5.3b7a821337628c2e90d8736a7b8b89f0229168d1.162e9d9e87bce7bae5d15438721fb8a0.830013b8	dos://dos-dss.ucsc-cgp-dev.org/b8906dd1-2117-4679-8ac7-4ea6cdc045f1?version=2018-01-31T093033.045825Z
+TOPMed	HapMap	Broad	NWD768309	7d678b22-314d-54c9-8dd3-7fdfdd6bec89	B-lymphocyte	NA12878	af5eceac-72b6-5e90-a822-ee2e8d12ec55	Normal - blood	WGS	NA12878_sample	250da12d-c7f1-5a2c-8573-1c1c683d6d33	sequence_upload	spinnaker	1.1.2	cram	NWD768309.recab.cram	6b1b029e-789b-47cd-9aa4-7f7c38e612fd	1ecf1c35-9e1e-55ef-8f42-71102c3abc33		s3://commons-dss-commons/blobs/e596a82a48f753c030ba1026aa8752ad149cae0d6a8c0bc46de64463ef7ef8db.975b8117f41f20157bc5418f44a2fe414605ca4e.170d40e2e02088f34943a39964ebef2f-308.9d1ea3b9	dos://dos-dss.ucsc-cgp-dev.org/6b1b029e-789b-47cd-9aa4-7f7c38e612fd?version=2018-01-31T142525.164592Z
+TOPMed	HapMap	Broad	NWD768309	7d678b22-314d-54c9-8dd3-7fdfdd6bec89	B-lymphocyte	NA12878	af5eceac-72b6-5e90-a822-ee2e8d12ec55	Normal - blood	WGS	NA12878_sample	250da12d-c7f1-5a2c-8573-1c1c683d6d33	sequence_upload	spinnaker	1.1.2	crai	NWD768309.recab.cram.crai	e1f2f1ec-eff6-42f6-92ad-51cea0c165f8	1ecf1c35-9e1e-55ef-8f42-71102c3abc33		s3://commons-dss-commons/blobs/68e41018e32ed6a8cb835d41a3d30c5cb130e05d20c377a14d9884bd2ac423d6.e9649dcc7f6174ec519988421cd4af4c63bbf5a6.3918dcc5e0bb2c93fefe3daba4f8eeef.9873b877	dos://dos-dss.ucsc-cgp-dev.org/e1f2f1ec-eff6-42f6-92ad-51cea0c165f8?version=2018-01-31T142525.877824Z
+TOPMed	HapMap	NYGC	NWD119836	bc6e1fd7-229d-5e65-a5a2-a15fee0613c0	B-lymphocyte	NA12878	8628f32f-d6fd-5419-a364-242a11abebb5	Normal - solid tissue	WGS	NA12878_sample	8ca820a2-d182-580c-9572-636f9f0eae62	sequence_upload	spinnaker	1.1.2	crai	NWD119836.recab.cram.crai	693dc20d-a6bf-4334-857c-6a496803b34a	204cf1bd-1477-57e6-880c-1b863edac627		s3://commons-dss-commons/blobs/597d139565b45176509fc7a4f3fc7066cac626ce50d80ad2a6b643eb0d9b4a5a.dda8235ca55396f9ab5cd2fb9e61d43dd796e7da.0ea01635527c738a5f3bf82acf0c3859.dcc419cd	dos://dos-dss.ucsc-cgp-dev.org/693dc20d-a6bf-4334-857c-6a496803b34a?version=2018-01-31T142528.027169Z
+NHLBI TOPMed: Boston Early-Onset COPD Study in the TOPMed Program	COPD	UW	EO1035541	dae1e053-1a58-5cea-a168-cbfc7cebe679	Blood	SRS1231232	bfcc3266-340a-5751-8db1-d661163ac8e5	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD145710	a3e869a6-2cc1-5fc5-aa4a-c6e9d071ff38	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD145710.b38.irc.v1.cram	ab4c0815-a366-47b8-b94f-626458d43859	d8fe0ae3-efa2-59c3-9e70-1b164ca868b3		gs://topmed-irc-share/genomes/NWD145710.b38.irc.v1.cram,s3://nih-nhlbi-datacommons/NWD145710.b38.irc.v1.cram	dos://dos-dss.ucsc-cgp-dev.org/ab4c0815-a366-47b8-b94f-626458d43859?version=2018-02-28T051204.497736Z
+NHLBI TOPMed: Boston Early-Onset COPD Study in the TOPMed Program	COPD	UW	EO1035541	dae1e053-1a58-5cea-a168-cbfc7cebe679	Blood	SRS1231232	bfcc3266-340a-5751-8db1-d661163ac8e5	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD145710	a3e869a6-2cc1-5fc5-aa4a-c6e9d071ff38	alignment	topmed-spinnaker	Alpha Build 1	crai	NWD145710.b38.irc.v1.cram.crai	f0b142de-e19a-4771-bd6c-d7c5a11d2a43	d8fe0ae3-efa2-59c3-9e70-1b164ca868b3		s3://nih-nhlbi-datacommons/NWD145710.b38.irc.v1.cram.crai,gs://topmed-irc-share/genomes/NWD145710.b38.irc.v1.cram.crai	dos://dos-dss.ucsc-cgp-dev.org/f0b142de-e19a-4771-bd6c-d7c5a11d2a43?version=2018-02-28T051206.328525Z
+NHLBI TOPMed: Boston Early-Onset COPD Study in the TOPMed Program	COPD	UW	EO5009365	c2ae54f0-eb16-50d6-b511-e85db0d103a5	Blood	SRS1231161	71557d46-f8eb-547a-924a-d5e9f8623fa2	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD106415	adc186d7-32b6-55f3-806d-5bb85ce94c83	alignment	topmed-spinnaker	Alpha Build 1	crai	NWD106415.b38.irc.v1.cram.crai	5eec1df5-408d-413b-9d46-87a587e2b8fc	b5dc2e31-8d75-5da9-b4a2-ba5061492d8d		gs://topmed-irc-share/genomes/NWD106415.b38.irc.v1.cram.crai,s3://nih-nhlbi-datacommons/NWD106415.b38.irc.v1.cram.crai	dos://dos-dss.ucsc-cgp-dev.org/5eec1df5-408d-413b-9d46-87a587e2b8fc?version=2018-02-28T033125.424703Z
+NHLBI TOPMed: Boston Early-Onset COPD Study in the TOPMed Program	COPD	UW	EO8055779	d58e246b-5cc1-5d5d-ac8c-cdeedb54d81b	Blood	SRS1231088	47a167c5-08b9-507f-9b6e-5c252ea89683	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD321156	b669582f-2ab4-58c5-8ac7-ede4a5149c07	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD321156.b38.irc.v1.cram	34171382-93f5-42e4-85d8-ebd8efdf2440	2f0a2f5f-5a47-5728-9375-51ecc7fb665d		gs://topmed-irc-share/genomes/NWD321156.b38.irc.v1.cram,s3://nih-nhlbi-datacommons/NWD321156.b38.irc.v1.cram	dos://dos-dss.ucsc-cgp-dev.org/34171382-93f5-42e4-85d8-ebd8efdf2440?version=2018-02-28T051322.937836Z
+TOPMed	HapMap	NYGC	NWD293295	b8284a5b-429d-5652-8247-0257f1e2f61d	B-lymphocyte	NA19238	58036d43-ec02-59f1-a6d8-83e7f666d90a	Normal - solid tissue	WGS	NA19238_sample	7c94077f-ea7c-5e9a-99fe-d13cac77a61d	sequence_upload	spinnaker	1.1.2	cram	NWD293295.recab.cram	87174c6a-ec98-44a4-9200-4292daa5b185	06c4bd47-c8e2-5045-8bae-bfad24633c87		s3://commons-dss-commons/blobs/19e5620579898ace0db2135e0434daba4f48edf72c5dbf82bfc1ad173161ff71.e1137f4a813e4d18387a799a5f88bb5d300c2cd6.aa81284302982b0f755d1238c3349762-336.feb986d1	dos://dos-dss.ucsc-cgp-dev.org/87174c6a-ec98-44a4-9200-4292daa5b185?version=2018-01-31T081712.534262Z
+TOPMed	HapMap	UW	NWD578417	a89a200d-e6f4-513a-b634-b1f7e10cf57a	B-lymphocyte	NA12878	dbe15848-e076-5517-a1c4-9d2cdbd6b4b5	Normal - blood derived	WGS	NA12878_sample	bb6d255e-f3fa-5bb5-8333-872ff60e491e	sequence_upload	spinnaker	1.1.2	crai	NWD578417.recab.cram.crai	8dd791a7-c6c9-4874-b110-b7b82656afe1	139f30ba-62d3-50fb-9177-ab3d370e29f8		s3://commons-dss-commons/blobs/7253c293c7112d801a7d1278ba6518ef283b2a8c3177a9b7c149f441892ec2c7.84393859a40b0277c70c8f32c2e833d66904beec.327ee0c5c1f606ce22a753d82b76285e.6539dadd	dos://dos-dss.ucsc-cgp-dev.org/8dd791a7-c6c9-4874-b110-b7b82656afe1?version=2018-01-31T142517.088013Z
+TOPMed	HapMap	NYGC	NWD119836	bc6e1fd7-229d-5e65-a5a2-a15fee0613c0	B-lymphocyte	NA12878	8628f32f-d6fd-5419-a364-242a11abebb5	Normal - solid tissue	WGS	NA12878_sample	8ca820a2-d182-580c-9572-636f9f0eae62	sequence_upload	spinnaker	1.1.2	cram	NWD119836.recab.cram	c3701573-8cc4-4da8-81ae-d3cb5dd67083	204cf1bd-1477-57e6-880c-1b863edac627		s3://commons-dss-commons/blobs/16b8bb72660b1612bfc14c6967124daf36695fa2c48a85c027b1ae56b557f4b1.4c40c1c34375fec97bea30343436e9d39c64c696.7b21891b5cfa4e8982fd55816d966191-308.2837f9a7	dos://dos-dss.ucsc-cgp-dev.org/c3701573-8cc4-4da8-81ae-d3cb5dd67083?version=2018-01-31T092110.375884Z
+NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	crai	NWD692354.b38.irc.v1.cram.crai	5a00cc38-2f8d-4d34-98e0-0a847579b988	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram.crai,s3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram.crai	dos://dos-dss.ucsc-cgp-dev.org/5a00cc38-2f8d-4d34-98e0-0a847579b988?version=2018-02-28T160411.061319Z
+NHLBI TOPMed: Boston Early-Onset COPD Study in the TOPMed Program	COPD	UW	EO8055779	d58e246b-5cc1-5d5d-ac8c-cdeedb54d81b	Blood	SRS1231088	47a167c5-08b9-507f-9b6e-5c252ea89683	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD321156	b669582f-2ab4-58c5-8ac7-ede4a5149c07	alignment	topmed-spinnaker	Alpha Build 1	crai	NWD321156.b38.irc.v1.cram.crai	1c9db311-4015-4ae2-9cc2-99a5f98d96ba	2f0a2f5f-5a47-5728-9375-51ecc7fb665d		gs://topmed-irc-share/genomes/NWD321156.b38.irc.v1.cram.crai,s3://nih-nhlbi-datacommons/NWD321156.b38.irc.v1.cram.crai	dos://dos-dss.ucsc-cgp-dev.org/1c9db311-4015-4ae2-9cc2-99a5f98d96ba?version=2018-02-28T051324.619728Z
+TOPMed	1000 Genomes	UW	NWD259170	066c6bb5-7c8c-51ea-b0fb-0c07b105c9b7	B-lymphocyte	HG01110	df2b221c-e8c1-5241-9c53-327cf103034e	Normal - blood derived	WGS	HG01110_sample	9f1e5d7d-90f8-57c6-8ccb-ca1d89d34611	sequence_upload	spinnaker	1.1.2	cram	NWD259170.recab.cram	670b2f03-8444-4e14-a13b-f804b56a4c4c	0d6371a8-fc4f-5232-9660-e655903b17ea		s3://commons-dss-commons/blobs/8342b0cee5cc14d4dcc33bbc7b29d0bd79a18f8d4c0a29d9d365596485d42649.036a40effc41791699de5ec159b3e84505bdef5a.e9ad2e5c1532f49064d3c17e16a28dd0-262.afc07a01	dos://dos-dss.ucsc-cgp-dev.org/670b2f03-8444-4e14-a13b-f804b56a4c4c?version=2018-01-31T081722.372972Z
+TOPMed	HapMap	NYGC	NWD293295	b8284a5b-429d-5652-8247-0257f1e2f61d	B-lymphocyte	NA19238	58036d43-ec02-59f1-a6d8-83e7f666d90a	Normal - solid tissue	WGS	NA19238_sample	7c94077f-ea7c-5e9a-99fe-d13cac77a61d	sequence_upload	spinnaker	1.1.2	crai	NWD293295.recab.cram.crai	10c8bed3-4395-490a-9f75-0cb5cc991c7a	06c4bd47-c8e2-5045-8bae-bfad24633c87		s3://commons-dss-commons/blobs/94e5eb2718e4bd776f2527f91794ef8511434ee6eec917c3f0b8177986896296.8b629733a8fd135a9dacd6264801184808070131.3a225f58729ac12d2d8e9142636ba6c9.eb66a4cf	dos://dos-dss.ucsc-cgp-dev.org/10c8bed3-4395-490a-9f75-0cb5cc991c7a?version=2018-01-31T081713.079689Z
+TOPMed	1000 Genomes	WashU	NWD100953	4ff60ff5-a1d6-557b-b4ed-3220f62a1b02	B-lymphocyte	HG01110	37865db0-0a24-5b4f-937f-00871886906b	Normal - blood derived	WGS	HG01110_sample	c4ffc283-b53e-5559-9d9f-3238ac281d86	sequence_upload	spinnaker	1.1.3	cram	NWD100953.recab.cram	cdf3ec65-0c1a-4107-8200-8e8d5d2b5b14	44a8837b-4456-5709-b56b-54e23000f13a		s3://commons-dss-commons/blobs/3e676baceae3698ae35d63a56a471deef12e2aff5581e93b1716d6cc9f0d0c4d.4a704dd7cb65a4465dc0f687a70783731603ef87.ac94763859e218da0be7e3ea25786bb1-559.ff76a67c	dos://dos-dss.ucsc-cgp-dev.org/cdf3ec65-0c1a-4107-8200-8e8d5d2b5b14?version=2018-01-31T152458.523248Z
+TOPMed	HapMap	NYGC	NWD119836	bc6e1fd7-229d-5e65-a5a2-a15fee0613c0	B-lymphocyte	NA12878	8628f32f-d6fd-5419-a364-242a11abebb5	Normal - solid tissue	WGS	NA12878_sample	8ca820a2-d182-580c-9572-636f9f0eae62	sequence_upload	spinnaker	1.1.2	cram	NWD119836.recab.cram	d5cd7954-3a9a-4877-830a-b0dc463d3373	204cf1bd-1477-57e6-880c-1b863edac627		s3://commons-dss-commons/blobs/16b8bb72660b1612bfc14c6967124daf36695fa2c48a85c027b1ae56b557f4b1.4c40c1c34375fec97bea30343436e9d39c64c696.7b21891b5cfa4e8982fd55816d966191-308.2837f9a7	dos://dos-dss.ucsc-cgp-dev.org/d5cd7954-3a9a-4877-830a-b0dc463d3373?version=2018-01-31T142527.540197Z
+TOPMed	HapMap	Baylor	NWD119844	ade6f774-2b64-5caa-a61f-593ab316cb66	B-lymphocyte	NA12878	0c3a10a8-1b44-5b86-80e7-a242473b5470	Normal - solid tissue	WGS	NA12878_sample	0ff51e1f-2209-58af-9e87-7e57cf41df35	sequence_upload	spinnaker	1.1.2	cram	NWD119844.recab.cram	05c8f031-958b-4671-8e77-218a0b18d26d	06dfc2ab-2d04-52c3-9723-0ac4042e4e38		s3://commons-dss-commons/blobs/bb1e5b25eb820186fc35e08acb815add88e4581cac6ff8a28a432f08fa507b74.25b77b75ac1109e0027f68bbbb5505e5b50921dc.d0f4fa6ef638c6dc7eca6c077c48ba76-348.f03272c9	dos://dos-dss.ucsc-cgp-dev.org/05c8f031-958b-4671-8e77-218a0b18d26d?version=2018-01-31T142456.422327Z
+TOPMed	HapMap	Baylor	NWD119844	ade6f774-2b64-5caa-a61f-593ab316cb66	B-lymphocyte	NA12878	0c3a10a8-1b44-5b86-80e7-a242473b5470	Normal - solid tissue	WGS	NA12878_sample	0ff51e1f-2209-58af-9e87-7e57cf41df35	sequence_upload	spinnaker	1.1.2	crai	NWD119844.recab.cram.crai	6079aaa4-71a1-41e1-9588-fded71219764	06dfc2ab-2d04-52c3-9723-0ac4042e4e38		s3://commons-dss-commons/blobs/a2c7634e5a9203219cc227fbb1583216ef934fb153e5711e1db117379f1ee8f8.bb6f1d0d5a033215e2f8ecdf78dbedf8958e081a.48c26ed3d4a8224f4dfa3515edae749b.d47f8df4	dos://dos-dss.ucsc-cgp-dev.org/6079aaa4-71a1-41e1-9588-fded71219764?version=2018-01-31T142457.109961Z
+NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD692354.b38.irc.v1.cram	b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		s3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram,gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram	dos://dos-dss.ucsc-cgp-dev.org/b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5?version=2018-02-28T160408.957538Z
+TOPMed	HapMap	Broad	NWD831422	bb2afc83-8980-53e3-a844-4c273b68bba4	B-lymphocyte	NA12878	58ca49bc-fa95-5bf9-b547-552e2645a4cd	Normal - blood	WGS	NA12878_sample	9d09c7c4-a078-5105-9493-26b0b4e40b79	sequence_upload	spinnaker	1.1.2	cram	NWD831422.recab.cram	91392f82-7529-4a5a-9ee6-1c33f5637332	0e727062-7fc9-5e46-b1e3-24537426ca4c		s3://commons-dss-commons/blobs/1b57d910d63451195e04dffefea05d9c358ed0ecdf2c7dec089f1adb7b87242a.6a30d9b442338922707a0da64a22538f1a6a39aa.75bab7b53cf134670dbf42a3fdc4ea64-343.1187a558	dos://dos-dss.ucsc-cgp-dev.org/91392f82-7529-4a5a-9ee6-1c33f5637332?version=2018-01-31T082344.675433Z
+TOPMed	HapMap	Broad	NWD831422	bb2afc83-8980-53e3-a844-4c273b68bba4	B-lymphocyte	NA12878	58ca49bc-fa95-5bf9-b547-552e2645a4cd	Normal - blood	WGS	NA12878_sample	9d09c7c4-a078-5105-9493-26b0b4e40b79	sequence_upload	spinnaker	1.1.2	crai	NWD831422.recab.cram.crai	db31d438-d369-48cc-aaa0-abc953c21213	0e727062-7fc9-5e46-b1e3-24537426ca4c		s3://commons-dss-commons/blobs/30da18b02902ae9e5bb51768728064886e47f57573687b282e75d874cdc79ebf.f248ba5422244cf67632ef93420591b5d57d9e38.dcbe9d985c104fd240b9c307e4acf4c0.542f74e7	dos://dos-dss.ucsc-cgp-dev.org/db31d438-d369-48cc-aaa0-abc953c21213?version=2018-01-31T082415.483029Z
+TOPMed	HapMap	UW	NWD578417	a89a200d-e6f4-513a-b634-b1f7e10cf57a	B-lymphocyte	NA12878	dbe15848-e076-5517-a1c4-9d2cdbd6b4b5	Normal - blood derived	WGS	NA12878_sample	bb6d255e-f3fa-5bb5-8333-872ff60e491e	sequence_upload	spinnaker	1.1.2	cram	NWD578417.recab.cram	8a3fb043-0324-465c-b000-60c150dd68b1	139f30ba-62d3-50fb-9177-ab3d370e29f8		s3://commons-dss-commons/blobs/fb94baec68b9b6b9e10d0f61aa03e915314d24320533f72088a60c412e247c77.b07c72e12d6857b3093cddc871a7b0cadc1831fe.5d06e7da42fa89d8f447cbbdeb337e03-300.d55d6c31	dos://dos-dss.ucsc-cgp-dev.org/8a3fb043-0324-465c-b000-60c150dd68b1?version=2018-01-31T142516.543571Z
+TOPMed	HapMap	NYGC	NWD119836	bc6e1fd7-229d-5e65-a5a2-a15fee0613c0	B-lymphocyte	NA12878	8628f32f-d6fd-5419-a364-242a11abebb5	Normal - solid tissue	WGS	NA12878_sample	8ca820a2-d182-580c-9572-636f9f0eae62	sequence_upload	spinnaker	1.1.2	crai	NWD119836.recab.cram.crai	975b0bf5-96f4-4e34-aa93-681e1d558b9c	204cf1bd-1477-57e6-880c-1b863edac627		s3://commons-dss-commons/blobs/597d139565b45176509fc7a4f3fc7066cac626ce50d80ad2a6b643eb0d9b4a5a.dda8235ca55396f9ab5cd2fb9e61d43dd796e7da.0ea01635527c738a5f3bf82acf0c3859.dcc419cd	dos://dos-dss.ucsc-cgp-dev.org/975b0bf5-96f4-4e34-aa93-681e1d558b9c?version=2018-01-31T092315.854852Z
diff --git a/test/test_bagitutils.py b/test/test_bagitutils.py
index 73faf8e..0613587 100644
--- a/test/test_bagitutils.py
+++ b/test/test_bagitutils.py
@@ -2,56 +2,16 @@
 
 import unittest
 import os
-import pandas as pd
-from StringIO import StringIO
 import zipfile
-# import numpy as np
 from bagitutils import BagHandler
-from pandas.util.testing import assert_frame_equal
 
 
 class TestBagHandlerMethods(unittest.TestCase):
 
-    def setUp(self):
-        """Load normalized test data into Pandas dataframe"""
-        fpath = 'test/test_normalize_df_mock.tsv'
-        try:
-            df = pd.read_csv(
-                fpath,
-                sep='\t')
-        except IOError:
-            print('Cannot open file')
-        self.normalized = df
-
-    def test_normalize(self):
-        """ """
-        # fpath = 'test/test_normalize_df_mock.tsv'
-        # df = pd.read_csv(fpath, sep='\t')
-        # args = dict([('data', df),
-        #              ('bag_info', 'test'),
-        #              ('bag_path', '~/dev/manifest-handover')])
-        # bag = BagHandler(**args)
-        # df_test = bag._BagHandler__normalize()
-        # assert_frame_equal(self.normalized, df)
-
-    def test_worksWithString(self):
-        s = StringIO("Program	Project	Center Name	Submitter Donor ID	Donor UUID	Submitter Donor Primary Site	Submitter Specimen ID	Specimen UUID	Submitter Specimen Type	Submitter Experimental Design	Submitter Sample ID	Sample UUID	Analysis Type	Workflow Name	Workflow Version	File Type	File Path	Upload File ID	Data Bundle UUID	Metadata.json	File URLs	File DOS URL\n\
-        NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	crai	NWD692354.b38.irc.v1.cram.crai	5a00cc38-2f8d-4d34-98e0-0a847579b988	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		[u'gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram.crai', u's3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram.crai']	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/5a00cc38-2f8d-4d34-98e0-0a847579b988?version=2018-02-28T160411.061319Z\n\
-        NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD692354.b38.irc.v1.cram	b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		[u's3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram', u'gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram']	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5?version=2018-02-28T160408.957538Z\n")
-        pd.read_csv(s, sep='\t')
-
-    def test_bagHandler(self):
-        s = StringIO("Program	Project	Center Name	Submitter Donor ID	Donor UUID	Submitter Donor Primary Site	Submitter Specimen ID	Specimen UUID	Submitter Specimen Type	Submitter Experimental Design	Submitter Sample ID	Sample UUID	Analysis Type	Workflow Name	Workflow Version	File Type	File Path	Upload File ID	Data Bundle UUID	Metadata.json	File URLs	File DOS URL\n\
-        NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	crai	NWD692354.b38.irc.v1.cram.crai	5a00cc38-2f8d-4d34-98e0-0a847579b988	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		[u'gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram.crai', u's3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram.crai']	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/5a00cc38-2f8d-4d34-98e0-0a847579b988?version=2018-02-28T160411.061319Z\n\
-        NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD692354.b38.irc.v1.cram	b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		[u's3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram', u'gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram']	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5?version=2018-02-28T160408.957538Z\n")
-        bag = BagHandler(data=s, bag_info={}, bag_name='manifest')
-        zip_name = bag.create_bag()
-        os.remove(zip_name)
-
     def test_zipRootIsManifest(self):
-        s = StringIO("Program	Project	Center Name	Submitter Donor ID	Donor UUID	Submitter Donor Primary Site	Submitter Specimen ID	Specimen UUID	Submitter Specimen Type	Submitter Experimental Design	Submitter Sample ID	Sample UUID	Analysis Type	Workflow Name	Workflow Version	File Type	File Path	Upload File ID	Data Bundle UUID	Metadata.json	File URLs	File DOS URL\n\
+        s = "Program	Project	Center Name	Submitter Donor ID	Donor UUID	Submitter Donor Primary Site	Submitter Specimen ID	Specimen UUID	Submitter Specimen Type	Submitter Experimental Design	Submitter Sample ID	Sample UUID	Analysis Type	Workflow Name	Workflow Version	File Type	File Path	Upload File ID	Data Bundle UUID	Metadata.json	File URLs	File DOS URL\n\
         NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	crai	NWD692354.b38.irc.v1.cram.crai	5a00cc38-2f8d-4d34-98e0-0a847579b988	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		[u'gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram.crai', u's3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram.crai']	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/5a00cc38-2f8d-4d34-98e0-0a847579b988?version=2018-02-28T160411.061319Z\n\
-        NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD692354.b38.irc.v1.cram	b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		[u's3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram', u'gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram']	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5?version=2018-02-28T160408.957538Z\n")
+        NHLBI TOPMed: Whole Genome Sequencing and Related Phenotypes in the Framingham Heart Study	Framingham	Broad	20428	09df7aef-246a-57eb-9685-e1d4d18b55ab	BLOOD	SRS1353998	2923638f-0784-5704-8d93-5b97b4ca3092	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	NWD692354	dd8337dd-f731-5c3b-9a03-bdae77ca47a9	alignment	topmed-spinnaker	Alpha Build 1	cram	NWD692354.b38.irc.v1.cram	b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5	50cfaf90-0998-5ef5-aa0b-cfaea71d5a7d		[u's3://nih-nhlbi-datacommons/NWD692354.b38.irc.v1.cram', u'gs://topmed-irc-share/genomes/NWD692354.b38.irc.v1.cram']	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/b4cf8998-34a1-4e00-aa23-bcdf8d6b23b5?version=2018-02-28T160408.957538Z\n"
         bag = BagHandler(data=s, bag_info={}, bag_name='manifest')
         zip_name = bag.create_bag()
         with zipfile.ZipFile(zip_name) as myzip:
@@ -61,6 +21,62 @@ def test_zipRootIsManifest(self):
                     self.assertIn('data/', name)
         os.remove(zip_name)
 
+    def testDemoData(self):
+        data = ("Program	Project	Center Name	Submitter Donor ID	Donor UUID	Submitter Donor Primary Site	Submitter Specimen ID	Specimen UUID	Submitter Specimen Type	Submitter Experimental Design	Submitter Sample ID	Sample UUID	Analysis Type	Workflow Name	Workflow Version	File Type	File Path	Upload File ID	Data Bundle UUID	Metadata.json	File URLs	File DOS URI\n\
+NIH Data Commons	NIH Data Commons Pilot	Broad Public Datasets	ABC123456	c2b4c298-4d80-4aaa-bddf-20c15d184af3	Blood	NA12878_2	bfcc3266-340a-5751-8db1-d661163ac8e5	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	H06JUADXX130110_1	c774934f-4100-44bf-8df9-8d4e509c088d	none	test workflow	Development	bam	H06JUADXX130110.1.ATCACGAT.20k_reads.bam	60936d97-6358-4ce3-8136-d5776186ee21	dd04fbf3-2a51-4c72-8038-da7094b8da55		gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/60936d97-6358-4ce3-8136-d5776186ee21?version=2018-03-23T123738.145535Z")
+        bag = BagHandler(data=data, bag_info={}, bag_name='manifest')
+        (participants, sample) = bag.convert_to_participant_and_sample()
+        self.assertListEqual(
+            sorted(['c2b4c298-4d80-4aaa-bddf-20c15d184af3']),
+            sorted(participants))
+        self.assertEquals(len(sample), 1)
+        row = sample[0]
+        self.assertEquals(row['participant'], 'c2b4c298-4d80-4aaa-bddf-20c15d184af3')
+        self.assertEquals(row['gs_url1'], 'gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam')
+        self.assertFalse('s3_url1' in row)
+
+    def testWriteCsv(self):
+        data = ("Program	Project	Center Name	Submitter Donor ID	Donor UUID	Submitter Donor Primary Site	Submitter Specimen ID	Specimen UUID	Submitter Specimen Type	Submitter Experimental Design	Submitter Sample ID	Sample UUID	Analysis Type	Workflow Name	Workflow Version	File Type	File Path	Upload File ID	Data Bundle UUID	Metadata.json	File URLs	File DOS URI\n\
+        NIH Data Commons	NIH Data Commons Pilot	Broad Public Datasets	ABC123456	c2b4c298-4d80-4aaa-bddf-20c15d184af3	Blood	NA12878_2	bfcc3266-340a-5751-8db1-d661163ac8e5	Normal - Blood	Seq_DNA_SNP_CNV; Seq_DNA_WholeGenome	H06JUADXX130110_1	c774934f-4100-44bf-8df9-8d4e509c088d	none	test workflow	Development	bam	H06JUADXX130110.1.ATCACGAT.20k_reads.bam	60936d97-6358-4ce3-8136-d5776186ee21	dd04fbf3-2a51-4c72-8038-da7094b8da55		gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam	dos://dos-dss.ucsc-cgp-dev.org/ga4gh/dos/v1/dataobjects/60936d97-6358-4ce3-8136-d5776186ee21?version=2018-03-23T123738.145535Z")
+        bag = BagHandler(data=data, bag_info={}, bag_name='manifest')
+        zip_name = bag.create_bag()
+        with zipfile.ZipFile(zip_name) as myzip:
+            for name in myzip.namelist():
+                if 'sample' in name:
+                    sample = myzip.open(name)
+                    row = sample.read()
+                    sampleid = 'entity:sample_id'
+                    self.assertEqual(sampleid, row[:len(sampleid)])
+
+        os.remove(zip_name)
+
+    def test_process_demo_data(self):
+        with open('test/manifest_with_crai_cram_bai.tsv', 'r') as tsv:
+            lines = tsv.readlines()
+        data = "\n".join(lines)
+        bag = BagHandler(data=data, bag_info={}, bag_name='manifest')
+        participants, max_files_in_sample, protocols = bag.participants_and_max_files_in_sample_and_protocols()
+        self.assertEqual(len(participants), 13)
+        self.assertEqual(len(protocols), 2)
+        self.assertEqual(max_files_in_sample, 4)
+        samples = bag.samples(max_files_in_sample, protocols)
+        self.assertEqual(len(samples), 13)
+
+        # Ensure every row has file_dos_uri<suffix> column
+        for suffix in [str(i) for i in range(1, max_files_in_sample + 1)]:
+            for i in range(0, len(samples)):
+                self.assertIn('file_dos_uri' + suffix, samples[i].keys())
+
+        first_row_keys = sorted(samples[0])
+        for i in range(0, len(samples)):
+            # Ensure all rows have the same keys
+            self.assertListEqual(first_row_keys, sorted(samples[i].keys()))
+        # Ensure there is no column with a 0 (zero) in its name (there was in
+        # the past before this test was written -- make sure it doesn't creep
+        # back in.
+        for key in first_row_keys:
+            self.assertNotIn('0', key)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/webservice.py b/webservice.py
index cfbfa4a..99ed805 100644
--- a/webservice.py
+++ b/webservice.py
@@ -391,7 +391,7 @@ def export_to_firecloud():
                 'data_type': 'TOPMed',
                 'date_created': datetime.datetime.now().isoformat()}
     # Instantiate bag object.
-    bag = BagHandler(data=StringIO(response_obj.get_data()),
+    bag = BagHandler(data=response_obj.get_data(),
                      bag_info=bag_info,
                      bag_name=bag_name)
     # Pathname of compressed bag.
@@ -402,11 +402,9 @@ def export_to_firecloud():
     fc_lambda_protocol = os.getenv("FC_LAMBDA_PROTOCOL", "https")
     fc_lambda_domain = os.getenv("FC_LAMBDA_DOMAIN", domain)
     fc_lambda_port = os.getenv("FC_LAMBDA_PORT", '443')
-    url = (fc_lambda_protocol +
-           '://' + fc_lambda_domain +
-           ':' + fc_lambda_port +
-           '/api/exportBag?workspace=' + workspace +
-           '&namespace=' + namespace)
+    url = '{}://{}:{}/api/exportBag?workspace={}&namespace={}'.format(
+        fc_lambda_protocol,  fc_lambda_domain, fc_lambda_port,
+        workspace, namespace)
     logger.info("going to hit {}".format(url))
     headers = {'Content-Type': 'application/octet-stream',
                'Accept': 'application/json',