Skip to content

Thoughts on Solr Indexing

Ankit Lohani edited this page Jun 21, 2018 · 12 revisions

I have got the JSON-LDs in the MongoDB and the next step is to push them to Solr. Solr requires a schema to provide a room for the coming data. I will discuss my approach towards indexing data in Soor here -

Before we proceed, let me give a sample of what our data looks like -

{
   "@context":"http://schema.org",
   "@type":"DataRecord",
   "identifier":"biosamples:SAMEA103996091",
   "dateModified":"2018-01-20T13:42:43.039Z",
   "dateCreated":"2016-08-30T23:00:00Z",
   "isPartOf":{
      "@type":"Dataset",
      "@id":"https://www.ebi.ac.uk/biosamples/samples"
   },
   "datasetPartOf":{
      "@type":"Dataset",
      "@id":"https://www.ebi.ac.uk/biosamples/samples"
   },
   "mainEntity":{
      "dataset":[
         "http://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-4567"
      ],
      "@context":"http://schema.org",
      "@type":[
         "BioChemEntity",
         "Sample"
      ],
      "name":"source hESC-H9-primed_3",
      "url":"https://www.ebi.ac.uk/biosamples/sample/SAMEA103996091",
      "identifiers":[
         "biosamples:SAMEA103996091"
      ],
      "additionalProperty":[
         {
            "name":"Organism",
            "value":"Homo sapiens",
            "valueReference":[
               {
                  "url":"http://purl.obolibrary.org/obo/NCBITaxon_9606",
                  "@type":"CategoryCode"
               }
            ],
            "@type":"PropertyValue"
         },
         {
            "name":"cell line",
            "value":"H9",
            "valueReference":[
               {
                  "url":"http://www.ebi.ac.uk/efo/EFO_0003045",
                  "@type":"CategoryCode"
               }
            ],
            "@type":"PropertyValue"
         },
         {
            "name":"cell type",
            "value":"embryonic stem cell",
            "valueReference":[
               {
                  "url":"http://purl.obolibrary.org/obo/CL_0002322",
                  "@type":"CategoryCode"
               }
            ],
            "@type":"PropertyValue"
         },
         {
            "name":"genetic modification",
            "value":"transfected with doxycycline inducible MCRS1, THAP11, TET1 construct",
            "@type":"PropertyValue"
         },
         {
            "name":"growth condition",
            "value":"W8 media",
            "@type":"PropertyValue"
         },
         {
            "name":"phenotype",
            "value":"primed pluripotent state",
            "@type":"PropertyValue"
         }
      ]
   }
}

Let us break this nested JSON-LD in parts -

Clone this wiki locally