diff --git a/CHANGELOG.md b/CHANGELOG.md index e7e9d7432..2dfc11594 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# v0.16.2 +- Deprecates heavy index on large tables: + - Adds a new table for tracking scope ranges. + - Converts the former `source_specimen` column on `expression_quantification` to a `SERIAL`` integer. + - Makes tabular import keep track of ranges per-specimen in the new range_definitions table. + - Updates the "optimized" sparse matrix query to use the ranges rather than the former huge index. + - Deprecates the modify-constraints CLI entrypoint (only used internally now). + - Deprecates the expression indexing module, CLI entrypoint, etc. + # v0.16.0 - Separates datasets into own databases: - `DBCursor` and `DBConnection` usage streamlined, typically requires study-scoping (dataset-scoping). diff --git a/pyproject.toml.unversioned b/pyproject.toml.unversioned index d0f02fb1c..6b2cfd2e7 100644 --- a/pyproject.toml.unversioned +++ b/pyproject.toml.unversioned @@ -174,12 +174,10 @@ packages = [ ] "spatialprofilingtoolbox.db.scripts" = [ "create_schema.py", - "modify_constraints.py", "guess_channels_from_object_files.py", "status.py", "retrieve_feature_matrices.py", "drop.py", - "index_expressions_table.py", "drop_ondemand_computations.py" ] "spatialprofilingtoolbox.db.data_model" = [ diff --git a/spatialprofilingtoolbox/db/data_model/performance_tweaks.sql b/spatialprofilingtoolbox/db/data_model/performance_tweaks.sql index 51e727834..ae362d97d 100644 --- a/spatialprofilingtoolbox/db/data_model/performance_tweaks.sql +++ b/spatialprofilingtoolbox/db/data_model/performance_tweaks.sql @@ -14,6 +14,16 @@ ADD UNIQUE (feature_specification, specifier, ordinality) ; ALTER TABLE two_cohort_feature_association_test ADD UNIQUE (selection_criterion_1, selection_criterion_2, test, p_value, feature_tested) ; +ALTER TABLE expression_quantification +ADD range_identifier_integer SERIAL ; + +CREATE TABLE range_definitions ( + scope_identifier VARCHAR(512), + tablename VARCHAR(512), + lowest_value INT, + highest_value INT +) ; + CREATE EXTENSION IF NOT EXISTS tablefunc; CREATE TABLE sample_strata ( diff --git a/spatialprofilingtoolbox/db/expressions_table_indexer.py b/spatialprofilingtoolbox/db/expressions_table_indexer.py deleted file mode 100644 index 4d6dbe9cd..000000000 --- a/spatialprofilingtoolbox/db/expressions_table_indexer.py +++ /dev/null @@ -1,145 +0,0 @@ -"""Set up source specimen index on big sparse expression values table.""" - -from spatialprofilingtoolbox.db.database_connection import retrieve_study_names -from spatialprofilingtoolbox.db.database_connection import DBCursor -from spatialprofilingtoolbox.standalone_utilities.log_formats import colorized_logger - -logger = colorized_logger(__name__) - -class ExpressionsTableIndexer: - """Set up source specimen index on big sparse expression values table.""" - - @staticmethod - def ensure_indexed_expressions_tables(database_config_file: str | None, study: str | None = None): - if study is None: - studies = retrieve_study_names(database_config_file) - else: - studies = [study] - for _study in studies: - with DBCursor(database_config_file=database_config_file, study=_study) as cursor: - logger.info('Will create custom index for study %s.', _study) - if not ExpressionsTableIndexer.expressions_table_is_indexed(cursor): - ExpressionsTableIndexer.create_index(cursor) - else: - logger.debug('Expression table for "%s" is already indexed.', _study) - - @staticmethod - def expressions_table_is_indexed(cursor): - columns = ExpressionsTableIndexer.get_expression_quantification_columns(cursor) - return 'source_specimen' in columns - - @staticmethod - def get_expression_quantification_columns(cursor): - cursor.execute(''' - SELECT column_name FROM information_schema.columns - WHERE table_schema = 'public' - AND table_name = 'expression_quantification' ; - ''') - return [row[0] for row in cursor.fetchall()] - - @staticmethod - def create_index(cursor): - ETI = ExpressionsTableIndexer #pylint: disable=invalid-name - ETI.log_current_indexes(cursor) - logger.debug('Will create extra index column "source_specimen".') - ETI.create_extra_column(cursor) - ETI.copy_in_source_specimen_values(cursor) - ETI.create_index_on_new_column(cursor) - - @staticmethod - def create_extra_column(cursor): - message = ('Creating column specifically for index, "source_specimen" on ' - '"expression_quantification".') - logger.debug(message) - cursor.execute(''' - ALTER TABLE expression_quantification - ADD COLUMN IF NOT EXISTS source_specimen VARCHAR(512) ; - ''') - ExpressionsTableIndexer.log_current_columns(cursor) - - @staticmethod - def log_current_columns(cursor): - columns = ExpressionsTableIndexer.get_expression_quantification_columns(cursor) - logger.debug('"expression_quantification" columns: %s', columns) - - @staticmethod - def copy_in_source_specimen_values(cursor): - logger.debug('Copying in the source specimen values.') - cursor.execute(''' - UPDATE expression_quantification eq - SET source_specimen=subquery.source_specimen - FROM ( - SELECT - eq2.histological_structure as histological_structure, - eq2.target as target, - sdmp.specimen as source_specimen - FROM expression_quantification eq2 - JOIN histological_structure_identification hsi ON hsi.histological_structure=eq2.histological_structure - JOIN data_file df ON hsi.data_source=df.sha256_hash - JOIN specimen_data_measurement_process sdmp ON df.source_generation_process=sdmp.identifier - ) AS subquery - WHERE subquery.histological_structure=eq.histological_structure - AND subquery.target=eq.target ; - ''') - ExpressionsTableIndexer.log_summary_of_index_values(cursor) - - @staticmethod - def log_summary_of_index_values(cursor): - logger.debug('Inserted values: ') - cursor.execute(''' - SELECT eq.source_specimen FROM expression_quantification eq - LIMIT 5 ; - ''') - first_values = [row[0] for row in cursor.fetchall()] - for value in first_values: - logger.debug(' %s', value) - logger.debug(' ...') - cursor.execute('SELECT COUNT(*) FROM expression_quantification ;') - count = cursor.fetchall()[0][0] - logger.debug('%s total values.', count) - - @staticmethod - def create_index_on_new_column(cursor): - logger.debug('Creating index on the new "source_specimen" column.') - cursor.execute(''' - CREATE INDEX expression_source_specimen ON expression_quantification (source_specimen) ; - ''') - ExpressionsTableIndexer.log_current_indexes(cursor) - - @staticmethod - def log_current_indexes(cursor): - cursor.execute(''' - SELECT indexname, indexdef - FROM pg_indexes WHERE tablename='expression_quantification' ; - ''') - rows = cursor.fetchall() - if len(rows) == 0: - logger.debug('No indexes on "expression_quantification".') - else: - logger.debug('Indexes on "expression_quantification":') - logger.debug(' (indexname, indexdef)') - for row in rows: - logger.debug(' %s', row) - - @staticmethod - def drop_index(database_config_file: str | None, study: str | None = None): - if study is None: - studies = retrieve_study_names(database_config_file) - else: - studies = [study] - for _study in studies: - with DBCursor(database_config_file=database_config_file, study=_study) as cursor: - is_indexed = ExpressionsTableIndexer.expressions_table_is_indexed(cursor) - if not is_indexed: - logger.debug('There is no index to drop for "%s" database.', _study) - continue - logger.debug('Will drop "source_specimen" column and index in "%s" database.', _study) - cursor.execute(''' - DROP INDEX IF EXISTS expression_source_specimen ; - ''') - cursor.execute(''' - ALTER TABLE expression_quantification - DROP COLUMN IF EXISTS source_specimen ; - ''') - with DBCursor(database_config_file=database_config_file, study=_study) as cursor: - ExpressionsTableIndexer.log_current_indexes(cursor) diff --git a/spatialprofilingtoolbox/db/scripts/index_expressions_table.py b/spatialprofilingtoolbox/db/scripts/index_expressions_table.py deleted file mode 100644 index b588efeea..000000000 --- a/spatialprofilingtoolbox/db/scripts/index_expressions_table.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Create a convenience index column on the sparse expression data table.""" -import argparse -from os.path import abspath -from os.path import expanduser - -from spatialprofilingtoolbox.db.expressions_table_indexer import ExpressionsTableIndexer -from spatialprofilingtoolbox.workflow.common.cli_arguments import add_argument -from spatialprofilingtoolbox.standalone_utilities.log_formats import colorized_logger - -logger = colorized_logger('index-expressions-table') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - prog='spt db index-expressions-table', - description='Create an index on the big sparse expression data table to allow more ' - 'efficient access operations.' - ) - add_argument(parser, 'database config') - parser.add_argument('--drop-index', dest='drop_index', action='store_true') - parser.add_argument('--study', dest='study', default=None, type=str) - args = parser.parse_args() - - logger.info('') - logger.info('spt db index-expressions-table called.') - - database_config_file = abspath(expanduser(args.database_config_file)) - if args.drop_index: - ExpressionsTableIndexer.drop_index(database_config_file, study=args.study) - else: - ExpressionsTableIndexer.ensure_indexed_expressions_tables(database_config_file, study=args.study) diff --git a/spatialprofilingtoolbox/db/scripts/modify_constraints.py b/spatialprofilingtoolbox/db/scripts/modify_constraints.py deleted file mode 100644 index bb13690c5..000000000 --- a/spatialprofilingtoolbox/db/scripts/modify_constraints.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Utility to drop or recreate certain constraints in the single-cell ADI SQL schema. Used to boost -performance of certain operations. -""" -import argparse -from os.path import exists -from os.path import abspath -from os.path import expanduser - -try: - import pandas as pd -except ModuleNotFoundError as e: - from spatialprofilingtoolbox.standalone_utilities.module_load_error import \ - SuggestExtrasException - SuggestExtrasException(e, 'db') - -from spatialprofilingtoolbox.db.modify_constraints import toggle_constraints -from spatialprofilingtoolbox.db.modify_constraints import DBConstraintsToggling - -from spatialprofilingtoolbox.standalone_utilities.log_formats import colorized_logger -logger = colorized_logger('modify-constraints') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - prog='spt db modify-constraints', - description='''Drop/recreate constraints on certain tables (the largest ones). - Can be used to wrap bulk import operations. - The status of constraints is written to stdout just before dropping or just after - creation. The meaning of the "connection_type" entry is documented here under "contype": - - https://www.postgresql.org/docs/current/catalog-pg-constraint.html - ''' - ) - parser.add_argument( - '--database-config-file-elevated', - dest='database_config_file_elevated', - type=str, - required=True, - help='The file for database configuration. The user specified must have elevated privilege.' - ) - parser.add_argument( - '--study', - dest='study', - type=str, - required=True, - help='Specifier of the study (short name).' - ) - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument( - '--drop', - action='store_true', - default=False, - ) - group.add_argument( - '--recreate', - action='store_true', - default=False, - ) - parser.add_argument( - '--all-tables', - action='store_true', - default=False, - ) - args = parser.parse_args() - - database_config_file_elevated = abspath(expanduser(args.database_config_file_elevated)) - if not exists(database_config_file_elevated): - message = f'Need to supply valid database config filename: {database_config_file_elevated}' - raise FileNotFoundError(message) - - if args.recreate: - db_state = DBConstraintsToggling.RECREATE - elif args.drop: - db_state = DBConstraintsToggling.DROP - else: - raise ValueError('--recreate or --drop must be flagged.') - - toggle_constraints( - database_config_file_elevated, - args.study, - state=db_state, - all_tables=args.all_tables, - ) diff --git a/spatialprofilingtoolbox/ondemand/scripts/cache_expressions_data_array.py b/spatialprofilingtoolbox/ondemand/scripts/cache_expressions_data_array.py index 655668f54..aee7a04d0 100644 --- a/spatialprofilingtoolbox/ondemand/scripts/cache_expressions_data_array.py +++ b/spatialprofilingtoolbox/ondemand/scripts/cache_expressions_data_array.py @@ -8,7 +8,6 @@ from os import getcwd import sys -from spatialprofilingtoolbox.db.expressions_table_indexer import ExpressionsTableIndexer from spatialprofilingtoolbox.workflow.common.structure_centroids import StructureCentroids from spatialprofilingtoolbox.ondemand.defaults import EXPRESSIONS_INDEX_FILENAME from spatialprofilingtoolbox.workflow.common.cli_arguments import add_argument @@ -48,7 +47,6 @@ def main(): message = '%s was not found, will do feature matrix pull after all.' logger.info(message, EXPRESSIONS_INDEX_FILENAME) - # ExpressionsTableIndexer.ensure_indexed_expressions_tables(database_config_file) puller = SparseMatrixPuller(database_config_file) puller.pull_and_write_to_files() diff --git a/spatialprofilingtoolbox/workflow/common/sparse_matrix_puller.py b/spatialprofilingtoolbox/workflow/common/sparse_matrix_puller.py index e495ba9de..18615cccc 100644 --- a/spatialprofilingtoolbox/workflow/common/sparse_matrix_puller.py +++ b/spatialprofilingtoolbox/workflow/common/sparse_matrix_puller.py @@ -2,12 +2,10 @@ from typing import cast, Any -from psycopg2.extensions import cursor as Psycopg2Cursor from pandas import DataFrame from numpy import ndarray from numpy import arange # type: ignore -from spatialprofilingtoolbox.db.expressions_table_indexer import ExpressionsTableIndexer from spatialprofilingtoolbox.db.database_connection import retrieve_study_from_specimen from spatialprofilingtoolbox.db.database_connection import retrieve_study_names from spatialprofilingtoolbox.db.database_connection import DBCursor @@ -277,7 +275,6 @@ def _fill_data_arrays_for_study(self, for _specimen in specimens: sparse_entries = self._get_sparse_entries( study_name, - measurement_study, specimen=_specimen, histological_structures=histological_structures, ) @@ -348,24 +345,19 @@ def _get_measurement_study_name(self, study: str) -> str: def _get_sparse_entries(self, study_name: str, - measurement_study: str, specimen: str, histological_structures: set[int] | None = None, ) -> list[tuple[str, str, int, str, str]]: sparse_entries: list = [] number_log_messages = 0 - parameters: list[str | tuple[str, ...]] = [measurement_study, specimen] - if histological_structures is not None: - parameters.append(tuple(str(hs_id) for hs_id in histological_structures)) with DBCursor(database_config_file=self.database_config_file, study=study_name) as cursor: - cursor.execute( - self._get_sparse_matrix_query_specimen_specific( - study_name, - histological_structures is not None, - ), - parameters, + query, parameters = self._get_sparse_matrix_query_specimen_specific( + cursor, + specimen, + histological_structures, ) + cursor.execute(query, parameters) total = cursor.rowcount while cursor.rownumber < total - 1: current_number_stored = len(sparse_entries) @@ -379,34 +371,32 @@ def _get_sparse_entries(self, return sparse_entries def _get_sparse_matrix_query_specimen_specific(self, - study: str, - histological_structures_condition: bool = False, - ) -> str: - is_indexed = False - with DBCursor(database_config_file=self.database_config_file, study=study) as cursor: - is_indexed = ExpressionsTableIndexer.expressions_table_is_indexed(cursor) - if is_indexed: - return self._sparse_entries_query_optimized(histological_structures_condition) - return self._sparse_entries_query_unoptimized(histological_structures_condition) + cursor, + specimen: str, + histological_structures: set[int] | None, + ) -> tuple[str, tuple]: + structures_present = histological_structures is not None + parameters: list[str | tuple[str, ...] | int] = [] + + range_definition = SparseMatrixPuller._retrieve_expressions_range(cursor, specimen) + query = self._sparse_entries_query(structures_present) + parameters = [range_definition[0], range_definition[1]] + if histological_structures is not None: + parameters.append(tuple(str(hs_id) for hs_id in histological_structures)) + return (query, tuple(parameters)) @staticmethod - def _sparse_entries_query_optimized(histological_structures_condition: bool = False) -> str: - return f''' - -- absorb/ignore first string formatting argument: %s - SELECT - eq.histological_structure, - eq.target, - CASE WHEN discrete_value='positive' THEN 1 ELSE 0 END AS coded_value, - eq.quantity as quantity - FROM expression_quantification eq - WHERE eq.source_specimen=%s - {'AND eq.histological_structure IN %s' if histological_structures_condition else ''} - ORDER BY eq.histological_structure, eq.target - ; + def _retrieve_expressions_range(cursor, scope: str) -> tuple[int, int]: + query = ''' + SELECT lowest_value, highest_value + FROM range_definitions + WHERE scope_identifier=%s AND tablename='expression_quantification' ; ''' + cursor.execute(query, (scope,)) + return cursor.fetchall()[0] @staticmethod - def _sparse_entries_query_unoptimized(histological_structures_condition: bool = False) -> str: + def _sparse_entries_query(histological_structures_condition: bool = False) -> str: return f''' SELECT eq.histological_structure, @@ -414,17 +404,7 @@ def _sparse_entries_query_unoptimized(histological_structures_condition: bool = CASE WHEN discrete_value='positive' THEN 1 ELSE 0 END AS coded_value, eq.quantity as quantity FROM expression_quantification eq - JOIN histological_structure hs - ON eq.histological_structure=hs.identifier - JOIN histological_structure_identification hsi - ON hs.identifier=hsi.histological_structure - JOIN data_file df - ON hsi.data_source=df.sha256_hash - JOIN specimen_data_measurement_process sdmp - ON df.source_generation_process=sdmp.identifier - WHERE sdmp.study=%s - AND hs.anatomical_entity='cell' - AND sdmp.specimen=%s + WHERE eq.range_identifier_integer BETWEEN %s AND %s {'AND eq.histological_structure IN %s' if histological_structures_condition else ''} ORDER BY eq.histological_structure, eq.target ; diff --git a/spatialprofilingtoolbox/workflow/tabular_import/parsing/cell_manifests.py b/spatialprofilingtoolbox/workflow/tabular_import/parsing/cell_manifests.py index 0202d5424..956c999fa 100644 --- a/spatialprofilingtoolbox/workflow/tabular_import/parsing/cell_manifests.py +++ b/spatialprofilingtoolbox/workflow/tabular_import/parsing/cell_manifests.py @@ -3,6 +3,7 @@ from io import BytesIO as StringIO import base64 import mmap +from typing import cast import shapefile # type: ignore import pandas as pd @@ -14,6 +15,8 @@ from spatialprofilingtoolbox.workflow.common.file_identifier_schema \ import get_input_filename_by_identifier from spatialprofilingtoolbox.db.source_file_parser_interface import SourceToADIParser +from spatialprofilingtoolbox.workflow.tabular_import.parsing.range_definition import RangeDefinition +from spatialprofilingtoolbox.workflow.tabular_import.parsing.range_definition import RangeDefinitionFactory from spatialprofilingtoolbox.standalone_utilities.log_formats import colorized_logger logger = colorized_logger(__name__) @@ -21,10 +24,96 @@ class CellManifestsParser(SourceToADIParser): """Source file parsing for metadata at the level of the cell manifest set.""" + scope: RangeDefinition | None def __init__(self, fields, **kwargs): super().__init__(fields, **kwargs) self.dataset_design = TabularCellMetadataDesign(**kwargs) + self.scope = None + + def parse(self, + connection, + file_manifest_file, + chemical_species_identifiers_by_symbol, + ): + """Retrieve each cell manifest, and parse records for: + - histological structure identification + - histological structure + - shape file + - expression quantification + """ + timer = PerformanceTimer() + timer.record_timepoint('Initial') + cursor = connection.cursor() + timer.record_timepoint('Cursor opened') + get_next = SourceToADIParser.get_next_integer_identifier + histological_structure_identifier_index = get_next('histological_structure', cursor) + shape_file_identifier_index = get_next('shape_file', cursor) + expression_quantification_index = self.get_expression_quantification_last_index(cursor) + 1 + timer.record_timepoint('Retrieved next integer identifiers') + initial_indices: dict[str, int] = { # type: ignore + 'structure': histological_structure_identifier_index, + 'shape file': shape_file_identifier_index, + 'expression quantification': expression_quantification_index, + } + channel_symbols = self.get_channel_symbols(chemical_species_identifiers_by_symbol) + final_indices = {} + file_count = 1 + for _, cell_manifest in self.get_cell_manifests(file_manifest_file).iterrows(): + logger.debug( + 'Considering contents of file "%s".', + cell_manifest['File ID'], + ) + filename = get_input_filename_by_identifier( + input_file_identifier=cell_manifest['File ID'], + file_manifest_filename=file_manifest_file, + ) + self.open_expression_quantification_scope(cell_manifest['Sample ID'], initial_indices['expression quantification']) + final_indices: dict[str, int] = self.parse_cell_manifest( # type: ignore + cursor, + filename, + channel_symbols, + initial_indices, + timer, + chemical_species_identifiers_by_symbol, + ) + self.finalize_expression_quantification_scope(final_indices['expression quantification'] - 1, cursor) + initial_indices = final_indices + timer.record_timepoint('Completed cell manifest parsing') + message = 'Performance report %s:\n%s' + logger.debug(message, file_count, timer.report_string(organize_by='total time spent')) + file_count += 1 + connection.commit() + cursor.close() + self.wrap_up_timer(timer) + + def open_expression_quantification_scope(self, scope_identifier: str, initial_index: int) -> None: + logger.debug('Opening range scope with %s.', initial_index) + self.scope = RangeDefinitionFactory.create( + scope_identifier, + initial_index, + 'expression_quantification', + ) + + def finalize_expression_quantification_scope(self, last_value: int, cursor): + logger.debug('Finalizing range scope with %s.', last_value) + RangeDefinitionFactory.finalize(cast(RangeDefinition, self.scope), last_value) + scope = cast(RangeDefinition, self.scope) + cursor.execute(''' + INSERT INTO range_definitions( + scope_identifier, + tablename, + lowest_value, + highest_value + ) VALUES (%s, %s, %s, %s) ; + ''', (scope.scope_identifier, scope.tablename, scope.lowest_value, scope.highest_value)) + + def get_expression_quantification_last_index(self, cursor) -> int: + cursor.execute('SELECT MAX(range_identifier_integer) FROM expression_quantification ;') + last = cursor.fetchall()[0][0] + if last is None: + last = 0 + return last def insert_chunks(self, cursor, @@ -60,8 +149,7 @@ def insert_chunks(self, logger.debug('Starting batch of cells that begins at index %s.', start) timer.record_timepoint('Started per-cell iteration') for j, cell in batch_cells.iterrows(): - histological_structure_identifier = str( - histological_structure_identifier_index) + histological_structure_identifier = str(histological_structure_identifier_index) histological_structure_identifier_index += 1 shape_file_identifier = str(shape_file_identifier_index) shape_file_identifier_index += 1 @@ -119,11 +207,16 @@ def insert_chunks(self, memmap.write(values_file_contents) memmap.seek(0) timer.record_timepoint('Started copy from command for bulk insertion') - cursor.copy_from(memmap, tablename) + if tablename == 'expression_quantification': + cursor.copy_from(memmap, tablename, columns=['histological_structure', 'target', 'quantity', 'unit', 'quantification_method', 'discrete_value', 'discretization_method']) + else: + cursor.copy_from(memmap, tablename) timer.record_timepoint('Finished inserting one chunk') + expression_quantification_index = self.get_expression_quantification_last_index(cursor) + 1 return { 'structure' : histological_structure_identifier_index, 'shape file' : shape_file_identifier_index, + 'expression quantification' : expression_quantification_index, } def parse_cell_manifest(self, @@ -189,58 +282,6 @@ def get_channel_symbols(self, chemical_species_identifiers_by_symbol): logger.warning('Cannot find channel metadata for %s .', str(missing)) return symbols.difference(missing) - def parse(self, - connection, - file_manifest_file, - chemical_species_identifiers_by_symbol, - ): - """Retrieve each cell manifest, and parse records for: - - histological structure identification - - histological structure - - shape file - - expression quantification - """ - timer = PerformanceTimer() - timer.record_timepoint('Initial') - cursor = connection.cursor() - timer.record_timepoint('Cursor opened') - get_next = SourceToADIParser.get_next_integer_identifier - histological_structure_identifier_index = get_next('histological_structure', cursor) - shape_file_identifier_index = get_next('shape_file', cursor) - timer.record_timepoint('Retrieved next integer identifiers') - initial_indices = { - 'structure' : histological_structure_identifier_index, - 'shape file' : shape_file_identifier_index, - } - channel_symbols = self.get_channel_symbols(chemical_species_identifiers_by_symbol) - final_indices = {} - file_count = 1 - for _, cell_manifest in self.get_cell_manifests(file_manifest_file).iterrows(): - logger.debug( - 'Considering contents of file "%s".', - cell_manifest['File ID'], - ) - filename = get_input_filename_by_identifier( - input_file_identifier=cell_manifest['File ID'], - file_manifest_filename=file_manifest_file, - ) - final_indices = self.parse_cell_manifest( - cursor, - filename, - channel_symbols, - initial_indices, - timer, - chemical_species_identifiers_by_symbol, - ) - initial_indices = final_indices - timer.record_timepoint('Completed cell manifest parsing') - message = 'Performance report %s:\n%s' - logger.debug(message, file_count, timer.report_string(organize_by='total time spent')) - file_count += 1 - connection.commit() - cursor.close() - self.wrap_up_timer(timer) - def get_number_known_cells(self, sha256_hash, cursor): query = ( 'SELECT COUNT(*) ' diff --git a/spatialprofilingtoolbox/workflow/tabular_import/parsing/range_definition.py b/spatialprofilingtoolbox/workflow/tabular_import/parsing/range_definition.py new file mode 100644 index 000000000..b4c925f84 --- /dev/null +++ b/spatialprofilingtoolbox/workflow/tabular_import/parsing/range_definition.py @@ -0,0 +1,25 @@ +"""Data structure representing a convenience scope or range for database records.""" +from attr import define + +@define +class RangeDefinition: + """Data structure representing a convenience scope or range for database records.""" + scope_identifier: str + tablename: str + lowest_value: int + highest_value: int | None + +class RangeDefinitionFactory: + """Create and finalize range definitions.""" + @classmethod + def create(cls, + scope_identifier: str, + initial_index: int, + tablename: str, + ) -> RangeDefinition: + lowest_value = initial_index + return RangeDefinition(scope_identifier, tablename, lowest_value, None) + + @classmethod + def finalize(cls, range_definition: RangeDefinition, highest_value: int): + range_definition.highest_value = highest_value diff --git a/test/db/module_tests/constraint_info.txt b/test/db/module_tests/constraint_info.txt deleted file mode 100644 index 6066e275e..000000000 --- a/test/db/module_tests/constraint_info.txt +++ /dev/null @@ -1,10 +0,0 @@ -connection_type constraint_name relation_name -f expression_quantification1 expression_quantification -f expression_quantification2 expression_quantification -f expression_quantification1 histological_structure -f histological_structure_identification1 histological_structure -f histological_structure_identification1 histological_structure_identification -f histological_structure_identification2 histological_structure_identification -f histological_structure_identification3 histological_structure_identification -f histological_structure_identification4 histological_structure_identification -f histological_structure_identification3 shape_file diff --git a/test/db/module_tests/test_expression_table_indexing.sh b/test/db/module_tests/test_expression_table_indexing.sh deleted file mode 100644 index f6bae62c0..000000000 --- a/test/db/module_tests/test_expression_table_indexing.sh +++ /dev/null @@ -1,16 +0,0 @@ - -function consider_exit() { - if [[ "$1" != "0" ]]; - then - exit 1 - fi -} - -spt db index-expressions-table --database-config-file .spt_db.config.container -status=$?; consider_exit $status - -spt db index-expressions-table --database-config-file .spt_db.config.container -status=$?; consider_exit $status - -spt db index-expressions-table --database-config-file .spt_db.config.container --drop-index -status=$?; consider_exit $status diff --git a/test/db/unit_tests/test_drop_recreate_database_constraints.sh b/test/db/unit_tests/test_drop_recreate_database_constraints.sh deleted file mode 100755 index 1565b553c..000000000 --- a/test/db/unit_tests/test_drop_recreate_database_constraints.sh +++ /dev/null @@ -1,31 +0,0 @@ -spt db modify-constraints --database-config-file .spt_db.config.container --study "Melanoma intralesional IL2" --drop >/dev/null 2>err_log.2.txt -spt db modify-constraints --database-config-file .spt_db.config.container --study "Melanoma intralesional IL2" --recreate > constraint_info.txt.comp 2>err_log.3.txt -diff module_tests/constraint_info.txt constraint_info.txt.comp >/dev/null -status=$? -if [[ "$status" != "0" ]]; -then - echo "Drop/recreate FAILED." - filename=module_tests/constraint_info.txt - echo $filename ":" - cat $filename - echo '' - filename=constraint_info.txt.comp - echo $filename ":" - cat $filename - echo '' -fi -rm constraint_info.txt.comp - -if [[ "$status" == "0" ]]; -then - rm err_log.2.txt err_log.3.txt - exit 0 -else - echo "From drop..." - cat err_log.2.txt - echo '' - echo "From recreate..." - cat err_log.3.txt - rm err_log.2.txt err_log.3.txt - exit 1 -fi diff --git a/version.txt b/version.txt index 04a373efe..3b7660ac7 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.16.0 +0.16.2 \ No newline at end of file