diff --git a/.gitignore b/.gitignore index f7a830ba61..0edba55da0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,9 @@ socorro/unittest/config/*.py *.log distribute*.tar.gz analysis/build/ +breakpad/ breakpad.tar.gz +depot_tools/ nosetests.xml scripts/config/*.py socorro/unittest/config/*.py diff --git a/alembic/versions/5bafdc19756c_drop_server_status_table.py b/alembic/versions/5bafdc19756c_drop_server_status_table.py new file mode 100644 index 0000000000..8d2a6f330f --- /dev/null +++ b/alembic/versions/5bafdc19756c_drop_server_status_table.py @@ -0,0 +1,40 @@ +"""drop server_status table + +Revision ID: 5bafdc19756c +Revises: 89ef86a3d57a +Create Date: 2016-09-13 15:56:53.898014 + +""" + +# revision identifiers, used by Alembic. +revision = '5bafdc19756c' +down_revision = '89ef86a3d57a' + +from alembic import op +from socorrolib.lib import citexttype, jsontype, buildtype +from socorrolib.lib.migrations import fix_permissions, load_stored_proc + +import sqlalchemy as sa +from sqlalchemy import types +from sqlalchemy.dialects import postgresql +from sqlalchemy.sql import table, column + +from sqlalchemy.dialects import postgresql + + +def upgrade(): + op.drop_table('server_status') + + +def downgrade(): + op.create_table('server_status', + sa.Column('avg_process_sec', sa.REAL(), autoincrement=False, nullable=True), + sa.Column('avg_wait_sec', sa.REAL(), autoincrement=False, nullable=True), + sa.Column('date_created', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False), + sa.Column('date_oldest_job_queued', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=True), + sa.Column('date_recently_completed', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=True), + sa.Column('id', sa.INTEGER(), nullable=False), + sa.Column('processors_count', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('waiting_job_count', sa.INTEGER(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('id', name=u'server_status_pkey') + ) diff --git a/analysis/hbase_schema b/analysis/hbase_schema deleted file mode 100644 index 1068bc4bb1..0000000000 --- a/analysis/hbase_schema +++ /dev/null @@ -1,12 +0,0 @@ -create 'crash_report_signatures', {NAME => 'counters', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'json', COMPRESSION => 'LZO', VERSIONS => '3', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports', {NAME => 'flags', VERSIONS => '1', COMPRESSION => 'LZO', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'ids', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'meta_data', COMPRESSION => 'LZO', VERSIONS => '3', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'processed_data', VERSIONS => '3', COMPRESSION => 'LZO', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'raw_data', COMPRESSION => 'LZO', VERSIONS => '3', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'timestamps', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_hang_id', {NAME => 'ids', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_hang_id_submitted_time', {NAME => 'ids', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_legacy_processed', {NAME => 'ids', COMPRESSION => 'NONE', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_legacy_submitted_time', {NAME => 'ids', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_legacy_unprocessed_flag', {NAME => 'ids', COMPRESSION => 'NONE', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'processor_state', VERSIONS => '5', COMPRESSION => 'NONE', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_priority_processed', {NAME => 'ids', COMPRESSION => 'NONE', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_signature_ooid', {NAME => 'ids', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_submitted_time', {NAME => 'ids', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'crash_reports_index_unprocessed_flag', {NAME => 'ids', VERSIONS => '1', COMPRESSION => 'NONE', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'processor_state', COMPRESSION => 'NONE', VERSIONS => '5', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} -create 'metrics', {NAME => 'counters', COMPRESSION => 'LZO', VERSIONS => '1', TTL => '2147483647', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} diff --git a/config/cron_submitter.ini-dist b/config/cron_submitter.ini-dist index 1da48e378b..7659d4a29a 100644 --- a/config/cron_submitter.ini-dist +++ b/config/cron_submitter.ini-dist @@ -119,41 +119,6 @@ # converter: str dump_file_suffix='.dump' - # name: forbidden_keys - # doc: a comma delimited list of keys banned from the processed crash in HBase - # converter: socorro.external.hbase.connection_context. - forbidden_keys='email, url, user_id, exploitability' - - # name: hbase_connection_pool_class - # doc: the class responsible for pooling and giving out HBaseconnections - # converter: configman.converters.class_converter - hbase_connection_pool_class='socorro.external.hbase.connection_context.HBaseConnectionContextPooled' - - # name: hbase_host - # doc: Host to HBase server - # converter: str - hbase_host='localhost' - - # name: hbase_port - # doc: Port to HBase server - # converter: int - hbase_port='9090' - - # name: hbase_timeout - # doc: timeout in milliseconds for an HBase connection - # converter: int - hbase_timeout='5000' - - # name: number_of_retries - # doc: Max. number of retries when fetching from hbaseClient - # converter: int - number_of_retries='2' - - # name: source_implementation - # doc: a class for a source of raw crashes - # converter: configman.converters.class_converter - source_implementation='socorro.external.hbase.crashstorage.HBaseCrashStorage' - # name: sql # doc: an sql string that selects crash_ids # converter: str @@ -195,4 +160,3 @@ # doc: the number of crashes to submit (all, forever, 1...) # converter: str number_of_submissions='all' - diff --git a/config/crontabber.ini-dist b/config/crontabber.ini-dist index a70d2ce758..1dc0cc28ca 100644 --- a/config/crontabber.ini-dist +++ b/config/crontabber.ini-dist @@ -245,7 +245,6 @@ socorro.cron.jobs.matviews.CrashAduByBuildSignatureCronApp|1d|07:30 #socorro.cron.jobs.ftpscraper.FTPScraperCronApp|1h #socorro.cron.jobs.automatic_emails.AutomaticEmailsCronApp|1h - socorro.cron.jobs.serverstatus.ServerStatusCronApp|5m socorro.cron.jobs.reprocessingjobs.ReprocessingJobsApp|5m socorro.cron.jobs.matviews.SignatureSummaryProductsCronApp|1d|05:00 socorro.cron.jobs.matviews.SignatureSummaryInstallationsCronApp|1d|05:00 diff --git a/config/middleware.ini-dist b/config/middleware.ini-dist index 7f88c47675..d5e6d46949 100644 --- a/config/middleware.ini-dist +++ b/config/middleware.ini-dist @@ -62,40 +62,6 @@ # umask to use for new files #umask=18 - [[hb]] - - #+include ./common_hb.ini - - # delays in seconds between retries - #backoff_delays=10, 30, 60, 120, 300 - - # the suffix used to identify a dump file (for use in temp files) - #dump_file_suffix=.dump - - # the class responsible for proving an hbase connection - #hbase_connection_context_class=socorro.external.hb.connection_context.HBaseConnectionContext - - # Host to HBase server - #hbase_host=localhost - - # Port to HBase server - #hbase_port=9090 - - # timeout in milliseconds for an HBase connection - #hbase_timeout=5000 - - # the maximum number of new crashes to yield at a time - #new_crash_limit=1000000 - - # a local filesystem path where dumps temporarily during processing - #temporary_file_system_storage_path=/tmp - - # a class that will execute transactions - #transaction_executor_class=socorro.database.transaction_executor.TransactionExecutorWithInfiniteBackoff - - # seconds between log during retries - #wait_log_interval=10 - [[logging]] #+include ./common_logging.ini @@ -263,59 +229,6 @@ # see "resource.fs.umask" for the default or override it here #umask=18 -[hbase] - - # delays in seconds between retries - # see "resource.hb.backoff_delays" for the default or override it here - #backoff_delays=10, 30, 60, 120, 300 - - # the suffix used to identify a dump file (for use in temp files) - # see "resource.hb.dump_file_suffix" for the default or override it here - #dump_file_suffix=.dump - - # a list of keys not allowed in a redacted processed crash - # see "resource.redactor.forbidden_keys" for the default or override it here - #forbidden_keys=url, email, user_id, exploitability,json_dump.sensitive,upload_file_minidump_flash1.json_dump.sensitive,upload_file_minidump_flash2.json_dump.sensitive,upload_file_minidump_browser.json_dump.sensitive,memory_info - - # None - #hbase_class=socorro.external.hb.crashstorage.HBaseCrashStorage - - # the class responsible for proving an hbase connection - # see "resource.hb.hbase_connection_context_class" for the default or override it here - #hbase_connection_context_class=socorro.external.hb.connection_context.HBaseConnectionContext - - # Host to HBase server - # see "resource.hb.hbase_host" for the default or override it here - #hbase_host=localhost - - # Port to HBase server - # see "resource.hb.hbase_port" for the default or override it here - #hbase_port=9090 - - # timeout in milliseconds for an HBase connection - # see "resource.hb.hbase_timeout" for the default or override it here - #hbase_timeout=5000 - - # the maximum number of new crashes to yield at a time - # see "resource.hb.new_crash_limit" for the default or override it here - #new_crash_limit=1000000 - - # the name of the class that implements a 'redact' method - # see "resource.redactor.redactor_class" for the default or override it here - #redactor_class=socorro.external.crashstorage_base.Redactor - - # a local filesystem path where dumps temporarily during processing - # see "resource.hb.temporary_file_system_storage_path" for the default or override it here - #temporary_file_system_storage_path=/tmp - - # a class that will execute transactions - # see "resource.hb.transaction_executor_class" for the default or override it here - #transaction_executor_class=socorro.database.transaction_executor.TransactionExecutorWithInfiniteBackoff - - # seconds between log during retries - # see "resource.hb.wait_log_interval" for the default or override it here - #wait_log_interval=10 - [http] [[correlations]] @@ -336,9 +249,9 @@ [implementations] # list of packages for service implementations - #implementation_list=psql: socorro.external.postgresql, hbase: socorro.external.hb, es: socorro.external.es, fs: socorro.external.fs, http: socorro.external.http, rabbitmq: socorro.external.rabbitmq + #implementation_list=psql: socorro.external.postgresql, boto: socorro.external.boto, es: socorro.external.es, fs: socorro.external.fs, http: socorro.external.http, rabbitmq: socorro.external.rabbitmq - # comma separated list of class overrides, e.g `Crashes: hbase` + # comma separated list of class overrides, e.g `Crashes: boto` #service_overrides=CrashData: fs, Correlations: http, CorrelationsSignatures: http, SuperSearch: es, Priorityjobs: rabbitmq, Query: es [introspection] diff --git a/docs/configuring-crash-stats.rst b/docs/configuring-crash-stats.rst index c980ae4ee3..3491ba34f7 100644 --- a/docs/configuring-crash-stats.rst +++ b/docs/configuring-crash-stats.rst @@ -142,7 +142,7 @@ underlying data stores: .. code-block:: bash - implementations__implementation_list='psql: socorro.external.postgresql, fs: socorro.external.filesystem, es: socorro.external.es, http: socorro.external.http, rabbitmq: socorro.external.rabbitmq, hb: socorro.external.fs' + implementations__implementation_list='psql: socorro.external.postgresql, fs: socorro.external.filesystem, es: socorro.external.es, http: socorro.external.http, rabbitmq: socorro.external.rabbitmq' implementations__service_overrides='Correlations: http, CorrelationsSignatures: http, SuperSearch: es, Priorityjobs: rabbitmq, Search: es, Query: es' # Pluggable Elasticsearch implementation elasticsearch__elasticsearch_class='socorro.external.es.connection_context.ConnectionContext' diff --git a/docs/configuring-socorro.rst b/docs/configuring-socorro.rst index 34bc10f3be..aa5fbe262a 100644 --- a/docs/configuring-socorro.rst +++ b/docs/configuring-socorro.rst @@ -188,7 +188,7 @@ in AWS using Consul at https://github.com/mozilla/socorro-infra/ Socorro has a very powerful and expressive configuration system, and can be configured to read from and write to a number of different data stores -(S3, Elasticsearch, HBase, PostgreSQL) and use queues (RabbitMQ) +(S3, Elasticsearch, PostgreSQL) and use queues (RabbitMQ) For instance, to have processor store crashes to both to the filesystem and to ElasticSearch: diff --git a/docs/development/addaservice.rst b/docs/development/addaservice.rst index f76e5c45c6..a69b9fab6d 100644 --- a/docs/development/addaservice.rst +++ b/docs/development/addaservice.rst @@ -16,7 +16,7 @@ URL with parameters. Documentation for each service is available in the Those services are not containing any code, but are only interfaces. They are using other resources from the external module. That external module is composed of one submodule for each external resource we are using. For example, -there is a PostgreSQL submodule, an elasticsearch submodule and an HBase +there is a PostgreSQL submodule, an elasticsearch submodule and a boto (AWS S3) submodule. You will also find some common code among external resources in diff --git a/docs/development/api/crashstorage.rst b/docs/development/api/crashstorage.rst index 0366a61f49..063e4cceef 100644 --- a/docs/development/api/crashstorage.rst +++ b/docs/development/api/crashstorage.rst @@ -32,16 +32,16 @@ Concrete implementation: * `NullCrashStorage`: Silently ignores everything it is told to do. Examples of other concrete implementations are: `PostgreSQLCrashStorage`, -`HBaseCrashStorage`. +`BotoCrashStorage`. CrashStorage containers for aggregating multiple crash storage implementations: * `PolyCrashStorage`: Container for other crash storage systems. * `FallbackCrashStorage`: Container for two other crash storage systems, a primary and a secondary. Attempts on the primary, if it fails it will - fallback to the secondary. In use when we had primary/secondary HBase. - Can be heterogeneous, example: Hbase + filesystem and use crashmovers to - move from filesystem into hbase when hbase comes back. + fallback to the secondary. In use when we have cutover between data stores. + Can be heterogeneous, example: S3 + filesystem and use crashmovers to + move from filesystem into S3 when S3 comes back. * `PrimaryDeferredStorage`: Container for two different storage systems and a predicate function. If predicate is false, store in primary, otherwise store in secondary. Usecase: situation where we want crashes to be put @@ -142,7 +142,7 @@ Use cases: * For Mozilla use by the collectors. * For other users, you can use this class as your primary storage instead of -HBase. Be sure to implement this in collectors, crashmovers, processors and +S3. Be sure to implement this in collectors, crashmovers, processors and middleware (depending on which components you use in your configuration). `Important ops note:` @@ -168,48 +168,6 @@ Classes: in-filesystem queueing techniques so that we know which crashes are new. Backwards compatible with `socorro.external.filesystem` (aka the 2009 system). -socorro.external.hb -------------------- - -socorro.external.hb.crashstorage -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This is used by crashmovers, processors. In the future, our middleware will -also use this instead of socorro.external.hbase. Can store raw crashes and -dumps. It has no knowledge of aggregations or normalized data. - -*TODO: Needs crash_data to be implemented for middleware* - -Special functions: - -* `crash_id_to_timestamped_row_id`: HBase uses a different primary key than our - internal UUID. Taking the first character and last six, and copying them to the - front of the UUID. First character is the salt for the region, and the next - six provide the date, for ordering. Sometimes you'll see 'ooid' or 'uuid' in - the docs, but we really mean `crash_id`. - -Implementation: - -* `HBaseCrashStorage`: implements access to HBase. HBase schema is defined in - ``analysis/hbase_schema``. - -Exceptions: - -* `BadCrashIdException`: just passes - -socorro.external.hb.connection_context -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -* `HBaseConnection`: all of the code that implements the core connection. Loose - wrapper around a bare socket speaking Thrift protocol. Commit/rollback are - noops. - -* `HBaseConnectionContext`: In production use. A factory in the form of a - functor for creating the HBaseConnection instances. - -* `HBasePersistentConnectionContext`: These are "pooled" so you can use them - again without closing. We don't use it and appears to be broken. - socorro.external.postgresql --------------------------- @@ -303,11 +261,6 @@ socorro.external.filesystem * Preceded `socorro.external.fs`. -socorro.external.hbase -^^^^^^^^^^^^^^^^^^^^^^ - -* Still in use by the middleware for `crash_data`. - socorro.storage ^^^^^^^^^^^^^^^ @@ -331,7 +284,7 @@ Which classes are used with which _app using `PolyCrashStore`. In testing we use `socorro.external.fs`, `socorro.external.rabbitmq`, and `socorro.external.postgresql`. -* `socorro.middleware.middleware_app`: In production: `socorro.external.hbase`. +* `socorro.middleware.middleware_app`: In production: `socorro.external.boto`. In testing: we use `socorro.external.fs` and `socorro.external.postgresql`. * `socorro.collector.submitter_app`: Defines it's own storage classes: @@ -340,8 +293,6 @@ Which classes are used with which _app to get a list of crashstorage ids and uses any other crashstorage as a source for the raw crashes that it pulls. -*TODO: update submitter_app to use the new socorro.external.hb instead of hbase* - Which classes can be used together ---------------------------------- diff --git a/docs/development/api/middleware.rst b/docs/development/api/middleware.rst index c164f6f3e5..aeb0fd603c 100644 --- a/docs/development/api/middleware.rst +++ b/docs/development/api/middleware.rst @@ -44,7 +44,6 @@ Documented services * `/releases/featured/ <#releases-featured-service>`_ * `/report/list/ <#report-list-service>`_ * `/search/ <#search-service>`_ -* `/server_status/ <#server-status-service>`_ * /signaturesummary/ * `/signaturesummary/report_type/architecture/ <#architecture-signature-summary-service>`_ * `/signaturesummary/report_type/exploitability/ <#exploitability-signature-summary-service>`_ @@ -2134,65 +2133,6 @@ If an error occured, the API will return something like this:: HTTP header... We will improve that soon! :) -.. ############################################################################ - Server Status API - ############################################################################ - -Server Status service ---------------------- - -Return the current state of the server and the revisions of Socorro and -Breakpad. - -API specifications -^^^^^^^^^^^^^^^^^^ - -+----------------+-----------------+ -| HTTP method | GET | -+----------------+-----------------+ -| URL | /server_status/ | -+----------------+-----------------+ - -Mandatory parameters -^^^^^^^^^^^^^^^^^^^^ - -None - -Optional parameters -^^^^^^^^^^^^^^^^^^^ - -+----------+---------------+----------------+--------------------------------+ -| Name | Type of value | Default value | Description | -+==========+===============+================+================================+ -| duration | Integer | 12 | Number of lines of data to get.| -+----------+---------------+----------------+--------------------------------+ - -Return value -^^^^^^^^^^^^ - -Return a list of data about the server status at different recent times -(usually the status is updated every 15 minutes), and the current version of -Socorro and Breakpad:: - - { - "hits": [ - { - "id": 1, - "date_recently_completed": "2000-01-01T00:00:00+00:00", - "date_oldest_job_queued": "2000-01-01T00:00:00+00:00", - "avg_process_sec": 2, - "avg_wait_sec": 5, - "waiting_job_count": 3, - "processors_count": 2, - "date_created": "2000-01-01T00:00:00+00:00" - } - ], - "socorro_revision": 42, - "breakpad_revision": 43, - "total": 1 - } - - .. ############################################################################ Signature Summary API (8 of them) ############################################################################ diff --git a/docs/development/breakpad-schema.graphml b/docs/development/breakpad-schema.graphml deleted file mode 100644 index d67bbafe46..0000000000 --- a/docs/development/breakpad-schema.graphml +++ /dev/null @@ -1,2594 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - Core Socorro - - - - - - - - - - Socorro Core - - - - - - - - - - - - - - - - public.reasons - <html><table> -<tr><td><u>reason_id</u></td><td>integer</td></tr> -<tr><td>reason</td><td>USER-DEFINED</td></tr> -<tr><td><i>first_seen</i></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.product_version_builds - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>build_id</u></td><td>numeric</td></tr> -<tr><td><u>platform</u></td><td>text</td></tr> -<tr><td><i>repository</i></td><td>USER-DEFINED</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.releases_raw - <html><table> -<tr><td><u>product_name</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>version</u></td><td>text</td></tr> -<tr><td><u>platform</u></td><td>text</td></tr> -<tr><td><u>build_id</u></td><td>numeric</td></tr> -<tr><td><u>build_type</u></td><td>USER-DEFINED</td></tr> -<tr><td><i>beta_number</i></td><td>integer</td></tr> -<tr><td><u>repository</u></td><td>USER-DEFINED</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.flash_versions - <html><table> -<tr><td><u>flash_version_id</u></td><td>integer</td></tr> -<tr><td>flash_version</td><td>USER-DEFINED</td></tr> -<tr><td><i>first_seen</i></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.crash_types - <html><table> -<tr><td><u>crash_type_id</u></td><td>integer</td></tr> -<tr><td>crash_type</td><td>USER-DEFINED</td></tr> -<tr><td>crash_type_short</td><td>USER-DEFINED</td></tr> -<tr><td>process_type</td><td>USER-DEFINED</td></tr> -<tr><td><i>has_hang_id</i></td><td>boolean</td></tr> -<tr><td>old_code</td><td>character</td></tr> -<tr><td>include_agg</td><td>boolean</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.windows_versions - <html><table> -<tr><td>windows_version_name</td><td>USER-DEFINED</td></tr> -<tr><td>major_version</td><td>integer</td></tr> -<tr><td>minor_version</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.signature_products_rollup - <html><table> -<tr><td><u>signature_id</u></td><td>integer</td></tr> -<tr><td><u>product_name</u></td><td>USER-DEFINED</td></tr> -<tr><td>ver_count</td><td>integer</td></tr> -<tr><td>version_list</td><td>ARRAY</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.signature_products - <html><table> -<tr><td><u>signature_id</u></td><td>integer</td></tr> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><i>first_report</i></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.reports_user_info - <html><table> -<tr><td><u>uuid</u></td><td>text</td></tr> -<tr><td>date_processed</td><td>timestamp with time zone</td></tr> -<tr><td><i>user_comments</i></td><td>USER-DEFINED</td></tr> -<tr><td><i>app_notes</i></td><td>USER-DEFINED</td></tr> -<tr><td><i>email</i></td><td>USER-DEFINED</td></tr> -<tr><td><i>url</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.release_repositories - <html><table> -<tr><td><u>repository</u></td><td>USER-DEFINED</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.release_channel_matches - <html><table> -<tr><td><u>release_channel</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>match_string</u></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.raw_adi - <html><table> -<tr><td><i>adu_count</i></td><td>integer</td></tr> -<tr><td><i>date</i></td><td>date</td></tr> -<tr><td><i>product_name</i></td><td>text</td></tr> -<tr><td><i>product_os_platform</i></td><td>text</td></tr> -<tr><td><i>product_os_version</i></td><td>text</td></tr> -<tr><td><i>product_version</i></td><td>text</td></tr> -<tr><td><i>build</i></td><td>text</td></tr> -<tr><td><i>update_channel</i></td><td>text</td></tr> -<tr><td><i>product_guid</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.product_productid_map - <html><table> -<tr><td>product_name</td><td>USER-DEFINED</td></tr> -<tr><td><u>productid</u></td><td>text</td></tr> -<tr><td>rewrite</td><td>boolean</td></tr> -<tr><td>version_began</td><td>major_version</td></tr> -<tr><td><i>version_ended</i></td><td>major_version</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.process_types - <html><table> -<tr><td><u>process_type</u></td><td>USER-DEFINED</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.plugins - <html><table> -<tr><td><u>id</u></td><td>integer</td></tr> -<tr><td>filename</td><td>text</td></tr> -<tr><td>name</td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.tcbs - <html><table> -<tr><td><u>signature_id</u></td><td>integer</td></tr> -<tr><td><u>report_date</u></td><td>date</td></tr> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>process_type</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>release_channel</u></td><td>USER-DEFINED</td></tr> -<tr><td>report_count</td><td>integer</td></tr> -<tr><td>win_count</td><td>integer</td></tr> -<tr><td>mac_count</td><td>integer</td></tr> -<tr><td>lin_count</td><td>integer</td></tr> -<tr><td>hang_count</td><td>integer</td></tr> -<tr><td><i>startup_count</i></td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.os_name_matches - <html><table> -<tr><td><u>os_name</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>match_string</u></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.os_versions - <html><table> -<tr><td><u>os_version_id</u></td><td>integer</td></tr> -<tr><td>os_name</td><td>USER-DEFINED</td></tr> -<tr><td>major_version</td><td>integer</td></tr> -<tr><td>minor_version</td><td>integer</td></tr> -<tr><td><i>os_version_string</i></td><td>USER-DEFINED</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.extensions - <html><table> -<tr><td>report_id</td><td>integer</td></tr> -<tr><td><i>date_processed</i></td><td>timestamp with time zone</td></tr> -<tr><td>extension_key</td><td>integer</td></tr> -<tr><td>extension_id</td><td>text</td></tr> -<tr><td><i>extension_version</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.signatures - <html><table> -<tr><td><u>signature_id</u></td><td>integer</td></tr> -<tr><td><i>signature</i></td><td>text</td></tr> -<tr><td><i>first_report</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>first_build</i></td><td>numeric</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.product_versions - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td>product_name</td><td>USER-DEFINED</td></tr> -<tr><td>major_version</td><td>major_version</td></tr> -<tr><td>release_version</td><td>USER-DEFINED</td></tr> -<tr><td>version_string</td><td>USER-DEFINED</td></tr> -<tr><td><i>beta_number</i></td><td>integer</td></tr> -<tr><td>version_sort</td><td>text</td></tr> -<tr><td>build_date</td><td>date</td></tr> -<tr><td>sunset_date</td><td>date</td></tr> -<tr><td>featured_version</td><td>boolean</td></tr> -<tr><td>build_type</td><td>USER-DEFINED</td></tr> -<tr><td><i>has_builds</i></td><td>boolean</td></tr> -<tr><td><i>is_rapid_beta</i></td><td>boolean</td></tr> -<tr><td><i>rapid_beta_id</i></td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.crashes_by_user_build - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>os_short_name</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>crash_type_id</u></td><td>integer</td></tr> -<tr><td><u>build_date</u></td><td>date</td></tr> -<tr><td><u>report_date</u></td><td>date</td></tr> -<tr><td>report_count</td><td>integer</td></tr> -<tr><td>adu</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.products - <html><table> -<tr><td><u>product_name</u></td><td>USER-DEFINED</td></tr> -<tr><td>sort</td><td>smallint</td></tr> -<tr><td><i>rapid_release_version</i></td><td>major_version</td></tr> -<tr><td>release_name</td><td>USER-DEFINED</td></tr> -<tr><td><i>rapid_beta_version</i></td><td>major_version</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.domains - <html><table> -<tr><td><u>domain_id</u></td><td>integer</td></tr> -<tr><td>domain</td><td>USER-DEFINED</td></tr> -<tr><td><i>first_seen</i></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.release_channels - <html><table> -<tr><td><u>release_channel</u></td><td>USER-DEFINED</td></tr> -<tr><td>sort</td><td>smallint</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.product_release_channels - <html><table> -<tr><td><u>product_name</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>release_channel</u></td><td>USER-DEFINED</td></tr> -<tr><td>throttle</td><td>numeric</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.crashes_by_user - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>os_short_name</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>crash_type_id</u></td><td>integer</td></tr> -<tr><td><u>report_date</u></td><td>date</td></tr> -<tr><td>report_count</td><td>integer</td></tr> -<tr><td>adu</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.os_names - <html><table> -<tr><td><u>os_name</u></td><td>USER-DEFINED</td></tr> -<tr><td>os_short_name</td><td>USER-DEFINED</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.bugs - <html><table> -<tr><td><u>id</u></td><td>integer</td></tr> -<tr><td><i>status</i></td><td>text</td></tr> -<tr><td><i>resolution</i></td><td>text</td></tr> -<tr><td><i>short_desc</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.addresses - <html><table> -<tr><td><u>address_id</u></td><td>integer</td></tr> -<tr><td>address</td><td>USER-DEFINED</td></tr> -<tr><td><i>first_seen</i></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.uptime_levels - <html><table> -<tr><td><u>uptime_level</u></td><td>integer</td></tr> -<tr><td>uptime_string</td><td>USER-DEFINED</td></tr> -<tr><td>min_uptime</td><td>interval</td></tr> -<tr><td>max_uptime</td><td>interval</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.reports - <html><table> -<tr><td>id</td><td>integer</td></tr> -<tr><td><i>client_crash_date</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>date_processed</i></td><td>timestamp with time zone</td></tr> -<tr><td>uuid</td><td>character varying</td></tr> -<tr><td><i>product</i></td><td>character varying</td></tr> -<tr><td><i>version</i></td><td>character varying</td></tr> -<tr><td><i>build</i></td><td>character varying</td></tr> -<tr><td><i>signature</i></td><td>character varying</td></tr> -<tr><td><i>url</i></td><td>character varying</td></tr> -<tr><td><i>install_age</i></td><td>integer</td></tr> -<tr><td><i>last_crash</i></td><td>integer</td></tr> -<tr><td><i>uptime</i></td><td>integer</td></tr> -<tr><td><i>cpu_name</i></td><td>character varying</td></tr> -<tr><td><i>cpu_info</i></td><td>character varying</td></tr> -<tr><td><i>reason</i></td><td>character varying</td></tr> -<tr><td><i>address</i></td><td>character varying</td></tr> -<tr><td><i>os_name</i></td><td>character varying</td></tr> -<tr><td><i>os_version</i></td><td>character varying</td></tr> -<tr><td><i>email</i></td><td>character varying</td></tr> -<tr><td><i>user_id</i></td><td>character varying</td></tr> -<tr><td><i>started_datetime</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>completed_datetime</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>success</i></td><td>boolean</td></tr> -<tr><td><i>truncated</i></td><td>boolean</td></tr> -<tr><td><i>processor_notes</i></td><td>text</td></tr> -<tr><td><i>user_comments</i></td><td>character varying</td></tr> -<tr><td><i>app_notes</i></td><td>character varying</td></tr> -<tr><td><i>distributor</i></td><td>character varying</td></tr> -<tr><td><i>distributor_version</i></td><td>character varying</td></tr> -<tr><td><i>topmost_filenames</i></td><td>text</td></tr> -<tr><td><i>addons_checked</i></td><td>boolean</td></tr> -<tr><td><i>flash_version</i></td><td>text</td></tr> -<tr><td><i>hangid</i></td><td>text</td></tr> -<tr><td><i>process_type</i></td><td>text</td></tr> -<tr><td><i>release_channel</i></td><td>text</td></tr> -<tr><td><i>productid</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.plugins_reports - <html><table> -<tr><td>report_id</td><td>integer</td></tr> -<tr><td>plugin_id</td><td>integer</td></tr> -<tr><td><i>date_processed</i></td><td>timestamp with time zone</td></tr> -<tr><td>version</td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - - - - - Matviews - - - - - - - - - - Matviews - - - - - - - - - - - - - - - - public.nightly_builds - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>build_date</u></td><td>date</td></tr> -<tr><td>report_date</td><td>date</td></tr> -<tr><td><u>days_out</u></td><td>integer</td></tr> -<tr><td>report_count</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.daily_hangs - <html><table> -<tr><td>uuid</td><td>text</td></tr> -<tr><td><u>plugin_uuid</u></td><td>text</td></tr> -<tr><td><i>report_date</i></td><td>date</td></tr> -<tr><td>product_version_id</td><td>integer</td></tr> -<tr><td>browser_signature_id</td><td>integer</td></tr> -<tr><td>plugin_signature_id</td><td>integer</td></tr> -<tr><td>hang_id</td><td>text</td></tr> -<tr><td><i>flash_version_id</i></td><td>integer</td></tr> -<tr><td><i>url</i></td><td>USER-DEFINED</td></tr> -<tr><td><i>duplicates</i></td><td>ARRAY</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.correlation_cores - <html><table> -<tr><td>correlation_id</td><td>integer</td></tr> -<tr><td>architecture</td><td>USER-DEFINED</td></tr> -<tr><td>cores</td><td>integer</td></tr> -<tr><td>crash_count</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.tcbs_build - <html><table> -<tr><td><u>signature_id</u></td><td>integer</td></tr> -<tr><td><u>build_date</u></td><td>date</td></tr> -<tr><td><u>report_date</u></td><td>date</td></tr> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>process_type</u></td><td>USER-DEFINED</td></tr> -<tr><td>release_channel</td><td>USER-DEFINED</td></tr> -<tr><td>report_count</td><td>integer</td></tr> -<tr><td>win_count</td><td>integer</td></tr> -<tr><td>mac_count</td><td>integer</td></tr> -<tr><td>lin_count</td><td>integer</td></tr> -<tr><td>hang_count</td><td>integer</td></tr> -<tr><td><i>startup_count</i></td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.home_page_graph_build - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>report_date</u></td><td>date</td></tr> -<tr><td><u>build_date</u></td><td>date</td></tr> -<tr><td>report_count</td><td>integer</td></tr> -<tr><td>adu</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.home_page_graph - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>report_date</u></td><td>date</td></tr> -<tr><td>report_count</td><td>integer</td></tr> -<tr><td>adu</td><td>integer</td></tr> -<tr><td>crash_hadu</td><td>numeric</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.explosiveness - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>signature_id</u></td><td>integer</td></tr> -<tr><td><u>last_date</u></td><td>date</td></tr> -<tr><td><i>oneday</i></td><td>numeric</td></tr> -<tr><td><i>threeday</i></td><td>numeric</td></tr> -<tr><td><i>day0</i></td><td>numeric</td></tr> -<tr><td><i>day1</i></td><td>numeric</td></tr> -<tr><td><i>day2</i></td><td>numeric</td></tr> -<tr><td><i>day3</i></td><td>numeric</td></tr> -<tr><td><i>day4</i></td><td>numeric</td></tr> -<tr><td><i>day5</i></td><td>numeric</td></tr> -<tr><td><i>day6</i></td><td>numeric</td></tr> -<tr><td><i>day7</i></td><td>numeric</td></tr> -<tr><td><i>day8</i></td><td>numeric</td></tr> -<tr><td><i>day9</i></td><td>numeric</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.correlations - <html><table> -<tr><td><u>correlation_id</u></td><td>integer</td></tr> -<tr><td>product_version_id</td><td>integer</td></tr> -<tr><td>os_name</td><td>USER-DEFINED</td></tr> -<tr><td>reason_id</td><td>integer</td></tr> -<tr><td>signature_id</td><td>integer</td></tr> -<tr><td>crash_count</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.correlation_modules - <html><table> -<tr><td>correlation_id</td><td>integer</td></tr> -<tr><td>module_signature</td><td>text</td></tr> -<tr><td>module_version</td><td>text</td></tr> -<tr><td>crash_count</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.correlation_addons - <html><table> -<tr><td>correlation_id</td><td>integer</td></tr> -<tr><td>addon_key</td><td>text</td></tr> -<tr><td>addon_version</td><td>text</td></tr> -<tr><td>crash_count</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.rank_compare - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>signature_id</u></td><td>integer</td></tr> -<tr><td><u>rank_days</u></td><td>integer</td></tr> -<tr><td><i>report_count</i></td><td>integer</td></tr> -<tr><td><i>total_reports</i></td><td>bigint</td></tr> -<tr><td><i>rank_report_count</i></td><td>integer</td></tr> -<tr><td><i>percent_of_total</i></td><td>numeric</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.bug_associations - <html><table> -<tr><td><u>signature</u></td><td>text</td></tr> -<tr><td><u>bug_id</u></td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.build_adu - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>build_date</u></td><td>date</td></tr> -<tr><td><u>adu_date</u></td><td>date</td></tr> -<tr><td><u>os_name</u></td><td>USER-DEFINED</td></tr> -<tr><td>adu_count</td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.product_adu - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>os_name</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>adu_date</u></td><td>date</td></tr> -<tr><td>adu_count</td><td>bigint</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.reports_clean - <html><table> -<tr><td><u>uuid</u></td><td>text</td></tr> -<tr><td>date_processed</td><td>timestamp with time zone</td></tr> -<tr><td><i>client_crash_date</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>product_version_id</i></td><td>integer</td></tr> -<tr><td><i>build</i></td><td>numeric</td></tr> -<tr><td>signature_id</td><td>integer</td></tr> -<tr><td><i>install_age</i></td><td>interval</td></tr> -<tr><td><i>uptime</i></td><td>interval</td></tr> -<tr><td>reason_id</td><td>integer</td></tr> -<tr><td>address_id</td><td>integer</td></tr> -<tr><td>os_name</td><td>USER-DEFINED</td></tr> -<tr><td>os_version_id</td><td>integer</td></tr> -<tr><td><i>hang_id</i></td><td>text</td></tr> -<tr><td>flash_version_id</td><td>integer</td></tr> -<tr><td>process_type</td><td>USER-DEFINED</td></tr> -<tr><td>release_channel</td><td>USER-DEFINED</td></tr> -<tr><td><i>duplicate_of</i></td><td>text</td></tr> -<tr><td>domain_id</td><td>integer</td></tr> -<tr><td><i>architecture</i></td><td>USER-DEFINED</td></tr> -<tr><td><i>cores</i></td><td>integer</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.reports_duplicates - <html><table> -<tr><td><u>uuid</u></td><td>text</td></tr> -<tr><td>duplicate_of</td><td>text</td></tr> -<tr><td>date_processed</td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.reports_bad - <html><table> -<tr><td>uuid</td><td>text</td></tr> -<tr><td>date_processed</td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - - - - - Email - - - - - - - - - - Folder 5 - - - - - - - - - - - - - - - - public.email_campaigns - <html><table> -<tr><td><u>id</u></td><td>integer</td></tr> -<tr><td>product</td><td>text</td></tr> -<tr><td>versions</td><td>text</td></tr> -<tr><td>signature</td><td>text</td></tr> -<tr><td>subject</td><td>text</td></tr> -<tr><td>body</td><td>text</td></tr> -<tr><td>start_date</td><td>timestamp with time zone</td></tr> -<tr><td>end_date</td><td>timestamp with time zone</td></tr> -<tr><td><i>email_count</i></td><td>integer</td></tr> -<tr><td>author</td><td>text</td></tr> -<tr><td>date_created</td><td>timestamp with time zone</td></tr> -<tr><td>status</td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.email_campaigns_contacts - <html><table> -<tr><td><i>email_campaigns_id</i></td><td>integer</td></tr> -<tr><td><i>email_contacts_id</i></td><td>integer</td></tr> -<tr><td>status</td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.email_contacts - <html><table> -<tr><td><u>id</u></td><td>integer</td></tr> -<tr><td>email</td><td>text</td></tr> -<tr><td>subscribe_token</td><td>text</td></tr> -<tr><td><i>subscribe_status</i></td><td>boolean</td></tr> -<tr><td><i>ooid</i></td><td>text</td></tr> -<tr><td><i>crash_date</i></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - - - - - Admin - - - - - - - - - - Folder 4 - - - - - - - - - - - - - - - - - - - - Monitoring - - - - - - - - - - Folder 11 - - - - - - - - - - - - - - - - public.replication_test - <html><table> -<tr><td><i>id</i></td><td>smallint</td></tr> -<tr><td><i>test</i></td><td>boolean</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - public.socorro_db_version_history - <html><table> -<tr><td><u>version</u></td><td>text</td></tr> -<tr><td><u>upgraded_on</u></td><td>timestamp with time zone</td></tr> -<tr><td><i>backfill_to</i></td><td>date</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.socorro_db_version - <html><table> -<tr><td><u>current_version</u></td><td>text</td></tr> -<tr><td><i>refreshed_at</i></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - - - - - To remove - - - - - - - - - - Folder 2 - - - - - - - - - - - - - - - - public.locks3 - <html><table> -<tr><td><i>now</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>waiting_locktype</i></td><td>text</td></tr> -<tr><td><i>waiting_table</i></td><td>regclass</td></tr> -<tr><td><i>waiting_query</i></td><td>text</td></tr> -<tr><td><i>waiting_mode</i></td><td>text</td></tr> -<tr><td><i>waiting_pid</i></td><td>integer</td></tr> -<tr><td><i>other_locktype</i></td><td>text</td></tr> -<tr><td><i>other_table</i></td><td>regclass</td></tr> -<tr><td><i>other_query</i></td><td>text</td></tr> -<tr><td><i>other_mode</i></td><td>text</td></tr> -<tr><td><i>other_pid</i></td><td>integer</td></tr> -<tr><td><i>other_granted</i></td><td>boolean</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.high_load_temp - <html><table> -<tr><td><i>now</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>datid</i></td><td>oid</td></tr> -<tr><td><i>datname</i></td><td>name</td></tr> -<tr><td><i>procpid</i></td><td>integer</td></tr> -<tr><td><i>usesysid</i></td><td>oid</td></tr> -<tr><td><i>usename</i></td><td>name</td></tr> -<tr><td><i>application_name</i></td><td>text</td></tr> -<tr><td><i>client_addr</i></td><td>inet</td></tr> -<tr><td><i>client_port</i></td><td>integer</td></tr> -<tr><td><i>backend_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>xact_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>query_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>waiting</i></td><td>boolean</td></tr> -<tr><td><i>current_query</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.locks2 - <html><table> -<tr><td><i>now</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>procpid</i></td><td>integer</td></tr> -<tr><td><i>query_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>nspname</i></td><td>name</td></tr> -<tr><td><i>relname</i></td><td>name</td></tr> -<tr><td><i>mode</i></td><td>text</td></tr> -<tr><td><i>granted</i></td><td>boolean</td></tr> -<tr><td><i>current_query</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.locks1 - <html><table> -<tr><td><i>now</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>procpid</i></td><td>integer</td></tr> -<tr><td><i>query_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>nspname</i></td><td>name</td></tr> -<tr><td><i>relname</i></td><td>name</td></tr> -<tr><td><i>mode</i></td><td>text</td></tr> -<tr><td><i>granted</i></td><td>boolean</td></tr> -<tr><td><i>current_query</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.locks - <html><table> -<tr><td><i>locktype</i></td><td>text</td></tr> -<tr><td><i>database</i></td><td>oid</td></tr> -<tr><td><i>relation</i></td><td>oid</td></tr> -<tr><td><i>page</i></td><td>integer</td></tr> -<tr><td><i>tuple</i></td><td>smallint</td></tr> -<tr><td><i>virtualxid</i></td><td>text</td></tr> -<tr><td><i>transactionid</i></td><td>xid</td></tr> -<tr><td><i>classid</i></td><td>oid</td></tr> -<tr><td><i>objid</i></td><td>oid</td></tr> -<tr><td><i>objsubid</i></td><td>smallint</td></tr> -<tr><td><i>virtualtransaction</i></td><td>text</td></tr> -<tr><td><i>pid</i></td><td>integer</td></tr> -<tr><td><i>mode</i></td><td>text</td></tr> -<tr><td><i>granted</i></td><td>boolean</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.activity_snapshot - <html><table> -<tr><td><i>datid</i></td><td>oid</td></tr> -<tr><td><i>datname</i></td><td>name</td></tr> -<tr><td><i>procpid</i></td><td>integer</td></tr> -<tr><td><i>usesysid</i></td><td>oid</td></tr> -<tr><td><i>usename</i></td><td>name</td></tr> -<tr><td><i>application_name</i></td><td>text</td></tr> -<tr><td><i>client_addr</i></td><td>inet</td></tr> -<tr><td><i>client_port</i></td><td>integer</td></tr> -<tr><td><i>backend_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>xact_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>query_start</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>waiting</i></td><td>boolean</td></tr> -<tr><td><i>current_query</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.product_info_changelog - <html><table> -<tr><td><u>product_version_id</u></td><td>integer</td></tr> -<tr><td><u>user_name</u></td><td>text</td></tr> -<tr><td><u>changed_on</u></td><td>timestamp with time zone</td></tr> -<tr><td><i>oldrec</i></td><td>USER-DEFINED</td></tr> -<tr><td><i>newrec</i></td><td>USER-DEFINED</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.special_product_platforms - <html><table> -<tr><td><u>platform</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>repository</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>release_channel</u></td><td>USER-DEFINED</td></tr> -<tr><td><u>release_name</u></td><td>USER-DEFINED</td></tr> -<tr><td>product_name</td><td>USER-DEFINED</td></tr> -<tr><td>min_version</td><td>major_version</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - - - - - Monitor, Processors and crontabber - - - - - - - - - - Folder 1 - - - - - - - - - - - - - - - - - - - - Crontabber - - - - - - - - - - Folder 10 - - - - - - - - - - - - - - - - public.crontabber_state - <html><table> -<tr><td>state</td><td>text</td></tr> -<tr><td><u>last_updated</u></td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.report_partition_info - <html><table> -<tr><td><u>table_name</u></td><td>USER-DEFINED</td></tr> -<tr><td>build_order</td><td>integer</td></tr> -<tr><td>keys</td><td>ARRAY</td></tr> -<tr><td>indexes</td><td>ARRAY</td></tr> -<tr><td>fkeys</td><td>ARRAY</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - - - - - UI management - - - - - - - - - - Folder 9 - - - - - - - - - - - - - - - - public.sessions - <html><table> -<tr><td><u>session_id</u></td><td>character varying</td></tr> -<tr><td>last_activity</td><td>integer</td></tr> -<tr><td>data</td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - public.processors - <html><table> -<tr><td><u>id</u></td><td>integer</td></tr> -<tr><td>name</td><td>character varying</td></tr> -<tr><td>startdatetime</td><td>timestamp without time zone</td></tr> -<tr><td><i>lastseendatetime</i></td><td>timestamp without time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.priorityjobs_log - <html><table> -<tr><td><i>uuid</i></td><td>character varying</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.server_status - <html><table> -<tr><td><u>id</u></td><td>integer</td></tr> -<tr><td><i>date_recently_completed</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>date_oldest_job_queued</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>avg_process_sec</i></td><td>real</td></tr> -<tr><td><i>avg_wait_sec</i></td><td>real</td></tr> -<tr><td>waiting_job_count</td><td>integer</td></tr> -<tr><td>processors_count</td><td>integer</td></tr> -<tr><td>date_created</td><td>timestamp with time zone</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.transform_rules - <html><table> -<tr><td><u>transform_rule_id</u></td><td>integer</td></tr> -<tr><td>category</td><td>USER-DEFINED</td></tr> -<tr><td>rule_order</td><td>integer</td></tr> -<tr><td>predicate</td><td>text</td></tr> -<tr><td>predicate_args</td><td>text</td></tr> -<tr><td>predicate_kwargs</td><td>text</td></tr> -<tr><td>action</td><td>text</td></tr> -<tr><td>action_args</td><td>text</td></tr> -<tr><td>action_kwargs</td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.priorityjobs - <html><table> -<tr><td><u>uuid</u></td><td>character varying</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.priorityjobs_logging_switch - <html><table> -<tr><td><u>log_jobs</u></td><td>boolean</td></tr> -</table> - - - - - - - - - - - - - - - - - - - public.jobs - <html><table> -<tr><td><u>id</u></td><td>integer</td></tr> -<tr><td>pathname</td><td>character varying</td></tr> -<tr><td>uuid</td><td>character varying</td></tr> -<tr><td><i>owner</i></td><td>integer</td></tr> -<tr><td><i>priority</i></td><td>integer</td></tr> -<tr><td><i>queueddatetime</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>starteddatetime</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>completeddatetime</i></td><td>timestamp with time zone</td></tr> -<tr><td><i>success</i></td><td>boolean</td></tr> -<tr><td><i>message</i></td><td>text</td></tr> -</table> - - - - - - - - - - - - - - - - - - - - - - Color Legend -Yellow: Cron/SP populated table -Pink: Externally updated by Processors, FTP or other -Green: Manually updated by Socorro administrator - - - - - - - - - - - - - - - - - - GraphML generated with pgschemagraph.py -Arranged manually with yEd -Last updated 2012-10-22 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/development/breakpad-schema.pdf b/docs/development/breakpad-schema.pdf deleted file mode 100644 index 98edde86d3..0000000000 Binary files a/docs/development/breakpad-schema.pdf and /dev/null differ diff --git a/docs/development/crontabber.rst b/docs/development/crontabber.rst index ffc3947d57..0b24181289 100644 --- a/docs/development/crontabber.rst +++ b/docs/development/crontabber.rst @@ -342,8 +342,8 @@ Writing cron apps (aka. jobs) Because of the configurable nature of the ``crontabber`` the actual cron apps can be located anywhere. For example, if it's related to -``HBase`` it could for example be in -``socorro/external/hbase/mycronapp.py``. However, for the most part +``S3`` it could for example be in +``socorro/external/boto/mycronapp.py``. However, for the most part it's probably a good idea to write them in ``socorro/cron/jobs/`` and write one class per file to make it clear. There are already some "sample apps" in there that does nothing except serving as good diff --git a/docs/development/databasetabledesc.rst b/docs/development/databasetabledesc.rst index 6b16809371..a6fc4c7a65 100644 --- a/docs/development/databasetabledesc.rst +++ b/docs/development/databasetabledesc.rst @@ -304,7 +304,7 @@ Contains crash-count summaries of crashes per architecture and number of cores. *correlation_modules* -Will contain crash-counts for modules per correlation. Will be populated daily by pull from Hbase. +Will contain crash-counts for modules per correlation. Will be populated daily by pull from S3. *crashes_by_user, crashes_by_user_view* @@ -428,9 +428,6 @@ priorityjobs processors The registration list for currently active processors. -server_status - Contains summary statistics on the various processor servers. - *UI management tables* diff --git a/docs/development/databasetablesbysource.rst b/docs/development/databasetablesbysource.rst index a6fd595dd3..bd342a41b7 100644 --- a/docs/development/databasetablesbysource.rst +++ b/docs/development/databasetablesbysource.rst @@ -117,7 +117,6 @@ These tables are used by various parts of the application to do other things tha * processor management tables * processors - * server_status * transform_rules * UI management tables diff --git a/docs/development/generalarchitecture.rst b/docs/development/generalarchitecture.rst index e5e8a79a98..14582350ca 100644 --- a/docs/development/generalarchitecture.rst +++ b/docs/development/generalarchitecture.rst @@ -19,8 +19,6 @@ will find the following folders. Here is what each of them contains: +-----------------+-------------------------------------------------------------+ | Folder | Description | +=================+=============================================================+ -| analysis/ | Contains metrics jobs such as mapreduce. Will be moved. | -+-----------------+-------------------------------------------------------------+ | config/ | Contains the Apache configuration for the different parts | | | of the Socorro application. | +-----------------+-------------------------------------------------------------+ @@ -60,20 +58,8 @@ Here are descriptions of every submodule in there: +-------------------+---------------------------------------------------------------+ | external | Here are APIs related to external resources like databases. | +-------------------+---------------------------------------------------------------+ -| integrationtest | Osolete. | -+-------------------+---------------------------------------------------------------+ -| lib | Different libraries used all over Socorro’s code. | -+-------------------+---------------------------------------------------------------+ | middleware | New-style middleware services place. | +-------------------+---------------------------------------------------------------+ -| monitor | All code related to monitors. | -+-------------------+---------------------------------------------------------------+ -| othertests | Some other tests? | -+-------------------+---------------------------------------------------------------+ -| services | Old-style middleware services place. | -+-------------------+---------------------------------------------------------------+ -| storage | HBase related code. | -+-------------------+---------------------------------------------------------------+ | unittest | All our unit tests are here. | +-------------------+---------------------------------------------------------------+ | webapi | Contains a few tools used by web-based services. | diff --git a/docs/development/generic_app.rst b/docs/development/generic_app.rst index d5483521a3..dbb8a23390 100644 --- a/docs/development/generic_app.rst +++ b/docs/development/generic_app.rst @@ -53,7 +53,7 @@ in the ``TransactionExecutor`` is you can see `here `_ -The idea is that any external module (e.g. HBase, PostgreSQL, etc) +The idea is that any external module (e.g. Boto, PostgreSQL, etc) can define a ``ConnectionContext`` class as per this model. What its job is is to create and close connections and it has to do so in a contextmanager. What that means is that you can do this:: diff --git a/docs/development/glossary/collector.rst b/docs/development/glossary/collector.rst index b52e6b71cc..6487bfe5c5 100644 --- a/docs/development/glossary/collector.rst +++ b/docs/development/glossary/collector.rst @@ -22,4 +22,4 @@ failed saves. This file system would likely be an NFS mounted file system. After a crash is saved, there is an app called :ref:`crashmover-chapter` that -will transfer the crashes to HBase. +will transfer the crashes to S3. diff --git a/docs/development/processor.rst b/docs/development/processor.rst index 337aaba10d..3fe51f3e82 100644 --- a/docs/development/processor.rst +++ b/docs/development/processor.rst @@ -10,7 +10,7 @@ Introduction Socorro Processor is a multithreaded application that applies JSON/dump pairs to the stackwalk_server application, parses the -output, and records the results in the hbase. The processor, coupled +output, and records the results in the S3. The processor, coupled with stackwalk_server, is computationally intensive. Multiple instances of the processor can be run simultaneously from different machines. diff --git a/requirements.txt b/requirements.txt index 0fd9a07472..9c16e384c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,8 +36,6 @@ configman==1.2.11 \ --hash=sha256:16798e8a67467f50f8f9b080c8e70a41f0ff7d2dd061e74423e53bc7ed92fb8d configobj==4.7.2 \ --hash=sha256:515ff923462592e8321df8b48c47e3428f8d406ee22b8de77bef969d1af11171 -hbase-thrift==0.20.4 \ - --hash=sha256:a33e36759cba1a8c31c3c01e943b4ee204604d6ff13dda281f8f3893b23910e6 isodate==0.5.4 \ --hash=sha256:42105c41d037246dc1987e36d96f3752ffd5c0c24834dd12e4fdbe1e79544e31 lxml==3.5.0 \ diff --git a/scripts/build-breakpad.sh b/scripts/build-breakpad.sh index b9bb542ec3..3050d89d66 100755 --- a/scripts/build-breakpad.sh +++ b/scripts/build-breakpad.sh @@ -12,32 +12,53 @@ # any failures in this script should cause the build to fail set -v -e -x -export MAKEFLAGS=-j$(getconf _NPROCESSORS_ONLN) +export MAKEFLAGS +MAKEFLAGS=-j$(getconf _NPROCESSORS_ONLN) -git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git -export PATH=`pwd`/depot_tools:$PATH +if [ ! -d "depot_tools" ]; then + git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git +fi + +cd depot_tools || exit +git pull origin master +echo "using depot_tools version: $(git rev parse HEAD)" +cd .. + +# Breakpad will rely on a bunch of stuff from depot_tools, like fetch +# So we just put it on the path +# see https://chromium.googlesource.com/breakpad/breakpad/+/master/#Getting-started-from-master +export PATH +PATH=$(pwd)/depot_tools:$PATH # Checkout and build Breakpad -echo "PREFIX: ${PREFIX:=`pwd`/build/breakpad}" -mkdir breakpad -cd breakpad -fetch breakpad +echo "PREFIX: ${PREFIX:=$(pwd)/build/breakpad}" +if [ ! -d "breakpad" ]; then + mkdir breakpad + cd breakpad + fetch breakpad +else + cd breakpad + gclient sync +fi + cd src -mkdir -p ${PREFIX} -rsync -a --exclude="*.git" ./src ${PREFIX}/ -./configure --prefix=${PREFIX} +echo "using breakpad version: $(git rev parse HEAD)" + +mkdir -p "${PREFIX}" +rsync -a --exclude="*.git" ./src "${PREFIX}"/ +./configure --prefix="${PREFIX}" make install if test -z "${SKIP_CHECK}"; then #FIXME: get this working again #make check true fi -git rev-parse master > ${PREFIX}/revision.txt +git rev-parse master > "${PREFIX}"/revision.txt cd ../.. -cp breakpad/src/src/third_party/libdisasm/libdisasm.a ${PREFIX}/lib/ +cp breakpad/src/src/third_party/libdisasm/libdisasm.a "${PREFIX}"/lib/ # Optionally package everything up if test -z "${SKIP_TAR}"; then - tar -C ${PREFIX}/.. --mode 755 --owner 0 --group 0 -zcf breakpad.tar.gz `basename ${PREFIX}` + tar -C "${PREFIX}"/.. --mode 755 --owner 0 --group 0 -zcf breakpad.tar.gz "$(basename "${PREFIX}")" fi diff --git a/scripts/crons/cron_correlations.sh b/scripts/crons/cron_correlations.sh deleted file mode 100755 index 55f6a8b41d..0000000000 --- a/scripts/crons/cron_correlations.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -. /etc/socorro/socorrorc - -NAME=`basename $0 .sh` - -# TODO this needs to stay in sync with the correlations.pig script -# FIXME move this bit to pig when we switch to 0.9 and use the new PigStorage -COLUMNS="filename,debug_file,debug_id,module_version,product,version,os_name,reason" -DATE=`date -d 'yesterday' +%y%m%d` -OUTPUT_DATE=`date -d $DATE +%Y%m%d` -OUTPUT_FILE="/mnt/crashanalysis/crash_analysis/correlations/correlations-${OUTPUT_DATE}.txt" -lock $NAME - -pig -param start_date=$DATE -param end_date=$DATE ${SOCORRO_DIR}/analysis/correlations.pig >> /var/log/socorro/cron_correlations.log 2>&1 -fatal $? "pig run failed" - -TMPFILE=`mktemp` -echo $COLUMNS > $TMPFILE -fatal $? "could not write header to tmpfile" - -hadoop fs -cat correlations-${DATE}-${DATE} >> $TMPFILE -fatal $? "hadoop cat failed writing to tmpfile" - -cat $OUTPUT_FILE | psql -U $databaseUserName -h $databaseHost $databaseName -c 'COPY correlations_raw FROM STDIN WITH CSV HEADER' -fatal $? "writing correlations to DB failed" - -mv $TMPFILE $OUTPUT_FILE -fatal $? "could not move tmpfile to output dir" - -hadoop fs -rmr correlations-${DATE}-${DATE} -fatal $? "hadoop cleanup failed" - -unlock $NAME diff --git a/scripts/crons/cron_fixbrokendumps.sh b/scripts/crons/cron_fixbrokendumps.sh deleted file mode 100755 index 6cacbb7622..0000000000 --- a/scripts/crons/cron_fixbrokendumps.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -. /etc/socorro/socorrorc -# Mozilla PHX needs this because of the particular VLAN setup there -# TODO - give cron jobs their own config overrides -. /etc/socorro/socorro-monitor.conf - -NAME=`basename $0 .sh` -lock $NAME -pyjob $NAME startFixBrokenDumps -EXIT_CODE=$? -unlock $NAME - -exit $EXIT_CODE diff --git a/scripts/install.sh b/scripts/install.sh index 40256c70d3..874a462ca3 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -56,7 +56,6 @@ rsync -a sql $BUILD_DIR/application rsync -a wsgi $BUILD_DIR/application rsync -a stackwalk $BUILD_DIR/ rsync -a scripts/stackwalk.sh $BUILD_DIR/stackwalk/bin/ -rsync -a analysis $BUILD_DIR/ rsync -a alembic $BUILD_DIR/application rsync -a webapp-django $BUILD_DIR/ # because this file is served from the parent of the `webapp-django/` directory diff --git a/scripts/integration-test.sh b/scripts/integration-test.sh index 7301982419..f740ccc322 100755 --- a/scripts/integration-test.sh +++ b/scripts/integration-test.sh @@ -131,7 +131,7 @@ function cleanup() { echo "INFO: Terminating background jobs" echo " any kill usage errors below may be ignored" - for p in collector processor middleware + for p in collector processor do # destroy any running processes started by this shell kill $(jobs -p) > /dev/null 2>&1 @@ -206,7 +206,7 @@ function retry() { } #------------------------------------------------------------------------------ -# setup and run the collector, processor and middleware +# setup and run the collector and processor # The collector should be configured using the 2015 method of having the # ability to collect multiple crash types using different end points. # breakpad crashes on /submit @@ -255,30 +255,13 @@ function start_2015_socorro_apps() { > processor.log 2>&1 & echo ' processor started' - sleep 1 - socorro middleware \ - --admin.conf=./config/middleware.ini \ - --database.database_hostname=$database_hostname \ - --database.database_username=$database_username \ - --database.database_password=$database_password \ - --rabbitmq.host=$rmq_host \ - --rabbitmq.rabbitmq_user=$rmq_user \ - --rabbitmq.rabbitmq_password=$rmq_password \ - --rabbitmq.virtual_host=$rmq_virtual_host \ - --rabbitmq.standard_queue_name=$rmq_normal_queue_name \ - --rabbitmq.priority_queue_name=$rmq_priority_queue_name \ - --rabbitmq.reprocessing_queue_name=$rmq_reprocessing_queue_name \ - --web_server.wsgi_server_class=socorro.webapi.servers.CherryPy \ - > middleware.log 2>&1 & - echo ' middleware started' - # tell the test routine to use the extra submission test extra_submission_test=1 echo " Done." } #------------------------------------------------------------------------------ -# setup and run the collector, processor and middleware +# setup and run the collector and processor # The collector will use the traditional wsgi function that can only receive # breakpad crashes on the endpoint /submit #------------------------------------------------------------------------------ @@ -320,20 +303,6 @@ function start_standard_socorro_apps() { --processor.processor_class=socorro.processor.mozilla_processor_2015.MozillaProcessorAlgorithm2015 \ > processor.log 2>&1 & sleep 1 - socorro middleware \ - --admin.conf=./config/middleware.ini \ - --database.database_hostname=$database_hostname \ - --database.database_username=$database_username \ - --database.database_password=$database_password \ - --rabbitmq.host=$rmq_host \ - --rabbitmq.rabbitmq_user=$rmq_user \ - --rabbitmq.rabbitmq_password=$rmq_password \ - --rabbitmq.virtual_host=$rmq_virtual_host \ - --rabbitmq.standard_queue_name=$rmq_normal_queue_name \ - --rabbitmq.priority_queue_name=$rmq_priority_queue_name \ - --rabbitmq.reprocessing_queue_name=$rmq_reprocessing_queue_name \ - --web_server.wsgi_server_class=socorro.webapi.servers.CherryPy \ - > middleware.log 2>&1 & # tell the test routine NOT to use the extra submission test extra_submission_test=0 @@ -342,7 +311,7 @@ function start_standard_socorro_apps() { } #------------------------------------------------------------------------------ -# setup and run the collector, processor and middleware WITHOUT RabbitMQ +# setup and run the collector and processor WITHOUT RabbitMQ # The collector will use the traditional wsgi function that can only receive # breakpad crashes on the endpoint /submit # The collector saves in @@ -368,21 +337,6 @@ function start_minimal_socorro_apps() { --destination.fs_root=./processedCrashStore \ > processor.log 2>&1 & sleep 1 - socorro middleware \ - --admin.conf=./config/middleware.ini \ - --database.database_hostname=$database_hostname \ - --database.database_username=$database_username \ - --database.database_password=$database_password \ - --filesystem.fs_root=./processedCrashStore \ - --rabbitmq.host=$rmq_host \ - --rabbitmq.rabbitmq_user=$rmq_user \ - --rabbitmq.rabbitmq_password=$rmq_password \ - --rabbitmq.virtual_host=$rmq_virtual_host \ - --rabbitmq.standard_queue_name=$rmq_normal_queue_name \ - --rabbitmq.priority_queue_name=$rmq_priority_queue_name \ - --rabbitmq.reprocessing_queue_name=$rmq_reprocessing_queue_name \ - --web_server.wsgi_server_class=socorro.webapi.servers.CherryPy \ - > middleware.log 2>&1 & # tell the test routine NOT to use the extra submission test extra_submission_test=0 @@ -423,7 +377,7 @@ echo " Done." #****************************************************************************** # Here's where we actually start testing -# Iterate through some combinations of collector/crashmover/processor/middleware/setups +# Iterate through some combinations of collector/crashmover/processor/setups # These setups are defined in functions with their names list in the for loop: for an_app_set in start_2015_socorro_apps start_standard_socorro_apps start_minimal_socorro_apps do @@ -466,28 +420,6 @@ do retry 'collector' "$CRASHID" retry 'processor' "saved - $CRASHID" - #---------------------------------------------------------------------------- - # check that mware has raw crash using curl to hit the HTTP endpoint - curl -s -D middleware_headers.log "http://localhost:8883/crash_data/?datatype=meta&uuid=$CRASHID" > /dev/null - err=$? - echo " looking for errors in hitting the middleware for $CRASHID" - check_for_logged_fatal_errors $err middleware - - echo " looking for "200 OK" in hitting the middleware for $CRASHID" - grep '200 OK' middleware_headers.log > /dev/null - fatal $? "middleware test failed, no raw data for crash ID $CRASHID" - - echo " looking for processed crash through middleware for $CRASHID" - function find_crash_in_middleware() { - curl -s "http://localhost:8883/crash_data/?datatype=processed&uuid=$CRASHID" | grep date_processed - echo "http://localhost:8883/crash_data/?datatype=processed&uuid=$CRASHID" - return $? - } - retry_command middleware find_crash_in_middleware - - # check that mware logs the request for the crash, and logs no errors - retry 'middleware' "/crash_data" - #---------------------------------------------------------------------------- # EXTRA submission test if [ $extra_submission_test = 1 ] diff --git a/scripts/staging/README.rst b/scripts/staging/README.rst deleted file mode 100644 index f39930ef31..0000000000 --- a/scripts/staging/README.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. This Source Code Form is subject to the terms of the Mozilla Public -.. License, v. 2.0. If a copy of the MPL was not distributed with this -.. file, You can obtain one at http://mozilla.org/MPL/2.0/. - -MiniDB Scripts -============== - -docs moved to docs/databasescripts.rst diff --git a/scripts/staging/afterload.sh b/scripts/staging/afterload.sh deleted file mode 100755 index 84adcadd4e..0000000000 --- a/scripts/staging/afterload.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -set -e - -cp /pgdata/9.0/data/postgresql.conf.prod /pgdata/9.0/data/postgresql.conf - -/etc/init.d/postgresql-9.0 restart - -su -l -c "psql -f ~postgres/update_staging_passwords.sql" postgres - -/etc/init.d/pgbouncer-web start -/etc/init.d/pgbouncer-processor start - -exit 0 - diff --git a/scripts/staging/backupdatadir.sh b/scripts/staging/backupdatadir.sh deleted file mode 100755 index 16c6c4a667..0000000000 --- a/scripts/staging/backupdatadir.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -set -e - -/etc/init.d/postgresql-9.0 stop - -rm -rf /pgdata/backupdata/* - -cp -r -p -v /pgdata/9.0/data/* /pgdata/backupdata/ - -/etc/init.d/postgresql-9.0 start - -exit 0 \ No newline at end of file diff --git a/scripts/staging/dumpschema.sh b/scripts/staging/dumpschema.sh deleted file mode 100755 index 9d21ab93db..0000000000 --- a/scripts/staging/dumpschema.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -DB=$1 -USER=$2 -PORT=$4 -: ${USER:="postgres"} -: ${DB:="breakpad"} -if [ -z $3 ] -then - HOST='' -else - HOST=" -h $2" -fi -: ${PORT:="5432"} - -TODAY=`date +%Y%m%d` - -pg_dump $HOST -p $PORT -s -U $USER \ - -T high_load_temp \ - -T locks* \ - -T activity_snapshot \ - -T product_info_changelog \ - -T '*_201*' \ - -T 'priority_jobs_*' \ - $DB > schema-$DB-$TODAY.sql - -echo 'schema dumped' - -exit 0 diff --git a/scripts/staging/extractMiniDB.py b/scripts/staging/extractMiniDB.py deleted file mode 100755 index 3b843b031a..0000000000 --- a/scripts/staging/extractMiniDB.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -import sys -import os -import psycopg2 -import psycopg2.extensions -from optparse import OptionParser - -# extracts a database from a copy of production breakpad -# consisting of only the last # weeks of data, more or less -# the resulting tgz file needs to be loaded with loadMiniDBonDev.py -# does not currently dump users - -parser = OptionParser() -parser.add_option("-w", "--weeks", dest="num_weeks", type="int", - help="number of weeks to extract", metavar="#", - default=2) -parser.add_option("-d", "--database", dest="database_name", - help="database to be extracted", metavar="DBNAME", - default="breakpad") -parser.add_option("-f", "--file", dest="tar_file", - help="extractdb tarball to be created", metavar="FILE", - default="extractdb.tgz") - -(options, args) = parser.parse_args() - -# simple shell command runner -def rundump(dump_command): - dump_result = os.system(dump_command) - if dump_result != 0: - sys.exit(dump_result) - -print "Extracting %s weeks of data" % options.num_weeks - -#connect to postgresql -conn = psycopg2.connect("dbname=%s user=postgres" % options.database_name) - -cur = conn.cursor() - -# get the list of weekly partitions to NOT dump -cur.execute(""" -SELECT array_to_string( array_agg ( ' -T ' || relname ), ' ' ) - FROM pg_stat_user_tables - WHERE relname ~* $x$_20\d+$$x$ -AND substring(relname FROM $x$_(20\d+)$$x$) < - to_char( ( now() - ( ( %s + 1 ) * interval '1 week') ), 'YYYYMMDD'); - """, (options.num_weeks,)) - -no_dump = str(cur.fetchone()[0]) - -#get the date of truncation -cur.execute(""" - SELECT to_date(substring(relname FROM $x$_(20\d+)$$x$),'YYYYMMDD') - FROM pg_stat_user_tables - WHERE relname LIKE 'reports_20%%' - AND substring(relname FROM $x$_(20\d+)$$x$) >= -to_char( ( now() - ( ( %s + 1 ) * interval '1 week') ), 'YYYYMMDD') - ORDER BY relname LIMIT 1; - """, (options.num_weeks,)) - -cutoff_date = str(cur.fetchone()[0]) - -# dump the list of matviews one at a time. consult dictionary -# for the queries to retrieve each set of truncated data - -# cycle through the list of matviews -# and tables with data that needs to be cleaned -# dump those with no data - -matviews = {'raw_adi' - : """SELECT * FROM raw_adi WHERE raw_adi.date >= '%s'""" % cutoff_date, - 'releases_raw' - : """SELECT releases_raw.* FROM releases_raw WHERE build_date(build_id) - >= ( DATE '%s' - 180 ) """ % cutoff_date, - 'product_adu' : """SELECT product_adu.* FROM product_adu WHERE adu_date >= '%s'""" % cutoff_date, - 'tcbs' : """SELECT tcbs.* FROM tcbs WHERE report_date >= '%s'""" % cutoff_date, - 'tcbs_build' : """SELECT * FROM tcbs_build WHERE build_date >= '%s'""" % cutoff_date, - 'sessions' : """SELECT * FROM sessions WHERE false""", - 'server_status' : """SELECT * FROM server_status WHERE false""", - 'reports_bad' : """SELECT * FROM reports_bad WHERE false""", - 'reports_duplicates' - : """SELECT * FROM reports_duplicates WHERE date_processed >= '%s'""" % cutoff_date, - 'daily_hangs' - : """SELECT * FROM daily_hangs WHERE report_date >= '%s'""" % cutoff_date, - 'build_adu' : """SELECT * FROM build_adu WHERE build_date >= '%s'""" % cutoff_date, - 'crashes_by_user' : """SELECT * FROM crashes_by_user WHERE report_date >= '%s'""" % cutoff_date, - 'crashes_by_user_build' : """SELECT * FROM crashes_by_user_build WHERE build_date >= '%s'""" % cutoff_date, - 'home_page_graph' : """SELECT * FROM home_page_graph WHERE report_date >= '%s'""" % cutoff_date, - 'home_page_graph_build' : """SELECT * FROM home_page_graph_build WHERE build_date >= '%s'""" % cutoff_date, - 'nightly_builds' : """SELECT * FROM nightly_builds WHERE report_date >= '%s'""" % cutoff_date - } - -no_dump_all = no_dump + ' -T "priority_jobs_*" -T ' + ' -T '.join(matviews) -# don't dump priority jobs queues either - -print "truncating all data before %s" % cutoff_date - -#pg_dump most of the database -print 'dumping most of the database' -rundump('pg_dump -Fc -U postgres ' + no_dump_all + ' breakpad -f minidb.dump') - -# copy truncated data for each matview - -for matview in matviews: - print 'dumping %s' % matview - dumpstring = """psql -U postgres -c "\copy ( """ + matviews[matview] + """ ) to """ + matview + """.dump" breakpad""" - rundump(dumpstring) - -# dump the schema for the matviews: -rundump('pg_dump -Fc -s -t' + ' -t '.join(matviews) + ' -f matview_schemas.dump breakpad') - -#DUMP the users and logins - -rundump('pg_dumpall -U postgres -r -f users.dump') - -#remove password sets - -rundump('sed -i "s/PASSWORD \'.*\'//" users.dump') - -rundump("tar -cvzf %s *.dump" % options.tar_file) -rundump('rm *.dump') - -print 'done extracting database' diff --git a/scripts/staging/fake_raw_adu.py b/scripts/staging/fake_raw_adu.py deleted file mode 100644 index 26500f36e6..0000000000 --- a/scripts/staging/fake_raw_adu.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import sys -import psycopg2 -import psycopg2.extensions -import psycopg2.extras - -#connect to CSD database -csd = psycopg2.connect("dbname=breakpad user=postgres port=5432") -csd_cur = csd.cursor() - -# check if we already have ADU for the day -csd_cur.execute(""" - SELECT COUNT(*) FROM raw_adi WHERE "date" = 'yesterday'::date -""") - -if csd_cur.fetchone()[0] > 0: - sys.stderr.write('raw_adi has already been exported for yesterday\n') - sys.exit(-1) - -#dump raw_adi from previous day and reinsert faked data -csd_cur.execute(""" - INSERT into raw_adi ( - adi_count, - date, - product_name, - product_os_platform, - product_os_version, - product_version, - build, - product_guid, - update_channel - ) - ( - SELECT adi_count, - 'yesterday'::date as "date", - product_name, - product_os_platform, - product_os_version, - product_version, - build, - product_guid, - update_channel - FROM raw_adi - WHERE date in (select max(date) from raw_adi) - ) -""") -csd.commit() -csd.close() - -print 'raw_adi successfully updated' - -sys.exit(0) diff --git a/scripts/staging/loadMiniDBonDev.py b/scripts/staging/loadMiniDBonDev.py deleted file mode 100644 index 0edc52d95b..0000000000 --- a/scripts/staging/loadMiniDBonDev.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -import sys -import os -import psycopg2 -import psycopg2.extensions -from optparse import OptionParser - -# loads a file created with extractminidb.py - -# intended only for use on DevDB, as it uses an experimental -# version of PostgreSQL's pg_restore which is installed there -# if you need a minidb on another server, restore this on devdb -# and then dump from there - -# creates users without changing passwords -# takes two arguments, the archive name holding the data -# and optionally the database name to restore - -# note that this script will fail unless you first kick -# all users off the database system. on stagedb, try -# running beforeload.sh first - -parser = OptionParser() -parser.add_option("-f", "--file", dest="tar_file", - help="extractdb tarball to be loaded", metavar="FILE", - default="extractdb.tgz") -parser.add_option("-d", "--database", dest="database_name", - help="database to be loaded", metavar="DBNAME", - default="breakpad") -parser.add_option("-P", "--postscript", dest="postsql", - help="post-load shell script", - default="/data/socorro/application/scripts/staging/postsql/postsql.sh") -(options, args) = parser.parse_args() - -print "Loading data" - -def runload(load_command): - load_result = os.system(load_command) - if load_result != 0: - sys.exit(load_result) - -matviews = ['raw_adi', - 'releases_raw', - 'product_adu', - 'tcbs', - 'tcbs_build', - 'sessions', - 'server_status', - 'reports_bad', - 'reports_duplicates', - 'daily_hangs', - 'build_adu', - 'crashes_by_user', - 'crashes_by_user_build', - 'home_page_graph', - 'home_page_graph_build', - 'nightly_builds' - ] - -# untar the file -runload('tar -xzf %s' % options.tar_file) - -#connect to postgresql -conn = psycopg2.connect("dbname=postgres user=postgres") - -conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) - -cur = conn.cursor() - -print 'drop and recreate the database' - -# drop the database and recreate it -try: - cur.execute("""DROP DATABASE %s;""" % options.database_name) -except psycopg2.Error as exc: - code = exc.pgcode - if code == '3D000': - pass - else: - # if this failed, check why. - sys.exit('unable to drop database %s probably because connections to it are still open: %s' - % (options.database_name, code,)) - -cur.execute("""CREATE DATABASE %s""" % options.database_name) - -print 'load users. please ignore any errors you see here' - -os.system('psql -q -v verbosity=terse -U postgres -f users.dump %s' % options.database_name) - -print 'load most of the database' - -# dump a list of objects - -# load everything else but not indexes and constraints -# needs to ignore errors - -os.system('/usr/local/pgsql/bin/pg_restore -j 3 -Fc --no-post-data -U postgres minidb.dump -d %s' - % options.database_name) - -print 'load the truncated materialized views' - -# restore the matview schema -# needs to ignore errors - -os.system('/usr/local/pgsql/bin/pg_restore -Fc --no-post-data -U postgres matview_schemas.dump -d %s' - % options.database_name) - -# restore matview data, one matview at a time - -for matview in matviews: - print "loading %s" % matview - runload("""psql -c "\copy %s FROM %s.dump" -U postgres %s""" % (matview, matview, options.database_name,)) - -# restore indexes and constraints - -print 'restore indexes and constraints' - -runload('/usr/local/pgsql/bin/pg_restore -j 3 -Fc --post-data-only -U postgres minidb.dump -d %s' % options.database_name) -runload('/usr/local/pgsql/bin/pg_restore -j 3 -Fc --post-data-only -U postgres matview_schemas.dump -d %s' % options.database_name) - -# truncate soon-to-be-dropped tables -# conn.disconnect() - -conn = psycopg2.connect("dbname=%s user=postgres" % options.database_name) - -conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) - -cur = conn.cursor() - -cur.execute(""" - DO $f$ - DECLARE tab TEXT; - BEGIN - FOR tab IN SELECT relname - FROM pg_stat_user_tables - WHERE relname LIKE 'frames%' LOOP - - EXECUTE 'TRUNCATE ' || tab; - - END LOOP; - END; $f$; - """) - -# load views which break on pg_restore, such as hang_report - -runload(options.postsql) - -#delete all the dump files - -runload('rm *.dump') - -# analyze - -cur.execute("""SET maintenance_work_mem = '512MB'""") -cur.execute('ANALYZE') - -print 'done loading database.' diff --git a/scripts/staging/loadprep.sh b/scripts/staging/loadprep.sh deleted file mode 100755 index 7cb1c6b4e5..0000000000 --- a/scripts/staging/loadprep.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -set -e - -/etc/init.d/pgbouncer-web stop -/etc/init.d/pgbouncer-processor stop - -cp /pgdata/9.0/data/postgresql.conf.localonly /pgdata/9.0/data/postgresql.conf - -/etc/init.d/postgresql-9.0 restart - -exit 0 - diff --git a/scripts/staging/postsql/README.rst b/scripts/staging/postsql/README.rst deleted file mode 100644 index a219481c3a..0000000000 --- a/scripts/staging/postsql/README.rst +++ /dev/null @@ -1,15 +0,0 @@ -.. This Source Code Form is subject to the terms of the Mozilla Public -.. License, v. 2.0. If a copy of the MPL was not distributed with this -.. file, You can obtain one at http://mozilla.org/MPL/2.0/. - -View Restore Scripts for Staging -================================ - -This directory contains SQL scripts for views which depend on matviews, and generally fail to load during backup/restore as part of the MiniDB database-shrinking process. The new LoadMiniDB.py script will load these views, one at a time, at the end of restoring the database. - -If loadMiniDB.py does not run these scripts because it cannot find the file location, then they can be run with the one-line shell script, loadviews.sh: - -loadviews.sh {databasename} - -Default databasename is "breakpad". This script must be run as the database superuser. - diff --git a/scripts/staging/postsql/crash_by_user_views.sql b/scripts/staging/postsql/crash_by_user_views.sql deleted file mode 100644 index bd59fd749d..0000000000 --- a/scripts/staging/postsql/crash_by_user_views.sql +++ /dev/null @@ -1,47 +0,0 @@ -\set ON_ERROR_STOP 1 - -BEGIN; - -DROP VIEW IF EXISTS crashes_by_user_build_view; - -CREATE OR REPLACE VIEW crashes_by_user_build_view AS -SELECT crashes_by_user_build.product_version_id, - product_versions.product_name, version_string, - os_short_name, os_name, crash_type, crash_type_short, - crashes_by_user_build.build_date, - sum(report_count) as report_count, - sum(report_count / throttle) as adjusted_report_count, - sum(adu) as adu, throttle -FROM crashes_by_user_build - JOIN product_versions USING (product_version_id) - JOIN product_release_channels ON - product_versions.product_name = product_release_channels.product_name - AND product_versions.build_type = product_release_channels.release_channel - JOIN os_names USING (os_short_name) - JOIN crash_types USING (crash_type_id) -WHERE crash_types.include_agg -GROUP BY crashes_by_user_build.product_version_id, - product_versions.product_name, version_string, - os_short_name, os_name, crash_type, crash_type_short, - crashes_by_user_build.build_date, throttle; - -ALTER VIEW crashes_by_user_build_view OWNER TO breakpad_rw; - -DROP VIEW IF EXISTS crashes_by_user_view; - -CREATE OR REPLACE VIEW crashes_by_user_view AS -SELECT crashes_by_user.product_version_id, - product_versions.product_name, version_string, - os_short_name, os_name, crash_type, crash_type_short, report_date, - report_count, (report_count / throttle) as adjusted_report_count, - adu, throttle -FROM crashes_by_user - JOIN product_versions USING (product_version_id) - JOIN product_release_channels ON - product_versions.product_name = product_release_channels.product_name - AND product_versions.build_type = product_release_channels.release_channel - JOIN os_names USING (os_short_name) - JOIN crash_types USING (crash_type_id) -WHERE crash_types.include_agg; - -ALTER VIEW crashes_by_user_view OWNER TO breakpad_rw; \ No newline at end of file diff --git a/scripts/staging/postsql/current_server_status.sql b/scripts/staging/postsql/current_server_status.sql deleted file mode 100644 index 2b221169c4..0000000000 --- a/scripts/staging/postsql/current_server_status.sql +++ /dev/null @@ -1,14 +0,0 @@ - - -CREATE OR REPLACE VIEW current_server_status AS - SELECT server_status.date_recently_completed, server_status.date_oldest_job_queued, date_part('epoch'::text, (server_status.date_created - server_status.date_oldest_job_queued)) AS oldest_job_age, server_status.avg_process_sec, server_status.avg_wait_sec, server_status.waiting_job_count, server_status.processors_count, server_status.date_created FROM server_status ORDER BY server_status.date_created DESC LIMIT 1; - - -ALTER TABLE public.current_server_status OWNER TO breakpad_rw; - -REVOKE ALL ON TABLE current_server_status FROM PUBLIC; -REVOKE ALL ON TABLE current_server_status FROM breakpad_rw; -GRANT ALL ON TABLE current_server_status TO breakpad_rw; -GRANT SELECT ON TABLE current_server_status TO monitoring; - - diff --git a/scripts/staging/postsql/default_versions.sql b/scripts/staging/postsql/default_versions.sql deleted file mode 100644 index 72fd908e02..0000000000 --- a/scripts/staging/postsql/default_versions.sql +++ /dev/null @@ -1,8 +0,0 @@ - -CREATE VIEW default_versions AS - SELECT count_versions.product_name, count_versions.version_string, count_versions.product_version_id FROM (SELECT product_info.product_name, product_info.version_string, product_info.product_version_id, row_number() OVER (PARTITION BY product_info.product_name ORDER BY ((('now'::text)::date >= product_info.start_date) AND (('now'::text)::date <= product_info.end_date)) DESC, product_info.is_featured DESC, product_info.channel_sort DESC) AS sort_count FROM product_info) count_versions WHERE (count_versions.sort_count = 1); - - -ALTER TABLE public.default_versions OWNER TO breakpad_rw; - - diff --git a/scripts/staging/postsql/default_versions_builds.sql b/scripts/staging/postsql/default_versions_builds.sql deleted file mode 100644 index 6a5f20dde4..0000000000 --- a/scripts/staging/postsql/default_versions_builds.sql +++ /dev/null @@ -1,8 +0,0 @@ - - -CREATE OR REPLACE VIEW default_versions_builds AS - SELECT count_versions.product_name, count_versions.version_string, count_versions.product_version_id FROM (SELECT product_info.product_name, product_info.version_string, product_info.product_version_id, row_number() OVER (PARTITION BY product_info.product_name ORDER BY ((('now'::text)::date >= product_info.start_date) AND (('now'::text)::date <= product_info.end_date)) DESC, product_info.is_featured DESC, product_info.channel_sort DESC) AS sort_count FROM product_info WHERE product_info.has_builds) count_versions WHERE (count_versions.sort_count = 1); - - -ALTER TABLE public.default_versions_builds OWNER TO breakpad_rw; - diff --git a/scripts/staging/postsql/hang_report.sql b/scripts/staging/postsql/hang_report.sql deleted file mode 100644 index d1b0368115..0000000000 --- a/scripts/staging/postsql/hang_report.sql +++ /dev/null @@ -1,16 +0,0 @@ - - -CREATE OR REPLACE VIEW hang_report AS - SELECT product_versions.product_name AS product, product_versions.version_string AS version, browser_signatures.signature AS browser_signature, plugin_signatures.signature AS plugin_signature, daily_hangs.hang_id AS browser_hangid, flash_versions.flash_version, daily_hangs.url, daily_hangs.uuid, daily_hangs.duplicates, daily_hangs.report_date AS report_day FROM ((((daily_hangs JOIN product_versions USING (product_version_id)) JOIN signatures browser_signatures ON ((daily_hangs.browser_signature_id = browser_signatures.signature_id))) JOIN signatures plugin_signatures ON ((daily_hangs.plugin_signature_id = plugin_signatures.signature_id))) LEFT JOIN flash_versions USING (flash_version_id)); - - -ALTER TABLE public.hang_report OWNER TO breakpad_rw; - -REVOKE ALL ON TABLE hang_report FROM PUBLIC; -REVOKE ALL ON TABLE hang_report FROM breakpad_rw; -GRANT ALL ON TABLE hang_report TO breakpad_rw; -GRANT SELECT ON TABLE hang_report TO breakpad; -GRANT SELECT ON TABLE hang_report TO breakpad_ro; -GRANT ALL ON TABLE hang_report TO monitor; - - diff --git a/scripts/staging/postsql/home_page_graph_views.sql b/scripts/staging/postsql/home_page_graph_views.sql deleted file mode 100644 index 5557dad7a6..0000000000 --- a/scripts/staging/postsql/home_page_graph_views.sql +++ /dev/null @@ -1,33 +0,0 @@ -CREATE OR REPLACE VIEW home_page_graph_view -AS -SELECT product_version_id, - product_name, - version_string, - report_date, - report_count, - adu, - crash_hadu -FROM home_page_graph - JOIN product_versions USING (product_version_id); - -ALTER VIEW home_page_graph_view OWNER TO breakpad_rw; - - -CREATE OR REPLACE VIEW home_page_graph_build_view -AS -SELECT product_version_id, - product_versions.product_name, - version_string, - home_page_graph_build.build_date, - sum(report_count) as report_count, - sum(adu) as adu, - crash_hadu(sum(report_count), sum(adu), throttle) as crash_hadu -FROM home_page_graph_build - JOIN product_versions USING (product_version_id) - JOIN product_release_channels ON - product_versions.product_name = product_release_channels.product_name - AND product_versions.build_type = product_release_channels.release_channel -GROUP BY product_version_id, product_versions.product_name, - version_string, home_page_graph_build.build_date, throttle; - -ALTER VIEW home_page_graph_build_view OWNER TO breakpad_rw; \ No newline at end of file diff --git a/scripts/staging/postsql/performance_check_1.sql b/scripts/staging/postsql/performance_check_1.sql deleted file mode 100644 index 0c2cbaba27..0000000000 --- a/scripts/staging/postsql/performance_check_1.sql +++ /dev/null @@ -1,21 +0,0 @@ - - -CREATE VIEW performance_check_1 AS - SELECT sum(report_count) FROM tcbs - WHERE report_date BETWEEN ( current_date - 7 ) and current_date; - - -ALTER TABLE public.performance_check_1 OWNER TO ganglia; - --- --- Name: performance_check_1; Type: ACL; Schema: public; Owner: ganglia --- - -REVOKE ALL ON TABLE performance_check_1 FROM PUBLIC; -REVOKE ALL ON TABLE performance_check_1 FROM ganglia; -GRANT ALL ON TABLE performance_check_1 TO ganglia; -GRANT SELECT ON TABLE performance_check_1 TO breakpad; -GRANT SELECT ON TABLE performance_check_1 TO breakpad_ro; -GRANT ALL ON TABLE performance_check_1 TO monitor; - - diff --git a/scripts/staging/postsql/postsql.sh b/scripts/staging/postsql/postsql.sh deleted file mode 100755 index b9e2b034dc..0000000000 --- a/scripts/staging/postsql/postsql.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -set -e - -CURDIR=$(dirname $0) - -# load all views in this directory -psql -f $CURDIR/*.sql breakpad - -#done -exit 0 diff --git a/scripts/staging/postsql/product_crash_ratio.sql b/scripts/staging/postsql/product_crash_ratio.sql deleted file mode 100644 index 7813567cdc..0000000000 --- a/scripts/staging/postsql/product_crash_ratio.sql +++ /dev/null @@ -1,57 +0,0 @@ -\set ON_ERROR_STOP 1 - -BEGIN; - -DROP VIEW IF EXISTS crashes_by_user_rollup; - -CREATE VIEW crashes_by_user_rollup AS -SELECT product_version_id, report_date, - os_short_name, - sum(report_count) as report_count, - min(adu) as adu - FROM crashes_by_user - JOIN crash_types USING (crash_type_id) - WHERE crash_types.include_agg - GROUP BY product_version_id, report_date, os_short_name; - -DROP VIEW IF EXISTS product_crash_ratio; - -CREATE OR REPLACE VIEW product_crash_ratio AS -SELECT crcounts.product_version_id, product_versions.product_name, - version_string, report_date as adu_date, - sum(report_count)::bigint as crashes, - sum(adu) as adu_count, throttle::numeric(5,2), - sum(report_count/throttle)::int as adjusted_crashes, - crash_hadu(sum(report_count)::bigint, sum(adu), throttle) as crash_ratio -FROM crashes_by_user_rollup as crcounts - JOIN product_versions ON crcounts.product_version_id = product_versions.product_version_id - JOIN product_release_channels - ON product_versions.product_name = product_release_channels.product_name - AND product_versions.build_type = product_release_channels.release_channel -GROUP BY crcounts.product_version_id, product_versions.product_name, - version_string, report_date, throttle; - -ALTER VIEW product_crash_ratio OWNER TO breakpad_rw; -GRANT SELECT ON product_crash_ratio TO analyst; - -DROP VIEW IF EXISTS product_os_crash_ratio; - -CREATE OR REPLACE VIEW product_os_crash_ratio AS -SELECT crcounts.product_version_id, product_versions.product_name, - version_string, os_names.os_short_name, os_names.os_name, report_date as adu_date, - sum(report_count)::bigint as crashes, sum(adu) as adu_count, throttle::numeric(5,2), - sum(report_count/throttle)::int as adjusted_crashes, - crash_hadu(sum(report_count)::bigint, sum(adu), throttle) as crash_ratio -FROM crashes_by_user_rollup AS crcounts - JOIN product_versions ON crcounts.product_version_id = product_versions.product_version_id - JOIN os_names ON crcounts.os_short_name::citext = os_names.os_short_name - JOIN product_release_channels ON product_versions.product_name - = product_release_channels.product_name - AND product_versions.build_type = product_release_channels.release_channel -GROUP BY crcounts.product_version_id, product_versions.product_name, - version_string, os_name, os_names.os_short_name, report_date, throttle;; - -ALTER VIEW product_os_crash_ratio OWNER TO breakpad_rw; -GRANT SELECT ON product_os_crash_ratio TO analyst; - -COMMIT; \ No newline at end of file diff --git a/scripts/staging/postsql/product_info.sql b/scripts/staging/postsql/product_info.sql deleted file mode 100644 index 12ca4c1905..0000000000 --- a/scripts/staging/postsql/product_info.sql +++ /dev/null @@ -1,11 +0,0 @@ - -CREATE OR REPLACE VIEW product_info AS - SELECT product_versions.product_version_id, product_versions.product_name, product_versions.version_string, 'new'::text AS which_table, product_versions.build_date AS start_date, product_versions.sunset_date AS end_date, product_versions.featured_version AS is_featured, product_versions.build_type, ((product_release_channels.throttle * (100)::numeric))::numeric(5,2) AS throttle, product_versions.version_sort, products.sort AS product_sort, release_channels.sort AS channel_sort, ((product_versions.build_type = ANY (ARRAY['Aurora'::citext, 'Nightly'::citext])) OR ((product_versions.build_type = 'Beta'::citext) AND (major_version_sort((product_versions.major_version)::text) <= major_version_sort((products.rapid_beta_version)::text)))) AS has_builds FROM (((product_versions JOIN product_release_channels ON (((product_versions.product_name = product_release_channels.product_name) AND (product_versions.build_type = product_release_channels.release_channel)))) JOIN products ON ((product_versions.product_name = products.product_name))) JOIN release_channels ON ((product_versions.build_type = release_channels.release_channel))) ORDER BY product_versions.product_name, product_versions.version_string; - - -ALTER TABLE public.product_info OWNER TO breakpad_rw; - --- --- PostgreSQL database dump complete --- - diff --git a/scripts/staging/postsql/product_selector.sql b/scripts/staging/postsql/product_selector.sql deleted file mode 100644 index 08f8e4e665..0000000000 --- a/scripts/staging/postsql/product_selector.sql +++ /dev/null @@ -1,5 +0,0 @@ - -CREATE OR REPLACE VIEW product_selector AS - SELECT product_versions.product_name, product_versions.version_string, 'new'::text AS which_table, product_versions.version_sort FROM product_versions WHERE (now() <= product_versions.sunset_date) ORDER BY product_versions.product_name, product_versions.version_string; - -ALTER TABLE product_selector OWNER TO breakpad_rw; diff --git a/scripts/staging/restoredatadir.sh b/scripts/staging/restoredatadir.sh deleted file mode 100755 index ca26910053..0000000000 --- a/scripts/staging/restoredatadir.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -set -e - -/etc/init.d/postgresql-9.0 stop - -rm -rf /pgdata/9.0/data/* - -cp -r -p -v /pgdata/backupdata/* /pgdata/9.0/data/ - -/etc/init.d/postgresql-9.0 start - -exit 0 \ No newline at end of file diff --git a/scripts/staging/sync_raw_adu.py b/scripts/staging/sync_raw_adu.py deleted file mode 100755 index 4ea046535a..0000000000 --- a/scripts/staging/sync_raw_adu.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# In production on stage db - -import os -from os.path import join, getsize -import sys -import psycopg2 -import psycopg2.extensions -import psycopg2.extras - -#connect to CSD database -csd = psycopg2.connect("dbname=breakpad user=postgres port=5432") -csd_cur = csd.cursor() -# check if we already have ADU for the day -csd_cur.execute("""SELECT COUNT(*) FROM raw_adi WHERE "date" = 'yesterday'::date""") - -if (csd_cur.fetchone()[0]) > 0: - sys.stderr.write('raw_adi has already been exported for yesterday') - sys.exit(-1) - -#connect to replayDB -replay = psycopg2.connect("dbname=breakpad user=postgres port=5499") -rep_cur = replay.cursor() - -# check if we already have ADU for the day -rep_cur.execute("""SELECT count(*) FROM raw_adi WHERE "date" = 'yesterday'::date""") - -if (rep_cur.fetchone()[0]) == 0: - sys.stderr.write('no raw_adi in replayDB for yesterday') - sys.exit(-2) - -#dump raw_adi to file -rep_cur.execute("""COPY ( SELECT * FROM raw_adi WHERE "date" = 'yesterday'::date ) -TO '/tmp/raw_adi_update.csv' with csv;""") -replay.close() - -#import raw_adi into CSD -csd_cur.execute("""COPY raw_adi FROM '/tmp/raw_adi_update.csv' with csv;""") -csd.commit() -csd.close() - -print 'raw_adi successfully updated' - -sys.exit(0) diff --git a/scripts/sync_fs.py b/scripts/sync_fs.py deleted file mode 100644 index 64f5d8826e..0000000000 --- a/scripts/sync_fs.py +++ /dev/null @@ -1,238 +0,0 @@ -#!/usr/bin/env python - -import os, sys -import happybase -import json -import logging - -from boto.sqs import connect_to_region as sqs_connect -from boto.sqs.jsonmessage import JSONMessage -from boto.s3 import connect_to_region as s3_connect -from boto.s3.key import Key - -from multiprocessing import Process as TaskClass -from multiprocessing import JoinableQueue as Queue - -import signal -import random - -from collections import deque - - -logger = logging.getLogger(__name__) - -# Following params need to be adjusted based on payload size, bandwidth etc -MAX_ROWS_IN_FLIGHT = 4096 -TASK_QUEUE_SIZE = MAX_ROWS_IN_FLIGHT * 4 - - -class HBaseSource: - def __init__(self, addr, row_range, max_rows = 2048, batch_size = 256): - self.thrift_addr = addr - self.start_row, self.end_row = row_range - self.max_rows = max_rows - self.batch_size = batch_size - - def items(self): - prev_last_read_key = None - curr_last_read_key = self.start_row - end_row = self.end_row - - while True: - src_tbl = happybase.Connection(random.choice(self.thrift_addr)).table('crash_reports') - - nrows = 0 - - try: - logger.debug('fetch %d rows of data via thrift', self.max_rows) - - # scan fetches rows with key in the range [row_start, row_stop) - # this necessitates the check for repeating keys as stopping condition - # - logger.info("scan start") - data = deque(src_tbl.scan(row_start = curr_last_read_key, - row_stop = end_row, - columns = ['raw_data', 'processed_data', 'meta_data'], - limit = self.max_rows, - batch_size = self.batch_size)) - logger.info("scan end %d rows starting at %s", len(data), data[0][0]) - while True: - if not data: - break - - key, val = data.popleft() - if (key == prev_last_read_key): - # last record from previous batch should be ignored - continue - - yield key, val - nrows += 1 - - prev_last_read_key = curr_last_read_key - curr_last_read_key = key - - logger.debug('read %d rows of data from hbase ending at %s', nrows, curr_last_read_key) - if nrows < self.max_rows: - print >> sys.stderr, "end of range. exiting" - break - - except happybase.hbase.ttypes.IOError: - logger.exception('caught exception. retrying.') - - except Exception: - logger.exception('unrecoverable exception.') - raise - -class SourceWorker(TaskClass): - def __init__(self, queue, source_config): - TaskClass.__init__(self) - - self.source = HBaseSource(*source_config) - self.queue = queue - - def run(self): - num_rows_written = 0 - total_size_written = 0 - s3_path_tmpl = '{env}/v1/{ftype}/{uuid}' - env = 'stage' - - for key, cols in self.source.items(): - dump_names = [] - for j in cols.keys(): - - suffix = get_suffix(j) - if not suffix: - #logger.info('column %s ignored for key %s', j, key) - continue - - if j.startswith('raw_data'): - dump_names.append(suffix) - - # crashstats/stage/v1/ - # format {{bucket}}/{{prefix}}/{{version}}/{{crash_type}}/{{crash_id}} - skey = s3_path_tmpl.format(env = env, - uuid = key[7:], - ftype = suffix) - - self.queue.put((skey, cols[j])) - - total_size_written += len(cols[j]) - - self.queue.put((s3_path_tmpl.format(env = env, - uuid = key[7:], - ftype = 'dump_names'), - json.dumps(dump_names))) - - num_rows_written += 1 - - if ((num_rows_written % 1000) == 0): - logger.info("wrote %d rows, at %s", num_rows_written, key) - logger.warn("qsize is %d", self.queue.qsize()) - - print >> sys.stderr, "SourceWorker DONE", num_rows_written, total_size_written - -class S3Worker(TaskClass): - def __init__(self, s3_region, s3_bucket, task_queue, result_queue): - signal.signal(signal.SIGINT, signal.SIG_IGN) - - TaskClass.__init__(self) - self.task_queue = task_queue - self.result_queue = result_queue - self.s3_region = s3_region - self.s3_bucket = s3_bucket - - def setup_s3(self): - self.s3 = s3_connect(self.s3_region) - self.bucket = self.s3.get_bucket(self.s3_bucket) - - def write_to_s3(self, key, payload): - k = Key(self.bucket) - k.key = key - k.set_contents_from_string(payload) - - def run(self): - self.setup_s3() - - while True: - kv = self.task_queue.get() - - if kv is None: - print >> sys.stderr, '%s: Exiting' % self.name - self.task_queue.task_done() - break - - k, v = kv - self.write_to_s3(k, v) - self.task_queue.task_done() - return - -def get_suffix(colname): - suffix_map = { - 'processed_data:json' : 'processed_crash', - 'raw_data:dump' : 'dump', - 'meta_data:json' : 'raw_crash', - 'raw_data:upload_file_minidump_browser' : 'upload_file_minidump_browser', - 'raw_data:upload_file_minidump_flash1' : 'upload_file_minidump_flash1', - 'raw_data:upload_file_minidump_flash2' : 'upload_file_minidump_flash2' - } - - if colname in suffix_map: - return suffix_map[colname] - elif colname.startswith('raw_data'): - return colname.split(':', 1)[1] - else: - return None - - -def main(num_workers = 64): - if len(sys.argv) != 3: - show_usage_and_quit() - - queue = Queue(TASK_QUEUE_SIZE) - - # start s3 workers - workers = [S3Worker('us-west-2', 'crashstats', queue, None) - for i in xrange(num_workers)] - - for i in workers: - i.start() - - thrift_hosts = sys.argv[1].split(',') - date = sys.argv[2] - - # start hbase workers - key_ranges = [] - for i in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']: - key_ranges.append(('%s%s%s' % (i, date, i), '%s%s%sg' % (i, date, i))) - - num_hbase_workers = 1 - - for i in xrange(0, len(key_ranges), num_hbase_workers): - src_workers = [] - krng = key_ranges[i : (i + num_hbase_workers)] - - for j in range(len(krng)): - src_workers.append(SourceWorker(queue, (thrift_hosts, krng[j]))) - - for w in src_workers: - print "starting src worker", w - w.start() - - for w in src_workers: - w.join() - - for i in workers: - queue.put(None) - - queue.join() - -def show_usage_and_quit(): - print >> sys.stderr, "Usage: %s hosts('host1,host2,host3') date(YYMMDD)" % (sys.argv[0]) - sys.exit(2) - - -if __name__ == '__main__': - logging.basicConfig(format = '%(asctime)s %(name)s:%(levelname)s: %(message)s', - level = logging.INFO) - - main() diff --git a/socorro/cron/crontabber_app.py b/socorro/cron/crontabber_app.py index 44c885dc35..2a6900967d 100755 --- a/socorro/cron/crontabber_app.py +++ b/socorro/cron/crontabber_app.py @@ -28,7 +28,6 @@ socorro.cron.jobs.matviews.ExploitabilityCronApp|1d|05:00 socorro.cron.jobs.matviews.CrashAduByBuildSignatureCronApp|1d|07:30 socorro.cron.jobs.ftpscraper.FTPScraperCronApp|1h - socorro.cron.jobs.serverstatus.ServerStatusCronApp|5m socorro.cron.jobs.reprocessingjobs.ReprocessingJobsApp|5m socorro.cron.jobs.matviews.SignatureSummaryProductsCronApp|1d|05:00 socorro.cron.jobs.matviews.SignatureSummaryInstallationsCronApp|1d|05:00 diff --git a/socorro/cron/fixBrokenDumps.py b/socorro/cron/fixBrokenDumps.py deleted file mode 100644 index cc38f6fa8a..0000000000 --- a/socorro/cron/fixBrokenDumps.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -import time -import sys -import subprocess -import os -import cPickle - -import psycopg2 -import psycopg2.extras - -import socorrolib.lib.util -import socorro.external.hbase.hbase_client as hbaseClient - -from datetime import datetime, timedelta - -from socorrolib.lib.datetimeutil import utc_now -from socorrolib.lib.datetimeutil import date_to_string - -def fetchOoids(configContext, logger, query): - try: - databaseDSN = "host=%(databaseHost)s dbname=%(databaseName)s user=%(databaseUserName)s password=%(databasePassword)s" % configContext - conn = psycopg2.connect(databaseDSN) - cur = conn.cursor() - except: - socorrolib.lib.util.reportExceptionAndAbort(logger) - - last_date_processed = get_last_run_date(configContext) - - rows = [] - try: - before = time.time() - logger.debug('last_date_processed used for query: %s' % last_date_processed) - cur.execute(query % last_date_processed) - rows = cur.fetchall() - conn.commit() - except: - socorrolib.lib.util.reportExceptionAndAbort(logger) - - return rows, last_date_processed - -def fix(configContext, logger, query, fixer): - rows, last_date_processed = fetchOoids(configContext, logger, query) - hbc = hbaseClient.HBaseConnectionForCrashReports(configContext.hbaseHost, configContext.hbasePort, configContext.hbaseTimeout, logger=logger) - for row in rows: - try: - ooid, last_date_processed = row - logger.info('fixing ooid: %s' % ooid) - dump = hbc.get_dump(ooid) - fname = '/dev/shm/%s.dump' % ooid - with open(fname, 'wb') as orig_dump_file: - orig_dump_file.write(dump) - logger.debug('wrote dump file: %s' % fname) - logger.debug('fixed dump file: %s' % fname) - subprocess.check_call([fixer, fname]) - logger.debug('fixer: %s' % fixer) - with open(fname, 'rb') as fixed_dump_file: - fixed_dump = fixed_dump_file.read() - hbc.put_fixed_dump(ooid, fixed_dump, add_to_unprocessed_queue = True, submitted_timestamp = date_to_string(utc_now())) - logger.debug('put fixed dump file into hbase: %s' % fname) - os.unlink(fname) - logger.debug('removed dump file: %s' % fname) - except: - socorrolib.lib.util.reportExceptionAndContinue(logger) - - return last_date_processed - -def get_last_run_date(config): - try: - with open(config.persistentBrokenDumpPathname, 'r') as f: - return cPickle.load(f) - except IOError: - return utc_now() - timedelta(days=config.daysIntoPast) - -def save_last_run_date(config, date): - with open(config.persistentBrokenDumpPathname, 'w') as f: - return cPickle.dump(date, f) - diff --git a/socorro/cron/jobs/serverstatus.py b/socorro/cron/jobs/serverstatus.py deleted file mode 100644 index 2a0a92c08a..0000000000 --- a/socorro/cron/jobs/serverstatus.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -""" -This job populates the server_status table for RabbitMQ and processors. - -The following fields are updated in server_status table: - id - primary key - date_recently_completed - timestamp for job most recently processed in jobs - table - date_oldest_job_queued - (INACCURATE until we upgrade RabbitMQ) timestamp - for the oldest job which is incomplete - avg_process_sec - Average number of seconds (float) for jobs completed - since last run or 0.0 in edge case where no jobs have been processed - avg_wait_sec- Average number of seconds (float) for jobs completed since - last run - or 0.0 in edge case where no jobs have been processed - waiting_job_count - Number of jobs in queue, not assigned to a processor - date_created - timestamp for this record being udpated -""" - -import datetime - -from configman import Namespace - -from socorrolib.lib.datetimeutil import utc_now -from crontabber.base import BaseCronApp -from crontabber.mixins import ( - with_postgres_transactions, - with_single_postgres_transaction -) -from socorro.cron.mixins import ( - with_rabbitmq_transactions -) - -_server_stats_sql = """ - INSERT INTO server_status ( - date_recently_completed, - date_oldest_job_queued, - avg_process_sec, - avg_wait_sec, - waiting_job_count, - date_created - ) - SELECT - ( SELECT MAX(r.completed_datetime) FROM %(table)s r ) - AS date_recently_completed, - - Null - AS date_oldest_job_queued, -- Need RabbitMQ upgrade to get this info - - ( - SELECT COALESCE ( - EXTRACT ( - EPOCH FROM avg(r.completed_datetime - r.started_datetime) - ), - 0 - ) - FROM %(table)s r - WHERE r.completed_datetime > %%(start_time)s - ) - AS avg_process_sec, - - ( - SELECT COALESCE ( - EXTRACT ( - EPOCH FROM avg(r.completed_datetime - r.date_processed) - ), - 0 - ) - FROM %(table)s r - WHERE r.completed_datetime > %%(start_time)s - ) - AS avg_wait_sec, - - %(count)s - AS waiting_job_count, -- From RabbitMQ - - CURRENT_TIMESTAMP AS date_created - """ - - -@with_postgres_transactions() -@with_single_postgres_transaction() -@with_rabbitmq_transactions() -class ServerStatusCronApp(BaseCronApp): - app_name = 'server-status' - app_description = ( - "Connects to the message queue and investigates " - "the recent reports and processor activity in the database" - ) - app_version = '0.1' - - required_config = Namespace() - required_config.add_option( - 'processing_interval_seconds', - default=5 * 60, - doc='How often we process reports (in seconds)' - ) - - def _report_partition(self): - now = utc_now() - previous_monday = now - datetime.timedelta(now.weekday()) - reports_partition = 'reports_' + previous_monday.strftime('%Y%m%d') - return reports_partition - - def run(self, connection): - message_count = self.queuing_transaction_executor( - lambda conn: int(conn.queue_status_standard.method.message_count) - ) - - start_time = datetime.datetime.utcnow() - start_time -= datetime.timedelta( - seconds=self.config.processing_interval_seconds - ) - - query = _server_stats_sql % { - 'table': self._report_partition(), - 'count': message_count - } - cursor = connection.cursor() - cursor.execute(query, {'start_time': start_time}) diff --git a/socorro/cron/mixins.py b/socorro/cron/mixins.py index 67e060282c..031d7edf85 100644 --- a/socorro/cron/mixins.py +++ b/socorro/cron/mixins.py @@ -6,51 +6,6 @@ with_single_transaction ) -#============================================================================== -# dedicated hbase mixins -#------------------------------------------------------------------------------ -# this class decorator adds attributes to the class in the form: -# self.long_term_storage_connection -# self.long_term_storage_transaction -# when using this definition as a class decorator, it is necessary to use -# parenthesis as it is a function call: -# @with_postgres_transactions() -# class MyClass ... -with_hbase_transactions = partial( - with_transactional_resource, - 'socorro.external.hb.connection_context.ConnectionContext', - 'long_term_storage' -) -#------------------------------------------------------------------------------ -# this class decorator adds a _run_proxy method to the class that will -# acquire a database connection and then pass it to the invocation of the -# class' "run" method. Since the connection is in the form of a -# context manager, the connection will automatically be closed when "run" -# completes. -# when using this definition as a class decorator, it is necessary to use -# parenthesis as it is a function call: -# @with_postgres_transactions() -# class MyClass ... -with_hbase_connection_as_argument = partial( - with_resource_connection_as_argument, - 'long_term_storage' -) -#------------------------------------------------------------------------------ -# this class decorator adds a _run_proxy method to the class that will -# call the class' run method in the context of a database transaction. It -# passes the connection to the "run" function. When "run" completes without -# raising an exception, the transaction will be commited if the connection -# context class understands transactions. The default HBase connection does not -# do transactions -# when using this definition as a class decorator, it is necessary to use -# parenthesis as it is a function call: -# @with_postgres_transactions() -# class MyClass ... -with_single_hb_transaction = partial( - with_single_transaction, - 'long_term_storage' -) - #============================================================================== # dedicated rabbitmq mixins #------------------------------------------------------------------------------ diff --git a/socorro/cron/serverstatus.py b/socorro/cron/serverstatus.py deleted file mode 100755 index 3201b56d0e..0000000000 --- a/socorro/cron/serverstatus.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -""" -This script is what populates the aggregate server_status table for jobs and processors. - -It provides up to date reports on the status of Socorro servers - -The following fields are updated in server_status table: - id - primary key - date_recently_completed - timestamp for job most recently processed in jobs table - date_oldest_job_queued - timestamp for the oldest job which is incomplete - avg_process_sec - Average number of seconds (float) for jobs completed since last run - or 0.0 in edge case where no jobs have been processed - avg_wait_sec- Average number of seconds (float) for jobs completed since last run - or 0.0 in edge case where no jobs have been processed - waiting_job_count - Number of jobs incomplete in queue - processors_count - Number of processors running to process jobs - date_created - timestamp for this record being udpated -""" -import time -import datetime - -import psycopg2 -import psycopg2.extras - -import socorrolib.lib.util -from socorrolib.lib.datetimeutil import utc_now - -def update(configContext, logger): - now = utc_now() - previous_monday = now - datetime.timedelta(now.weekday()) - reports_partition = 'reports_%4d%02d%02d' % ( - previous_monday.year, - previous_monday.month, - previous_monday.day, - ) - serverStatsSql = """ /* serverstatus.serverStatsSql */ - INSERT INTO server_status ( - date_recently_completed, - date_oldest_job_queued, - avg_process_sec, - avg_wait_sec, - waiting_job_count, - processors_count, - date_created - ) - SELECT - - ( - SELECT - MAX(r.completed_datetime) - FROM %s r - ) - AS date_recently_completed, - - ( - SELECT - jobs.queueddatetime - FROM jobs - WHERE jobs.completeddatetime IS NULL - ORDER BY jobs.queueddatetime LIMIT 1 - ) - AS date_oldest_job_queued, - - ( - SELECT COALESCE ( - EXTRACT ( - EPOCH FROM avg(r.completed_datetime - r.started_datetime) - ), - 0 - ) - FROM %s r - WHERE r.completed_datetime > %%s - ) - AS avg_process_sec , - - ( - SELECT COALESCE ( - EXTRACT ( - EPOCH FROM avg(r.completed_datetime - r.date_processed) - ), - 0 - ) - FROM %s r - WHERE r.completed_datetime > %%s - ) - AS avg_wait_sec, - - ( - SELECT - COUNT(jobs.id) - FROM jobs WHERE jobs.completeddatetime IS NULL - ) - AS waiting_job_count, - - ( - SELECT - count(processors.id) - FROM processors - ) - AS processors_count, - - CURRENT_TIMESTAMP AS date_created; - """ % (reports_partition, reports_partition, reports_partition) - - serverStatsLastUpdSql = """ /* serverstatus.serverStatsLastUpdSql */ - SELECT - id, - date_recently_completed, - date_oldest_job_queued, - avg_process_sec, - avg_wait_sec, - waiting_job_count, - processors_count, - date_created - FROM server_status - ORDER BY date_created DESC - LIMIT 1; -""" - - try: - databaseDSN = "host=%(databaseHost)s dbname=%(databaseName)s user=%(databaseUserName)s password=%(databasePassword)s" % configContext - conn = psycopg2.connect(databaseDSN) - cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) - except: - socorrolib.lib.util.reportExceptionAndAbort(logger) - - startTime = datetime.datetime.now() - startTime -= configContext.processingInterval - timeInserting = 0 - if configContext.debug: - logger.debug("Creating stats from now back until %s" % startTime) - try: - before = time.time() - cur.execute(serverStatsSql, (startTime, startTime)) - timeInserting = time.time() - before; - cur.execute(serverStatsLastUpdSql) - row = cur.fetchone() - conn.commit() - except: - socorrolib.lib.util.reportExceptionAndAbort(logger) - - if row: - logger.info("Server Status id=%d was updated at %s -- recent=%s, oldest=%s, avg_proc=%s, avg_wait=%s, waiting=%s, procs=%s -- in %s seconds" % (row['id'], row['date_created'], row['date_recently_completed'], row['date_oldest_job_queued'], row['avg_process_sec'], row['avg_wait_sec'], row['waiting_job_count'], row['processors_count'], timeInserting)) - else: - msg = "Unable to read from server_status table after attempting to insert a new record" - logger.warn(msg) - raise Exception(msg) diff --git a/socorro/external/boto/crash_data.py b/socorro/external/boto/crash_data.py index 3cacb4c4aa..7ee1e3ca09 100644 --- a/socorro/external/boto/crash_data.py +++ b/socorro/external/boto/crash_data.py @@ -2,9 +2,11 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -# this is a temporary hack to coerse the middleware to talk to boto S3 -# instead of HBase. - +from socorrolib.lib import external_common, MissingArgumentError +from socorro.external.boto.crashstorage import ( + BotoS3CrashStorage, + CrashIDNotFound, +) from socorro.external.crash_data_base import CrashDataBase @@ -24,3 +26,53 @@ def get_storage(self): # implementation details with boto S3. return self.config.hbase.hbase_class(self.config.hbase) + +class SimplifiedCrashData(BotoS3CrashStorage): + """The difference between this and the base CrashData class is that + this one only makes the get() and if it fails it does NOT + try to put the crash ID back into the priority jobs queue. + Also, it returns a python dict instead of a DotDict which + makes this easier to work with from the webapp's model bridge. + """ + + def __init__(self, *args, **kwargs): + super(SimplifiedCrashData, self).__init__(*args, **kwargs) + # Forcibly set this to override the default in the base + # crash storage class for boto. We're confident that at this + # leaf point we want to NOT return a DotDict but just a plain + # python dict. + self.config.json_object_hook = dict + + def get(self, **kwargs): + """Return JSON data of a crash report, given its uuid. """ + filters = [ + ('uuid', None, str), + ('datatype', None, str), + ('name', None, str) # only applicable if datatype == 'raw' + ] + params = external_common.parse_arguments(filters, kwargs, modern=True) + + if not params.uuid: + raise MissingArgumentError('uuid') + + if not params.datatype: + raise MissingArgumentError('datatype') + + datatype_method_mapping = { + 'raw': 'get_raw_dump', + 'meta': 'get_raw_crash', + 'processed': 'get_processed', + 'unredacted': 'get_unredacted_processed', + } + get = self.__getattribute__(datatype_method_mapping[params.datatype]) + try: + if params.datatype == 'raw': + return get(params.uuid, name=params.name) + else: + return get(params.uuid) + except CrashIDNotFound: + # The CrashIDNotFound exception that happens inside the + # crashstorage is too revealing as exception message + # contains information about buckets and prefix keys. + # Re-wrap it here so the message is just the crash ID. + raise CrashIDNotFound(params.uuid) diff --git a/socorro/external/boto/crashstorage.py b/socorro/external/boto/crashstorage.py index 500d7db0a9..3d8442aaee 100644 --- a/socorro/external/boto/crashstorage.py +++ b/socorro/external/boto/crashstorage.py @@ -7,7 +7,6 @@ import json_schema_reducer from socorrolib.lib.converters import change_default -from socorrolib.lib.util import DotDict from configman import Namespace from configman.converters import class_converter, py_obj_to_str @@ -70,6 +69,11 @@ class BotoCrashStorage(CrashStorageBase): default='.dump', reference_value_from='resource.boto', ) + required_config.add_option( + 'json_object_hook', + default='configman.dotdict.DotDict', + from_string_converter=class_converter, + ) def is_operational_exception(self, x): if "not found, no value returned" in str(x): @@ -174,20 +178,27 @@ def save_raw_and_processed( self.save_processed(processed_crash) @staticmethod - def do_get_raw_crash(boto_connection, crash_id): + def do_get_raw_crash(boto_connection, crash_id, json_object_hook): try: raw_crash_as_string = boto_connection.fetch( crash_id, "raw_crash" ) - return json.loads(raw_crash_as_string, object_hook=DotDict) + return json.loads( + raw_crash_as_string, + object_hook=json_object_hook + ) except boto_connection.ResponseError, x: raise CrashIDNotFound( '%s not found: %s' % (crash_id, x) ) def get_raw_crash(self, crash_id): - return self.transaction_for_get(self.do_get_raw_crash, crash_id) + return self.transaction_for_get( + self.do_get_raw_crash, + crash_id, + self.config.json_object_hook + ) @staticmethod def do_get_raw_dump(boto_connection, crash_id, name=None): @@ -244,7 +255,11 @@ def get_raw_dumps_as_files(self, crash_id): ) @staticmethod - def _do_get_unredacted_processed(boto_connection, crash_id): + def _do_get_unredacted_processed( + boto_connection, + crash_id, + json_object_hook, + ): try: processed_crash_as_string = boto_connection.fetch( crash_id, @@ -252,7 +267,7 @@ def _do_get_unredacted_processed(boto_connection, crash_id): ) return json.loads( processed_crash_as_string, - object_hook=DotDict + object_hook=json_object_hook, ) except boto_connection.ResponseError, x: raise CrashIDNotFound( @@ -262,7 +277,8 @@ def _do_get_unredacted_processed(boto_connection, crash_id): def get_unredacted_processed(self, crash_id): return self.transaction_for_get( self._do_get_unredacted_processed, - crash_id + crash_id, + self.config.json_object_hook, ) diff --git a/socorro/external/crash_data_base.py b/socorro/external/crash_data_base.py index d9a57bf6cd..63029c1f8d 100644 --- a/socorro/external/crash_data_base.py +++ b/socorro/external/crash_data_base.py @@ -2,6 +2,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +# XXX this is now deprecated and can be deleted. +# See https://bugzilla.mozilla.org/show_bug.cgi?id=1299465 + from socorrolib.lib import ( MissingArgumentError, ResourceNotFound, diff --git a/socorro/external/happybase/__init__.py b/socorro/external/happybase/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/socorro/external/happybase/connection_context.py b/socorro/external/happybase/connection_context.py deleted file mode 100644 index 22169713fa..0000000000 --- a/socorro/external/happybase/connection_context.py +++ /dev/null @@ -1,154 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import contextlib -import socket -import threading - -from configman.config_manager import RequiredConfig -from configman import Namespace - -import happybase - - -#============================================================================== -class HBaseConnection(object): - """An HBase connection class encapsulating various parts of the underlying - mechanism to connect to HBase.""" - #-------------------------------------------------------------------------- - def __init__(self, config, connection): - self.config = config - self._connection = connection - - #-------------------------------------------------------------------------- - def commit(self): - pass - - #-------------------------------------------------------------------------- - def rollback(self): - pass - - #-------------------------------------------------------------------------- - def close(self): - self._connection.close() - - #-------------------------------------------------------------------------- - def __getattr__(self, name): - return getattr(self._connection, name) - - -#============================================================================== -class HappyBaseConnectionContext(RequiredConfig): - """This class implements a connection to HBase for every transaction to be - executed. - """ - required_config = Namespace() - required_config.add_option( - 'hbase_host', - doc='Host to HBase server', - default='localhost', - reference_value_from='resource.hb', - ) - required_config.add_option( - 'hbase_port', - doc='Port to HBase server', - default=9090, - reference_value_from='resource.hb', - ) - required_config.add_option( - 'hbase_timeout', - doc='timeout in milliseconds for an HBase connection', - default=5000, - reference_value_from='resource.hb', - ) - required_config.add_option( - 'temporary_file_system_storage_path', - doc='a local filesystem path where dumps temporarily ' - 'during processing', - default='/tmp', - reference_value_from='resource.hb', - ) - required_config.add_option( - 'dump_file_suffix', - doc='the suffix used to identify a dump file (for use in temp files)', - default='.dump', - reference_value_from='resource.hb', - ) - - operational_exceptions = ( - happybase.NoConnectionsAvailable, - socket.timeout, - socket.error, - ) - - conditional_exceptions = () - - #-------------------------------------------------------------------------- - def __init__(self, config): - super(HappyBaseConnectionContext, self).__init__() - self.config = config - - #-------------------------------------------------------------------------- - def connection(self, name=None): - return HBaseConnection( - self.config, - happybase.Connection( - host=self.config.hbase_host, - port=self.config.hbase_port, - timeout=self.config.hbase_timeout - ) - ) - - #-------------------------------------------------------------------------- - @contextlib.contextmanager - def __call__(self, name=None): - conn = self.connection(name) - try: - yield conn - finally: - self.close_connection(conn) - - #-------------------------------------------------------------------------- - def force_reconnect(self): - pass - - #-------------------------------------------------------------------------- - def close(self): - pass - - #-------------------------------------------------------------------------- - def close_connection(self, connection, force=False): - connection.close() - - #-------------------------------------------------------------------------- - def is_operational_exception(self, msg): - return False - - -#============================================================================== -class HappyBasePooledConnectionContext(HappyBaseConnectionContext): - """This class implements a connection to HBase for every transaction to be - executed. - """ - - #-------------------------------------------------------------------------- - def __init__(self, config): - super(HappyBasePooledConnectionContext, self).__init__(config) - self._connection_pool = happybase.ConnectionPool( - 20, # TODO: how to get this number imported from the taskmanager - host=self.config.hbase_host, - port=self.config.hbase_port, - timeout=self.config.hbase_timeout - ) - - #-------------------------------------------------------------------------- - def connection(self, name=None): - raise Exception('must use context manager') - - #-------------------------------------------------------------------------- - @contextlib.contextmanager - def __call__(self, name=None): - with self._connection_pool.connection() as connection: - yield HBaseConnection(self.config, connection) - diff --git a/socorro/external/happybase/crash_data.py b/socorro/external/happybase/crash_data.py deleted file mode 100644 index 1c7ecb9208..0000000000 --- a/socorro/external/happybase/crash_data.py +++ /dev/null @@ -1,14 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from socorro.external.crash_data_base import CrashDataBase - - -class CrashData(CrashDataBase): - """ - Implement the /crash_data service with HBase. - """ - def get_storage(self): - return self.config.hb.hbase_class(self.config.hb) - diff --git a/socorro/external/happybase/crashstorage.py b/socorro/external/happybase/crashstorage.py deleted file mode 100644 index 27be84a32d..0000000000 --- a/socorro/external/happybase/crashstorage.py +++ /dev/null @@ -1,234 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import datetime -import json -import os - -from socorro.external.happybase.connection_context import \ - HappyBaseConnectionContext -from socorro.external.crashstorage_base import ( - CrashStorageBase, - CrashIDNotFound, - MemoryDumpsMapping, - FileDumpsMapping -) -from socorrolib.lib.util import DotDict - -from configman import Namespace, class_converter - - -class BadCrashIDException(ValueError): - pass - - -def crash_id_to_row_id(crash_id): - """ - Returns a row_id suitable for the HBase crash_reports table. - The first hex character of the crash_id is used to "salt" the rowkey - so that there should always be 16 HBase RegionServers responsible - for dealing with the current stream of data. - Then, we put the last six digits of the crash_id which represent the - submission date. This lets us easily scan through the crash_reports - table by day. - Finally, we append the normal crash_id string. - """ - try: - return "%s%s%s" % (crash_id[0], crash_id[-6:], crash_id) - except Exception, x: - raise BadCrashIDException(x) - - -def row_id_to_crash_id(row_id): - """ - Returns the natural ooid given an HBase row key. - See ooid_to_row_id for structure of row_id. - """ - try: - return row_id[7:] - except Exception, x: - raise BadCrashIDException(x) - - -def crash_id_to_timestamped_row_id(crash_id, timestamp): - """ - Returns a row_id suitable for the HBase crash_reports index tables. - The first hex character of the ooid is used to "salt" the rowkey - so that there should always be 16 HBase RegionServers responsible - for dealing with the current stream of data. - Then, we put the crash_report submission timestamp. This lets us - easily scan through a time specific region of the index. - Finally, we append the normal ooid string for uniqueness. - """ - if timestamp[-6] in "-+": - return "%s%s%s" % (crash_id[0], timestamp[:-6], crash_id) - return "%s%s%s" % (crash_id[0], timestamp, crash_id) - - -class HBaseCrashStorage(CrashStorageBase): - required_config = Namespace() - required_config.add_option( - 'transaction_executor_class', - default="socorro.database.transaction_executor." - "TransactionExecutorWithInfiniteBackoff", - doc='a class that will execute transactions', - from_string_converter=class_converter, - reference_value_from='resource.hb', - ) - required_config.add_option( - 'hbase_connection_context_class', - default=HappyBaseConnectionContext, - doc='the class responsible for proving an hbase connection', - reference_value_from='resource.hb', - ) - - def __init__(self, config, quit_check_callback=None): - super(HBaseCrashStorage, self).__init__( - config, - quit_check_callback - ) - self.logger.info('connecting to hbase via happybase') - self.hbase = config.hbase_connection_context_class(config) - self.transaction = config.transaction_executor_class( - config, - self.hbase, - quit_check_callback=quit_check_callback - ) - - def save_raw_crash(self, raw_crash, dumps, crash_id): - row_id = crash_id_to_row_id(crash_id) - submitted_timestamp = raw_crash['submitted_timestamp'] - - columns_and_values = { - "flags:processed": "N", - "meta_data:json": json.dumps(raw_crash), - "timestamps:submitted": submitted_timestamp, - "ids:ooid": crash_id, - } - # we don't know where the dumps came from, they could be in - # in the form of names to binary blobs or names to pathnames. - # this call ensures that we've got the former. - in_memory_dumps = dumps.as_memory_dumps_mapping() - for key, dump in in_memory_dumps.iteritems(): - if key in (None, '', 'upload_file_minidump'): - key = 'dump' - columns_and_values['raw_data:%s' % key] = dump - - def do_save(connection, raw_crash, in_memory_dumps, crash_id): - crash_report_table = connection.table('crash_reports') - crash_report_table.put( - row_id, - columns_and_values - ) - self.transaction(do_save, raw_crash, in_memory_dumps, crash_id) - - def save_processed(self, processed_crash): - crash_id = processed_crash['uuid'] - row_id = crash_id_to_row_id(crash_id) - columns_and_values = { - "timestamps:processed": processed_crash['completeddatetime'], - "processed_data:signature": processed_crash['signature'], - "processed_data:json": json.dumps( - processed_crash - ), - "flags:processed": "" - } - - def do_save(connection, processed_crash): - crash_report_table = connection.table('crash_reports') - crash_report_table.put( - row_id, - columns_and_values - ) - - self.transaction(do_save, processed_crash) - - def get_raw_crash(self, crash_id): - row_id = crash_id_to_row_id(crash_id) - - def _do_get_raw_crash(connection, row_id): - crash_report_table = connection.table('crash_reports') - try: - return crash_report_table.row( - row_id, - columns=['meta_data:json'] - )['meta_data:json'] - except KeyError: - raise CrashIDNotFound(crash_id) - raw_crash_json_str = self.transaction(_do_get_raw_crash, row_id) - raw_crash = json.loads(raw_crash_json_str, object_hook=DotDict) - return raw_crash - - def get_raw_dump(self, crash_id, name=None): - row_id = crash_id_to_row_id(crash_id) - if name in (None, '', 'upload_file_minidump'): - name = 'dump' - column_name = 'raw_data:%s' % name - def do_get(connection, row_id, name): - crash_report_table = connection.table('crash_reports') - try: - return crash_report_table.row( - row_id, - columns=[column_name] - )[column_name] - except KeyError: - raise CrashIDNotFound(crash_id) - return self.transaction(do_get, row_id, name) - - @staticmethod - def _make_dump_name(family_qualifier): - name = family_qualifier.split(':')[1] - if name == 'dump': - name = 'upload_file_minidump' - return name - - def get_raw_dumps(self, crash_id): - row_id = crash_id_to_row_id(crash_id) - - def do_get(connection, row_id): - try: - crash_report_table = connection.table('crash_reports') - dumps = crash_report_table.row( - row_id, - columns=['raw_data'] - ) - # ensure that we return a proper mapping of names to - # binary blobs. - return MemoryDumpsMapping( - (self._make_dump_name(k), v) for k, v in dumps.iteritems() - ) - except KeyError: - raise CrashIDNotFound(crash_id) - - return self.transaction(do_get, row_id) - - def get_raw_dumps_as_files(self, crash_id): - in_memory_dumps = self.get_raw_dumps(crash_id) - # convert our in memory name/blob data into name/pathname data - return in_memory_dumps.as_file_dumps_mapping( - crash_id, - self.hbase.config.temporary_file_system_storage_path, - self.hbase.config.dump_file_suffix - ) - - - def get_unredacted_processed(self, crash_id): - row_id = crash_id_to_row_id(crash_id) - - def do_get(connection, row_id): - crash_report_table = connection.table('crash_reports') - try: - return crash_report_table.row( - row_id, - columns=['processed_data:json'] - )['processed_data:json'] - except KeyError: - raise CrashIDNotFound(crash_id) - processed_crash_json_str = self.transaction(do_get, row_id) - processed_crash = json.loads( - processed_crash_json_str, - object_hook=DotDict - ) - return processed_crash - diff --git a/socorro/external/happybase/hbase_client.py b/socorro/external/happybase/hbase_client.py deleted file mode 100644 index f1a9940d83..0000000000 --- a/socorro/external/happybase/hbase_client.py +++ /dev/null @@ -1,280 +0,0 @@ -from socorrolib.app import generic_app - -from configman import Namespace, RequiredConfig, ConfigurationManager -from configman.converters import class_converter - -from socorro.external.happybase.crashstorage import ( - HBaseCrashStorage, \ - crash_id_to_row_id, - row_id_to_crash_id -) - -import itertools -import pprint -import contextlib -import gzip -import sys -import json - - -_raises_exception = object() - - -class NotEnoughArguments(Exception): - def __init__(self, arg): - self.arg = arg - - -def expect_from_aggregation(required_config, name, i, - default=_raises_exception): - def _closure(g, l, a): - if len(a) < i + 1: - if default is _raises_exception: - raise NotEnoughArguments(name) - return default - return a[i] - required_config.add_aggregation(name, _closure) - - -class _Command(RequiredConfig): - required_config = Namespace() - - def __init__(self, app): - self.app = app - self.config = app.config - self.storage = app.storage - - -class _CommandRequiringCrashID(_Command): - required_config = Namespace() - expect_from_aggregation(required_config, 'crash_id', 0) - - -class _CommandRequiringTable(_Command): - required_config = Namespace() - expect_from_aggregation(required_config, 'table', 0) - - -class _CommandRequiringTableRow(_CommandRequiringTable): - required_config = Namespace() - expect_from_aggregation(required_config, 'row_id', 1) - - -class _CommandRequiringScanParameters(_CommandRequiringTable): - required_config = Namespace() - expect_from_aggregation(required_config, 'prefix', 1) - expect_from_aggregation(required_config, 'columns', 2) - expect_from_aggregation(required_config, 'limit', 3) - - -class help(_Command): - """Usage: help - Get help on commands.""" - def run(self): - self.app.config_manager.output_summary() - -class get_raw_crash(_CommandRequiringCrashID): - """Usage: get_raw_crash CRASH_ID - Get the raw crash JSON data.""" - def run(self): - pprint.pprint(self.storage.get_raw_crash(self.config.crash_id)) - - -class get_raw_dumps(_CommandRequiringCrashID): - """Usage: get_raw_dumps CRASH_ID - Get information on the raw dumps for a crash.""" - def run(self): - for name, dump in self.storage.get_raw_dumps( - self.config.crash_id - ).items(): - dump_name = "%s.%s.dump" % (self.config.crash_id, name) - with open(dump_name, "w") as f: - f.write(dump) - print("%s: dump length = %s" % (name, len(dump))) - - -class get_processed(_CommandRequiringCrashID): - """Usage: get_processed CRASH_ID - Get the redacted processed JSON for a crash""" - def run(self): - if self.config.json: - print json.dumps(self.storage.get_processed(self.config.crash_id)) - else: - pprint.pprint(self.storage.get_processed(self.config.crash_id)) - - -class get_unredacted_processed(_CommandRequiringCrashID): - """Usage: get_unredacted_processed CRASH_ID - Get the unredacted processed JSON for a crash""" - def run(self): - if self.config.json: - print json.dumps(self.storage.get_unredacted_processed( - self.config.crash_id - )) - else: - pprint.pprint(self.storage.get_unredacted_processed( - self.config.crash_id - )) - - -class get_report_processing_state(_CommandRequiringCrashID): - """Usage: get_report_processing_state CRASH_ID - Get the report processing state for a crash.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - pprint.pprint(self.storage._get_report_processing_state( - conn, - self.config.crash_id - )) - transaction() - - -class union_scan_with_prefix(_CommandRequiringScanParameters): - """Usage: union_scan_with_prefix TABLE PREFIX COLUMNS [LIMIT] - Do a union scan on a table using a given prefix.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn, limit=self.config.limit): - for row in itertools.islice( - self.storage._union_scan_with_prefix( - conn, - self.config.table, - self.config.prefix, - self.config.columns - ), - self.config.limit): - pprint.pprint(row) - transaction() - - -class merge_scan_with_prefix(_CommandRequiringScanParameters): - """Usage: merge_scan_with_prefix TABLE PREFIX COLUMNS [LIMIT] - Do a merge scan on a table using a given prefix.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn, limit=self.config.limit): - for row in itertools.islice( - self.storage._merge_scan_with_prefix( - conn, - self.config.table, - self.config.prefix, - self.config.columns - ), - self.config.limit): - pprint.pprint(row) - transaction() - - -class describe_table(_CommandRequiringTable): - """Usage: describe_table TABLE - Describe the details of a table in HBase.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - pprint.pprint(conn.getColumnDescriptors(self.config.table)) - transaction() - - -class get_full_row(_CommandRequiringTableRow): - """Usage: describe_table TABLE ROW_ID - Pretty-print a row in HBase.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - pprint.pprint(self.storage._make_row_nice(conn.getRow( - self.config.table, - self.config.row_id - )[0])) - transaction() - - -class export_processed_crashes_for_date(_Command): - """Usage: export_processed_crashes_for_date DATE PATH - Export all crashes for a given date to a path.""" - required_config = Namespace() - expect_from_aggregation(required_config, 'date', 0) - expect_from_aggregation(required_config, 'path', 1) - - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - for row in itertools.islice( - self.storage._union_scan_with_prefix(conn, - 'crash_reports', - self.config.date, - ['processed_data:json']), - 10 - ): - crash_id = row_id_to_crash_id(row['_rowkey']) - - if row['processed_data:json']: - file_name = os.path.join(self.config.path, - crash_id + '.jsonz') - with contextlib.closing(gzip.GzipFile(file_name, - 'w', - 9)) as f: - json.dump(row['processed_data:json'], f) - transaction() - - -class HBaseClientConfigurationManager(ConfigurationManager): - def output_summary(self, output_stream=sys.stdout, block_password=True): - super(HBaseClientConfigurationManager, self).output_summary( - output_stream, - block_password - ) - - print >> output_stream, "Available commands:" - - for command in (var for var in globals().values() - if isinstance(var, type) and - issubclass(var, _Command) and - var.__name__[0] != '_'): - - print >> output_stream, ' ' + command.__name__ - print >> output_stream, ' ' + (command.__doc__ or - '(undocumented)') - print >> output_stream, '' - - -class HBaseClientApp(generic_app.App): - app_name = "hbase_client.py" - app_version = "0.1" - app_description = __doc__ - - required_config = Namespace() - required_config.add_option( - 'hbase_crash_storage_class', - default=HappyBaseCrashStorage, - - doc='the class responsible for proving an hbase connection', - from_string_converter=class_converter - ) - required_config.add_option( - 'command', - default=help, - doc='command to use', - is_argument=True, - from_string_converter=lambda s: class_converter(__name__ + '.' + s) - ) - required_config.add_option( - 'json', - default=False, - short_form='j', - doc='json output instead of a pretty printed mapping', - ) - - - def main(self): - self.storage = self.config.hbase_crash_storage_class(self.config) - self.config.command(self).run() - - -if __name__ == '__main__': - try: - generic_app.main(HBaseClientApp, - config_manager_cls=HBaseClientConfigurationManager) - except NotEnoughArguments as e: - print >> sys.stderr, "ERROR: was expecting another argument: " + e.arg - print >> sys.stderr, "Use the 'help' command to get help on commands." diff --git a/socorro/external/hb/__init__.py b/socorro/external/hb/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/socorro/external/hb/connection_context.py b/socorro/external/hb/connection_context.py deleted file mode 100644 index 4b15d6237c..0000000000 --- a/socorro/external/hb/connection_context.py +++ /dev/null @@ -1,206 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import contextlib -import socket - -from configman.config_manager import RequiredConfig -from configman import Namespace - -from thrift import Thrift -from thrift.transport import TSocket, TTransport -from thrift.protocol import TBinaryProtocol -from hbase.Hbase import Client -import hbase.ttypes - - -#============================================================================== -class HBaseConnection(object): - """An HBase connection class encapsulating various parts of the underlying - mechanism to connect to HBase.""" - #-------------------------------------------------------------------------- - def __init__(self, config): - self.config = config - self.make_connection() - - #-------------------------------------------------------------------------- - def commit(self): - pass - - #-------------------------------------------------------------------------- - def rollback(self): - pass - - #-------------------------------------------------------------------------- - def make_connection(self): - self.socket = TSocket.TSocket(self.config.hbase_host, - self.config.hbase_port) - self.socket.setTimeout(self.config.hbase_timeout) - self.transport = TTransport.TBufferedTransport(self.socket) - self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) - self.client = Client(self.protocol) - self.transport.open() - - #-------------------------------------------------------------------------- - def close(self): - self.transport.close() - - -#============================================================================== -class HBaseConnectionContext(RequiredConfig): - """This class implements a connection to HBase for every transaction to be - executed. - """ - required_config = Namespace() - required_config.add_option( - 'hbase_host', - doc='Host to HBase server', - default='localhost', - reference_value_from='resource.hb', - ) - required_config.add_option( - 'hbase_port', - doc='Port to HBase server', - default=9090, - reference_value_from='resource.hb', - ) - required_config.add_option( - 'hbase_timeout', - doc='timeout in milliseconds for an HBase connection', - default=5000, - reference_value_from='resource.hb', - ) - required_config.add_option( - 'temporary_file_system_storage_path', - doc='a local filesystem path where dumps temporarily ' - 'during processing', - default='/tmp', - reference_value_from='resource.hb', - ) - required_config.add_option( - 'dump_file_suffix', - doc='the suffix used to identify a dump file (for use in temp files)', - default='.dump', - reference_value_from='resource.hb', - ) - - operational_exceptions = ( - hbase.ttypes.IOError, - Thrift.TException, - socket.timeout, - socket.error, - ) - - conditional_exceptions = () - - #-------------------------------------------------------------------------- - def __init__(self, config): - super(HBaseConnectionContext, self).__init__() - self.config = config - - #-------------------------------------------------------------------------- - def connection(self, name=None): - return HBaseConnection(self.config) - - #-------------------------------------------------------------------------- - @contextlib.contextmanager - def __call__(self, name=None): - conn = self.connection(name) - try: - yield conn - finally: - self.close_connection(conn) - - #-------------------------------------------------------------------------- - def force_reconnect(self): - pass - - #-------------------------------------------------------------------------- - def close(self): - pass - - #-------------------------------------------------------------------------- - def close_connection(self, connection, force=False): - connection.close() - - #-------------------------------------------------------------------------- - def is_operational_exception(self, msg): - return False - - -#============================================================================== -class HBasePooledConnectionContext(HBaseConnectionContext): - """a configman compliant class that pools HBase database connections""" - #-------------------------------------------------------------------------- - def __init__(self, config): - super(HBasePooledConnectionContext, self).__init__(config) - #self.config.logger.debug("HBasePooledConnectionContext - " - # "setting up connection pool") - self.pool = {} - - #-------------------------------------------------------------------------- - def connection(self, name=None): - """return a named connection. - - This function will return a named connection by either finding one - in its pool by the name or creating a new one. If no name is given, - it will use the name of the current executing thread as the name of - the connection. - - parameters: - name - a name as a string - """ - if not name: - name = self.config.executor_identity() - if name in self.pool: - return self.pool[name] - self.pool[name] = \ - super(HBasePooledConnectionContext, self).connection(name) - return self.pool[name] - - #-------------------------------------------------------------------------- - def close_connection(self, connection, force=False): - """overriding the baseclass function, this routine will decline to - close a connection at the end of a transaction context. This allows - for reuse of connections.""" - if force: - try: - (super(HBasePooledConnectionContext, self) - .close_connection(connection, force)) - except self.operational_exceptions: - self.config.logger.error( - 'HBasePooledConnectionContext - failed closing' - ) - for name, conn in self.pool.iteritems(): - if conn is connection: - break - del self.pool[name] - - #-------------------------------------------------------------------------- - def close(self): - """close all pooled connections""" - self.config.logger.debug( - "HBasePooledConnectionContext - shutting down connection pool" - ) - # force a list, we're changing the pool as we iterate - for name, connection in list(self.pool.iteritems()): - self.close_connection(connection, force=True) - self.config.logger.debug( - "HBasePooledConnectionContext - connection %s closed", - name - ) - - #-------------------------------------------------------------------------- - def force_reconnect(self, name=None): - """tell this functor that the next time it gives out a connection - under the given name, it had better make sure it is brand new clean - connection. Use this when you discover that your connection has - gone bad and you want to report that fact to the appropriate - authority. You are responsible for actually closing the connection or - not, if it is really hosed.""" - if name is None: - name = self.config.executor_identity() - self.config.logger.debug('identity: %s', name) - if name in self.pool: - del self.pool[name] diff --git a/socorro/external/hb/crash_data.py b/socorro/external/hb/crash_data.py deleted file mode 100644 index 76e51dfc20..0000000000 --- a/socorro/external/hb/crash_data.py +++ /dev/null @@ -1,14 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from socorro.external.crash_data_base import CrashDataBase - - -class CrashData(CrashDataBase): - """ - Implement the /crash_data service with HBase. - """ - def get_storage(self): - return self.config.hbase.hbase_class(self.config.hbase) - diff --git a/socorro/external/hb/crashstorage.py b/socorro/external/hb/crashstorage.py deleted file mode 100644 index 2328bb95bb..0000000000 --- a/socorro/external/hb/crashstorage.py +++ /dev/null @@ -1,549 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import datetime -import heapq -import itertools -import json -import os - -from socorrolib.lib.datetimeutil import utc_now -from socorro.external.crashstorage_base import ( - CrashStorageBase, - CrashIDNotFound, - MemoryDumpsMapping, -) -from socorro.external.hb.connection_context import \ - HBaseConnectionContext -from socorrolib.lib.util import DotDict -from configman import Namespace, class_converter - -from hbase.Hbase import Mutation - - -class BadCrashIDException(ValueError): pass - - -def crash_id_to_row_id(crash_id): - """ - Returns a row_id suitable for the HBase crash_reports table. - The first hex character of the crash_id is used to "salt" the rowkey - so that there should always be 16 HBase RegionServers responsible - for dealing with the current stream of data. - Then, we put the last six digits of the crash_id which represent the - submission date. This lets us easily scan through the crash_reports - table by day. - Finally, we append the normal crash_id string. - """ - try: - return "%s%s%s" % (crash_id[0], crash_id[-6:], crash_id) - except Exception, x: - raise BadCrashIDException(x) - - -def row_id_to_crash_id(row_id): - """ - Returns the natural ooid given an HBase row key. - See ooid_to_row_id for structure of row_id. - """ - try: - return row_id[7:] - except Exception, x: - raise BadCrashIDException(x) - - -def crash_id_to_timestamped_row_id(crash_id, timestamp): - """ - Returns a row_id suitable for the HBase crash_reports index tables. - The first hex character of the ooid is used to "salt" the rowkey - so that there should always be 16 HBase RegionServers responsible - for dealing with the current stream of data. - Then, we put the crash_report submission timestamp. This lets us - easily scan through a time specific region of the index. - Finally, we append the normal ooid string for uniqueness. - """ - if timestamp[-6] in "-+": - return "%s%s%s" % (crash_id[0], timestamp[:-6], crash_id) - return "%s%s%s" % (crash_id[0], timestamp, crash_id) - - -class HBaseCrashStorage(CrashStorageBase): - required_config = Namespace() - required_config.add_option( - 'new_crash_limit', - default=10 ** 6, - doc='the maximum number of new crashes to yield at a time', - reference_value_from='resource.hb', - ) - required_config.add_option( - 'transaction_executor_class', - default="socorro.database.transaction_executor." - "TransactionExecutorWithInfiniteBackoff", - doc='a class that will execute transactions', - from_string_converter=class_converter, - reference_value_from='resource.hb', - ) - required_config.add_option( - 'hbase_connection_context_class', - default=HBaseConnectionContext, - doc='the class responsible for proving an hbase connection', - reference_value_from='resource.hb', - ) - - def __init__(self, config, quit_check_callback=None): - super(HBaseCrashStorage, self).__init__(config, quit_check_callback) - self.logger.info('connecting to hbase') - self.hbase = config.hbase_connection_context_class(config) - self.transaction = config.transaction_executor_class( - config, - self.hbase, - quit_check_callback=quit_check_callback - ) - - def _wrap_in_transaction(self, f): - """This decorator takes a function wraps it in a transaction context. - The function being wrapped will take the connection as an argument.""" - return lambda *args, **kwargs: self.transaction(lambda conn_ctx: f(conn_ctx.client, *args, **kwargs)) - - def close(self): - self.hbase.close() - - def _salted_scanner_iterable(self, client, salted_prefix, scanner): - """Generator based iterable that runs over an HBase scanner - yields a tuple of the un-salted rowkey and the nice format of the - row.""" - self.logger.debug('Scanner %s generated', salted_prefix) - raw_rows = client.scannerGet(scanner) - while raw_rows: - nice_row = self._make_row_nice(raw_rows[0]) - yield (nice_row['_rowkey'][1:], nice_row) - raw_rows = client.scannerGet(scanner) - self.logger.debug('Scanner %s exhausted' % salted_prefix) - client.scannerClose(scanner) - - @staticmethod - def _make_row_nice(client_row_object): - columns = dict( - ((x, y.value) for x, y in client_row_object.columns.items()) - ) - columns['_rowkey'] = client_row_object.row - return columns - - def _get_report_processing_state(self, client, crash_id): - """Return the current state of processing for this report and the - submitted_timestamp needed. For processing queue manipulation. - If the ooid doesn't exist, return an empty array""" - raw_rows = client.getRowWithColumns('crash_reports', - crash_id_to_row_id(crash_id), - ['flags:processed', - 'flags:legacy_processing', - 'timestamps:submitted', - 'timestamps:processed']) - - if raw_rows: - return self._make_row_nice(raw_rows[0]) - else: - raise CrashIDNotFound(crash_id) - - def _put_crash_report_indices(self, client, crash_id, timestamp, indices): - row_id = crash_id_to_timestamped_row_id(crash_id, timestamp) - for index_name in indices: - client.mutateRow(index_name, row_id, - [Mutation(column="ids:ooid", value=crash_id)]) - - def save_raw_crash(self, raw_crash, dumps, crash_id): - @self._wrap_in_transaction - def transaction(client): - row_id = crash_id_to_row_id(crash_id) - submitted_timestamp = raw_crash['submitted_timestamp'] - - legacy_processing = raw_crash.get('legacy_processing', False) - - columns = [("flags:processed", "N"), - ("meta_data:json", json.dumps(raw_crash)), - ("timestamps:submitted", submitted_timestamp), - ("ids:ooid", crash_id) - ] - - # we don't know where the dumps came from, they could be in - # in the form of names to binary blobs or names to pathnames. - # this call ensures that we've got the former. - memory_dumps_mapping = dumps.as_memory_dumps_mapping() - for key, dump in memory_dumps_mapping.iteritems(): - if key in (None, '', 'upload_file_minidump'): - key = 'dump' - columns.append(('raw_data:%s' % key, dump)) - - mutations = [Mutation(column=c, value=v) - for c, v in columns if v is not None] - - indices = [ - 'crash_reports_index_submitted_time', - 'crash_reports_index_unprocessed_flag' - ] - - if legacy_processing == 0: - mutations.append(Mutation(column="flags:legacy_processing", - value='Y')) - indices.append('crash_reports_index_legacy_unprocessed_flag') - indices.append('crash_reports_index_legacy_submitted_time') - - process_type = raw_crash.get('ProcessType', 'default') - - is_hang = 'HangID' in raw_crash - - if is_hang: - hang_id = raw_crash['HangID'] - mutations.append(Mutation(column="ids:hang", value=hang_id)) - - client.mutateRow('crash_reports', row_id, mutations) - self._put_crash_report_indices(client, crash_id, submitted_timestamp, - indices) - - if is_hang: - # Put the hang's indices. - ooid_column_name = "ids:ooid:" + process_type - client.mutateRow( - 'crash_reports_index_hang_id_submitted_time', - crash_id_to_timestamped_row_id(hang_id, submitted_timestamp), - [Mutation(column=ooid_column_name, value=crash_id)] - ) - client.mutateRow( - 'crash_reports_index_hang_id', - hang_id, - [Mutation(column=ooid_column_name, value=crash_id)] - ) - - # update the metrics - time_levels = [ - submitted_timestamp[:16], # minute yyyy-mm-ddTHH:MM - submitted_timestamp[:13], # hour yyyy-mm-ddTHH - submitted_timestamp[:10], # day yyyy-mm-dd - submitted_timestamp[: 7], # month yyyy-mm - submitted_timestamp[: 4] # year yyyy - ] - counter_increments = ['counters:submitted_crash_reports'] - counter_increments.append( - "counters:submitted_crash_reports_legacy_throttle_%d" - % legacy_processing - ) - if process_type != 'default': - if is_hang: - counter_increments.append( - "counters:submitted_crash_report_hang_pairs" - ) - else: - counter_increments.append( - "counters:submitted_oop_%s_crash_reports" % process_type - ) - - client.atomicIncrement( - 'metrics', - 'crash_report_queue', - 'counters:current_unprocessed_size', - 1 - ) - if legacy_processing == 0: - client.atomicIncrement( - 'metrics', - 'crash_report_queue', - 'counters:current_legacy_unprocessed_size', - 1 - ) - - for rowkey in time_levels: - for column in counter_increments: - client.atomicIncrement('metrics', rowkey, column, 1) - - self.logger.info('saved - %s', crash_id) - return transaction() - - def save_processed(self, processed_crash): - @self._wrap_in_transaction - def transaction(client, processed_crash=processed_crash): - processed_crash = processed_crash.copy() - self._stringify_dates_in_dict(processed_crash) - - crash_id = processed_crash['uuid'] - - row_id = crash_id_to_row_id(crash_id) - - processing_state = self._get_report_processing_state(client, crash_id) - submitted_timestamp = processing_state.get( - 'timestamps:submitted', - processed_crash.get('date_processed', 'unknown') - ) - - if processing_state.get('flags:processed', '?') == 'N': - index_row_key = crash_id_to_timestamped_row_id( - crash_id, - submitted_timestamp - ) - client.atomicIncrement('metrics', - 'crash_report_queue', - 'counters:current_unprocessed_size', - -1) - client.deleteAllRow('crash_reports_index_unprocessed_flag', - index_row_key) - - processed_timestamp = processed_crash['completeddatetime'] - - if 'signature' in processed_crash: - if len(processed_crash['signature']) > 0: - signature = processed_crash['signature'] - else: - signature = '##empty##' - else: - signature = '##null##' - - mutations = [] - mutations.append(Mutation(column="timestamps:processed", - value=processed_timestamp)) - mutations.append(Mutation(column="processed_data:signature", - value=signature)) - processed_crash_as_json_string = json.dumps(processed_crash) - mutations.append(Mutation(column="processed_data:json", - value=processed_crash_as_json_string)) - mutations.append(Mutation(column="flags:processed", - value="Y")) - - mutation_size = ( - len(processed_timestamp) - + len(signature) - + len(processed_crash_as_json_string) - + 1 - ) - start_timestamp = utc_now() - try: - client.mutateRow('crash_reports', row_id, mutations) - finally: - end_timestamp = utc_now() - self.config.logger.debug( - 'mutation size for row_id %s: %s, execution time: %s', - row_id, - mutation_size, - end_timestamp - start_timestamp - ) - - sig_ooid_idx_row_key = signature + crash_id - client.mutateRow( - 'crash_reports_index_signature_ooid', - sig_ooid_idx_row_key, - [Mutation(column="ids:ooid", value=crash_id)] - ) - return transaction() - - def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id): - """ bug 866973 - do not put raw_crash back into HBase again - We are doing this in lieu of a queuing solution that could allow - us to operate an independent crashmover. When the queuing system - is implemented, we could remove this, and have the raw crash - saved by a crashmover that's consuming crash_ids the same way - that the processor consumes them. - """ - self.save_processed(processed_crash) - - def get_raw_crash(self, crash_id): - @self._wrap_in_transaction - def transaction(client): - row_id = crash_id_to_row_id(crash_id) - raw_rows = client.getRowWithColumns('crash_reports', - row_id, - ['meta_data:json']) - try: - if raw_rows: - row_column = raw_rows[0].columns["meta_data:json"].value - else: - raise CrashIDNotFound(crash_id) - except KeyError: - self.logger.debug( - 'key error trying to get "meta_data:json" for %s', - crash_id - ) - raise - - return json.loads(row_column, object_hook=DotDict) - return transaction() - - def get_raw_dump(self, crash_id, name=None): - """Return the minidump for a given crash_id as a string of bytes - If the crash_id doesn't exist, raise not found""" - @self._wrap_in_transaction - def transaction(client, name): - if name in (None, '', 'upload_file_minidump'): - name = 'dump' - column_family_and_qualifier = 'raw_data:%s' % name - row_id = crash_id_to_row_id(crash_id) - raw_rows = client.getRowWithColumns('crash_reports', - row_id, - [column_family_and_qualifier]) - - try: - if raw_rows: - return raw_rows[0].columns[column_family_and_qualifier].value - else: - raise CrashIDNotFound(crash_id) - except KeyError: - self.logger.debug( - 'key error trying to get "%s" for %s', - (column_family_and_qualifier, crash_id) - ) - raise - return transaction(name) - - @staticmethod - def _make_dump_name(family_qualifier): - name = family_qualifier.split(':')[1] - if name == 'dump': - name = 'upload_file_minidump' - return name - - def get_raw_dumps(self, crash_id): - """Return the minidump for a given ooid as a string of bytes - If the ooid doesn't exist, raise not found""" - @self._wrap_in_transaction - def transaction(client): - row_id = crash_id_to_row_id(crash_id) - raw_rows = client.getRowWithColumns('crash_reports', - row_id, - ['raw_data']) - try: - if raw_rows: - column_mapping = raw_rows[0].columns - # ensure that we return a proper mapping of names to - # binary blobs. - d = MemoryDumpsMapping([ - (self._make_dump_name(k), v.value) - for k, v in column_mapping.iteritems()]) - return d - else: - raise CrashIDNotFound(crash_id) - except KeyError: - self.logger.debug( - 'key error trying to get "raw_data" from %s', - crash_id - ) - raise - return transaction() - - def get_raw_dumps_as_files(self, crash_id): - memory_dumps_mapping = self.get_raw_dumps(crash_id) - # convert our in memory name/blob data into name/pathname data - return memory_dumps_mapping.as_file_dumps_mapping( - crash_id, - self.hbase.config.temporary_file_system_storage_path, - self.hbase.config.dump_file_suffix - ) - - def get_unredacted_processed(self, crash_id): - """Return the unredacted processed json (jsonz) for a given ooid as a - Mapping. If not found, raise the NotFound exception.""" - @self._wrap_in_transaction - def transaction(client): - row_id = crash_id_to_row_id(crash_id) - raw_rows = client.getRowWithColumns('crash_reports', - row_id, - ['processed_data:json']) - - if raw_rows: - row_columns = raw_rows[0].columns["processed_data:json"].value - else: - raise CrashIDNotFound(crash_id) - - return json.loads(row_columns, object_hook=DotDict) - return transaction() - - def new_crashes(self): - try: - with self.hbase() as context: - for row in itertools.islice( - self._merge_scan_with_prefix( - context.client, - 'crash_reports_index_legacy_unprocessed_flag', - '', - ['ids:ooid'] - ), - self.config.new_crash_limit - ): - self._delete_from_legacy_processing_index(context.client, - row['_rowkey']) - yield row['ids:ooid'] - except self.hbase.operational_exceptions: - self.hbase.force_reconnect() - self.config.logger.critical( - 'hbase is in trouble, forcing reconnect', - exc_info=True - ) - - def _union_scan_with_prefix(self, client, table, prefix, columns): - # TODO: Need assertion for columns contains at least 1 element - """A lazy chain of iterators that yields unordered rows starting with - a given prefix. The implementation opens up 16 scanners (one for each - leading hex character of the salt) one at a time and returns all of - the rows matching""" - for salt in '0123456789abcdef': - salted_prefix = "%s%s" % (salt, prefix) - scanner = client.scannerOpenWithPrefix(table, - salted_prefix, - columns) - for rowkey, row in self._salted_scanner_iterable(client, - salted_prefix, - scanner): - yield row - - def _merge_scan_with_prefix(self, client, table, prefix, columns): - # TODO: Need assertion that columns is array containing at least - # one string - """A generator based iterator that yields totally ordered rows starting - with a given prefix. The implementation opens up 16 scanners (one for - each leading hex character of the salt) simultaneously and then yields - the next row in order from the pool on each iteration.""" - iterators = [] - next_items_queue = [] - for salt in '0123456789abcdef': - salted_prefix = "%s%s" % (salt, prefix) - scanner = client.scannerOpenWithPrefix(table, - salted_prefix, - columns) - iterators.append(self._salted_scanner_iterable(client, - salted_prefix, - scanner)) - # The i below is so we can advance whichever scanner delivers us the - # polled item. - for i, it in enumerate(iterators): - try: - next = it.next - next_items_queue.append([next(), i, next]) - except StopIteration: - pass - heapq.heapify(next_items_queue) - - while True: - try: - while True: - row_tuple, iter_index, next = s = next_items_queue[0] - # tuple[1] is the actual nice row. - yield row_tuple[1] - s[0] = next() - heapq.heapreplace(next_items_queue, s) - except StopIteration: - heapq.heappop(next_items_queue) - except IndexError: - return - - def _delete_from_legacy_processing_index(self, client, index_row_key): - client.deleteAllRow('crash_reports_index_legacy_unprocessed_flag', - index_row_key) - - client.atomicIncrement('metrics', - 'crash_report_queue', - 'counters:current_legacy_unprocessed_size', - -1) - - @staticmethod - def _stringify_dates_in_dict(items): - for k, v in items.iteritems(): - if isinstance(v, datetime.datetime): - items[k] = v.strftime("%Y-%m-%d %H:%M:%S.%f") - return items diff --git a/socorro/external/hb/hbase_client.py b/socorro/external/hb/hbase_client.py deleted file mode 100644 index e22c631417..0000000000 --- a/socorro/external/hb/hbase_client.py +++ /dev/null @@ -1,280 +0,0 @@ -import os - -from socorrolib.app import generic_app - -from configman import Namespace, RequiredConfig, ConfigurationManager -from configman.converters import class_converter - -from socorro.external.hb.crashstorage import ( - HBaseCrashStorage, - row_id_to_crash_id -) - -import itertools -import pprint -import contextlib -import gzip -import sys -import json - - -_raises_exception = object() - - -class NotEnoughArguments(Exception): - def __init__(self, arg): - self.arg = arg - - -def expect_from_aggregation(required_config, name, i, - default=_raises_exception): - def _closure(g, l, a): - if len(a) < i + 1: - if default is _raises_exception: - raise NotEnoughArguments(name) - return default - return a[i] - required_config.add_aggregation(name, _closure) - - -class _Command(RequiredConfig): - required_config = Namespace() - - def __init__(self, app): - self.app = app - self.config = app.config - self.storage = app.storage - - -class _CommandRequiringCrashID(_Command): - required_config = Namespace() - expect_from_aggregation(required_config, 'crash_id', 0) - - -class _CommandRequiringTable(_Command): - required_config = Namespace() - expect_from_aggregation(required_config, 'table', 0) - - -class _CommandRequiringTableRow(_CommandRequiringTable): - required_config = Namespace() - expect_from_aggregation(required_config, 'row_id', 1) - - -class _CommandRequiringScanParameters(_CommandRequiringTable): - required_config = Namespace() - expect_from_aggregation(required_config, 'prefix', 1) - expect_from_aggregation(required_config, 'columns', 2) - expect_from_aggregation(required_config, 'limit', 3) - - -class help(_Command): - """Usage: help - Get help on commands.""" - def run(self): - self.app.config_manager.output_summary() - -class get_raw_crash(_CommandRequiringCrashID): - """Usage: get_raw_crash CRASH_ID - Get the raw crash JSON data.""" - def run(self): - pprint.pprint(self.storage.get_raw_crash(self.config.crash_id)) - - -class get_raw_dumps(_CommandRequiringCrashID): - """Usage: get_raw_dumps CRASH_ID - Get information on the raw dumps for a crash.""" - def run(self): - for name, dump in self.storage.get_raw_dumps( - self.config.crash_id - ).items(): - dump_name = "%s.%s.dump" % (self.config.crash_id, name) - with open(dump_name, "w") as f: - f.write(dump) - print("%s: dump length = %s" % (name, len(dump))) - - -class get_processed(_CommandRequiringCrashID): - """Usage: get_processed CRASH_ID - Get the redacted processed JSON for a crash""" - def run(self): - if self.config.json: - print json.dumps(self.storage.get_processed(self.config.crash_id)) - else: - pprint.pprint(self.storage.get_processed(self.config.crash_id)) - - -class get_unredacted_processed(_CommandRequiringCrashID): - """Usage: get_unredacted_processed CRASH_ID - Get the unredacted processed JSON for a crash""" - def run(self): - if self.config.json: - print json.dumps(self.storage.get_unredacted_processed( - self.config.crash_id - )) - else: - pprint.pprint(self.storage.get_unredacted_processed( - self.config.crash_id - )) - - -class get_report_processing_state(_CommandRequiringCrashID): - """Usage: get_report_processing_state CRASH_ID - Get the report processing state for a crash.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - pprint.pprint(self.storage._get_report_processing_state( - conn, - self.config.crash_id - )) - transaction() - - -class union_scan_with_prefix(_CommandRequiringScanParameters): - """Usage: union_scan_with_prefix TABLE PREFIX COLUMNS [LIMIT] - Do a union scan on a table using a given prefix.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn, limit=self.config.limit): - for row in itertools.islice( - self.storage._union_scan_with_prefix( - conn, - self.config.table, - self.config.prefix, - self.config.columns - ), - self.config.limit): - pprint.pprint(row) - transaction() - - -class merge_scan_with_prefix(_CommandRequiringScanParameters): - """Usage: merge_scan_with_prefix TABLE PREFIX COLUMNS [LIMIT] - Do a merge scan on a table using a given prefix.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn, limit=self.config.limit): - for row in itertools.islice( - self.storage._merge_scan_with_prefix( - conn, - self.config.table, - self.config.prefix, - self.config.columns - ), - self.config.limit): - pprint.pprint(row) - transaction() - - -class describe_table(_CommandRequiringTable): - """Usage: describe_table TABLE - Describe the details of a table in HBase.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - pprint.pprint(conn.getColumnDescriptors(self.config.table)) - transaction() - - -class get_full_row(_CommandRequiringTableRow): - """Usage: describe_table TABLE ROW_ID - Pretty-print a row in HBase.""" - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - pprint.pprint(self.storage._make_row_nice(conn.getRow( - self.config.table, - self.config.row_id - )[0])) - transaction() - - -class export_processed_crashes_for_date(_Command): - """Usage: export_processed_crashes_for_date DATE PATH - Export all crashes for a given date to a path.""" - required_config = Namespace() - expect_from_aggregation(required_config, 'date', 0) - expect_from_aggregation(required_config, 'path', 1) - - def run(self): - @self.storage._wrap_in_transaction - def transaction(conn): - for row in itertools.islice( - self.storage._union_scan_with_prefix(conn, - 'crash_reports', - self.config.date, - ['processed_data:json']), - 10 - ): - crash_id = row_id_to_crash_id(row['_rowkey']) - - if row['processed_data:json']: - file_name = os.path.join(self.config.path, - crash_id + '.jsonz') - with contextlib.closing(gzip.GzipFile(file_name, - 'w', - 9)) as f: - json.dump(row['processed_data:json'], f) - transaction() - - -class HBaseClientConfigurationManager(ConfigurationManager): - def output_summary(self, output_stream=sys.stdout, block_password=True): - super(HBaseClientConfigurationManager, self).output_summary( - output_stream, - block_password - ) - - print >> output_stream, "Available commands:" - - for command in (var for var in globals().values() - if isinstance(var, type) and - issubclass(var, _Command) and - var.__name__[0] != '_'): - - print >> output_stream, ' ' + command.__name__ - print >> output_stream, ' ' + (command.__doc__ or - '(undocumented)') - print >> output_stream, '' - - -class HBaseClientApp(generic_app.App): - app_name = "hbase_client.py" - app_version = "0.1" - app_description = __doc__ - - required_config = Namespace() - required_config.add_option( - 'hbase_crash_storage_class', - default=HBaseCrashStorage, - - doc='the class responsible for proving an hbase connection', - from_string_converter=class_converter - ) - required_config.add_option( - 'command', - default=help, - doc='command to use', - from_string_converter=lambda s: class_converter(__name__ + '.' + s) - ) - required_config.add_option( - 'json', - default=False, - short_form='j', - doc='json output instead of a pretty printed mapping', - ) - - - def main(self): - self.storage = self.config.hbase_crash_storage_class(self.config) - self.config.command(self).run() - - -if __name__ == '__main__': - try: - generic_app.main(HBaseClientApp, - config_manager_cls=HBaseClientConfigurationManager) - except NotEnoughArguments as e: - print >> sys.stderr, "ERROR: was expecting another argument: " + e.arg - print >> sys.stderr, "Use the 'help' command to get help on commands." diff --git a/socorro/external/postgresql/models.py b/socorro/external/postgresql/models.py index 6388b90dca..c1df73bbc2 100644 --- a/socorro/external/postgresql/models.py +++ b/socorro/external/postgresql/models.py @@ -1057,24 +1057,6 @@ class ReprocessingJob(DeclarativeBase): __mapper_args__ = {"primary_key": (uuid)} -class ServerStatu(DeclarativeBase): - __tablename__ = 'server_status' - - #column definitions - avg_process_sec = Column(u'avg_process_sec', REAL()) - avg_wait_sec = Column(u'avg_wait_sec', REAL()) - date_created = Column(u'date_created', TIMESTAMP(timezone=True), nullable=False) - date_oldest_job_queued = Column(u'date_oldest_job_queued', TIMESTAMP(timezone=True)) - date_recently_completed = Column(u'date_recently_completed', TIMESTAMP(timezone=True)) - id = Column(u'id', INTEGER(), primary_key=True, nullable=False) - processors_count = Column(u'processors_count', INTEGER(), nullable=True) - waiting_job_count = Column(u'waiting_job_count', INTEGER(), nullable=False) - - __table_args__ = ( - Index('idx_server_status_date', date_created, id), - ) - - class Session(DeclarativeBase): __tablename__ = 'sessions' diff --git a/socorro/external/postgresql/raw_sql/views/current_server_status_view.sql b/socorro/external/postgresql/raw_sql/views/current_server_status_view.sql deleted file mode 100644 index 30433c5943..0000000000 --- a/socorro/external/postgresql/raw_sql/views/current_server_status_view.sql +++ /dev/null @@ -1,3 +0,0 @@ -CREATE VIEW current_server_status AS - SELECT server_status.date_recently_completed, server_status.date_oldest_job_queued, date_part('epoch'::text, (server_status.date_created - server_status.date_oldest_job_queued)) AS oldest_job_age, server_status.avg_process_sec, server_status.avg_wait_sec, server_status.waiting_job_count, server_status.processors_count, server_status.date_created FROM server_status ORDER BY server_status.date_created DESC LIMIT 1 -; diff --git a/socorro/external/rabbitmq/crashstorage.py b/socorro/external/rabbitmq/crashstorage.py index 135959247e..c54490779d 100644 --- a/socorro/external/rabbitmq/crashstorage.py +++ b/socorro/external/rabbitmq/crashstorage.py @@ -105,11 +105,13 @@ def __init__(self, config, quit_check_callback=None): if config.throttle == 100: self.dont_queue_this_crash = lambda: False else: - self.dont_queue_this_crash = lambda: randint(1, 100) > config.throttle + self.dont_queue_this_crash = ( + lambda: randint(1, 100) > config.throttle + ) #-------------------------------------------------------------------------- def save_raw_crash(self, raw_crash, dumps, crash_id): - if self.dont_queue_this_crash(): + if self.dont_queue_this_crash(): self.config.logger.info( 'Crash %s filtered out of RabbitMQ queue %s', crash_id, @@ -118,8 +120,7 @@ def save_raw_crash(self, raw_crash, dumps, crash_id): return try: this_crash_should_be_queued = ( - (not self.config.filter_on_legacy_processing) - or + not self.config.filter_on_legacy_processing or raw_crash.legacy_processing == 0 ) except KeyError: @@ -170,7 +171,6 @@ def new_crashes(self): # queues the crash_id. The '_consume_acknowledgement_queue' function # is run to send acknowledgments back to RabbitMQ self._consume_acknowledgement_queue() - conn = self.rabbitmq.connection() queues = [ self.rabbitmq.config.priority_queue_name, self.rabbitmq.config.standard_queue_name, @@ -326,3 +326,31 @@ def reprocess(self, crash_ids): ): success = False return success + + +#============================================================================== +class PriorityjobRabbitMQCrashStore(RabbitMQCrashStorage): + required_config = Namespace() + required_config.rabbitmq_class = change_default( + RabbitMQCrashStorage, + 'rabbitmq_class', + ConnectionContext, + ) + required_config.add_option( + 'routing_key', + default='socorro.priority', + doc='the name of the queue to receive crashes', + ) + + def process(self, crash_ids): + if not isinstance(crash_ids, (list, tuple)): + crash_ids = [crash_ids] + success = bool(crash_ids) + for crash_id in crash_ids: + if not self.save_raw_crash( + DotDict({'legacy_processing': 0}), + [], + crash_id + ): + success = False + return success diff --git a/socorro/external/rabbitmq/priorityjobs.py b/socorro/external/rabbitmq/priorityjobs.py index a2e406988c..fdd85953a6 100644 --- a/socorro/external/rabbitmq/priorityjobs.py +++ b/socorro/external/rabbitmq/priorityjobs.py @@ -2,6 +2,8 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +# XXX This file is probably not used anywhere and can be deleted. + import pika from pika.exceptions import ChannelClosed diff --git a/socorro/middleware/middleware_app.py b/socorro/middleware/middleware_app.py index 5d5640e31f..2ec77c4adf 100755 --- a/socorro/middleware/middleware_app.py +++ b/socorro/middleware/middleware_app.py @@ -41,7 +41,6 @@ (r'/backfill/(.*)', 'backfill.Backfill'), (r'/correlations/signatures/(.*)', 'correlations.CorrelationsSignatures'), (r'/correlations/(.*)', 'correlations.Correlations'), - (r'/crash_data/(.*)', 'crash_data.CrashData'), ( r'/crashes/' r'(comments|count_by_day|daily|frequency|signatures|' @@ -124,7 +123,6 @@ class MiddlewareApp(App): 'implementation_list', doc='list of packages for service implementations', default='psql:socorro.external.postgresql, ' - 'hbase:socorro.external.hb, ' 'es:socorro.external.es, ' 'fs:socorro.external.fs, ' 'http:socorro.external.http, ' @@ -135,9 +133,8 @@ class MiddlewareApp(App): required_config.implementations.add_option( 'service_overrides', - doc='comma separated list of class overrides, e.g `Crashes: hbase`', - default='CrashData: fs, ' - 'Correlations: http, ' + doc='comma separated list of class overrides, e.g `Query: es`', + default='Correlations: http, ' 'CorrelationsSignatures: http, ' 'SuperSearch: es, ' 'Priorityjobs: rabbitmq, ' @@ -158,17 +155,6 @@ class MiddlewareApp(App): from_string_converter=class_converter ) - #-------------------------------------------------------------------------- - # hbase namespace - # the namespace is for external implementations of the services - #------------------------------------------------------------------------- - required_config.namespace('hbase') - required_config.hbase.add_option( - 'hbase_class', - default='socorro.external.boto.crashstorage.BotoS3CrashStorage', - from_string_converter=class_converter - ) - #-------------------------------------------------------------------------- # filesystem namespace # the namespace is for external implementations of the services diff --git a/socorro/processor/mozilla_processor_2015.py b/socorro/processor/mozilla_processor_2015.py index ae05e2e803..161bb12cfc 100644 --- a/socorro/processor/mozilla_processor_2015.py +++ b/socorro/processor/mozilla_processor_2015.py @@ -54,6 +54,7 @@ "socorro.processor.mozilla_transform_rules.OSPrettyVersionRule, " "socorro.processor.mozilla_transform_rules.TopMostFilesRule, " "socorro.processor.mozilla_transform_rules.MissingSymbolsRule, " + "socorro.processor.mozilla_transform_rules.ThemePrettyNameRule, " "socorro.processor.signature_utilities.SignatureGenerationRule," "socorro.processor.signature_utilities.StackwalkerErrorSignatureRule, " "socorro.processor.signature_utilities.OOMSignature, " diff --git a/socorro/processor/mozilla_transform_rules.py b/socorro/processor/mozilla_transform_rules.py index 1db8112c63..b7b92fe043 100644 --- a/socorro/processor/mozilla_transform_rules.py +++ b/socorro/processor/mozilla_transform_rules.py @@ -1054,10 +1054,10 @@ def _get_pretty_os_version(self, processed_crash): elif processed_crash.os_name == 'Mac OS X': if ( - major_version >= 10 - and major_version < 11 - and minor_version >= 0 - and minor_version < 20 + major_version >= 10 and + major_version < 11 and + minor_version >= 0 and + minor_version < 20 ): pretty_name = 'OS X %s.%s' % (major_version, minor_version) else: @@ -1071,3 +1071,47 @@ def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): processed_crash ) return True + + +#============================================================================== +class ThemePrettyNameRule(Rule): + """The Firefox theme shows up commonly in crash reports referenced by its + internal ID. The ID is not easy to change, and is referenced by id in other + software. + + This rule attempts to modify it to have a more identifiable name, like + other built-in extensions. + + Must be run after the Addons Rule.""" + + #-------------------------------------------------------------------------- + def __init__(self, config): + super(ThemePrettyNameRule, self).__init__(config) + self.conversions = { + "{972ce4c6-7e08-4474-a285-3208198ce6fd}": + "{972ce4c6-7e08-4474-a285-3208198ce6fd} " + "(default Firefox theme)", + } + + #-------------------------------------------------------------------------- + def version(self): + return '1.0' + + #-------------------------------------------------------------------------- + def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta): + '''addons is a list of tuples containing (extension, version)''' + addons = processed_crash.get('addons', []) + + for extension, version in addons: + if extension in self.conversions: + return True + return False + + #-------------------------------------------------------------------------- + def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta): + addons = processed_crash.addons + + for index, (extension, version) in enumerate(addons): + if extension in self.conversions: + addons[index] = (self.conversions[extension], version) + return True diff --git a/socorro/schemas/crash_report.json b/socorro/schemas/crash_report.json index 9dbc2f7f74..24683cc6b7 100644 --- a/socorro/schemas/crash_report.json +++ b/socorro/schemas/crash_report.json @@ -63,7 +63,7 @@ "description": "Notes from the application that crashed. Mostly contains graphics-related annotations." }, "build_id": { - "type": ["integer", "string", "null"], + "type": ["string", "null"], "description": "The unique build identifier of this version, which is a timestamp of the form YYYYMMDDHHMMSS. " }, "classifications": { @@ -229,7 +229,7 @@ } }, "tiny_block_size": { - "type": ["integer", "string", "null"], + "type": ["string", "null"], "description": "If present, the total size of all memory regions in the crashing process that are smaller than 1 MB." }, "thread_count": { @@ -257,7 +257,7 @@ } }, "write_combine_size": { - "type": ["integer", "string", "null"], + "type": ["string", "null"], "description": "If present, the total size of all committed memory regions in the crashing process marked with PAGE_WRITECOMBINE." } } diff --git a/socorro/unittest/cron/jobs/test_serverstatus.py b/socorro/unittest/cron/jobs/test_serverstatus.py deleted file mode 100644 index 07b6de748a..0000000000 --- a/socorro/unittest/cron/jobs/test_serverstatus.py +++ /dev/null @@ -1,105 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from mock import Mock, MagicMock -from nose.tools import eq_ - -from crontabber.app import CronTabber - -from socorro.unittest.cron.jobs.base import IntegrationTestBase - -from socorro.unittest.cron.setup_configman import ( - get_config_manager_for_crontabber, -) - - -#============================================================================== -class IntegrationTestServerStatus(IntegrationTestBase): - - def _clear_tables(self): - self.conn.cursor().execute(""" - TRUNCATE - server_status, - report_partition_info, - server_status, - release_channels, - reports - CASCADE - """) - - def setUp(self): - super(IntegrationTestServerStatus, self).setUp() - self._clear_tables() - - def tearDown(self): - """ - The reason why this is all necessary, including the commit, is that - we're testing a multi-process tool, crontabber. - The changes made to the database happen in a transaction - that crontabber doesn't have visibility into. - - TODO drop reports partitions, not just the data - - """ - self._clear_tables() - self.conn.commit() - super(IntegrationTestServerStatus, self).tearDown() - - def _setup_config_manager(self): - queue_mock = Mock() - queue_mock.return_value.return_value = MagicMock() - queue_mock.return_value.return_value.queue_status_standard \ - .method.message_count = 1 - - return get_config_manager_for_crontabber( - jobs='socorro.cron.jobs.serverstatus.ServerStatusCronApp|5m', - overrides={ - 'crontabber.class-ServerStatusCronApp.queuing.queuing_class': - queue_mock - } - ) - - def test_server_status(self): - """ Simple test of status monitor """ - config_manager = self._setup_config_manager() - - cursor = self.conn.cursor() - - # Create partitions to support the status query - # Load report_partition_info data - cursor.execute(""" - INSERT into report_partition_info - (table_name, build_order, keys, indexes, - fkeys, partition_column, timetype) - VALUES - ('reports', '1', '{id,uuid}', - '{date_processed,hangid,"product,version",reason,signature,url}', - '{}', 'date_processed', 'TIMESTAMPTZ') - """) - cursor.execute('SELECT weekly_report_partitions()') - - # We have to do this here to accommodate separate crontabber processes - self.conn.commit() - - with config_manager.context() as config: - tab = CronTabber(config) - tab.run_all() - cursor.execute('select count(*) from server_status') - - res_expected = 1 - res, = cursor.fetchone() - eq_(res, res_expected) - - cursor.execute("""select - date_recently_completed - , date_oldest_job_queued -- is NULL until we upgrade Rabbit - , avg_process_sec - , waiting_job_count -- should be 1 - -- , date_created -- leaving timestamp verification out - from server_status - """) - - res_expected = (None, None, 0.0, 1) - res = cursor.fetchone() - eq_(res, res_expected) diff --git a/socorro/unittest/external/boto/test_crash_data.py b/socorro/unittest/external/boto/test_crash_data.py new file mode 100644 index 0000000000..cb61f6fee1 --- /dev/null +++ b/socorro/unittest/external/boto/test_crash_data.py @@ -0,0 +1,156 @@ +import json + +import mock +from nose.tools import eq_, assert_raises +from boto.exception import StorageResponseError + +from configman import ConfigurationManager + +from socorrolib.lib import MissingArgumentError +from socorro.external.boto.crash_data import SimplifiedCrashData +from socorro.external.crashstorage_base import CrashIDNotFound +from socorro.unittest.testbase import TestCase + + +class TestSimplifiedCrashData(TestCase): + + def _get_config(self, sources, extra_values=None): + self.mock_logging = mock.Mock() + + config_definitions = [] + for source in sources: + conf = source.get_required_config() + conf.add_option('logger', default=self.mock_logging) + config_definitions.append(conf) + + values_source = {'logger': self.mock_logging} + + config_manager = ConfigurationManager( + config_definitions, + app_name='testapp', + app_version='1.0', + app_description='', + values_source_list=[values_source], + argv_source=[], + ) + + return config_manager.get_config() + + def get_s3_store(self): + s3 = SimplifiedCrashData( + config=self._get_config([SimplifiedCrashData]) + ) + s3_conn = s3.connection_source + s3_conn._connect_to_endpoint = mock.Mock() + return s3 + + def test_get_basic_processed(self): + boto_s3_store = self.get_s3_store() + mocked_connection = ( + boto_s3_store.connection_source._connect_to_endpoint() + ) + + def mocked_get_contents_as_string(): + return json.dumps({'foo': 'bar'}) + + mocked_connection.get_bucket().get_key().get_contents_as_string = ( + mocked_get_contents_as_string + ) + result = boto_s3_store.get( + uuid='0bba929f-8721-460c-dead-a43c20071027', + datatype='processed' + ) + eq_(result, {'foo': 'bar'}) + + def test_get_not_found_processed(self): + boto_s3_store = self.get_s3_store() + mocked_connection = ( + boto_s3_store.connection_source._connect_to_endpoint() + ) + + def mocked_get_key(key): + assert '/processed_crash/' in key + assert '0bba929f-8721-460c-dead-a43c20071027' in key + raise StorageResponseError(404, 'not found') + + mocked_connection.get_bucket().get_key = ( + mocked_get_key + ) + assert_raises( + CrashIDNotFound, + boto_s3_store.get, + uuid='0bba929f-8721-460c-dead-a43c20071027', + datatype='processed' + ) + + def test_get_basic_raw_dump(self): + boto_s3_store = self.get_s3_store() + mocked_connection = ( + boto_s3_store.connection_source._connect_to_endpoint() + ) + + def mocked_get_contents_as_string(): + return '\xa0' + + mocked_connection.get_bucket().get_key().get_contents_as_string = ( + mocked_get_contents_as_string + ) + result = boto_s3_store.get( + uuid='0bba929f-8721-460c-dead-a43c20071027', + datatype='raw', + ) + eq_(result, '\xa0') + + def test_get_not_found_raw_dump(self): + boto_s3_store = self.get_s3_store() + mocked_connection = ( + boto_s3_store.connection_source._connect_to_endpoint() + ) + + def mocked_get_key(key): + assert '/dump/' in key + assert '0bba929f-8721-460c-dead-a43c20071027' in key + raise StorageResponseError(404, 'not found') + + mocked_connection.get_bucket().get_key = ( + mocked_get_key + ) + assert_raises( + CrashIDNotFound, + boto_s3_store.get, + uuid='0bba929f-8721-460c-dead-a43c20071027', + datatype='raw' + ) + + def test_get_not_found_raw_crash(self): + boto_s3_store = self.get_s3_store() + mocked_connection = ( + boto_s3_store.connection_source._connect_to_endpoint() + ) + + def mocked_get_key(key): + assert '/raw_crash/' in key + assert '0bba929f-8721-460c-dead-a43c20071027' in key + raise StorageResponseError(404, 'not found') + + mocked_connection.get_bucket().get_key = ( + mocked_get_key + ) + assert_raises( + CrashIDNotFound, + boto_s3_store.get, + uuid='0bba929f-8721-460c-dead-a43c20071027', + datatype='meta' + ) + + def test_bad_arguments(self): + boto_s3_store = self.get_s3_store() + assert_raises( + MissingArgumentError, + boto_s3_store.get + ) + assert_raises( + MissingArgumentError, + boto_s3_store.get, + uuid='0bba929f-8721-460c-dead-a43c20071027', + ) diff --git a/socorro/unittest/external/boto/test_crashstorage.py b/socorro/unittest/external/boto/test_crashstorage.py index 0f25323f44..cde76cfa15 100644 --- a/socorro/unittest/external/boto/test_crashstorage.py +++ b/socorro/unittest/external/boto/test_crashstorage.py @@ -32,7 +32,8 @@ ) from socorro.unittest.external.es.base import ElasticsearchTestCase -import socorro.unittest.testbase +from socorro.unittest.testbase import TestCase + from socorrolib.lib.util import DotDict @@ -60,7 +61,7 @@ class ConditionallyABadDeal(Exception): S3ConnectionContext.conditional_exceptions = (ConditionallyABadDeal, ) -class BaseTestCase(socorro.unittest.testbase.TestCase): +class BaseTestCase(TestCase): @classmethod def setUpClass(cls): @@ -102,7 +103,8 @@ def setup_mocked_s3_storage( 'dump_file_suffix': '.dump', 'bucket_name': bucket_name, 'prefix': 'dev', - 'calling_format': mock.Mock() + 'calling_format': mock.Mock(), + 'json_object_hook': DotDict, }) if isinstance(storage_class, basestring): diff --git a/socorro/unittest/external/happybase/__init__.py b/socorro/unittest/external/happybase/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/socorro/unittest/external/happybase/test_connection_context.py b/socorro/unittest/external/happybase/test_connection_context.py deleted file mode 100644 index ae6da2e70f..0000000000 --- a/socorro/unittest/external/happybase/test_connection_context.py +++ /dev/null @@ -1,269 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import mock - -import happybase - -from socorro.external.happybase import connection_context -from socorro.external.happybase.connection_context import ( - HappyBaseConnectionContext, - HappyBasePooledConnectionContext, -) - -from socorrolib.lib.util import SilentFakeLogger, DotDict -from socorro.database.transaction_executor import TransactionExecutor -from socorro.unittest.testbase import TestCase -from configman import Namespace - -from socket import timeout, error - - -class FakeHB_Connection(object): - def __init__(self, config, *args, **kwargs): - self.close_counter = 0 - self.commit_counter = 0 - self.rollback_counter = 0 - - def close(self): - self.close_counter += 1 - - def commit(self): - self.commit_counter += 1 - - def rollback(self): - self.rollback_counter += 1 - - -class TestConnectionContext(TestCase): - def test_basic_hbase_usage(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'logger': SilentFakeLogger(), - }) - a_fake_hbase_connection = FakeHB_Connection(local_config) - with mock.patch.object(happybase, 'Connection', - mock.Mock(return_value=a_fake_hbase_connection)): - hb_context = connection_context.HappyBaseConnectionContext( - local_config - ) - # open a connection - with hb_context() as conn: - pass - self.assertEqual( - a_fake_hbase_connection.close_counter, - 1 - ) - # open another connection again - with hb_context() as conn: - pass - self.assertEqual( - a_fake_hbase_connection.close_counter, - 2 - ) - # get a named connection - with hb_context('fred') as conn: - pass - self.assertEqual( - a_fake_hbase_connection.close_counter, - 3 - ) - # close all connections - hb_context.close() - self.assertEqual( - a_fake_hbase_connection.close_counter, - 3 - ) - - def test_hbase_usage_with_transaction(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - }) - a_fake_hbase_connection = FakeHB_Connection(local_config) - with mock.patch.object(HappyBaseConnectionContext, 'connection', - mock.Mock(return_value=a_fake_hbase_connection)): - hb_context = connection_context.HappyBaseConnectionContext( - local_config - ) - def all_ok(connection, dummy): - self.assertEqual(dummy, 'hello') - return True - - transaction = TransactionExecutor(local_config, hb_context) - result = transaction(all_ok, 'hello') - self.assertTrue(result) - self.assertEqual( - a_fake_hbase_connection.close_counter, - 1 - ) - self.assertEqual( - a_fake_hbase_connection.rollback_counter, - 0 - ) - self.assertEqual( - a_fake_hbase_connection.commit_counter, - 1 - ) - - def bad_deal(connection, dummy): - raise KeyError('fred') - - self.assertRaises(KeyError, transaction, bad_deal, 'hello') - self.assertEqual( - a_fake_hbase_connection.close_counter, - 2 - ) - self.assertEqual( - a_fake_hbase_connection.commit_counter, - 1 - ) - - hb_context.close() - self.assertEqual( - a_fake_hbase_connection.close_counter, - 2 - ) - -from contextlib import contextmanager - -class FakeHB_Connection2(object): - def __init__(self, config, *args, **kwargs): - self.close_counter = 0 - self.commit_counter = 0 - self.rollback_counter = 0 - - def close(self): - self.close_counter += 1 - - def commit(self): - self.commit_counter += 1 - - def rollback(self): - self.rollback_counter += 1 - - @contextmanager - def __call__(self): - yield self - -class HappyBasePooledConnectionContextMock(HappyBasePooledConnectionContext): - @contextmanager - def __call__(self, name=None): - with self._connection_pool.connection() as connection: - yield connection - - -class TestPooledConnectionContext(TestCase): - def test_basic_hbase_usage(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'logger': SilentFakeLogger(), - }) - a_fake_hbase_connection = FakeHB_Connection(local_config) - a_fake_hbase_pool = mock.MagicMock() - a_fake_hbase_pool.return_value = a_fake_hbase_connection - with mock.patch.object( - happybase, - 'ConnectionPool', - mock.Mock(return_value=a_fake_hbase_pool) - ): - hb_context = connection_context.HappyBasePooledConnectionContext( - local_config - ) - # open a connection - with hb_context() as conn: - pass - self.assertEqual( - a_fake_hbase_connection.close_counter, - 0 - ) - # open another connection again - with hb_context() as conn: - pass - self.assertEqual( - a_fake_hbase_connection.close_counter, - 0 - ) - # get a named connection - with hb_context('fred') as conn: - pass - self.assertEqual( - a_fake_hbase_connection.close_counter, - 0 - ) - # close all connections - hb_context.close() - self.assertEqual( - a_fake_hbase_connection.close_counter, - 0 - ) - - def test_hbase_usage_with_transaction(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - }) - a_fake_hbase_connection = FakeHB_Connection2(local_config) - a_fake_hbase_pool = mock.MagicMock() - a_fake_hbase_pool.connection = a_fake_hbase_connection - with mock.patch.object( - happybase, - 'ConnectionPool', - mock.Mock(return_value=a_fake_hbase_pool) - ): - hb_context = HappyBasePooledConnectionContextMock( - local_config - ) - def all_ok(connection, dummy): - self.assertEqual(dummy, 'hello') - return True - - transaction = TransactionExecutor(local_config, hb_context) - result = transaction(all_ok, 'hello') - self.assertTrue(result) - self.assertEqual( - a_fake_hbase_connection.close_counter, - 0 - ) - self.assertEqual( - a_fake_hbase_connection.rollback_counter, - 0 - ) - self.assertEqual( - a_fake_hbase_connection.commit_counter, - 1 - ) - - def bad_deal(connection, dummy): - raise KeyError('fred') - - self.assertRaises(KeyError, transaction, bad_deal, 'hello') - self.assertEqual( - a_fake_hbase_connection.close_counter, - 0 - ) - self.assertEqual( - a_fake_hbase_connection.commit_counter, - 1 - ) - - hb_context.close() - self.assertEqual( - a_fake_hbase_connection.close_counter, - 0 - ) diff --git a/socorro/unittest/external/happybase/test_crash_data.py b/socorro/unittest/external/happybase/test_crash_data.py deleted file mode 100644 index fa566c7827..0000000000 --- a/socorro/unittest/external/happybase/test_crash_data.py +++ /dev/null @@ -1,231 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import os -from nose.plugins.skip import SkipTest -from nose.tools import eq_ -from configman import ConfigurationManager, Namespace -from mock import Mock, patch - -from socorrolib.lib import ( - MissingArgumentError, - ResourceNotFound, - ResourceUnavailable, -) -from socorro.external.happybase import crash_data, crashstorage -from socorro.external.happybase.connection_context import ( - HappyBaseConnectionContext -) -from socorro.unittest.testbase import TestCase - - -_run_integration_tests = os.environ.get('RUN_HBASE_INTEGRATION_TESTS', False) -if _run_integration_tests in ('false', 'False', 'no', '0'): - _run_integration_tests = False - - -class TestIntegrationHBaseCrashData(TestCase): - - def setUp(self): - if not _run_integration_tests: - raise SkipTest("Skipping HBase integration tests") - super(TestIntegrationHBaseCrashData, self).setUp() - self.config_manager = self._common_config_setup() - - with self.config_manager.context() as config: - store = crashstorage.HBaseCrashStorage(config.hbase) - - # A complete crash report (raw, dump and processed) - fake_raw_dump_1 = 'peter is a swede' - fake_raw_dump_2 = 'lars is a norseman' - fake_raw_dump_3 = 'adrian is a frenchman' - fake_dumps = {'upload_file_minidump': fake_raw_dump_1, - 'lars': fake_raw_dump_2, - 'adrian': fake_raw_dump_3} - fake_raw = { - 'name': 'Peter', - 'legacy_processing': 0, - 'submitted_timestamp': '2013-05-04' - } - fake_processed = { - 'name': 'Peter', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314', - 'completeddatetime': '2012-01-01T00:00:00', - 'email': 'peter@fake.org', - } - - store.save_raw_crash( - fake_raw, - fake_dumps, - '114559a5-d8e6-428c-8b88-1c1f22120314' - ) - store.save_processed(fake_processed) - - # A non-processed crash report - fake_raw = { - 'name': 'Adrian', - 'legacy_processing': 0, - 'submitted_timestamp': '2013-05-04' - } - - store.save_raw_crash( - fake_raw, - fake_dumps, - '58727744-12f5-454a-bcf5-f688a2120821' - ) - - def tearDown(self): - super(TestIntegrationHBaseCrashData, self).tearDown() - with self.config_manager.context() as config: - connection = hbase_client.HBaseConnectionForCrashReports( - config.hbase.hbase_host, - config.hbase.hbase_port, - config.hbase.hbase_timeout - ) - for row in connection.merge_scan_with_prefix( - 'crash_reports', '', ['ids:ooid']): - index_row_key = row['_rowkey'] - connection.client.deleteAllRow( - 'crash_reports', index_row_key) - # because of HBase's async nature, deleting can take time - list(connection.iterator_for_all_legacy_to_be_processed()) - - def _common_config_setup(self): - mock_logging = Mock() - required_config = Namespace() - required_config.namespace('hbase') - required_config.hbase.hbase_class = \ - crashstorage.HBaseCrashStorage - required_config.hbase.add_option('logger', default=mock_logging) - config_manager = ConfigurationManager( - [required_config], - app_name='testapp', - app_version='1.0', - app_description='app description', - values_source_list=[{'hbase': { - 'logger': mock_logging - }}] - ) - return config_manager - - def test_get(self): - with self.config_manager.context() as config: - - priorityjobs_mock = Mock() - service = crash_data.CrashData( - config=config, - all_services={'Priorityjobs': priorityjobs_mock} - ) - params = { - 'datatype': 'raw', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314' - } - - # Test 1: get a raw dump - res_expected = ('peter is a swede', - 'application/octet-stream') - res = service.get(**params) - - eq_(res, res_expected) - - # Test 2: get a raw crash - params['datatype'] = 'meta' - res_expected = { - 'name': 'Peter', - 'legacy_processing': 0, - 'submitted_timestamp': '2013-05-04' - } - res = service.get(**params) - - eq_(res, res_expected) - - # Test 3: get a processed crash - params['datatype'] = 'processed' - res_expected = { - 'name': 'Peter', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314', - 'completeddatetime': '2012-01-01T00:00:00' - } - res = service.get(**params) - - eq_(res, res_expected) - - # Test 3a: get a unredacted processed crash - params['datatype'] = 'unredacted' - res_expected = { - 'name': 'Peter', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314', - 'completeddatetime': '2012-01-01T00:00:00', - 'email': 'peter@fake.org', - } - res = service.get(**params) - - eq_(res, res_expected) - - # Test 4: missing parameters - self.assertRaises( - MissingArgumentError, - service.get - ) - self.assertRaises( - MissingArgumentError, - service.get, - **{'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314'} - ) - - # Test 5: crash cannot be found - self.assertRaises( - ResourceNotFound, - service.get, - **{ - 'uuid': 'c44245f4-c93b-49b8-86a2-c15dc2130504', - 'datatype': 'processed' - } - ) - # Test 5a: crash cannot be found - self.assertRaises( - ResourceNotFound, - service.get, - **{ - 'uuid': 'c44245f4-c93b-49b8-86a2-c15dc2130504', - 'datatype': 'unredacted' - } - ) - - # Test 6: not yet available crash - self.assertRaises( - ResourceUnavailable, - service.get, - **{ - 'uuid': '58727744-12f5-454a-bcf5-f688a2120821', - 'datatype': 'processed' - } - ) - priorityjobs_mock.cls.return_value.create.assert_called_once_with( - uuid='58727744-12f5-454a-bcf5-f688a2120821' - ) - priorityjobs_mock.cls.return_value.create.reset_mock() - - # Test 6a: not yet available crash - self.assertRaises( - ResourceUnavailable, - service.get, - **{ - 'uuid': '58727744-12f5-454a-bcf5-f688a2120821', - 'datatype': 'unredacted' - } - ) - priorityjobs_mock.cls.return_value.create.assert_called_once_with( - uuid='58727744-12f5-454a-bcf5-f688a2120821' - ) - - # Test 7: raw crash cannot be found - self.assertRaises( - ResourceNotFound, - service.get, - **{ - 'uuid': 'c44245f4-c93b-49b8-86a2-c15dc2130505', - 'datatype': 'raw' - } - ) diff --git a/socorro/unittest/external/happybase/test_crashstorage.py b/socorro/unittest/external/happybase/test_crashstorage.py deleted file mode 100644 index 88de752fa2..0000000000 --- a/socorro/unittest/external/happybase/test_crashstorage.py +++ /dev/null @@ -1,190 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import mock -import json - -from socorrolib.lib.util import SilentFakeLogger, DotDict -from socorro.external.crashstorage_base import ( - Redactor, - MemoryDumpsMapping, -) -from socorro.external.happybase.crashstorage import HBaseCrashStorage, CrashIDNotFound -from socorro.database.transaction_executor import TransactionExecutor -from socorro.unittest.testbase import TestCase - - -class TestCrashStorage(TestCase): - def setUp(self): - super(TestCrashStorage, self).setUp() - self.context = mock.MagicMock() - self.context.__enter__.return_value = self.context - config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - 'hbase_connection_context_class': mock.Mock( - return_value=self.context - ), - 'transaction_executor_class': TransactionExecutor, - 'new_crash_limit': 10 ** 6, - 'redactor_class': Redactor, - 'forbidden_keys': Redactor.required_config.forbidden_keys.default, - }) - self.storage = HBaseCrashStorage(config) - - def _fake_processed_crash(self): - d = DotDict() - # these keys survive redaction - d.a = DotDict() - d.a.b = DotDict() - d.a.b.c = 11 - d.sensitive = DotDict() - d.sensitive.x = 2 - d.not_url = 'not a url' - - return d - - def _fake_redacted_processed_crash(self): - d = self._fake_unredacted_processed_crash() - del d.url - del d.email - del d.user_id - del d.exploitability - del d.json_dump.sensitive - del d.upload_file_minidump_flash1.json_dump.sensitive - del d.upload_file_minidump_flash2.json_dump.sensitive - del d.upload_file_minidump_browser.json_dump.sensitive - - return d - - def _fake_unredacted_processed_crash(self): - d = self._fake_processed_crash() - - # these keys do not survive redaction - d['url'] = 'http://very.embarassing.com' - d['email'] = 'lars@fake.com' - d['user_id'] = '3333' - d['exploitability'] = 'yep' - d.json_dump = DotDict() - d.json_dump.sensitive = 22 - d.upload_file_minidump_flash1 = DotDict() - d.upload_file_minidump_flash1.json_dump = DotDict() - d.upload_file_minidump_flash1.json_dump.sensitive = 33 - d.upload_file_minidump_flash2 = DotDict() - d.upload_file_minidump_flash2.json_dump = DotDict() - d.upload_file_minidump_flash2.json_dump.sensitive = 33 - d.upload_file_minidump_browser = DotDict() - d.upload_file_minidump_browser.json_dump = DotDict() - d.upload_file_minidump_browser.json_dump.sensitive = DotDict() - d.upload_file_minidump_browser.json_dump.sensitive.exploitable = 55 - d.upload_file_minidump_browser.json_dump.sensitive.secret = 66 - - return d - - def _fake_unredacted_processed_crash_as_string(self): - d = self._fake_unredacted_processed_crash() - s = json.dumps(d) - return s - - - def test_close(self): - self.storage.close() - self.assertEqual(self.storage.hbase.close.call_count, 0) - - def test_save_processed(self): - self.storage.save_processed({ - "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", - "completeddatetime": "2012-04-08 10:56:50.902884", - "signature": 'now_this_is_a_signature' - }) - with self.storage.hbase() as conn: - self.assertEqual(conn.table.call_count, 1) - #self.assertEqual(conn.client.mutateRow.call_count, 2) - - def test_save_raw_crash(self): - self.storage.save_raw_crash({ - "submitted_timestamp": "2013-01-09T22:21:18.646733+00:00" - }, MemoryDumpsMapping(), "0bba929f-8721-460c-dead-a43c20071027") - with self.storage.hbase() as conn: - self.assertEqual(conn.table.call_count, 1) - self.assertEqual(conn.table.return_value.put.call_count, 1) - - def test_save_raw_crash_hang(self): - self.storage.save_raw_crash({ - "submitted_timestamp": "2013-01-09T22:21:18.646733+00:00", - "HangID": "?" - }, MemoryDumpsMapping(), "0bba929f-8721-460c-dead-a43c20071027") - with self.storage.hbase() as conn: - self.assertEqual(conn.table.call_count, 1) - self.assertEqual(conn.table.return_value.put.call_count, 1) - - def test_get_raw_dumps(self): - self.storage.get_raw_dumps("936ce666-ff3b-4c7a-9674-367fe2120408") - with self.storage.hbase() as conn: - self.assertEqual(conn.table.return_value.row.call_count, 1) - - def test_get_raw_dumps_as_files(self): - self.storage.get_raw_dumps_as_files( - "936ce666-ff3b-4c7a-9674-367fe2120408") - with self.storage.hbase() as conn: - self.assertEqual(conn.table.return_value.row.call_count, 1) - - def test_get_unredacted_processed(self): - - processed_crash = DotDict() - with self.storage.hbase() as conn: - conn.table.return_value.row.return_value = { - 'processed_data:json': - self._fake_unredacted_processed_crash_as_string() - } - - processed_crash = self.storage.get_unredacted_processed( - "936ce666-ff3b-4c7a-9674-367fe2120408" - ) - self.assertEqual( - processed_crash, - self._fake_unredacted_processed_crash() - ) - - def test_get_processed(self): - faked_hb_row_object = DotDict() - faked_hb_row_object.columns = DotDict() - faked_hb_row_object.columns['processed_data:json'] = DotDict() - faked_hb_row_object.columns['processed_data:json'].value = \ - self._fake_unredacted_processed_crash_as_string() - - processed_crash = DotDict() - with self.storage.hbase() as conn: - conn.table.return_value.row.return_value = { - 'processed_data:json': - self._fake_unredacted_processed_crash_as_string() - } - - processed_crash = self.storage.get_processed( - "936ce666-ff3b-4c7a-9674-367fe2120408" - ) - self.assertEqual( - processed_crash, - self._fake_redacted_processed_crash() - ) - - - def test_get_processed_failure(self): - with self.storage.hbase() as conn: - conn.table.return_value.row.return_value = {} - self.assertRaises( - CrashIDNotFound, - self.storage.get_processed, - "936ce666-ff3b-4c7a-9674-367fe2120408" - ) - - def test_new_crashes(self): - self.storage._salted_scanner_iterable = mock.Mock( - return_value=iter([]) - ) - self.assertEqual(list(self.storage.new_crashes()), []) diff --git a/socorro/unittest/external/hb/__init__.py b/socorro/unittest/external/hb/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/socorro/unittest/external/hb/test_connection_context.py b/socorro/unittest/external/hb/test_connection_context.py deleted file mode 100644 index 0019f08164..0000000000 --- a/socorro/unittest/external/hb/test_connection_context.py +++ /dev/null @@ -1,237 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import mock -from nose.tools import eq_, ok_, assert_raises - -from socorro.external.hb import connection_context -from socorrolib.lib.util import SilentFakeLogger, DotDict -from socorro.database.transaction_executor import TransactionExecutor -from socorro.unittest.testbase import TestCase - -from socket import error - - -class FakeHB_Connection(object): - def __init__(self, config): - self.hbaseThriftExceptions = (error,) - self.close_counter = 0 - self.commit_counter = 0 - self.rollback_counter = 0 - - def close(self): - self.close_counter += 1 - - def commit(self): - self.commit_counter += 1 - - def rollback(self): - self.rollback_counter += 1 - - -class TestConnectionContext(TestCase): - def test_basic_hbase_usage(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - 'executor_identity': lambda: 'dwight' # bogus thread id - }) - a_fake_hbase_connection = FakeHB_Connection(local_config) - with mock.patch.object(connection_context, 'HBaseConnection', - mock.Mock(return_value=a_fake_hbase_connection)): - hb_context = connection_context.HBaseConnectionContext( - local_config - ) - # open a connection - with hb_context() as conn: - pass - eq_( - a_fake_hbase_connection.close_counter, - 1 - ) - # open another connection again - with hb_context() as conn: - pass - eq_( - a_fake_hbase_connection.close_counter, - 2 - ) - # get a named connection - with hb_context('fred') as conn: - pass - eq_( - a_fake_hbase_connection.close_counter, - 3 - ) - # close all connections - hb_context.close() - eq_( - a_fake_hbase_connection.close_counter, - 3 - ) - - def test_hbase_usage_with_transaction(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - 'executor_identity': lambda: 'dwight' # bogus thread id - }) - a_fake_hbase_connection = FakeHB_Connection(local_config) - with mock.patch.object(connection_context, 'HBaseConnection', - mock.Mock(return_value=a_fake_hbase_connection)): - hb_context = connection_context.HBaseConnectionContext( - local_config - ) - def all_ok(connection, dummy): - eq_(dummy, 'hello') - return True - - transaction = TransactionExecutor(local_config, hb_context) - result = transaction(all_ok, 'hello') - ok_(result) - eq_( - a_fake_hbase_connection.close_counter, - 1 - ) - eq_( - a_fake_hbase_connection.rollback_counter, - 0 - ) - eq_( - a_fake_hbase_connection.commit_counter, - 1 - ) - - def bad_deal(connection, dummy): - raise KeyError('fred') - - assert_raises(KeyError, transaction, bad_deal, 'hello') - eq_( - a_fake_hbase_connection.close_counter, - 2 - ) - eq_( - a_fake_hbase_connection.commit_counter, - 1 - ) - - hb_context.close() - eq_( - a_fake_hbase_connection.close_counter, - 2 - ) - - -class TestHBasePooledConnectionContext(TestCase): - - def test_basic_hbase_usage(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - 'executor_identity': lambda: 'dwight' # bogus thread id - }) - a_fake_hbase_connection = FakeHB_Connection(local_config) - with mock.patch.object(connection_context, 'HBaseConnection', - mock.Mock(return_value=a_fake_hbase_connection)): - hb_context = connection_context.HBasePooledConnectionContext( - local_config - ) - # open a connection - with hb_context() as conn: - pass - eq_( - a_fake_hbase_connection.close_counter, - 0 - ) - # open another connection again - with hb_context() as conn: - pass - eq_( - a_fake_hbase_connection.close_counter, - 0 - ) - # get a named connection - with hb_context('fred') as conn: - pass - eq_( - a_fake_hbase_connection.close_counter, - 0 - ) - # close all connections - hb_context.close() - eq_( - a_fake_hbase_connection.close_counter, - 2 - ) - - def test_hbase_usage_with_transaction(self): - local_config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - 'executor_identity': lambda: 'dwight' # bogus thread id - }) - a_fake_hbase_connection = FakeHB_Connection(local_config) - with mock.patch.object(connection_context, 'HBaseConnection', - mock.Mock(return_value=a_fake_hbase_connection)): - hb_context = connection_context.HBasePooledConnectionContext( - local_config - ) - def all_ok(connection, dummy): - eq_(dummy, 'hello') - return True - - transaction = TransactionExecutor(local_config, hb_context) - result = transaction(all_ok, 'hello') - ok_(result) - eq_( - a_fake_hbase_connection.close_counter, - 0 - ) - eq_( - a_fake_hbase_connection.rollback_counter, - 0 - ) - eq_( - a_fake_hbase_connection.commit_counter, - 1 - ) - - def bad_deal(connection, dummy): - raise KeyError('fred') - - assert_raises(KeyError, transaction, bad_deal, 'hello') - # at this point, the underlying connection has been deleted from - # the pool, because it was considered to be a bad connection. - eq_( - a_fake_hbase_connection.close_counter, - 0 - ) - eq_( - a_fake_hbase_connection.commit_counter, - 1 - ) - - hb_context.close() - # because the connection was previously deleted from the pool, - # no connection gets closed at this point. - eq_( - a_fake_hbase_connection.close_counter, - 0 - ) diff --git a/socorro/unittest/external/hb/test_crash_data.py b/socorro/unittest/external/hb/test_crash_data.py deleted file mode 100644 index 1e110b8e59..0000000000 --- a/socorro/unittest/external/hb/test_crash_data.py +++ /dev/null @@ -1,237 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import os -from nose.plugins.skip import SkipTest -from nose.tools import eq_, assert_raises -from configman import ConfigurationManager, Namespace -from mock import Mock - -from socorrolib.lib import ( - MissingArgumentError, - ResourceNotFound, - ResourceUnavailable, -) -from socorro.external.hb import crash_data, crashstorage, hbase_client -from socorro.unittest.testbase import TestCase - - -_run_integration_tests = os.environ.get('RUN_HBASE_INTEGRATION_TESTS', False) -if _run_integration_tests in ('false', 'False', 'no', '0'): - _run_integration_tests = False - - -class TestIntegrationHBaseCrashData(TestCase): - - def setUp(self): - if not _run_integration_tests: - raise SkipTest("Skipping HBase integration tests") - super(TestIntegrationHBaseCrashData, self).setUp() - self.config_manager = self._common_config_setup() - - with self.config_manager.context() as config: - store = crashstorage.HBaseCrashStorage(config.hbase) - - # A complete crash report (raw, dump and processed) - fake_raw_dump_1 = 'peter is a swede' - fake_raw_dump_2 = 'lars is a norseman' - fake_raw_dump_3 = 'adrian is a frenchman' - fake_dumps = {'upload_file_minidump': fake_raw_dump_1, - 'lars': fake_raw_dump_2, - 'adrian': fake_raw_dump_3} - fake_raw = { - 'name': 'Peter', - 'legacy_processing': 0, - 'submitted_timestamp': '2013-05-04' - } - fake_processed = { - 'name': 'Peter', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314', - 'completeddatetime': '2012-01-01T00:00:00', - 'email': 'peter@fake.org', - } - - store.save_raw_crash( - fake_raw, - fake_dumps, - '114559a5-d8e6-428c-8b88-1c1f22120314' - ) - store.save_processed(fake_processed) - - # A non-processed crash report - fake_raw = { - 'name': 'Adrian', - 'legacy_processing': 0, - 'submitted_timestamp': '2013-05-04' - } - - store.save_raw_crash( - fake_raw, - fake_dumps, - '58727744-12f5-454a-bcf5-f688a2120821' - ) - - def tearDown(self): - super(TestIntegrationHBaseCrashData, self).tearDown() - with self.config_manager.context() as config: - connection = hbase_client.HBaseConnectionForCrashReports( - config.hbase.hbase_host, - config.hbase.hbase_port, - config.hbase.hbase_timeout - ) - for row in connection.merge_scan_with_prefix( - 'crash_reports', '', ['ids:ooid']): - index_row_key = row['_rowkey'] - connection.client.deleteAllRow( - 'crash_reports', index_row_key) - # because of HBase's async nature, deleting can take time - list(connection.iterator_for_all_legacy_to_be_processed()) - - def _common_config_setup(self): - mock_logging = Mock() - required_config = Namespace() - required_config.namespace('hbase') - required_config.hbase.hbase_class = \ - crashstorage.HBaseCrashStorage - required_config.hbase.add_option('logger', default=mock_logging) - config_manager = ConfigurationManager( - [required_config], - app_name='testapp', - app_version='1.0', - app_description='app description', - values_source_list=[{'hbase': { - 'logger': mock_logging - }}] - ) - return config_manager - - def test_get(self): - with self.config_manager.context() as config: - - priorityjobs_mock = Mock() - service = crash_data.CrashData( - config=config, - all_services={'Priorityjobs': priorityjobs_mock} - ) - params = { - 'datatype': 'raw', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314' - } - - # Test 1a: get a raw dump - res_expected = ('peter is a swede', - 'application/octet-stream') - res = service.get(**params) - eq_(res, res_expected) - - # Test 1b: get a raw dump with the default name - res = service.get(**dict(params, name='upload_file_minidump')) - eq_(res, res_expected) - - # Test 1c: get a raw dump with a different name - res_expected = ('lars is a norseman', - 'application/octet-stream') - res = service.get(**dict(params, name='lars')) - eq_(res, res_expected) - - # Test 2: get a raw crash - params['datatype'] = 'meta' - res_expected = { - 'name': 'Peter', - 'legacy_processing': 0, - 'submitted_timestamp': '2013-05-04' - } - res = service.get(**params) - - eq_(res, res_expected) - - # Test 3: get a processed crash - params['datatype'] = 'processed' - res_expected = { - 'name': 'Peter', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314', - 'completeddatetime': '2012-01-01T00:00:00' - } - res = service.get(**params) - - eq_(res, res_expected) - - # Test 3a: get a unredacted processed crash - params['datatype'] = 'unredacted' - res_expected = { - 'name': 'Peter', - 'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314', - 'completeddatetime': '2012-01-01T00:00:00', - 'email': 'peter@fake.org', - } - res = service.get(**params) - - eq_(res, res_expected) - - # Test 4: missing parameters - assert_raises( - MissingArgumentError, - service.get - ) - assert_raises( - MissingArgumentError, - service.get, - **{'uuid': '114559a5-d8e6-428c-8b88-1c1f22120314'} - ) - - # Test 5: crash cannot be found - assert_raises( - ResourceNotFound, - service.get, - **{ - 'uuid': 'c44245f4-c93b-49b8-86a2-c15dc2130504', - 'datatype': 'processed' - } - ) - # Test 5a: crash cannot be found - assert_raises( - ResourceNotFound, - service.get, - **{ - 'uuid': 'c44245f4-c93b-49b8-86a2-c15dc2130504', - 'datatype': 'unredacted' - } - ) - - # Test 6: not yet available crash - assert_raises( - ResourceUnavailable, - service.get, - **{ - 'uuid': '58727744-12f5-454a-bcf5-f688a2120821', - 'datatype': 'processed' - } - ) - priorityjobs_mock.cls.return_value.create.assert_called_once_with( - uuid='58727744-12f5-454a-bcf5-f688a2120821' - ) - priorityjobs_mock.cls.return_value.create.reset_mock() - - # Test 6a: not yet available crash - assert_raises( - ResourceUnavailable, - service.get, - **{ - 'uuid': '58727744-12f5-454a-bcf5-f688a2120821', - 'datatype': 'unredacted' - } - ) - priorityjobs_mock.cls.return_value.create.assert_called_once_with( - uuid='58727744-12f5-454a-bcf5-f688a2120821' - ) - - # Test 7: raw crash cannot be found - assert_raises( - ResourceNotFound, - service.get, - **{ - 'uuid': 'c44245f4-c93b-49b8-86a2-c15dc2130505', - 'datatype': 'raw' - } - ) diff --git a/socorro/unittest/external/hb/test_crashstorage.py b/socorro/unittest/external/hb/test_crashstorage.py deleted file mode 100644 index e1a94bf341..0000000000 --- a/socorro/unittest/external/hb/test_crashstorage.py +++ /dev/null @@ -1,187 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import json - -import mock -from nose.tools import eq_, assert_raises - -from socorrolib.lib.util import SilentFakeLogger, DotDict -from socorro.external.crashstorage_base import ( - Redactor, - MemoryDumpsMapping -) -from socorro.external.hb.crashstorage import HBaseCrashStorage, CrashIDNotFound -from socorro.database.transaction_executor import TransactionExecutor -from socorro.unittest.testbase import TestCase - - -class TestCrashStorage(TestCase): - def setUp(self): - super(TestCrashStorage, self).setUp() - self.context = mock.MagicMock() - self.context.__enter__.return_value = self.context - config = DotDict({ - 'hbase_host': 'host', - 'database_name': 'name', - 'hbase_port': 9090, - 'hbase_timeout': 9000, - 'number_of_retries': 2, - 'logger': SilentFakeLogger(), - 'hbase_connection_context_class': mock.Mock( - return_value=self.context - ), - 'transaction_executor_class': TransactionExecutor, - 'new_crash_limit': 10 ** 6, - 'redactor_class': Redactor, - 'forbidden_keys': Redactor.required_config.forbidden_keys.default, - }) - self.storage = HBaseCrashStorage(config) - - def _fake_processed_crash(self): - d = DotDict() - # these keys survive redaction - d.a = DotDict() - d.a.b = DotDict() - d.a.b.c = 11 - d.sensitive = DotDict() - d.sensitive.x = 2 - d.not_url = 'not a url' - - return d - - def _fake_redacted_processed_crash(self): - d = self._fake_unredacted_processed_crash() - del d.url - del d.email - del d.user_id - del d.exploitability - del d.json_dump.sensitive - del d.upload_file_minidump_flash1.json_dump.sensitive - del d.upload_file_minidump_flash2.json_dump.sensitive - del d.upload_file_minidump_browser.json_dump.sensitive - - return d - - def _fake_unredacted_processed_crash(self): - d = self._fake_processed_crash() - - # these keys do not survive redaction - d['url'] = 'http://very.embarassing.com' - d['email'] = 'lars@fake.com' - d['user_id'] = '3333' - d['exploitability'] = 'yep' - d.json_dump = DotDict() - d.json_dump.sensitive = 22 - d.upload_file_minidump_flash1 = DotDict() - d.upload_file_minidump_flash1.json_dump = DotDict() - d.upload_file_minidump_flash1.json_dump.sensitive = 33 - d.upload_file_minidump_flash2 = DotDict() - d.upload_file_minidump_flash2.json_dump = DotDict() - d.upload_file_minidump_flash2.json_dump.sensitive = 33 - d.upload_file_minidump_browser = DotDict() - d.upload_file_minidump_browser.json_dump = DotDict() - d.upload_file_minidump_browser.json_dump.sensitive = DotDict() - d.upload_file_minidump_browser.json_dump.sensitive.exploitable = 55 - d.upload_file_minidump_browser.json_dump.sensitive.secret = 66 - - return d - - def _fake_unredacted_processed_crash_as_string(self): - d = self._fake_unredacted_processed_crash() - s = json.dumps(d) - return s - - - def test_close(self): - self.storage.close() - eq_(self.storage.hbase.close.call_count, 1) - - def test_save_processed(self): - self.storage.save_processed({ - "uuid": "936ce666-ff3b-4c7a-9674-367fe2120408", - "completeddatetime": "2012-04-08 10:56:50.902884" - }) - with self.storage.hbase() as conn: - eq_(conn.client.mutateRow.call_count, 2) - - def test_save_raw_crash(self): - self.storage.save_raw_crash({ - "submitted_timestamp": "2013-01-09T22:21:18.646733+00:00" - }, MemoryDumpsMapping(), "0bba929f-8721-460c-dead-a43c20071027") - with self.storage.hbase() as conn: - eq_(conn.client.mutateRow.call_count, 5) - - def test_save_raw_crash_hang(self): - self.storage.save_raw_crash({ - "submitted_timestamp": "2013-01-09T22:21:18.646733+00:00", - "HangID": "?" - }, MemoryDumpsMapping(), "0bba929f-8721-460c-dead-a43c20071027") - with self.storage.hbase() as conn: - eq_(conn.client.mutateRow.call_count, 7) - - def test_get_raw_dumps(self): - self.storage.get_raw_dumps("936ce666-ff3b-4c7a-9674-367fe2120408") - with self.storage.hbase() as conn: - eq_(conn.client.getRowWithColumns.call_count, 1) - - def test_get_raw_dumps_as_files(self): - self.storage.get_raw_dumps_as_files( - "936ce666-ff3b-4c7a-9674-367fe2120408") - with self.storage.hbase() as conn: - eq_(conn.client.getRowWithColumns.call_count, 1) - - def test_get_unredacted_processed(self): - faked_hb_row_object = DotDict() - faked_hb_row_object.columns = DotDict() - faked_hb_row_object.columns['processed_data:json'] = DotDict() - faked_hb_row_object.columns['processed_data:json'].value = \ - self._fake_unredacted_processed_crash_as_string() - - processed_crash = DotDict() - with self.storage.hbase() as conn: - conn.client.getRowWithColumns.return_value = [faked_hb_row_object] - - processed_crash = self.storage.get_unredacted_processed( - "936ce666-ff3b-4c7a-9674-367fe2120408" - ) - eq_( - processed_crash, - self._fake_unredacted_processed_crash() - ) - - def test_get_processed(self): - faked_hb_row_object = DotDict() - faked_hb_row_object.columns = DotDict() - faked_hb_row_object.columns['processed_data:json'] = DotDict() - faked_hb_row_object.columns['processed_data:json'].value = \ - self._fake_unredacted_processed_crash_as_string() - - processed_crash = DotDict() - with self.storage.hbase() as conn: - conn.client.getRowWithColumns.return_value = [faked_hb_row_object] - - processed_crash = self.storage.get_processed( - "936ce666-ff3b-4c7a-9674-367fe2120408" - ) - eq_( - processed_crash, - self._fake_redacted_processed_crash() - ) - - - def test_get_processed_failure(self): - with self.storage.hbase() as conn: - conn.client.getRowWithColumns.return_value = [] - assert_raises( - CrashIDNotFound, - self.storage.get_processed, - "936ce666-ff3b-4c7a-9674-367fe2120408" - ) - - def test_new_crashes(self): - self.storage._salted_scanner_iterable = mock.Mock( - return_value=iter([]) - ) - eq_(list(self.storage.new_crashes()), []) diff --git a/socorro/unittest/external/postgresql/test_server_status.py b/socorro/unittest/external/postgresql/test_server_status.py index 15a4175a09..9fd62ee82c 100644 --- a/socorro/unittest/external/postgresql/test_server_status.py +++ b/socorro/unittest/external/postgresql/test_server_status.py @@ -52,7 +52,7 @@ def tearDown(self): os.remove(os.path.join(self.basedir, 'breakpad_revision.txt')) cursor = self.connection.cursor() - cursor.execute("TRUNCATE server_status, alembic_version CASCADE;") + cursor.execute("TRUNCATE alembic_version CASCADE;") self.connection.commit() super(IntegrationTestServerStatus, self).tearDown() diff --git a/socorro/unittest/middleware/test_middleware_app.py b/socorro/unittest/middleware/test_middleware_app.py index 6f282e7cce..d66f45dd8b 100644 --- a/socorro/unittest/middleware/test_middleware_app.py +++ b/socorro/unittest/middleware/test_middleware_app.py @@ -999,15 +999,6 @@ def test_missing_argument_yield_bad_request(self): app.main() server = middleware_app.application - response = self.get( - server, - '/crash_data/', - {'xx': 'yy'}, - expect_errors=True - ) - eq_(response.status, 400) - ok_('uuid' in response.body) - response = self.get( server, '/crashes/comments/', diff --git a/socorro/unittest/processor/test_mozilla_transform_rules.py b/socorro/unittest/processor/test_mozilla_transform_rules.py index b1d97138fc..0bb0a421ca 100644 --- a/socorro/unittest/processor/test_mozilla_transform_rules.py +++ b/socorro/unittest/processor/test_mozilla_transform_rules.py @@ -36,6 +36,7 @@ MissingSymbolsRule, BetaVersionRule, OSPrettyVersionRule, + ThemePrettyNameRule, ) canonical_standard_raw_crash = DotDict({ @@ -1020,6 +1021,7 @@ def test_extract_memory_info_too_big(self): opened.read.return_value = json.dumps({ 'some': 'notveryshortpieceofjson' }) + def gzip_open(filename, mode): assert mode == 'rb' return opened @@ -2119,3 +2121,95 @@ def test_everything_we_hoped_for(self): rule.act(raw_crash, raw_dumps, processed_crash, processor_meta) ok_('os_pretty_version' in processed_crash) eq_(processed_crash['os_pretty_version'], 'Windows NT') + + +#============================================================================== +class TestThemePrettyNameRule(TestCase): + + #-------------------------------------------------------------------------- + def get_basic_config(self): + config = CDotDict() + config.logger = Mock() + config.chatty = False + return config + + #-------------------------------------------------------------------------- + def get_basic_processor_meta(self): + processor_meta = DotDict() + processor_meta.processor_notes = [] + + return processor_meta + + #-------------------------------------------------------------------------- + def test_everything_we_hoped_for(self): + config = self.get_basic_config() + + raw_crash = copy.copy(canonical_standard_raw_crash) + raw_dumps = {} + processed_crash = DotDict() + processor_meta = self.get_basic_processor_meta() + + rule = ThemePrettyNameRule(config) + + processed_crash.addons = [ + ('adblockpopups@jessehakanen.net', '0.3'), + ('dmpluginff@westbyte.com', '1,4.8'), + ('firebug@software.joehewitt.com', '1.9.1'), + ('killjasmin@pierros14.com', '2.4'), + ('support@surfanonymous-free.com', '1.0'), + ('uploader@adblockfilters.mozdev.org', '2.1'), + ('{a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7}', '20111107'), + ('{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', '2.0.3'), + ('anttoolbar@ant.com', '2.4.6.4'), + ('{972ce4c6-7e08-4474-a285-3208198ce6fd}', '12.0'), + ('elemhidehelper@adblockplus.org', '1.2.1') + ] + + # the call to be tested + rule.act(raw_crash, raw_dumps, processed_crash, processor_meta) + + # the raw crash & raw_dumps should not have changed + eq_(raw_crash, canonical_standard_raw_crash) + eq_(raw_dumps, {}) + + expected_addon_list = [ + ('adblockpopups@jessehakanen.net', '0.3'), + ('dmpluginff@westbyte.com', '1,4.8'), + ('firebug@software.joehewitt.com', '1.9.1'), + ('killjasmin@pierros14.com', '2.4'), + ('support@surfanonymous-free.com', '1.0'), + ('uploader@adblockfilters.mozdev.org', '2.1'), + ('{a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7}', '20111107'), + ('{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', '2.0.3'), + ('anttoolbar@ant.com', '2.4.6.4'), + ('{972ce4c6-7e08-4474-a285-3208198ce6fd} (default Firefox theme)', + '12.0'), + ('elemhidehelper@adblockplus.org', '1.2.1') + ] + eq_(processed_crash.addons, expected_addon_list) + + #-------------------------------------------------------------------------- + def test_missing_key(self): + config = self.get_basic_config() + + processed_crash = DotDict() + processor_meta = self.get_basic_processor_meta() + + rule = ThemePrettyNameRule(config) + + # Test with missing key. + res = rule._predicate({}, {}, processed_crash, processor_meta) + ok_(not res) + + # Test with empty list. + processed_crash.addons = [] + res = rule._predicate({}, {}, processed_crash, processor_meta) + ok_(not res) + + # Test with key missing from list. + processed_crash.addons = [ + ('adblockpopups@jessehakanen.net', '0.3'), + ('dmpluginff@westbyte.com', '1,4.8'), + ] + res = rule._predicate({}, {}, processed_crash, processor_meta) + ok_(not res) diff --git a/tools/loadjsonz.py b/tools/loadjsonz.py deleted file mode 100644 index 7cb219515d..0000000000 --- a/tools/loadjsonz.py +++ /dev/null @@ -1,42 +0,0 @@ -#! /usr/bin/env python -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -import sys -import socorro.external.hbase.hbase_client as hbase -import gzip - -class JsonzLoader(object): - def __init__(self,host,port): - self.hbase_connection = hbase.HBaseConnectionForCrashReports(host,port) - - def close(self): - self.hbase_connection.close() - - def load_from_file(self, uuid, path): - jsonz_file = gzip.open(path, 'rb') - json_string = jsonz_file.read() - jsonz_file.close() - self.hbase_connection.create_ooid_from_jsonz(uuid,json_string) - -if __name__=="__main__": - if len(sys.argv) != 3: - print "Usage: loadjsonz.py \nText file should be uuid and file path seperated by a tab" - sys.exit(1) - input_file_path = sys.argv[1] - host, port = sys.argv[2].split(':') - loader = JsonzLoader(host,int(port)) - input_file = open(input_file_path,'rb') - i = 0 - for line in input_file: - uuid, path = line.strip().split('\t') - loader.load_from_file(uuid, path) - i += 1 - if i % 1000 == 0: - print i,'reports loaded' - loader.close() - input_file.close() - print "%s jsonz file(s) loaded" % i - diff --git a/tools/trigger b/tools/trigger index 91321c551d..b68753b9dd 100644 --- a/tools/trigger +++ b/tools/trigger @@ -1 +1 @@ -This will trigger a jenkins run. \ No newline at end of file +This will trigger a jenkins run. Python is as great to work with as underpants made of fire. diff --git a/webapp-django/crashstats/api/tests/test_views.py b/webapp-django/crashstats/api/tests/test_views.py index 9d1399463d..e070667a29 100644 --- a/webapp-django/crashstats/api/tests/test_views.py +++ b/webapp-django/crashstats/api/tests/test_views.py @@ -28,6 +28,9 @@ Reprocessing, ProductBuildTypes, Status, + ProcessedCrash, + RawCrash, + UnredactedCrash, ) from crashstats.tokens.models import Token @@ -767,8 +770,7 @@ def mocked_get(url, params, **options): ok_(dump['hits']) ok_(dump['total']) - @mock.patch('requests.get') - def test_ProcessedCrash(self, rget): + def test_ProcessedCrash(self): url = reverse('api:model_wrapper', args=('ProcessedCrash',)) response = self.client.get(url) eq_(response.status_code, 400) @@ -776,11 +778,9 @@ def test_ProcessedCrash(self, rget): dump = json.loads(response.content) ok_(dump['errors']['crash_id']) - def mocked_get(url, params, **options): - assert '/crash_data' in url, url - + def mocked_get(**params): if 'datatype' in params and params['datatype'] == 'processed': - return Response({ + return { "client_crash_date": "2012-06-11T06:08:45", "dump": dump, "signature": "FakeSignature1", @@ -812,10 +812,10 @@ def mocked_get(url, params, **options): "upload_file_minidump_flash1": "a crash", "upload_file_minidump_flash2": "a crash", "upload_file_minidump_plugin": "a crash" - }) - raise NotImplementedError(url) + } + raise NotImplementedError - rget.side_effect = mocked_get + ProcessedCrash.implementation().get.side_effect = mocked_get response = self.client.get(url, { 'crash_id': '123', @@ -826,8 +826,7 @@ def mocked_get(url, params, **options): ok_('upload_file_minidump_flash2' in dump) ok_('url' not in dump) - @mock.patch('requests.get') - def test_UnredactedCrash(self, rget): + def test_UnredactedCrash(self): url = reverse('api:model_wrapper', args=('UnredactedCrash',)) response = self.client.get(url) # because we don't have the sufficient permissions yet to use it @@ -855,11 +854,9 @@ def test_UnredactedCrash(self, rget): dump = json.loads(response.content) ok_(dump['errors']['crash_id']) - def mocked_get(url, params, **options): - assert '/crash_data/' in url - + def mocked_get(**params): if 'datatype' in params and params['datatype'] == 'unredacted': - return Response({ + return { "client_crash_date": "2012-06-11T06:08:45", "dump": dump, "signature": "FakeSignature1", @@ -892,10 +889,10 @@ def mocked_get(url, params, **options): "upload_file_minidump_flash2": "a crash", "upload_file_minidump_plugin": "a crash", "exploitability": "Unknown Exploitability", - }) - raise NotImplementedError(url) + } + raise NotImplementedError - rget.side_effect = mocked_get + UnredactedCrash.implementation().get.side_effect = mocked_get response = self.client.get(url, { 'crash_id': '123', @@ -906,13 +903,11 @@ def mocked_get(url, params, **options): ok_('upload_file_minidump_flash2' in dump) ok_('exploitability' in dump) - @mock.patch('requests.get') - def test_RawCrash(self, rget): + def test_RawCrash(self): - def mocked_get(url, params, **options): - assert '/crash_data' in url + def mocked_get(**params): if 'uuid' in params and params['uuid'] == 'abc123': - return Response({ + return { "InstallTime": "1366691881", "AdapterVendorID": "0x8086", "Theme": "classic/1.0", @@ -946,10 +941,10 @@ def mocked_get(url, params, **options): "upload_file_minidump_flash1": "a crash", "upload_file_minidump_flash2": "a crash", "upload_file_minidump_plugin": "a crash" - }) - raise NotImplementedError(url) + } + raise NotImplementedError - rget.side_effect = mocked_get + RawCrash.implementation().get.side_effect = mocked_get url = reverse('api:model_wrapper', args=('RawCrash',)) response = self.client.get(url) @@ -977,16 +972,14 @@ def mocked_get(url, params, **options): ok_('http://p0rn.com' not in dump['Comments']) ok_('mail@email.com' not in dump['Comments']) - @mock.patch('requests.get') - def test_RawCrash_binary_blob(self, rget): + def test_RawCrash_binary_blob(self): - def mocked_get(url, params, **options): - assert '/crash_data' in url + def mocked_get(**params): if 'uuid' in params and params['uuid'] == 'abc': - return Response('\xe0') - raise NotImplementedError(url) + return '\xe0' + raise NotImplementedError - rget.side_effect = mocked_get + RawCrash.implementation().get.side_effect = mocked_get url = reverse('api:model_wrapper', args=('RawCrash',)) response = self.client.get(url, { @@ -996,7 +989,6 @@ def mocked_get(url, params, **options): # because we don't have permission eq_(response.status_code, 403) - url = reverse('api:model_wrapper', args=('RawCrash',)) response = self.client.get(url, { 'crash_id': 'abc', 'format': 'wrong' # note diff --git a/webapp-django/crashstats/api/views.py b/webapp-django/crashstats/api/views.py index 5918575217..898ce7dd2a 100644 --- a/webapp-django/crashstats/api/views.py +++ b/webapp-django/crashstats/api/views.py @@ -17,6 +17,7 @@ from waffle.decorators import waffle_switch from socorrolib.lib import BadArgumentError, MissingArgumentError +from socorro.external.crashstorage_base import CrashIDNotFound import crashstats from crashstats.crashstats.decorators import track_api_pageview @@ -41,6 +42,10 @@ models.RequiredParameterError, ) +NOT_FOUND_EXCEPTIONS = ( + CrashIDNotFound, +) + # See http://www.iana.org/assignments/http-status-codes REASON_PHRASES = { @@ -337,6 +342,11 @@ def model_wrapper(request, model_name): content_type='application/json; charset=UTF-8' ) raise + except NOT_FOUND_EXCEPTIONS as exception: + return http.HttpResponseNotFound( + json.dumps({'error': str(exception)}), + content_type='application/json; charset=UTF-8' + ) except BAD_REQUEST_EXCEPTIONS as exception: return http.HttpResponseBadRequest( json.dumps({'error': str(exception)}), diff --git a/webapp-django/crashstats/crashstats/jinja2/crashstats/login.html b/webapp-django/crashstats/crashstats/jinja2/crashstats/login.html index 627b61eead..58dcc6c11b 100644 --- a/webapp-django/crashstats/crashstats/jinja2/crashstats/login.html +++ b/webapp-django/crashstats/crashstats/jinja2/crashstats/login.html @@ -1,13 +1,14 @@ {% extends "crashstats_base.html" %} {% block page_title %} -{% if request.user.is_authenticated() %}Insufficient Privileges{% else %}Login Required{% endif %} +{% if request.user.is_active %}Insufficient Privileges{% else %}Login Required{% endif %} {% endblock %} {% block content %} +
- {% if request.user.is_authenticated() %} + {% if request.user.is_active %}

Insufficient Privileges

{% else %}

Login Required

@@ -16,15 +17,15 @@

Login Required

- {% if request.user.is_authenticated() %} -

- You are signed in but you do not have sufficient permissions to reach the resource you requested. -

- {% else %} -

- The page you requested requires authentication. Use the login button at the lower right to log in. -

- {% endif %} + {% if request.user.is_authenticated() %} +

+ You are signed in but you do not have sufficient permissions to reach the resource you requested. +

+ {% else %} +

+ The page you requested requires authentication. Use the login button at the lower right to log in. +

+ {% endif %}
diff --git a/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index.html b/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index.html index 5321fdade8..984526cef8 100644 --- a/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index.html +++ b/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index.html @@ -39,7 +39,10 @@

{{ report.product }} {{ report.version }} Crash Report [@ {{ report.signatur
- +
ID: {{ report.uuid }}
diff --git a/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index_pending.html b/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index_pending.html index 0e06726166..9363e2ac77 100644 --- a/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index_pending.html +++ b/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index_pending.html @@ -1,32 +1,33 @@ {% extends "crashstats_base.html" %} +{% block site_css %} +{{ super() }} +{% stylesheet 'report_pending' %} +{% endblock %} + +{% block site_js %} +{{ super() }} +{% javascript 'report_pending' %} +{% endblock %} + {% block content %}
-
-
- -
-

Please Wait...

-

Fetching this archived report will take 30 seconds to 5 minutes

- -

Next attempt in - 30 seconds...

- -
- - {% endblock %} - -{% block site_js %} -{{ super() }} - - -{% endblock %} diff --git a/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index_too_old.html b/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index_too_old.html deleted file mode 100644 index 3305844bc0..0000000000 --- a/webapp-django/crashstats/crashstats/jinja2/crashstats/report_index_too_old.html +++ /dev/null @@ -1,10 +0,0 @@ -{% extends "crashstats_base.html" %} - -{% block content %} -
-
-

Oh Noes!

-

This archived report has expired because it is greater than 3 years of age.

-
-
-{% endblock %} diff --git a/webapp-django/crashstats/crashstats/models.py b/webapp-django/crashstats/crashstats/models.py index f9d2b4f1d5..2860ce083c 100644 --- a/webapp-django/crashstats/crashstats/models.py +++ b/webapp-django/crashstats/crashstats/models.py @@ -12,10 +12,12 @@ import ujson from configman import configuration, Namespace +from socorrolib.lib import BadArgumentError from socorro.external.es.base import ElasticsearchConfig from socorro.external.postgresql.crashstorage import PostgreSQLCrashStorage from socorro.external.rabbitmq.crashstorage import ( ReprocessingOneRabbitMQCrashStore, + PriorityjobRabbitMQCrashStore, ) import socorro.external.postgresql.platforms import socorro.external.postgresql.bugs @@ -28,6 +30,7 @@ import socorro.external.postgresql.product_build_types import socorro.external.postgresql.signature_first_date import socorro.external.postgresql.server_status +import socorro.external.boto.crash_data from socorrolib.app import socorro_app @@ -67,6 +70,16 @@ def config_from_configman(): 'rabbitmq_reprocessing_class', default=ReprocessingOneRabbitMQCrashStore, ) + definition_source.namespace('priority') + definition_source.priority.add_option( + 'rabbitmq_priority_class', + default=PriorityjobRabbitMQCrashStore, + ) + definition_source.namespace('data') + definition_source.data.add_option( + 'crash_data_class', + default=socorro.external.boto.crash_data.SimplifiedCrashData, + ) config = configuration( definition_source=definition_source, values_source_list=[ @@ -78,6 +91,8 @@ def config_from_configman(): # logger set up by configman as an aggregate, we just use the # same logger as we have here in the webapp. config.queuing.logger = logger + config.priority.logger = logger + config.data.logger = logger return config @@ -395,7 +410,6 @@ def get_implementation(self): config = config_from_configman() if self.implementation_config_namespace: config = config[self.implementation_config_namespace] - _implementations[key] = self.implementation( config=config ) @@ -1010,7 +1024,9 @@ class ReportList(SocorroMiddleware): class ProcessedCrash(SocorroMiddleware): - URL_PREFIX = '/crash_data/' + + implementation = socorro.external.boto.crash_data.SimplifiedCrashData + implementation_config_namespace = 'data' required_params = ( 'crash_id', @@ -1092,7 +1108,6 @@ class ProcessedCrash(SocorroMiddleware): class UnredactedCrash(ProcessedCrash): - URL_PREFIX = '/crash_data/' defaults = { 'datatype': 'unredacted', @@ -1123,7 +1138,8 @@ class RawCrash(SocorroMiddleware): token that carries the "View Raw Dumps" permission. """ - URL_PREFIX = '/crash_data/' + implementation = socorro.external.boto.crash_data.SimplifiedCrashData + implementation_config_namespace = 'data' required_params = ( 'crash_id', @@ -1239,11 +1255,17 @@ class RawCrash(SocorroMiddleware): ) def get(self, **kwargs): - format = kwargs.get('format', 'meta') - if format == 'raw_crash': - format = kwargs['format'] = 'raw' - kwargs['expect_json'] = format != 'raw' - return super(RawCrash, self).get(**kwargs) + format_ = kwargs.get('format', 'meta') + if format_ == 'raw_crash': + # legacy + format_ = kwargs['format'] = 'raw' + expect_dict = format_ != 'raw' + result = super(RawCrash, self).get(**kwargs) + # This 'result', will either be a binary blob or a python dict. + # Unless kwargs['format']==raw, this has to be a python dict. + if expect_dict and not isinstance(result, dict): + raise BadArgumentError('format') + return result class CommentsBySignature(SocorroMiddleware): @@ -1881,6 +1903,25 @@ def post(self, **data): return self.get_implementation().reprocess(**data) +class Priorityjob(SocorroMiddleware): + """Return true if all supplied crash IDs + were sucessfully submitted onto the priority queue. + """ + + implementation = PriorityjobRabbitMQCrashStore + + implementation_config_namespace = 'priority' + + required_params = ( + ('crash_ids', list), + ) + + get = None + + def post(self, **kwargs): + return self.get_implementation().process(**kwargs) + + class Healthcheck(SocorroMiddleware): """Return a sign of life from the middleware. diff --git a/webapp-django/crashstats/crashstats/static/crashstats/css/report_pending.less b/webapp-django/crashstats/crashstats/static/crashstats/css/report_pending.less new file mode 100644 index 0000000000..b24c0d0e0d --- /dev/null +++ b/webapp-django/crashstats/crashstats/static/crashstats/css/report_pending.less @@ -0,0 +1,18 @@ +@import "mixins.less"; + +div.pending { + background-color: #fff; + color: #333; + margin: 20px auto; + padding: 10px; + border: 4px solid #999; + width: 400px; + text-align: center; + .rounded-corners(25px); + p { + font-size: 12px; + } + img { + margin: 10px 0; + } +} diff --git a/webapp-django/crashstats/crashstats/static/crashstats/css/screen.less b/webapp-django/crashstats/crashstats/static/crashstats/css/screen.less index 4bfdf4f7b1..895dde7908 100644 --- a/webapp-django/crashstats/crashstats/static/crashstats/css/screen.less +++ b/webapp-django/crashstats/crashstats/static/crashstats/css/screen.less @@ -863,28 +863,7 @@ div.code { #allthreads { display: none; } -/* Report Pending Status page */ -div.pendingStatus { - background-color: #fff; - color: #333; - margin: 20px auto; - padding: 10px; - border: 4px solid #999; - width: 400px; - font: 12px "Lucida Grande", "Lucida Sans Unicode", verdana, lucida, arial, helvetica, sans-serif; - text-align: center; - .rounded-corners(25px); - p { - font-size: 12px; - } - p.pendingProcessing { - color: green; - font-weight: bold; - } - img { - margin: 10px 0; - } -} + /* Top Crashers Index Page */ #topcrashers { h1 { diff --git a/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/oauth2.js b/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/oauth2.js index c0277eab9d..64ce8f078d 100644 --- a/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/oauth2.js +++ b/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/oauth2.js @@ -110,8 +110,20 @@ var OAuth2 = (function() { $.post(url, data) .done(function(response) { // It worked! - // TODO: https://bugzilla.mozilla.org/show_bug.cgi?id=1283296 - document.location.reload(); + var next = Qs.parse( + document.location.search.slice(1) + ).next; + // only if ?next=/... exists on the current URL + if (next) { + // A specific URL exits. + // This is most likely the case when you tried + // to access a privileged URL whilst being + // anonymous and being redirected. + // Make sure it's on this server + document.location.pathname = next; + } else { + document.location.reload(); + } }) .fail(function(xhr) { console.error(xhr); diff --git a/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/pending.js b/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/pending.js index ad56f18646..5e37e21a21 100644 --- a/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/pending.js +++ b/webapp-django/crashstats/crashstats/static/crashstats/js/socorro/pending.js @@ -1,47 +1,38 @@ +$(function() { + var Checker = (function() { + var intervalTime = 5 * 1000; + var checkInterval; + var totalTime = 0; -/* Javascript for the Pending Reports page */ - -// Begin the timer and Ajax calls for reports -var original_seconds = 30; -var seconds = original_seconds; -var number_calls = 1; - -// Maintain the time in seconds, and make an ajax call every 30 seconds -function pendingReportTimer(url){ - if (seconds == 0){ - $('#next_attempt').hide(); - $('#processing').show(); - - // Upon the third attempt, state that this failed - if (number_calls == 10) { - $('#checking').hide(); - $('#fail').show(); - } else { - pendingReportCheck(url); - number_calls += 1; - seconds = original_seconds; - $('#counter').html(original_seconds); - setTimeout("pendingReportTimer(\""+url+"\")",1000); - } - } - // Decrement the seconds count - else { - $('#processing').hide(); - $('#next_attempt').show(); - seconds -= 1; - $('#counter').html(seconds); - setTimeout("pendingReportTimer(\""+url+"\")",1000); - } -} - -// Perform the ajax call to check for this report -function pendingReportCheck (url) -{ - $.get(url, {}, - function(responseJSON){ - if (responseJSON.status == 'ready') { - top.location = responseJSON.url_redirect; + return { + startChecking: function(crashID) { + checkInterval = setInterval(function() { + $.get('/api/ProcessedCrash/', {crash_id: crashID}) + .then(function() { + clearInterval(checkInterval); + // If it exists, we can reload the page we're on. + $('.pending .searching').hide(); + $('.pending .found').fadeIn(300, function() { + document.location.reload(); + }); + }) + .fail(function(err) { + // Perfectly expected. + // We kind of expect the processed crash to not + // exist for a while. Once it's been processed, + // it should exist and yield a 200 error. + if (err.status !== 404) { + // But it's not a 404 error it's something unexpected. + clearInterval(checkInterval); + throw new Error(err); + } + }); + }, intervalTime); } - },"json" - ); -} + }; + })(); + + var pathname = document.location.pathname.split('/'); + var crashID = pathname[pathname.length - 1]; + Checker.startChecking(crashID); +}); diff --git a/webapp-django/crashstats/crashstats/tests/test_models.py b/webapp-django/crashstats/crashstats/tests/test_models.py index 60c1b8cad7..37dda7714f 100644 --- a/webapp-django/crashstats/crashstats/tests/test_models.py +++ b/webapp-django/crashstats/crashstats/tests/test_models.py @@ -735,17 +735,15 @@ def mocked_get(url, params, **options): ok_(r['hits']) ok_(r['total']) - @mock.patch('requests.get') - def test_processed_crash(self, rget): + def test_processed_crash(self): model = models.ProcessedCrash api = model() - def mocked_get(url, params, **options): - assert '/crash_data' in url + def mocked_get(**params): ok_('datatype' in params) eq_(params['datatype'], 'processed') - return Response({ + return { 'product': 'WaterWolf', 'uuid': '7c44ade2-fdeb-4d6c-830a-07d302120525', 'version': '13.0', @@ -765,23 +763,21 @@ def mocked_get(url, params, **options): '13.0' ] ] - }) + } - rget.side_effect = mocked_get + model.implementation().get.side_effect = mocked_get r = api.get(crash_id='7c44ade2-fdeb-4d6c-830a-07d302120525') ok_(r['product']) - @mock.patch('requests.get') - def test_unredacted_crash(self, rget): + def test_unredacted_crash(self): model = models.UnredactedCrash api = model() - def mocked_get(url, params, **options): - assert '/crash_data' in url + def mocked_get(**params): ok_('datatype' in params) eq_(params['datatype'], 'unredacted') - return Response({ + return { 'product': 'WaterWolf', 'uuid': '7c44ade2-fdeb-4d6c-830a-07d302120525', 'version': '13.0', @@ -802,9 +798,10 @@ def mocked_get(url, params, **options): '13.0', ] ], - }) + } + + model.implementation().get.side_effect = mocked_get - rget.side_effect = mocked_get r = api.get(crash_id='7c44ade2-fdeb-4d6c-830a-07d302120525') ok_(r['product']) ok_(r['exploitability']) @@ -1081,59 +1078,48 @@ def mocked_get(**options): # but this should work api.get(batch='250', page='1') - @mock.patch('requests.get') - def test_raw_crash(self, rget): + def test_raw_crash(self): model = models.RawCrash api = model() - def mocked_get(url, params, **options): - assert '/crash_data/' in url - return Response({ + def mocked_get(**params): + return { 'InstallTime': '1339289895', 'FramePoisonSize': '4096', 'Theme': 'classic/1.0', 'Version': '5.0a1', 'Email': 'socorro-123@restmail.net', 'Vendor': 'Mozilla', - }) + } - rget.side_effect = mocked_get + model.implementation().get.side_effect = mocked_get r = api.get(crash_id='some-crash-id') eq_(r['Vendor'], 'Mozilla') ok_('Email' in r) # no filtering at this level - @mock.patch('requests.get') - def test_raw_crash_raw_data(self, rget): + def test_raw_crash_raw_data(self): model = models.RawCrash api = model() mocked_calls = [] - def mocked_get(url, params, **options): - assert '/crash_data/' in url + def mocked_get(**params): mocked_calls.append(params) assert params['datatype'] == 'raw' if params.get('name') == 'other': - return Response('\xe0\xe0') - elif params.get('name') == 'unknown': - return Response('not found', 404) + return '\xe0\xe0' else: - return Response('\xe0') + return '\xe0' + + model.implementation().get.side_effect = mocked_get - rget.side_effect = mocked_get r = api.get(crash_id='some-crash-id', format='raw') eq_(r, '\xe0') r = api.get(crash_id='some-crash-id', format='raw', name='other') eq_(r, '\xe0\xe0') - assert_raises( - models.BadStatusCodeError, - api.get, - crash_id='some-crash-id', format='raw', name='unknown' - ) - @mock.patch('requests.put') def test_put_featured_versions(self, rput): model = models.ReleasesFeatured @@ -1630,15 +1616,31 @@ def mocked_get(url, params, **options): def test_Reprocessing(self): api = models.Reprocessing() - def mocked_reprocess(crash_id): - if crash_id == 'some-crash-id': + def mocked_reprocess(crash_ids): + if crash_ids == 'some-crash-id': return True - elif crash_id == 'bad-crash-id': + elif crash_ids == 'bad-crash-id': return - raise NotImplementedError(crash_id) + raise NotImplementedError(crash_ids) models.Reprocessing.implementation().reprocess = mocked_reprocess - ok_(api.post(crash_id='some-crash-id')) + ok_(api.post(crash_ids='some-crash-id')) # Note that it doesn't raise an error if # the ReprocessingOneRabbitMQCrashStore choses NOT to queue it. - ok_(not api.post(crash_id='bad-crash-id')) + ok_(not api.post(crash_ids='bad-crash-id')) + + def test_Priorityjob(self): + api = models.Priorityjob() + + def mocked_process(crash_ids): + if crash_ids == 'some-crash-id': + return True + elif crash_ids == 'bad-crash-id': + return + raise NotImplementedError(crash_ids) + + models.Priorityjob.implementation().process = mocked_process + ok_(api.post(crash_ids='some-crash-id')) + # Note that it doesn't raise an error if + # the PriorityjobRabbitMQCrashStore choses NOT to queue it. + ok_(not api.post(crash_ids='bad-crash-id')) diff --git a/webapp-django/crashstats/crashstats/tests/test_views.py b/webapp-django/crashstats/crashstats/tests/test_views.py index 113b569505..9b9ff6a2b0 100644 --- a/webapp-django/crashstats/crashstats/tests/test_views.py +++ b/webapp-django/crashstats/crashstats/tests/test_views.py @@ -11,7 +11,7 @@ import pyquery import mock -from nose.tools import eq_, ok_, assert_raises +from nose.tools import eq_, ok_ from nose.plugins.skip import SkipTest from django.test.client import RequestFactory @@ -28,6 +28,8 @@ from django.core.urlresolvers import reverse from django.contrib.contenttypes.models import ContentType +from socorro.external.crashstorage_base import CrashIDNotFound + from crashstats.base.tests.testbase import DjangoTestCase from crashstats.crashstats import models, views from crashstats.crashstats.management import PERMISSIONS @@ -418,7 +420,7 @@ def mocked_product_versions(**params): ) def mocked_supersearchfields(**params): - results = copy.copy(SUPERSEARCH_FIELDS_MOCKED_RESULTS) + results = copy.deepcopy(SUPERSEARCH_FIELDS_MOCKED_RESULTS) # to be realistic we want to introduce some dupes # that have a different key but its `in_database_name` # is one that is already in the hardcoded list (the @@ -2359,18 +2361,6 @@ def test_report_index(self, rget, rpost): rpost.side_effect = mocked_post_threeothersigs def mocked_get(url, params, **options): - if '/crash_data' in url: - assert 'datatype' in params - - if params['datatype'] == 'meta': - return Response(_SAMPLE_META) - if params['datatype'] == 'unredacted': - return Response(dict( - _SAMPLE_UNREDACTED, - dump=dump, - user_comments=comment0 - )) - if 'correlations/signatures' in url: return Response({ 'hits': [ @@ -2379,10 +2369,34 @@ def mocked_get(url, params, **options): ], 'total': 2 }) - raise NotImplementedError(url) + rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['dump'] = dump + crash['user_comments'] = comment0 + return crash + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611']) response = self.client.get(url) @@ -2463,52 +2477,6 @@ def test_report_index_with_additional_raw_dump_links(self, rget, rpost): rpost.side_effect = mocked_post_threeothersigs def mocked_get(url, params, **options): - if '/crash_data' in url: - assert 'datatype' in params - - if params['datatype'] == 'meta': - return Response({ - 'InstallTime': '1339289895', - 'FramePoisonSize': '4096', - 'Theme': 'classic/1.0', - 'Version': '5.0a1', - 'Email': 'secret@email.com', - 'Vendor': 'Mozilla', - 'URL': 'farmville.com', - 'additional_minidumps': 'foo, bar,', - }) - if params['datatype'] == 'unredacted': - return Response({ - 'client_crash_date': '2012-06-11T06:08:45', - 'dump': dump, - 'signature': 'FakeSignature1', - 'user_comments': None, - 'uptime': 14693, - 'release_channel': 'nightly', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'flash_version': '[blank]', - 'hangid': None, - 'distributor_version': None, - 'truncated': True, - 'process_type': None, - 'id': 383569625, - 'os_version': '10.6.8 10K549', - 'version': '5.0a1', - 'build': '20120609030536', - 'ReleaseChannel': 'nightly', - 'addons_checked': None, - 'product': 'WaterWolf', - 'os_name': 'Mac OS X', - 'last_crash': 371342, - 'date_processed': '2012-06-11T06:08:44', - 'cpu_name': 'amd64', - 'reason': 'EXC_BAD_ACCESS / KERN_INVALID_ADDRESS', - 'address': '0x8', - 'completeddatetime': '2012-06-11T06:08:57', - 'success': True, - 'exploitability': 'Unknown Exploitability' - }) - if 'correlations/signatures' in url: return Response({ 'hits': [ @@ -2522,6 +2490,42 @@ def mocked_get(url, params, **options): rget.side_effect = mocked_get + def mocked_processed_crash_get(**params): + assert 'datatype' in params + + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + del crash['json_dump'] + crash['dump'] = dump + return crash + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + + def mocked_raw_crash_get(**params): + assert 'datatype' in params + + if params['datatype'] == 'meta': + return { + 'InstallTime': '1339289895', + 'FramePoisonSize': '4096', + 'Theme': 'classic/1.0', + 'Version': '5.0a1', + 'Email': 'secret@email.com', + 'Vendor': 'Mozilla', + 'URL': 'farmville.com', + 'additional_minidumps': 'foo, bar,', + } + + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' url = reverse('crashstats:report_index', args=(crash_id,)) response = self.client.get(url) @@ -2565,7 +2569,6 @@ def mocked_get(url, params, **options): @mock.patch('requests.get') def test_report_index_with_symbol_url_in_modules(self, rget, rpost): rpost.side_effect = mocked_post_threeothersigs - dump = 'OS|Mac OS X|10.6.8 10K549\\nCPU|amd64|family 6 mod|1' json_dump = { 'status': 'OK', 'sensitive': { @@ -2598,54 +2601,6 @@ def test_report_index_with_symbol_url_in_modules(self, rget, rpost): } def mocked_get(url, params, **options): - if '/crash_data' in url: - assert 'datatype' in params - - if params['datatype'] == 'meta': - return Response({ - 'InstallTime': '1339289895', - 'FramePoisonSize': '4096', - 'Theme': 'classic/1.0', - 'Version': '5.0a1', - 'Email': 'secret@email.com', - 'Vendor': 'Mozilla', - 'URL': 'farmville.com', - 'additional_minidumps': 'foo, bar,', - }) - if params['datatype'] == 'unredacted': - return Response({ - 'client_crash_date': '2012-06-11T06:08:45', - # 'dump': 'OS|Mac OS X|10.6.8 10K549\nCPU|amd64', - 'dump': dump, - 'signature': 'FakeSignature1', - 'user_comments': None, - 'uptime': 14693, - 'release_channel': 'nightly', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'flash_version': '[blank]', - 'hangid': None, - 'distributor_version': None, - 'truncated': True, - 'process_type': None, - 'id': 383569625, - 'os_version': '10.6.8 10K549', - 'version': '5.0a1', - 'build': '20120609030536', - 'ReleaseChannel': 'nightly', - 'addons_checked': None, - 'product': 'WaterWolf', - 'os_name': 'Mac OS X', - 'last_crash': 371342, - 'date_processed': '2012-06-11T06:08:44', - 'cpu_name': 'amd64', - 'reason': 'EXC_BAD_ACCESS / KERN_INVALID_ADDRESS', - 'address': '0x8', - 'completeddatetime': '2012-06-11T06:08:57', - 'success': True, - 'exploitability': 'Unknown Exploitability', - 'json_dump': json_dump, - }) - if 'correlations/signatures' in url: return Response({ 'hits': [ @@ -2659,6 +2614,31 @@ def mocked_get(url, params, **options): rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + crash = copy.deepcopy(_SAMPLE_META) + crash['additional_minidumps'] = 'foo, bar,' + return crash + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['json_dump'] = json_dump + return crash + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' url = reverse('crashstats:report_index', args=(crash_id,)) response = self.client.get(url) @@ -2673,7 +2653,6 @@ def mocked_get(url, params, **options): @mock.patch('crashstats.crashstats.models.Bugs.get') @mock.patch('requests.get') def test_report_index_fennecandroid_report(self, rget, rpost): - dump = 'OS|Mac OS X|10.6.8 10K549\nCPU|amd64|family 6 mod|1' comment0 = 'This is a comment\nOn multiple lines' comment0 += '\npeterbe@mozilla.com' comment0 += '\nwww.p0rn.com' @@ -2681,21 +2660,6 @@ def test_report_index_fennecandroid_report(self, rget, rpost): rpost.side_effect = mocked_post_threeothersigs def mocked_get(url, params, **options): - if '/crash_data' in url: - assert 'datatype' in params - - if params['datatype'] == 'meta': - return Response(_SAMPLE_META) - if params['datatype'] == 'unredacted': - raw_crash = dict( - _SAMPLE_UNREDACTED, - dump=dump, - user_comments=comment0, - ) - raw_crash['product'] = 'WinterSun' - - return Response(raw_crash) - if 'correlations/signatures' in url: return Response({ 'hits': [ @@ -2706,8 +2670,33 @@ def mocked_get(url, params, **options): }) raise NotImplementedError(url) + rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['product'] = 'WinterSun' + return crash + + raise NotImplementedError + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611']) @@ -2733,7 +2722,6 @@ def test_report_index_odd_product_and_version(self, rget, rpost): """If the processed JSON references an unfamiliar product and version it should not use that to make links in the nav to reports for that unfamiliar product and version.""" - dump = 'OS|Mac OS X|10.6.8 10K549\nCPU|amd64|family 6 mod|1' comment0 = 'This is a comment\nOn multiple lines' comment0 += '\npeterbe@mozilla.com' comment0 += '\nwww.p0rn.com' @@ -2741,21 +2729,6 @@ def test_report_index_odd_product_and_version(self, rget, rpost): rpost.side_effect = mocked_post_threeothersigs def mocked_get(url, params, **options): - if '/crash_data' in url: - assert 'datatype' in params - - if params['datatype'] == 'meta': - return Response(_SAMPLE_META) - if params['datatype'] == 'unredacted': - processed = dict( - _SAMPLE_UNREDACTED, - dump=dump, - user_comments=comment0, - ) - processed['product'] = 'SummerWolf' - processed['version'] = '99.9' - return Response(processed) - if 'correlations/signatures' in url: return Response({ 'hits': [ @@ -2766,8 +2739,34 @@ def mocked_get(url, params, **options): }) raise NotImplementedError(url) + rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError(params) + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['product'] = 'SummerWolf' + crash['version'] = '99.9' + return crash + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611']) response = self.client.get(url) @@ -2792,24 +2791,39 @@ def test_report_index_correlations_failed(self, rget, rpost): rpost.side_effect = mocked_post_threeothersigs def mocked_get(url, params, **options): - if '/crash_data' in url: - assert 'datatype' in params - - if params['datatype'] == 'meta': - return Response(_SAMPLE_META) - if params['datatype'] == 'unredacted': - return Response(dict( - _SAMPLE_UNREDACTED, - dump=dump, - user_comments=comment0, - )) - if 'correlations/signatures' in url: raise models.BadStatusCodeError(500) raise NotImplementedError(url) + rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError(params) + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + return copy.deepcopy(dict( + _SAMPLE_UNREDACTED, + dump=dump, + user_comments=comment0, + )) + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611']) response = self.client.get(url) @@ -2818,33 +2832,40 @@ def mocked_get(url, params, **options): @mock.patch('crashstats.crashstats.models.Bugs.get') @mock.patch('requests.get') def test_report_index_no_dump(self, rget, rpost): - dump = '' - comment0 = 'This is a comment' - rpost.side_effect = mocked_post_threesigs def mocked_get(url, params, **options): - if '/crash_data' in url: - assert 'datatype' in params - - if params['datatype'] == 'meta': - return Response(_SAMPLE_META) - if params['datatype'] == 'unredacted': - data = dict( - _SAMPLE_UNREDACTED, - dump=dump, - user_comments=comment0, - ) - del data['dump'] - del data['json_dump'] - return Response(data) - if 'correlations/signatures' in url: raise models.BadStatusCodeError(500) raise NotImplementedError(url) + rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + del crash['json_dump'] + return crash + + raise NotImplementedError(url) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611']) response = self.client.get(url) @@ -2860,53 +2881,12 @@ def test_report_index_invalid_crash_id(self): ok_('Invalid crash ID' in response.content) eq_(response['Content-Type'], 'text/html; charset=utf-8') - @mock.patch('requests.get') - def test_report_pending_today(self, rget): - def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - raise models.BadStatusCodeError(404) - - rget.side_effect = mocked_get - - today = datetime.datetime.utcnow().strftime('%y%m%d') - url = reverse('crashstats:report_index', - args=['11cb72f5-eb28-41e1-a8e4-849982%s' % today]) - response = self.client.get(url) - ok_('pendingStatus' in response.content) - eq_(response.status_code, 200) - - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) - yesterday = yesterday.strftime('%y%m%d') - url = reverse('crashstats:report_index', - args=['11cb72f5-eb28-41e1-a8e4-849982%s' % yesterday]) - response = self.client.get(url) - ok_('Crash Not Found' in response.content) - eq_(response.status_code, 200) - - url = reverse('crashstats:report_index', - args=['blablabla']) - response = self.client.get(url) - eq_(response.status_code, 400) - @mock.patch('crashstats.crashstats.models.Bugs.get') @mock.patch('requests.get') def test_report_index_with_valid_install_time(self, rget, rpost): rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': '1461170304', - 'Version': '5.0a1', - }) if 'crashes/comments' in url: return Response({ 'hits': [], @@ -2918,26 +2898,35 @@ def mocked_get(url, params, **options): 'total': 0, }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - 'dump': 'some dump', - 'signature': 'FakeSignature1', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'process_type': None, - 'os_name': 'Windows NT', - 'product': 'WaterWolf', - 'version': '1.0', - 'cpu_name': 'amd64', - }) - raise NotImplementedError(url) rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return { + 'InstallTime': '1461170304', + 'Version': '5.0a1', + } + + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + return copy.deepcopy(_SAMPLE_UNREDACTED) + + raise NotImplementedError + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse( 'crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611'] @@ -2954,17 +2943,6 @@ def test_report_index_with_invalid_install_time(self, rget, rpost): rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': 'Not a number', - 'Version': '5.0a1', - 'Email': '', - 'URL': None, - }) if 'crashes/comments' in url: return Response({ 'hits': [], @@ -2976,25 +2954,34 @@ def mocked_get(url, params, **options): 'total': 0 }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - 'dump': 'some dump', - 'signature': 'FakeSignature1', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'process_type': None, - 'os_name': 'Windows NT', - 'product': 'WaterWolf', - 'version': '1.0', - 'cpu_name': 'amd64', - }) raise NotImplementedError(url) rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + crash = copy.deepcopy(_SAMPLE_META) + crash['InstallTime'] = 'Not a number' + return crash + + raise NotImplementedError(params) + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + return copy.deepcopy(_SAMPLE_UNREDACTED) + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse( 'crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611'] @@ -3015,17 +3002,6 @@ def test_report_index_known_total_correlations(self, rget, rpost): rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': 'Not a number', - 'Version': '5.0a1', - 'Email': '', - 'URL': None, - }) if 'crashes/comments' in url: return Response({ 'hits': [], @@ -3037,12 +3013,25 @@ def mocked_get(url, params, **options): 'total': 0 }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ + raise NotImplementedError(url) + + rget.side_effect = mocked_get + + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError(params) + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + return { 'dump': 'some dump', 'signature': 'FakeSignature1', 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', @@ -3051,10 +3040,13 @@ def mocked_get(url, params, **options): 'product': 'WaterWolf', 'version': '1.0', 'cpu_name': 'amd64', - }) - raise NotImplementedError(url) + } - rget.side_effect = mocked_get + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) url = reverse( 'crashstats:report_index', @@ -3087,17 +3079,6 @@ def test_report_index_empty_os_name(self, rget, rpost): rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': 'Not a number', - 'Version': '5.0a1', - 'Email': '', - 'URL': None, - }) if 'crashes/comments' in url: return Response({ 'hits': [], @@ -3109,25 +3090,34 @@ def mocked_get(url, params, **options): 'total': 0 }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - 'dump': 'some dump', - 'signature': 'FakeSignature1', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'process_type': None, - 'os_name': None, - 'product': 'WaterWolf', - 'version': '1.0', - 'cpu_name': 'amd64', - }) raise NotImplementedError(url) rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['os_name'] = None + return crash + + raise NotImplementedError + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse( 'crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611'] @@ -3172,28 +3162,11 @@ def test_report_index_with_invalid_parsed_dump(self, rget, rpost): } comment0 = "This is a comment" - email0 = "some@emailaddress.com" - url0 = "someaddress.com" email1 = "some@otheremailaddress.com" rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - "InstallTime": "Not a number", - "FramePoisonSize": "4096", - "Theme": "classic/1.0", - "Version": "5.0a1", - "Email": email0, - "Vendor": "Mozilla", - "URL": url0, - "HangID": "123456789" - }) if 'crashes/comments' in url: return Response({ "hits": [ @@ -3215,45 +3188,34 @@ def mocked_get(url, params, **options): "total": 2 }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - "client_crash_date": "2012-06-11T06:08:45", - "json_dump": json_dump, - "signature": "FakeSignature1", - "user_comments": None, - "uptime": 14693, - "release_channel": "nightly", - "uuid": "11cb72f5-eb28-41e1-a8e4-849982120611", - "flash_version": "[blank]", - "hangid": None, - "distributor_version": None, - "truncated": True, - "process_type": None, - "id": 383569625, - "os_version": "10.6.8 10K549", - "version": "5.0a1", - "build": "20120609030536", - "ReleaseChannel": "nightly", - "addons_checked": None, - "product": "WaterWolf", - "os_name": "Mac OS X", - "last_crash": 371342, - "date_processed": "2012-06-11T06:08:44", - "cpu_name": "amd64", - "reason": "EXC_BAD_ACCESS / KERN_INVALID_ADDRESS", - "address": "0x8", - "completeddatetime": "2012-06-11T06:08:57", - "success": True, - "exploitability": "Unknown Exploitability" - }) raise NotImplementedError(url) rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['json_dump'] = json_dump + return crash + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611']) response = self.client.get(url) @@ -3265,28 +3227,11 @@ def test_report_index_with_sparse_json_dump(self, rget, rpost): json_dump = {'status': 'ERROR_NO_MINIDUMP_HEADER', 'sensitive': {}} comment0 = 'This is a comment' - email0 = 'some@emailaddress.com' - url0 = 'someaddress.com' email1 = 'some@otheremailaddress.com' rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': 'Not a number', - 'FramePoisonSize': '4096', - 'Theme': 'classic/1.0', - 'Version': '5.0a1', - 'Email': email0, - 'Vendor': 'Mozilla', - 'URL': url0, - 'HangID': '123456789', - }) if 'crashes/comments' in url: return Response({ 'hits': [ @@ -3308,45 +3253,34 @@ def mocked_get(url, params, **options): 'total': 2 }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - 'client_crash_date': '2012-06-11T06:08:45', - 'json_dump': json_dump, - 'signature': 'FakeSignature1', - 'user_comments': None, - 'uptime': 14693, - 'release_channel': 'nightly', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'flash_version': '[blank]', - 'hangid': None, - 'distributor_version': None, - 'truncated': True, - 'process_type': None, - 'id': 383569625, - 'os_version': '10.6.8 10K549', - 'version': '5.0a1', - 'build': '20120609030536', - 'ReleaseChannel': 'nightly', - 'addons_checked': None, - 'product': 'WaterWolf', - 'os_name': 'Mac OS X', - 'last_crash': 371342, - 'date_processed': '2012-06-11T06:08:44', - 'cpu_name': 'amd64', - 'reason': 'EXC_BAD_ACCESS / KERN_INVALID_ADDRESS', - 'address': '0x8', - 'completeddatetime': '2012-06-11T06:08:57', - 'success': True, - 'exploitability': 'Unknown Exploitability' - }) - raise NotImplementedError(url) + rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['json_dump'] = json_dump + return crash + + raise NotImplementedError + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=['11cb72f5-eb28-41e1-a8e4-849982120611']) response = self.client.get(url) @@ -3355,10 +3289,7 @@ def mocked_get(url, params, **options): @mock.patch('crashstats.crashstats.models.Bugs.get') @mock.patch('requests.get') def test_report_index_with_crash_exploitability(self, rget, rpost): - dump = 'OS|Mac OS X|10.6.8 10K549\\nCPU|amd64|family 6 mod|1' comment0 = 'This is a comment' - email0 = 'some@emailaddress.com' - url0 = 'someaddress.com' email1 = 'some@otheremailaddress.com' crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' @@ -3366,21 +3297,6 @@ def test_report_index_with_crash_exploitability(self, rget, rpost): rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': 'Not a number', - 'FramePoisonSize': '4096', - 'Theme': 'classic/1.0', - 'Version': '5.0a1', - 'Email': email0, - 'Vendor': 'Mozilla', - 'URL': url0, - 'HangID': '123456789', - }) if '/crashes/comments' in url: return Response({ 'hits': [ @@ -3402,45 +3318,34 @@ def mocked_get(url, params, **options): 'total': 2 }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - 'client_crash_date': '2012-06-11T06:08:45', - 'dump': dump, - 'signature': 'FakeSignature1', - 'user_comments': None, - 'uptime': 14693, - 'release_channel': 'nightly', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'flash_version': '[blank]', - 'hangid': None, - 'distributor_version': None, - 'truncated': True, - 'process_type': None, - 'id': 383569625, - 'os_version': '10.6.8 10K549', - 'version': '5.0a1', - 'build': '20120609030536', - 'ReleaseChannel': 'nightly', - 'addons_checked': None, - 'product': 'WaterWolf', - 'os_name': 'Mac OS X', - 'last_crash': 371342, - 'date_processed': '2012-06-11T06:08:44', - 'cpu_name': 'amd64', - 'reason': 'EXC_BAD_ACCESS / KERN_INVALID_ADDRESS', - 'address': '0x8', - 'completeddatetime': '2012-06-11T06:08:57', - 'success': True, - 'exploitability': 'Unknown Exploitability', - }) raise NotImplementedError(url) rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError(params) + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + crash['exploitability'] = 'Unknown Exploitability' + return crash + + raise NotImplementedError + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + url = reverse('crashstats:report_index', args=[crash_id]) response = self.client.get(url) @@ -3455,260 +3360,103 @@ def mocked_get(url, params, **options): ok_('Exploitability' in response.content) ok_('Unknown Exploitability' in response.content) - @mock.patch('requests.get') - def test_report_index_processed_crash_not_found(self, rget): - crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' - - def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - raise models.BadStatusCodeError(404) - - raise NotImplementedError(url) - rget.side_effect = mocked_get - - url = reverse('crashstats:report_index', - args=[crash_id]) - response = self.client.get(url) - - eq_(response.status_code, 200) - ok_('Crash Not Found' in response.content) - @mock.patch('crashstats.crashstats.models.Bugs.get') - @mock.patch('requests.get') - def test_report_index_raw_crash_not_found(self, rget, rpost): + def test_report_index_raw_crash_not_found(self, rpost): crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' - dump = 'OS|Mac OS X|10.6.8 10K549\\nCPU|amd64|family 6 mod|1' rpost.side_effect = mocked_post_123 - def mocked_get(url, params, **options): - assert '/crash_data/' in url + def mocked_raw_crash_get(**params): assert 'datatype' in params - if params['datatype'] == 'unredacted': - return Response({ - 'client_crash_date': '2012-06-11T06:08:45', - 'dump': dump, - 'signature': 'FakeSignature1', - 'user_comments': None, - 'uptime': 14693, - 'release_channel': 'nightly', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'flash_version': '[blank]', - 'hangid': None, - 'distributor_version': None, - 'truncated': True, - 'process_type': None, - 'id': 383569625, - 'os_version': '10.6.8 10K549', - 'version': '5.0a1', - 'build': '20120609030536', - 'ReleaseChannel': 'nightly', - 'addons_checked': None, - 'product': 'WaterWolf', - 'os_name': 'Mac OS X', - 'last_crash': 371342, - 'date_processed': '2012-06-11T06:08:44', - 'cpu_name': 'amd64', - 'reason': 'EXC_BAD_ACCESS / KERN_INVALID_ADDRESS', - 'address': '0x8', - 'completeddatetime': '2012-06-11T06:08:57', - 'success': True, - 'exploitability': 'Unknown Exploitability' - }) - elif params['datatype'] == 'meta': # raw crash json! - raise models.BadStatusCodeError(404) + if params['datatype'] == 'meta': + raise CrashIDNotFound(params['uuid']) - raise NotImplementedError(url) + raise NotImplementedError - rget.side_effect = mocked_get + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) url = reverse('crashstats:report_index', args=[crash_id]) response = self.client.get(url) - eq_(response.status_code, 200) + eq_(response.status_code, 404) ok_('Crash Not Found' in response.content) - @mock.patch('requests.get') - def test_report_index_pending(self, rget): - crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' - - def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - raise models.BadStatusCodeError(408) - - raise NotImplementedError(url) - rget.side_effect = mocked_get - - url = reverse('crashstats:report_index', - args=[crash_id]) - response = self.client.get(url) - - eq_(response.status_code, 200) - ok_('Fetching this archived report' in response.content) - - @mock.patch('requests.get') - def test_report_index_too_old(self, rget): + @mock.patch('crashstats.crashstats.models.Bugs.get') + def test_report_index_processed_crash_not_found(self, rpost): crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' - def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - raise models.BadStatusCodeError(410) - - raise NotImplementedError(url) - rget.side_effect = mocked_get + rpost.side_effect = mocked_post_123 - url = reverse('crashstats:report_index', - args=[crash_id]) - response = self.client.get(url) + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) - eq_(response.status_code, 200) - ok_('This archived report has expired' in response.content) + raise NotImplementedError - @mock.patch('requests.get') - def test_report_index_other_error(self, rget): - crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) - def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response('Scary Error', status_code=500) + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + raise CrashIDNotFound(params['uuid']) - raise NotImplementedError(url) - rget.side_effect = mocked_get + raise NotImplementedError - url = reverse('crashstats:report_index', - args=[crash_id]) - assert_raises( - models.BadStatusCodeError, - self.client.get, - url + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get ) - # Let's also check that we get the response in the exception - # message. - try: - self.client.get(url) - assert False # shouldn't get here - except models.BadStatusCodeError as exception: - ok_('Scary Error' in str(exception)) - # and it should include the URL it used - mware_url = models.UnredactedCrash.base_url + '/crash_data/' - ok_(mware_url in str(exception)) - @mock.patch('requests.get') - def test_report_pending_json(self, rget): - crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' - - def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - raise models.BadStatusCodeError(408) + def mocked_priority_job_process(**params): + assert params['crash_ids'] == [crash_id] + return True - raise NotImplementedError(url) - - rget.side_effect = mocked_get + models.Priorityjob.implementation().process.side_effect = ( + mocked_priority_job_process + ) - url = reverse('crashstats:report_pending', - args=[crash_id]) + url = reverse('crashstats:report_index', args=[crash_id]) response = self.client.get(url) - expected = { - 'status': 'error', - 'status_message': ('The report for %s' - ' is not available yet.' % crash_id), - 'url_redirect': '' - } - eq_(response.status_code, 200) - eq_(expected, json.loads(response.content)) - - def test_report_index_and_pending_missing_crash_id(self): - url = reverse('crashstats:report_index', args=['']) - response = self.client.get(url) - eq_(response.status_code, 404) - - url = reverse('crashstats:report_pending', args=['']) - response = self.client.get(url) - eq_(response.status_code, 404) + ok_('Please wait...' in response.content) + ok_( + 'Processing this crash report only takes a few seconds' in + response.content + ) @mock.patch('crashstats.crashstats.models.Bugs.get') - @mock.patch('requests.get') - def test_report_index_with_invalid_date_processed(self, rget, rpost): + def test_report_index_with_invalid_date_processed(self, rpost): crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' - def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': 'Not a number', - 'FramePoisonSize': '4096', - 'Theme': 'classic/1.0', - 'Version': '5.0a1', - 'Email': None, - 'Vendor': 'Mozilla', - 'URL': None, - 'HangID': '123456789', - }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - 'client_crash_date': '2012-06-11T06:08:45', - 'dump': 'anything', - 'signature': 'FakeSignature1', - 'user_comments': None, - 'uptime': 14693, - 'release_channel': 'nightly', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'flash_version': '[blank]', - 'hangid': None, - 'distributor_version': None, - 'truncated': True, - 'process_type': None, - 'id': 383569625, - 'os_version': '10.6.8 10K549', - 'version': '5.0a1', - 'build': '20120609030536', - 'ReleaseChannel': 'nightly', - 'addons_checked': None, - 'product': 'WaterWolf', - 'os_name': 'Mac OS X', - 'last_crash': 371342, - # NOTE! A wanna-be valid date that is not valid - 'date_processed': '2015-10-10 15:32:07.620535', - 'cpu_name': 'amd64', - 'reason': 'EXC_BAD_ACCESS / KERN_INVALID_ADDRESS', - 'address': '0x8', - 'completeddatetime': '2012-06-11T06:08:57', - 'success': True, - 'exploitability': 'Unknown Exploitability', - }) - raise NotImplementedError(url) + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) - rget.side_effect = mocked_get + raise NotImplementedError(params) + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + crash = copy.deepcopy(_SAMPLE_UNREDACTED) + # NOTE! A wanna-be valid date that is not valid + crash['date_processed'] = '2015-10-10 15:32:07.620535' + return crash + raise NotImplementedError + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) url = reverse('crashstats:report_index', args=[crash_id]) @@ -4802,30 +4550,12 @@ def mocked_get(url, params, **options): @mock.patch('crashstats.crashstats.models.Bugs.get') @mock.patch('requests.get') def test_report_index_redirect_by_prefix(self, rget, rpost): - - dump = "OS|Mac OS X|10.6.8 10K549\\nCPU|amd64|family 6 mod|1" comment0 = "This is a comment" - email0 = "some@emailaddress.com" - url0 = "someaddress.com" email1 = "some@otheremailaddress.com" rpost.side_effect = mocked_post_123 def mocked_get(url, params, **options): - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'meta' - ): - return Response({ - 'InstallTime': '1339289895', - 'FramePoisonSize': '4096', - 'Theme': 'classic/1.0', - 'Version': '5.0a1', - 'Email': email0, - 'Vendor': 'Mozilla', - 'URL': url0 - }) if 'crashes/comments' in url: return Response({ 'hits': [ @@ -4839,42 +4569,6 @@ def mocked_get(url, params, **options): 'total': 1 }) - if ( - '/crash_data' in url and - 'datatype' in params and - params['datatype'] == 'unredacted' - ): - return Response({ - 'client_crash_date': '2012-06-11T06:08:45', - 'dump': dump, - 'signature': 'FakeSignature1', - 'user_comments': None, - 'uptime': 14693, - 'release_channel': 'nightly', - 'uuid': '11cb72f5-eb28-41e1-a8e4-849982120611', - 'flash_version': '[blank]', - 'hangid': None, - 'distributor_version': None, - 'truncated': True, - 'process_type': None, - 'id': 383569625, - 'os_version': '10.6.8 10K549', - 'version': '5.0a1', - 'build': '20120609030536', - 'ReleaseChannel': 'nightly', - 'addons_checked': None, - 'product': 'WaterWolf', - 'os_name': 'Mac OS X', - 'last_crash': 371342, - 'date_processed': '2012-06-11T06:08:44', - 'cpu_name': 'amd64', - 'reason': 'EXC_BAD_ACCESS / KERN_INVALID_ADDRESS', - 'address': '0x8', - 'completeddatetime': '2012-06-11T06:08:57', - 'success': True, - 'exploitability': 'Unknown Exploitability' - }) - if 'correlations/signatures' in url: return Response({ 'hits': [ @@ -4888,6 +4582,28 @@ def mocked_get(url, params, **options): rget.side_effect = mocked_get + def mocked_raw_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'meta': + return copy.deepcopy(_SAMPLE_META) + + raise NotImplementedError(params) + + models.RawCrash.implementation().get.side_effect = ( + mocked_raw_crash_get + ) + + def mocked_processed_crash_get(**params): + assert 'datatype' in params + if params['datatype'] == 'unredacted': + return copy.deepcopy(_SAMPLE_UNREDACTED) + + raise NotImplementedError(params) + + models.UnredactedCrash.implementation().get.side_effect = ( + mocked_processed_crash_get + ) + base_crash_id = '11cb72f5-eb28-41e1-a8e4-849982120611' crash_id = settings.CRASH_ID_PREFIX + base_crash_id assert len(crash_id) > 36 @@ -4917,22 +4633,19 @@ def mocked_get(url, params, **options): ok_('' not in response.content) # it's a partial ok_('no reports in the time period specified' in response.content) - @mock.patch('requests.get') - def test_raw_data(self, rget): - def mocked_get(url, params, **options): - assert '/crash_data' in url + def test_raw_data(self): + + def mocked_get(**params): if 'datatype' in params and params['datatype'] == 'raw': - return Response(""" - bla bla bla - """.strip()) + return "bla bla bla" else: # default is datatype/meta - return Response({ + return { 'foo': 'bar', 'stuff': 123, - }) + } - rget.side_effect = mocked_get + models.RawCrash.implementation().get.side_effect = mocked_get crash_id = '176bcd6c-c2ec-4b0c-9d5f-dadea2120531' json_url = reverse('crashstats:raw_data', args=(crash_id, 'json')) @@ -4962,14 +4675,10 @@ def mocked_get(url, params, **options): # dump files are cached. # check the mock function and expect no change - def different_mocked_get(url, **options): - if '/crash_data' in url and 'datatype=raw' in url: - return Response(""" - SOMETHING DIFFERENT - """.strip()) - raise NotImplementedError(url) + def different_mocked_get(**params): + raise AssertionError("shouldn't be used due to caching") - rget.side_effect = different_mocked_get + models.RawCrash.implementation().get.side_effect = different_mocked_get response = self.client.get(dump_url) eq_(response.status_code, 200) diff --git a/webapp-django/crashstats/crashstats/urls.py b/webapp-django/crashstats/crashstats/urls.py index 619bd863b1..9ab148bede 100644 --- a/webapp-django/crashstats/crashstats/urls.py +++ b/webapp-django/crashstats/crashstats/urls.py @@ -90,15 +90,9 @@ url(r'^exploitability/$', views.exploitability_report, name='exploitability_report'), - url(r'^report/index/(?P.*)$', + url(r'^report/index/(?P[\w-]+)$', views.report_index, name='report_index'), - # make the suffix `_ajax` optional there. - # we prefer report/pending/XXX but because of legacy we need to - # support report/pending_ajax/XXX too - url(r'^report/pending(_ajax)?/(?P.*)$', - views.report_pending, - name='report_pending'), url(r'^search/quick/$', views.quick_search, name='quick_search'), diff --git a/webapp-django/crashstats/crashstats/views.py b/webapp-django/crashstats/crashstats/views.py index cd1782479f..ea18991bd7 100644 --- a/webapp-django/crashstats/crashstats/views.py +++ b/webapp-django/crashstats/crashstats/views.py @@ -22,6 +22,7 @@ from session_csrf import anonymous_csrf +from socorro.external.crashstorage_base import CrashIDNotFound from . import forms, models, utils from .decorators import check_days_parameter, pass_default_context @@ -1334,8 +1335,6 @@ def exploitability_report(request, default_context=None): @pass_default_context def report_index(request, crash_id, default_context=None): - if not crash_id: - raise http.Http404('Crash id is missing') valid_crash_id = utils.find_crash_id(crash_id) if not valid_crash_id: return http.HttpResponseBadRequest('Invalid crash ID') @@ -1353,33 +1352,26 @@ def report_index(request, crash_id, default_context=None): context = default_context or {} context['crash_id'] = crash_id - api = models.UnredactedCrash() - - def handle_middleware_404(crash_id, error_code): - if error_code == 404: - # if crash was submitted today, send to pending screen - crash_date = datetime.datetime.strptime(crash_id[-6:], '%y%m%d') - crash_age = datetime.datetime.utcnow() - crash_date - if crash_age < datetime.timedelta(days=1): - tmpl = 'crashstats/report_index_pending.html' - else: - tmpl = 'crashstats/report_index_not_found.html' - return render(request, tmpl, context) - elif error_code == 408: - return render(request, - 'crashstats/report_index_pending.html', context) - elif error_code == 410: - return render(request, - 'crashstats/report_index_too_old.html', context) - - # this is OK because this function is expected to be called within - # an exception stack frame - raise + raw_api = models.RawCrash() + try: + context['raw'] = raw_api.get(crash_id=crash_id) + except CrashIDNotFound: + # If the raw crash can't be found, we can't do much. + tmpl = 'crashstats/report_index_not_found.html' + return render(request, tmpl, context, status=404) + api = models.UnredactedCrash() try: context['report'] = api.get(crash_id=crash_id) - except models.BadStatusCodeError as e: - return handle_middleware_404(crash_id, e.status) + except CrashIDNotFound: + # ...if we haven't already done so. + cache_key = 'priority_job:{}'.format(crash_id) + if not cache.get(cache_key): + priority_api = models.Priorityjob() + priority_api.post(crash_ids=[crash_id]) + cache.set(cache_key, True, 60) + tmpl = 'crashstats/report_index_pending.html' + return render(request, tmpl, context) if 'json_dump' in context['report']: json_dump = context['report']['json_dump'] @@ -1434,12 +1426,6 @@ def handle_middleware_404(crash_id, error_code): reverse=True ) - raw_api = models.RawCrash() - try: - context['raw'] = raw_api.get(crash_id=crash_id) - except models.BadStatusCodeError as e: - return handle_middleware_404(crash_id, e.status) - context['raw_keys'] = [] if request.user.has_perm('crashstats.view_pii'): # hold nothing back @@ -1449,7 +1435,8 @@ def handle_middleware_404(crash_id, error_code): x for x in context['raw'] if x in models.RawCrash.API_WHITELIST ] - context['raw_keys'].sort(key=unicode.lower) + # Sort keys case-insensitively + context['raw_keys'].sort(key=lambda s: s.lower()) if request.user.has_perm('crashstats.view_rawdump'): context['raw_dump_urls'] = [ @@ -1523,37 +1510,6 @@ def handle_middleware_404(crash_id, error_code): return render(request, 'crashstats/report_index.html', context) -@utils.json_view -def report_pending(request, crash_id): - if not crash_id: - raise http.Http404("Crash id is missing") - - data = {} - - url = reverse('crashstats:report_index', kwargs=dict(crash_id=crash_id)) - - api = models.UnredactedCrash() - - try: - data['report'] = api.get(crash_id=crash_id) - status = 'ready' - status_message = 'The report for %s is now available.' % crash_id - url_redirect = "%s" % url - except models.BadStatusCodeError as e: - if str(e).startswith('5'): - raise - status = 'error' - status_message = 'The report for %s is not available yet.' % crash_id - url_redirect = '' - - data = { - "status": status, - "status_message": status_message, - "url_redirect": url_redirect - } - return data - - @pass_default_context def report_list(request, partial=None, default_context=None): context = default_context or {} diff --git a/webapp-django/crashstats/manage/decorators.py b/webapp-django/crashstats/manage/decorators.py new file mode 100644 index 0000000000..7329e34a6b --- /dev/null +++ b/webapp-django/crashstats/manage/decorators.py @@ -0,0 +1,26 @@ +from django.contrib.auth.decorators import ( + REDIRECT_FIELD_NAME, + user_passes_test, +) + + +def superuser_required( + function=None, + redirect_field_name=REDIRECT_FIELD_NAME, + login_url=None +): + """Same logic as in login_required() (see doc string above) but with + the additional check that we require you to be superuser also. + """ + + def check_user(user): + return user.is_active and user.is_superuser + + actual_decorator = user_passes_test( + check_user, + login_url=login_url, + redirect_field_name=redirect_field_name + ) + if function: + return actual_decorator(function) + return actual_decorator diff --git a/webapp-django/crashstats/manage/tests/test_views.py b/webapp-django/crashstats/manage/tests/test_views.py index e1bec5a8c4..66205583d7 100644 --- a/webapp-django/crashstats/manage/tests/test_views.py +++ b/webapp-django/crashstats/manage/tests/test_views.py @@ -93,7 +93,10 @@ def test_home_page_not_signed_in(self): self._login(is_superuser=False) response = self.client.get(home_url, follow=True) assert response.status_code == 200 - ok_('You need to be a superuser to access this' in response.content) + ok_( + 'You are signed in but you do not have sufficient permissions ' + 'to reach the resource you requested.' in response.content + ) def test_home_page_signed_in(self): user = self._login() diff --git a/webapp-django/crashstats/manage/views.py b/webapp-django/crashstats/manage/views.py index 5502b0e108..3cb5e6bd67 100644 --- a/webapp-django/crashstats/manage/views.py +++ b/webapp-django/crashstats/manage/views.py @@ -1,6 +1,5 @@ import collections import copy -import functools import hashlib import math import urllib @@ -38,6 +37,7 @@ from crashstats.status.models import StatusMessage from crashstats.symbols.models import SymbolsUpload from crashstats.crashstats.utils import json_view +from crashstats.manage.decorators import superuser_required from . import forms from . import utils @@ -86,21 +86,6 @@ def notice_change(before, after): raise NotImplementedError(before.__class__.__name__) -def superuser_required(view_func): - @functools.wraps(view_func) - def inner(request, *args, **kwargs): - if not request.user.is_active: - return redirect(settings.LOGIN_URL) - elif not request.user.is_superuser: - messages.error( - request, - 'You need to be a superuser to access this.' - ) - return redirect('home:home', settings.DEFAULT_PRODUCT) - return view_func(request, *args, **kwargs) - return inner - - @superuser_required def home(request, default_context=None): context = default_context or {} diff --git a/webapp-django/crashstats/settings/base.py b/webapp-django/crashstats/settings/base.py index a3f7fbccf5..460663d8ff 100644 --- a/webapp-django/crashstats/settings/base.py +++ b/webapp-django/crashstats/settings/base.py @@ -687,6 +687,9 @@ def path(*dirs): 'rabbitmq_user': config('RABBITMQ_USER', ''), 'rabbitmq_password': config('RABBITMQ_PASSWORD', ''), }, + 'boto': { + 'secret_access_key': config('secrets.boto.secret_access_key', ''), + }, }, 'resource': { 'elasticsearch': { @@ -715,6 +718,16 @@ def path(*dirs): 'virtual_host': config('RABBITMQ_VIRTUAL_HOST', '/'), 'port': config('RABBITMQ_PORT', 5672), }, + 'boto': { + 'access_key': config('resource.boto.access_key', ''), + 'bucket_name': config( + 'resource.boto.bucket_name', 'crashstats'), + 'prefix': config('resource.boto.prefix', ''), + 'keybuilder_class': config( + 'resource.boto.keybuilder_class', + 'socorro.external.boto.connection_context.DatePrefixKeyBuilder' + ), + } } } diff --git a/webapp-django/crashstats/settings/bundles.py b/webapp-django/crashstats/settings/bundles.py index d267d3b22a..87f3290863 100644 --- a/webapp-django/crashstats/settings/bundles.py +++ b/webapp-django/crashstats/settings/bundles.py @@ -87,6 +87,12 @@ ), 'output_filename': 'css/report-index.min.css', }, + 'report_pending': { + 'source_filenames': ( + 'crashstats/css/report_pending.less', + ), + 'output_filename': 'css/report-pending.min.css', + }, 'report_list': { 'source_filenames': ( 'crashstats/css/report_list.less', @@ -333,6 +339,12 @@ ), 'output_filename': 'js/report-index.min.js', }, + 'report_pending': { + 'source_filenames': ( + 'crashstats/js/socorro/pending.js', + ), + 'output_filename': 'js/report-pending.min.js', + }, 'report_list': { 'source_filenames': ( 'crashstats/js/jquery/plugins/jquery.cookie.js',