Skip to content

Commit

Permalink
Merge pull request #304 from GSA/ingest-new-sources
Browse files Browse the repository at this point in the history
Ingest Update
  • Loading branch information
akuny authored Feb 23, 2024
2 parents 35dca73 + 3296707 commit fd4c0c8
Show file tree
Hide file tree
Showing 12 changed files with 76 additions and 23 deletions.
13 changes: 7 additions & 6 deletions entities/website.entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,15 @@ export class Website {
@Expose({ name: 'target_url_agency_owner' })
agency: string;

@Column({
nullable: true,
})
@Column({ nullable: true })
@Exclude()
agencyCode?: number;

@Column()
@Expose({ name: 'target_url_bureau_owner' })
bureau: string;

@Column({
nullable: true,
})
@Column({ nullable: true })
@Exclude()
bureauCode?: number;

Expand All @@ -72,6 +68,11 @@ export class Website {
})
sourceList?: string;

@Column({ nullable: true })
@Expose({ name: 'omb_idea_public' })
@Exclude()
ombIdeaPublic?: boolean;

serialized() {
const serializedWebsite = classToPlain(this);
const serializedCoreResult = classToPlain(this.coreResult);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ describe('AnalysisService', () => {
service = new AnalysisService(websiteRepository);
});

afterEach(() => db.close());
afterEach(() => db.destroy());

it('repository should be defined', () => {
expect(websiteRepository).toBeDefined();
Expand All @@ -46,6 +46,7 @@ describe('AnalysisService', () => {
website.agencyCode = 10;
website.bureauCode = 10;
website.sourceList = 'gov';
website.ombIdeaPublic = false;

const coreResult = createCoreResult(website);
coreResult.finalUrlBaseDomain = '18f.gov';
Expand Down Expand Up @@ -81,6 +82,7 @@ describe('AnalysisService', () => {
firstWebsite.agencyCode = 10;
firstWebsite.bureauCode = 10;
firstWebsite.sourceList = 'gov';
firstWebsite.ombIdeaPublic = false;

const secondWebsite = new Website();
secondWebsite.url = 'https://fake.gov';
Expand All @@ -91,6 +93,7 @@ describe('AnalysisService', () => {
secondWebsite.agencyCode = 10;
secondWebsite.bureauCode = 10;
secondWebsite.sourceList = 'gov';
secondWebsite.ombIdeaPublic = false;

const firstCoreResult = createCoreResult(firstWebsite);
firstCoreResult.finalUrlBaseDomain = '18f.gov';
Expand Down Expand Up @@ -130,6 +133,7 @@ describe('AnalysisService', () => {
firstWebsite.agencyCode = 10;
firstWebsite.bureauCode = 10;
firstWebsite.sourceList = 'gov';
firstWebsite.ombIdeaPublic = false;

const secondWebsite = new Website();
secondWebsite.url = 'https://fake.gov';
Expand All @@ -140,6 +144,7 @@ describe('AnalysisService', () => {
secondWebsite.agencyCode = 10;
secondWebsite.bureauCode = 10;
secondWebsite.sourceList = 'gov';
secondWebsite.ombIdeaPublic = false;

const firstCoreResult = createCoreResult(firstWebsite);
firstCoreResult.finalUrlBaseDomain = '18f.gov';
Expand Down Expand Up @@ -179,6 +184,7 @@ describe('AnalysisService', () => {
firstWebsite.agencyCode = 10;
firstWebsite.bureauCode = 10;
firstWebsite.sourceList = 'gov';
firstWebsite.ombIdeaPublic = false;

const secondWebsite = new Website();
secondWebsite.url = 'https://fake.gov';
Expand All @@ -189,6 +195,7 @@ describe('AnalysisService', () => {
secondWebsite.agencyCode = 10;
secondWebsite.bureauCode = 10;
secondWebsite.sourceList = 'gov';
secondWebsite.ombIdeaPublic = false;

const firstCoreResult = createCoreResult(firstWebsite);
firstCoreResult.finalUrlBaseDomain = '18f.gov';
Expand Down
1 change: 1 addition & 0 deletions libs/database/src/websites/dto/create-website.dto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ export class CreateWebsiteDto {
bureau: string;
bureauCode?: number;
sourceList: string;
ombIdeaPublic: boolean;
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ describe('AnalysisService', () => {
service = new WebsiteService(websiteRepository);
});

afterEach(() => db.close());
afterEach(() => db.destroy());

it('repository should be defined', () => {
expect(websiteRepository).toBeDefined();
Expand All @@ -44,6 +44,7 @@ describe('AnalysisService', () => {
firstWebsite.agencyCode = 10;
firstWebsite.bureauCode = 10;
firstWebsite.sourceList = 'gov';
firstWebsite.ombIdeaPublic = false;

const secondWebsite = new Website();
secondWebsite.url = 'https://fake.gov';
Expand All @@ -54,6 +55,7 @@ describe('AnalysisService', () => {
secondWebsite.agencyCode = 10;
secondWebsite.bureauCode = 10;
secondWebsite.sourceList = 'gov';
secondWebsite.ombIdeaPublic = false;

const thirdWebsite = new Website();
thirdWebsite.url = 'https://anotherfake.gov';
Expand All @@ -64,6 +66,7 @@ describe('AnalysisService', () => {
thirdWebsite.agencyCode = 10;
thirdWebsite.bureauCode = 10;
thirdWebsite.sourceList = 'gov';
thirdWebsite.ombIdeaPublic = false;

const fourthWebsite = new Website();
fourthWebsite.url = 'https://anotherfake.mil';
Expand All @@ -74,6 +77,7 @@ describe('AnalysisService', () => {
fourthWebsite.agencyCode = 10;
fourthWebsite.bureauCode = 10;
fourthWebsite.sourceList = 'mil';
fourthWebsite.ombIdeaPublic = false;

const firstCoreResult = new CoreResult();
firstCoreResult.website = firstWebsite;
Expand Down
4 changes: 2 additions & 2 deletions libs/database/src/websites/websites.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ describe('WebsiteService', () => {
agencyCode: 10,
bureauCode: 10,
sourceList: 'gov',
ombIdeaPublic: false,
};

const website = new Website();
Expand All @@ -85,8 +86,7 @@ describe('WebsiteService', () => {
website.agencyCode = 10;
website.bureauCode = 10;
website.sourceList = 'gov';

await service.upsert(createWebsiteDto);
(website.ombIdeaPublic = false), await service.upsert(createWebsiteDto);
expect(mockRepository.insert).toHaveBeenCalledWith(website);
});

Expand Down
1 change: 1 addition & 0 deletions libs/database/src/websites/websites.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ export class WebsiteService {
website.agencyCode = createWebsiteDto.agencyCode;
website.bureauCode = createWebsiteDto.bureauCode;
website.sourceList = createWebsiteDto.sourceList;
website.ombIdeaPublic = createWebsiteDto.ombIdeaPublic;
const exists = await this.website.findOneBy({
url: website.url,
});
Expand Down
4 changes: 2 additions & 2 deletions libs/ingest/src/ingest.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { Module } from '@nestjs/common';
import { HttpModule } from '@nestjs/axios';
import { IngestService } from './ingest.service';
import { ConfigModule } from '@nestjs/config';
import { UrlList } from './url-list';
import { UrlListDataFetcher } from './url-list-data-fetcher';
import ingestConfig from './config/ingest.config';

@Module({
Expand All @@ -16,7 +16,7 @@ import ingestConfig from './config/ingest.config';
load: [ingestConfig],
}),
],
providers: [IngestService, UrlList],
providers: [IngestService, UrlListDataFetcher],
exports: [IngestService],
})
export class IngestModule {}
8 changes: 4 additions & 4 deletions libs/ingest/src/ingest.service.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { WebsiteService } from '@app/database/websites/websites.service';
import { UrlList } from './url-list';
import { UrlListDataFetcher } from './url-list-data-fetcher';
import { Test, TestingModule } from '@nestjs/testing';
import { mock, MockProxy } from 'jest-mock-extended';
import { IngestService } from './ingest.service';
Expand All @@ -8,11 +8,11 @@ import { Website } from 'entities/website.entity';
describe('IngestService', () => {
let service: IngestService;
let mockWebsiteService: MockProxy<WebsiteService>;
let mockUrlList: MockProxy<UrlList>;
let mockUrlList: MockProxy<UrlListDataFetcher>;

beforeEach(async () => {
mockWebsiteService = mock<WebsiteService>();
mockUrlList = mock<UrlList>();
mockUrlList = mock<UrlListDataFetcher>();

const module: TestingModule = await Test.createTestingModule({
providers: [
Expand All @@ -22,7 +22,7 @@ describe('IngestService', () => {
useValue: mockWebsiteService,
},
{
provide: UrlList,
provide: UrlListDataFetcher,
useValue: mockUrlList,
},
],
Expand Down
38 changes: 35 additions & 3 deletions libs/ingest/src/ingest.service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { parse } from '@fast-csv/parse';
import { Injectable, Logger } from '@nestjs/common';
import { UrlList } from './url-list';
import { UrlListDataFetcher } from './url-list-data-fetcher';
import { CreateWebsiteDto } from '@app/database/websites/dto/create-website.dto';
import { WebsiteService } from '@app/database/websites/websites.service';

Expand All @@ -12,11 +12,11 @@ export class IngestService {

constructor(
private websiteService: WebsiteService,
private urlList: UrlList,
private urlListDataFetcher: UrlListDataFetcher,
) {}

async getUrls(url?: string): Promise<string> {
return await this.urlList.fetch(url);
return await this.urlListDataFetcher.fetch(url);
}

/**
Expand All @@ -40,7 +40,14 @@ export class IngestService {
'sourceListFederalDomains',
'sourceListDap',
'sourceListPulse',
'sourceListOmbIdea',
'sourceListEotw',
'sourceListUsagov',
'sourceListGovMan',
'sourceListUsacourts',
'sourceListOira',
'sourceListOther',
'ombIdeaPublic',
'sourceListMil',
],
renameHeaders: true, // discard the existing headers to ease parsing
Expand All @@ -53,6 +60,7 @@ export class IngestService {
agencyCode: data.agencyCode ? parseInt(data.agencyCode) : null,
bureauCode: data.bureauCode ? parseInt(data.bureauCode) : null,
sourceList: this.getSourceList(data),
ombIdeaPublic: data.ombIdeaPublic.toLowerCase() === 'true',
}),
)
.on('error', (error) => {
Expand Down Expand Up @@ -140,6 +148,30 @@ export class IngestService {
sourceList.push('pulse');
}

if (row.sourceListOmbIdea.toLowerCase() === 'true') {
sourceList.push('omb_idea');
}

if (row.sourceListEotw.toLowerCase() === 'true') {
sourceList.push('eotw');
}

if (row.sourceListUsagov.toLowerCase() === 'true') {
sourceList.push('usagov');
}

if (row.sourceListGovMan.toLowerCase() === 'true') {
sourceList.push('gov_man');
}

if (row.sourceListUsacourts.toLowerCase() === 'true') {
sourceList.push('usacourts');
}

if (row.sourceListOira.toLowerCase() === 'true') {
sourceList.push('oira');
}

if (row.sourceListOther.toLowerCase() === 'true') {
sourceList.push('other');
}
Expand Down
7 changes: 7 additions & 0 deletions libs/ingest/src/subdomain-row.interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ export interface SubdomainRow {
sourceListFederalDomains: string;
sourceListDap: string;
sourceListPulse: string;
sourceListOmbIdea: string;
sourceListEotw: string;
sourceListUsagov: string;
sourceListGovMan: string;
sourceListUsacourts: string;
sourceListOira: string;
sourceListOther: string;
ombIdeaPublic: string;
sourceListMil: string;
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { UrlList } from './url-list';
import { UrlListDataFetcher } from './url-list-data-fetcher';
import { ConfigService } from '@nestjs/config';
import { HttpService } from '@nestjs/axios';
import { mock } from 'jest-mock-extended';
import { of } from 'rxjs';

describe('UrlList', () => {
let urlList: UrlList;
let urlList: UrlListDataFetcher;

beforeEach(() => {
const mockHttpService = mock<HttpService>();
Expand All @@ -21,7 +21,7 @@ describe('UrlList', () => {
}),
);

urlList = new UrlList(mockHttpService, mockConfigService);
urlList = new UrlListDataFetcher(mockHttpService, mockConfigService);
});

it('should be defined', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { map } from 'rxjs/operators';
import { lastValueFrom } from 'rxjs';

@Injectable()
export class UrlList {
export class UrlListDataFetcher {
constructor(
private httpService: HttpService,
private configService: ConfigService,
Expand Down

0 comments on commit fd4c0c8

Please sign in to comment.