Skip to content

Commit

Permalink
T343 schoolie (#514)
Browse files Browse the repository at this point in the history
Resolves #343 by adding google scholar compliant meta tags to HTML for ETDs only, and replacing sitemap with new sitemap that only includes ETDs (for now)
  • Loading branch information
kerchner authored Mar 7, 2024
1 parent a4839db commit 9783136
Show file tree
Hide file tree
Showing 17 changed files with 160 additions and 30 deletions.
5 changes: 2 additions & 3 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ gem 'rails', '~> 5.2.8.1'
gem "sqlite3", "~> 1.3.0"
# Use pg as the production database for Active Record
gem 'pg'
# Use sitemap
# See https://github.com/viseztrance/rails-sitemap
gem 'sitemap'
# Use Passenger as the app server
# Update this when we update the Passenger docker container base image version
gem 'passenger', '6.0.17', require: "phusion_passenger/rack_handler"
Expand Down Expand Up @@ -88,6 +85,8 @@ gem "ffi", "~> 1.15"

gem 'json-canonicalization', '0.3.1' # https://github.com/dryruby/json-canonicalization/issues/2

gem 'schoolie', '0.1.3'

gem 'prawn'

group :development, :test do
Expand Down
3 changes: 3 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,8 @@ GEM
sassc (2.4.0)
ffi (~> 1.9)
scanf (1.0.0)
schoolie (0.1.3)
actionview (~> 5)
select2-rails (3.5.11)
shacl (0.1.1)
json-ld (~> 3.1, >= 3.1.7)
Expand Down Expand Up @@ -1057,6 +1059,7 @@ DEPENDENCIES
rsolr (>= 1.0, < 3)
rspec-rails
sass-rails (~> 5.0)
schoolie (= 0.1.3)
sidekiq (~> 6)
simplecov
sitemap
Expand Down
3 changes: 1 addition & 2 deletions app/jobs/sitemap_regenerate_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

class SitemapRegenerateJob < ApplicationJob
def perform
Rake::Task['sitemap:generate'].invoke
Rake::Task['sitemap:ping'].invoke
Rake::Task['schoolie:sitemap'].invoke
end
end
2 changes: 1 addition & 1 deletion app/models/gw_etd.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class GwEtd < ActiveFedora::Base
# Change this to restrict which works can be added as a child.
# self.valid_child_concerns = []
self.indexer = GwEtdIndexer

validates :title, presence: { message: 'Your work must have a title.' }

property :gw_affiliation, predicate: ::RDF::URI.new('http://scholarspace.library.gwu.edu/ns#gwaffiliation') do |index|
Expand Down
2 changes: 1 addition & 1 deletion app/models/gw_work.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ class GwWork < ActiveFedora::Base
end

include ::Hyrax::BasicMetadata
end
end
6 changes: 6 additions & 0 deletions app/presenters/hyrax/gw_etd_presenter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,11 @@ class GwEtdPresenter < GwWorkPresenter
def permanent_url
Scholarspace::Application.config.permanent_url_base + "etd/#{id}"
end

# scholarly? is used to determine whether or not
# the Google Scholar meta tags are rendered
def scholarly?
true
end
end
end
4 changes: 4 additions & 0 deletions app/presenters/hyrax/gw_work_presenter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,9 @@ class GwWorkPresenter < Hyrax::WorkShowPresenter
def permanent_url
Scholarspace::Application.config.permanent_url_base + "work/#{id}"
end

def scholarly?
false
end
end
end
4 changes: 2 additions & 2 deletions app/views/layouts/_head_tag_content.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ signed in %>

<!-- Twitter card metadata -->
<%= yield :twitter_meta %>
<!-- Google Scholar metadata -->
<%= yield :gscholar_meta %>
<!-- Schoolie for Google Scholar metadata (replaces :gscholar_meta) -->
<%= yield :schoolie_meta %>

<title><%= content_for?(:page_title) ? yield(:page_title) : default_page_title %></title>

Expand Down
2 changes: 2 additions & 0 deletions app/views/shared/_citations.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<%= render 'shared/twitter_citations' %>
<%= render 'shared/schoolie_citations' %>
5 changes: 5 additions & 0 deletions app/views/shared/_schoolie_citations.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<% content_for(:schoolie_meta) do %>
<% if @presenter.scholarly? %>
<%schoolie_tags(@presenter).html_safe%>
<% end %>
<% end %>
15 changes: 15 additions & 0 deletions app/views/shared/_twitter_citations.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<% content_for(:twitter_meta) do %>
<meta name="twitter:card" content="product" />
<meta name="twitter:site" content="<%= t('hyrax.product_twitter_handle') %>" />
<meta name="twitter:creator" content="<%= @presenter.tweeter %>" />
<meta property="og:site_name" content="<%= application_name %>" />
<meta property="og:type" content="object" />
<meta property="og:title" content="<%= @presenter.title.first %>" />
<meta property="og:description" content="<%= @presenter.description.first.truncate(200) rescue @presenter.title.first %>" />
<meta property="og:image" content="<%= @presenter.download_url %>" />
<meta property="og:url" content="<%= polymorphic_url([main_app, @presenter]) %>" />
<meta name="twitter:data1" content="<%= @presenter.keyword.join(', ') %>" />
<meta name="twitter:label1" content="Keywords" />
<meta name="twitter:data2" content="<%= @presenter.rights_statement.first %>" />
<meta name="twitter:label2" content="Rights Statement" />
<% end %>
6 changes: 4 additions & 2 deletions config/authorities/resource_types.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@ terms:
term: Conference Proceeding
- id: Dataset
term: Dataset
- id: Dissertation
term: Dissertation
- id: Image
term: Image
- id: Journal
term: Journal
- id: Map or Cartographic Material
term: Map or Cartographic Material
- id: Master's Thesis
term: Master's Thesis
- id: Meeting Minutes
term: Meeting Minutes
- id: Newsletter
Expand All @@ -39,8 +43,6 @@ terms:
term: Research Paper
- id: Software or Program Code
term: Software or Program Code
- id: Thesis or Dissertation
term: Thesis or Dissertation
- id: Video
term: Video
- id: Working Paper
Expand Down
20 changes: 20 additions & 0 deletions config/etd_degree_map.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Master's Thesis:
- M.A.
- M.S.
- M.P.H.
- LL.M.
- M.F.A.
- M.A.T.
- M.P.S.
- M.B.A.
- M.F.S.
- M.Int.St.
- M.P.H.
- M.P.P.
Dissertation:
- Ph.D.
- Ed.D.
- D.Engr.
- Dr.P.H.
- D.Sc.
- S.J.D.
10 changes: 10 additions & 0 deletions config/schoolie.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
static:
citation_institution: George Washington University
attributes:
citation_title: title
citation_author: creator
citation_type: resource_type
dc.type: resource_type
citation_date: publication_date
citation_keywords: keyword
citation_pdf_url: download_url
16 changes: 0 additions & 16 deletions config/sitemap.rb

This file was deleted.

57 changes: 54 additions & 3 deletions lib/tasks/gwss.rake
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ namespace :gwss do
end
end

desc "Queues a job to (re)generate the sitemap.xml"
desc "Executes (immediately) a job to (re)generate the sitemap.xml"
task "sitemap_queue_generate" => :environment do
SitemapRegenerateJob.perform_later
SitemapRegenerateJob.perform_now
end

desc "Creates the default Admin Set if it doesn't exist"
Expand Down Expand Up @@ -141,6 +141,10 @@ namespace :gwss do
# problem that would be caused by referencing GwEtd first
# See articles such as http://neethack.com/2015/04/rails-circular-dependency/
GwWork

degree_hash = YAML.load_file('config/etd_degree_map.yml')
degree_categories = degree_hash.keys # Typically ["Master's Thesis", "Dissertation"]

manifest_file = options[:mfpath]
if File.exist?(manifest_file)
mf = File.read(manifest_file)
Expand All @@ -153,7 +157,10 @@ namespace :gwss do
item_attributes['degree'] = manifest_json['degree'][0]
end
# resource_type may need more logic around it, TBD
item_attributes['resource_type'] = ['Thesis or Dissertation']
if manifest_json['etd_type']
item_attributes['resource_type'] = manifest_json['etd_type']
end
# item_attributes['resource_type'] = ['Thesis or Dissertation']

# dc:rights
# Always set this license for ETDs
Expand Down Expand Up @@ -313,4 +320,48 @@ namespace :gwss do
ContentBlock.find_or_create_by(name: "help_page").update!(value: help_page_html.read)
ContentBlock.find_or_create_by(name: "share_page").update!(value: share_page_html.read)
end

desc "Reassigns GwEtd resource_type values to Master's Thesis or Dissertation"
task "reassign_etd_resource_types" => :environment do
etd_degree_map = YAML.load_file('config/etd_degree_map.yml')
degree_etd_map = {}
degree_categories = etd_degree_map.keys
# Flip etd_degree_map to create degree_etd_map
# So that for any given degree, we can get back whether it's a masters or a doctorate
degree_categories.each do |degree_category|
etd_degree_map[degree_category].each do |degree_name|
# upcase each degree (just in case) and ignore "."s
degree_etd_map[degree_name.upcase.delete('.')] = degree_category
end
end

ids = Hyrax::SolrService.new.get("has_model_ssim:GwEtd", fl: [:id], rows: 1_000_000)
ids["response"]["docs"].each do |doc|
work = GwEtd.find(doc["id"])
if work.degree.nil?
puts "GwEtd id=#{doc["id"]} degree is empty! Skipping"
else
degree_name = work.degree.upcase.delete('.')
if degree_etd_map.keys.include?(degree_name)
work.resource_type = [degree_etd_map[degree_name]]
work.save
puts "Reassigned #{degree_name} resource type to #{degree_etd_map[degree_name]}"
else
puts "Degree name #{degree_name} not found! Skipping"
end
end
end
end

desc "Enumerates degree types present among existing GwEtd works"
task "enumerate_degree_types" => :environment do
ids = Hyrax::SolrService.new.get("has_model_ssim:GwEtd", fl: [:id], rows: 1_000_000)
docs = ids["response"]["docs"]
# Map a list of ids to a list of degree values
degrees = docs.map {|doc| GwEtd.find(doc["id"]).degree}
degree_hash = degrees.tally
degree_hash.keys.each do |key|
puts "#{key}, #{degree_hash[key]}"
end
end
end
30 changes: 30 additions & 0 deletions lib/tasks/schoolie.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# frozen_string_literal: true

require 'json'
require 'nokogiri'

namespace :schoolie do
desc "Creates Google Scholar compliant sitemap for GwEtd works"
task sitemap: :environment do
date_field = 'system_modified_dtsi'
result = Hyrax::SolrService.new.get("has_model_ssim:GwEtd",
fl: "id,#{date_field}",
rows: 1_000_000)
ids = result['response']['docs'].map do |x|
["https://scholarspace.library.gwu.edu/etd/#{x['id'].to_s}", x[date_field].to_s]
end
builder = Nokogiri::XML::Builder.new do |sitemap|
sitemap.urlset("xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
xmlns: "http://www.sitemaps.org/schemas/sitemap/0.9",
"xsi:schemaLocation": "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd") {
ids.each { |url, date|
sitemap.url {
sitemap.loc url
sitemap.lastmod date
}
}
}
end
File.open(Rails.root.join("public", "sitemap.xml"), "w") { |f| f.write(builder.to_xml) }
end
end

0 comments on commit 9783136

Please sign in to comment.