Skip to content

Commit

Permalink
feat: Migrate WordPress Show More...
Browse files Browse the repository at this point in the history
Migrate WordPress `Show More...` to Hugo's `Summary`
https://gohugo.io/content-management/front-matter/#summary
  • Loading branch information
ashishb committed Mar 18, 2024
1 parent 8daf051 commit e9a8704
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 63 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ Written in Go.
1. [x] YouTube embeds
1. [x] Google Map embed via a custom short code `googlemaps`
1. [x] Migrate `caption` (WordPress) to `figure` (Hugo)
1. [x] Migrate "Show more..." of WordPress -> `Summary` in Hugo
1. [ ] Migrate code blocks correctly - syntax highlighting is not working right now
1. [ ] Migrate "Show more..."
1. [ ] Featured images - I tried this [WordPress plugin](https://wordpress.org/plugins/export-media-with-selected-content/) but featured images are simply not exported


Expand Down
20 changes: 7 additions & 13 deletions src/wp2hugo/internal/hugogenerator/hugo_gen_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,25 +206,19 @@ func writePage(outputMediaDirPath string, pagePath string, page wpparser.CommonF
return fmt.Errorf("error parsing page URL: %s", err)
}

p := hugopage.Page{
AbsoluteURL: *pageURL,
Title: page.Title,
PublishDate: page.PublishDate,
Draft: page.PublishStatus == wpparser.PublishStatusDraft || page.PublishStatus == wpparser.PublishStatusPending,
Categories: page.Categories,
Tags: page.Tags,
HTMLContent: page.Content,
GUID: page.GUID,
p, err := hugopage.NewPage(
*pageURL, page.Title, page.PublishDate,
page.PublishStatus == wpparser.PublishStatusDraft || page.PublishStatus == wpparser.PublishStatusPending,
page.Categories, page.Tags, page.Content, page.GUID)
if err != nil {
return fmt.Errorf("error creating Hugo page: %s", err)
}
if err = p.Write(w); err != nil {
return err
}
log.Info().Msgf("Page written: %s", pagePath)

links, err := p.WPImageLinks()
if err != nil {
return fmt.Errorf("error getting WordPress content links: %s", err)
}
links := p.WPImageLinks()
log.Debug().
Str("page", page.Title).
Int("links", len(links)).
Expand Down
106 changes: 59 additions & 47 deletions src/wp2hugo/internal/hugogenerator/hugopage/hugo_page.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,37 @@ const (

type Page struct {
// This is the original URL of the page from the WordPress site
AbsoluteURL url.URL
absoluteURL url.URL

Title string
PublishDate *time.Time
Draft bool
Categories []string
Tags []string
GUID *rss.GUID

// HTMLContent is the HTML content of the page that will be
// transformed to Markdown
HTMLContent string
metadata map[string]any
markdown string
}

const _WordPressMoreTag = "<!--more-->"

// In the next step, we will replace this as well
const _customMoreTag = "{{< more >}}"

var _markdownImageLinks = regexp.MustCompile(`!\[.*?]\((.+?)\)`)

// Extracts "src" from Hugo figure shortcode
// {{< figure align=aligncenter width=905 src="/wp-content/uploads/2023/01/Stollemeyer-castle-1024x768.jpg" alt="" >}}
var _hugoFigureLinks = regexp.MustCompile(`{{< figure.*?src="(.+?)".*? >}}`)

func (page Page) getRelativeURL() string {
return page.AbsoluteURL.Path
func NewPage(pageURL url.URL, title string, publishDate *time.Time, isDraft bool,
categories []string, tags []string, htmlContent string, guid *rss.GUID) (*Page, error) {
page := Page{
absoluteURL: pageURL,
metadata: getMetadata(pageURL, title, publishDate, isDraft, categories, tags, guid),
}
// htmlContent is the HTML content of the page that will be
// transformed to Markdown
markdown, err := page.getMarkdown(htmlContent)
if err != nil {
return nil, err
}
page.markdown = *markdown
return &page, nil
}

func (page Page) Write(w io.Writer) error {
Expand All @@ -58,14 +67,10 @@ func (page Page) Write(w io.Writer) error {
return nil
}

func (page Page) WPImageLinks() ([]string, error) {
markdown, err := page.getMarkdown()
if err != nil {
return nil, err
}
arr1 := getMarkdownLinks(_markdownImageLinks, *markdown)
arr2 := getMarkdownLinks(_hugoFigureLinks, *markdown)
return append(arr1, arr2...), nil
func (page *Page) WPImageLinks() []string {
arr1 := getMarkdownLinks(_markdownImageLinks, page.markdown)
arr2 := getMarkdownLinks(_hugoFigureLinks, page.markdown)
return append(arr1, arr2...)
}

func getMarkdownLinks(regex *regexp.Regexp, markdown string) []string {
Expand All @@ -77,29 +82,31 @@ func getMarkdownLinks(regex *regexp.Regexp, markdown string) []string {
return links
}

func (page Page) writeMetadata(w io.Writer) error {
func getMetadata(pageURL url.URL, title string, publishDate *time.Time, isDraft bool,
categories []string, tags []string, guid *rss.GUID) map[string]any {
metadata := make(map[string]any)
metadata["url"] = page.getRelativeURL()
metadata["title"] = page.Title
if page.PublishDate != nil {
metadata["date"] = page.PublishDate.Format(_hugoDateFormat)
metadata["url"] = pageURL.Path // Relative URL
metadata["title"] = title
if publishDate != nil {
metadata["date"] = publishDate.Format(_hugoDateFormat)
}
if page.Draft {
if isDraft {
metadata["draft"] = "true"
}

if len(page.Categories) > 0 {
metadata[CategoryName] = page.Categories
if len(categories) > 0 {
metadata[CategoryName] = categories
}

if len(page.Tags) > 0 {
metadata[TagName] = page.Tags
if len(tags) > 0 {
metadata[TagName] = tags
}
if page.GUID != nil {
metadata["GUID"] = page.GUID.Value
if guid != nil {
metadata["guid"] = guid.Value
}
return metadata
}

combinedMetadata, err := yaml.Marshal(metadata)
func (page *Page) writeMetadata(w io.Writer) error {
combinedMetadata, err := yaml.Marshal(page.metadata)
if err != nil {
return fmt.Errorf("error marshalling metadata: %s", err)
}
Expand All @@ -110,26 +117,36 @@ func (page Page) writeMetadata(w io.Writer) error {
return nil
}

func (page Page) getMarkdown() (*string, error) {
if page.HTMLContent == "" {
func (page *Page) getMarkdown(htmlContent string) (*string, error) {
if htmlContent == "" {
return nil, fmt.Errorf("empty HTML content")
}
converter := getMarkdownConverter()
htmlContent := replaceCaptionWithFigure(page.HTMLContent)
htmlContent = replaceCaptionWithFigure(htmlContent)

htmlContent = strings.Replace(htmlContent, _WordPressMoreTag, _customMoreTag, 1)
markdown, err := converter.ConvertString(htmlContent)
if err != nil {
return nil, fmt.Errorf("error converting HTML to Markdown: %s", err)
}
if len(strings.TrimSpace(markdown)) == 0 {
return nil, fmt.Errorf("empty markdown")
}
markdown = ReplaceAbsoluteLinksWithRelative(page.AbsoluteURL.Host, markdown)
if strings.Contains(markdown, _customMoreTag) {
// Ref: https://gohugo.io/content-management/summaries/#manual-summary-splitting
page.metadata["summary"] = strings.Split(markdown, _customMoreTag)[0]
markdown = strings.Replace(markdown, _customMoreTag, "", 1)
log.Warn().
Msgf("Manual summary splitting is not supported: %s", page.metadata)
}

markdown = ReplaceAbsoluteLinksWithRelative(page.absoluteURL.Host, markdown)
markdown = replaceCatlistWithShortcode(markdown)
// Disabled for now, as it does not work well
if false {
markdown = highlightCode(markdown)
} else {
log.Warn().Msg("Auto-detecting languages of code blocks is disabled for now")
log.Debug().Msg("Auto-detecting languages of code blocks is disabled for now")
}
return &markdown, nil
}
Expand Down Expand Up @@ -195,12 +212,7 @@ func getLanguageCode(code string) string {
}

func (page Page) writeContent(w io.Writer) error {
markdown, err := page.getMarkdown()
if err != nil {
return err
}

if _, err := w.Write([]byte(*markdown)); err != nil {
if _, err := w.Write([]byte(page.markdown)); err != nil {
return fmt.Errorf("error writing to page file: %s", err)
}
return nil
Expand Down
4 changes: 2 additions & 2 deletions src/wp2hugo/internal/hugogenerator/wp_to_hugo_rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"path"
)

// setupRssFeedFormat sets up custom GUID for RSS feed that is being migrated from WordPress
// setupRssFeedFormat sets up custom guid for RSS feed that is being migrated from WordPress
// to Hugo
func setupRssFeedFormat(siteDir string) error {
if err := createDirIfNotExist(path.Join(siteDir, "layouts")); err != nil {
Expand All @@ -27,7 +27,7 @@ func getModifiedRSSXML(data []byte) []byte {
original := "<guid>{{ .Permalink }}</guid>"
wordPressCompatible := "" +
"{{ if .Params.GUID }} " +
"<guid isPermaLink=\"false\">{{ .Params.GUID }}</guid> " +
"<guid isPermaLink=\"false\">{{ .Params.guid }}</guid> " +
"{{ else }} " +
"<guid isPermaLink=\"false\">{{ .Permalink }}</guid> " +
"{{ end }}"
Expand Down

0 comments on commit e9a8704

Please sign in to comment.