Skip to content

Commit

Permalink
Feature/wordpress to md (#27)
Browse files Browse the repository at this point in the history
Add the ability to convert a WordPress export file into n markdown files representing each blog post (published and draft). Metadata (categories, tags, author info) is contained in YAML frontmatter. Images in the post have been added as attachments in the frontmatter and are also automatically downloaded. Image urls and internal links are re-written to be relative (but rooted).
  • Loading branch information
HowardvanRooijen authored May 3, 2020
1 parent 1d957b1 commit 813a697
Show file tree
Hide file tree
Showing 43 changed files with 1,067 additions and 46 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ A .NET Global Tool for automating marketing content across social channels. It s

A useful blog post for understanding `System.CommandLine` is [Radu Matei's](https://twitter.com/matei_radu) blog post "[Building self-contained, single executable .NET Core 3 CLI tools](https://radu-matei.com/blog/self-contained-dotnet-cli/)".

## Prerequisites

`stacker` used Pandoc to convert from wordpress to markdown. You will need to install [Pandoc](https://pandoc.org/installing.html) and add it to the `PATH`.

## dotnet global tools

`stacker` is a [.NET global tool](https://docs.microsoft.com/en-us/dotnet/core/tools/global-tools), which means once installed, it's available on the PATH of your machine.
Expand Down Expand Up @@ -63,6 +67,8 @@ Once you have `dotnet-suggest` installed, you can use `stacker` then TAB to expl

`stacker wordpress export universal` - Exports blog posts from WordPress into a reusable format suitable for publishing across social channels.

`wordpress export markdown` - Exports blog posts from WordPress and converts them into Markdown. Various clean up routes are also run.

### Buffer commands

`stacker twitter buffer <CONTENT-PATH> <ACCOUNT>` - Upload content items into buffer for the specified Twitter profile.
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Solutions/Stacker.Cli.Specs/Stacker.Cli.Specs.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@

<ItemGroup>
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1" />
<PackageReference Include="SpecFlow" Version="3.1.86" />
<PackageReference Include="SpecFlow.NUnit" Version="3.1.86" />
<PackageReference Include="SpecFlow.Tools.MsBuild.Generation" Version="3.1.86" />
<PackageReference Include="SpecFlow" Version="3.1.95" />
<PackageReference Include="SpecFlow.NUnit" Version="3.1.95" />
<PackageReference Include="SpecFlow.Tools.MsBuild.Generation" Version="3.1.95" />
<PackageReference Include="nunit" Version="3.12.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" />
<PackageReference Include="coverlet.msbuild" Version="2.8.0">
<PackageReference Include="coverlet.msbuild" Version="2.8.1">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
</PackageReference>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// <copyright file="ContentItemAttachementPathCleaner.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Text.RegularExpressions;
using Flurl;
using Stacker.Cli.Domain.Universal;

public class ContentItemAttachementPathCleaner : IPreDownloadCleaner
{
public ContentItem Clean(ContentItem contentItem)
{
string pattern = @"(https?:\/\/(?:(?:blogs?.endjin.com)|(?:endjinblog.azurewebsites.net))\/wp-content\/uploads)";
string path = "/assets/images/blog";

Regex regexp = new Regex(pattern, RegexOptions.Compiled, TimeSpan.FromSeconds(1));

foreach (var attachment in contentItem.Content.Attachments)
{
attachment.Path = regexp.Replace(attachment.Path, path);

if (!attachment.Path.StartsWith(path, StringComparison.InvariantCultureIgnoreCase))
{
attachment.Path = Url.Combine(path, attachment.Path);
}
}

return contentItem;
}
}
}
39 changes: 39 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/ContentItemCleaner.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// <copyright file="ContentItemCleaner.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Linq;
using Microsoft.Extensions.DependencyInjection;
using Stacker.Cli.Domain.Universal;

public class ContentItemCleaner
{
private readonly IServiceProvider serviceProvider;

public ContentItemCleaner(IServiceProvider serviceProvider)
{
this.serviceProvider = serviceProvider;
}

public ContentItem PreDownload(ContentItem content)
{
var cleaners = this.serviceProvider.GetServices<IPreDownloadCleaner>();
return cleaners.Aggregate(content, (current, cleaner) => cleaner.Clean(current));
}

public ContentItem PostDownload(ContentItem content)
{
var cleaners = this.serviceProvider.GetServices<IPostDownloadCleaner>();
return cleaners.Aggregate(content, (current, cleaner) => cleaner.Clean(current));
}

internal string PostConvert(string content)
{
var cleaners = this.serviceProvider.GetServices<IPostConvertCleaner>();
return cleaners.Aggregate(content, (current, cleaner) => cleaner.Clean(current));
}
}
}
24 changes: 24 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/EnsureEndjinHttpsInBody.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// <copyright file="EnsureEndjinHttpsInBody.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Text.RegularExpressions;
using Stacker.Cli.Domain.Universal;

public class EnsureEndjinHttpsInBody : IPreDownloadCleaner
{
public ContentItem Clean(ContentItem contentItem)
{
string pattern = @"(http:\/\/endjin.com)";

Regex regexp = new Regex(pattern, RegexOptions.Compiled, TimeSpan.FromSeconds(1));

contentItem.Content.Body = regexp.Replace(contentItem.Content.Body, "https://endjin.com");

return contentItem;
}
}
}
11 changes: 11 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/IPostConvertCleaner.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// <copyright file="IPostConvertCleaner.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
public interface IPostConvertCleaner
{
string Clean(string content);
}
}
13 changes: 13 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/IPostDownloadCleaner.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// <copyright file="IPostDownloadCleaner.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using Stacker.Cli.Domain.Universal;

public interface IPostDownloadCleaner
{
ContentItem Clean(ContentItem contentItem);
}
}
13 changes: 13 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/IPreDownloadCleaner.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// <copyright file="IPreDownloadCleaner.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using Stacker.Cli.Domain.Universal;

public interface IPreDownloadCleaner
{
ContentItem Clean(ContentItem contentItem);
}
}
19 changes: 19 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/RemoveHeaderImageFromBody.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// <copyright file="RemoveHeaderImageFromBody.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Text.RegularExpressions;

public class RemoveHeaderImageFromBody : IPostConvertCleaner
{
public string Clean(string content)
{
Regex regexp = new Regex(@"(\[?!\[.*\]\(.*\))", RegexOptions.Compiled, TimeSpan.FromSeconds(1));

return regexp.Replace(content, string.Empty, 1, 0);
}
}
}
24 changes: 24 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/RemoveHostNamesFromBody.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// <copyright file="RemoveHostNamesFromBody.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Text.RegularExpressions;
using Stacker.Cli.Domain.Universal;

public class RemoveHostNamesFromBody : IPostDownloadCleaner
{
public ContentItem Clean(ContentItem contentItem)
{
string pattern = @"(https?:\/\/(?:(?:blogs?.endjin.com)|(?:endjinblog.azurewebsites.net)))";

Regex regexp = new Regex(pattern, RegexOptions.Compiled, TimeSpan.FromSeconds(1));

contentItem.Content.Body = regexp.Replace(contentItem.Content.Body, string.Empty);

return contentItem;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// <copyright file="RemoveThreeBlankLinesFromStartBody.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Text.RegularExpressions;

public class RemoveThreeBlankLinesFromStartBody : IPostConvertCleaner
{
public string Clean(string content)
{
Regex regexp = new Regex(@"(\r\n){3,3}", RegexOptions.Compiled, TimeSpan.FromSeconds(1));

return regexp.Replace(content, Environment.NewLine);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// <copyright file="ReplaceNewLineWithParagraphTagCleaner.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using Stacker.Cli.Domain.Universal;

public class ReplaceNewLineWithParagraphTagCleaner : IPreDownloadCleaner
{
public ContentItem Clean(ContentItem contentItem)
{
contentItem.Content.Body = contentItem.Content.Body.Replace("\n", "<p/>");

return contentItem;
}
}
}
14 changes: 14 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/ReplaceSmartQuotes.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// <copyright file="ReplaceSmartQuotes.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
public class ReplaceSmartQuotes : IPostConvertCleaner
{
public string Clean(string content)
{
return content.Replace("", "\"").Replace("", "\"").Replace("", "'").Replace("", "'");
}
}
}
19 changes: 19 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/ReplaceWpUploadPath.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// <copyright file="ReplaceWpUploadPath.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Text.RegularExpressions;

public class ReplaceWpUploadPath : IPostConvertCleaner
{
public string Clean(string content)
{
Regex regexp = new Regex(@"\((\/wp-content\/uploads\/)", RegexOptions.Compiled, TimeSpan.FromSeconds(1));

return regexp.Replace(content, "(/assets/images/blog/");
}
}
}
25 changes: 25 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/UpdateInternalPostUrls.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// <copyright file="UpdateInternalPostUrls.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System;
using System.Text.RegularExpressions;

public class UpdateInternalPostUrls : IPostConvertCleaner
{
public string Clean(string content)
{
Regex regexp = new Regex(@"\((\/\d{4}\/\d{2}\/.*?)(\/)", RegexOptions.Compiled, TimeSpan.FromSeconds(1));

content = regexp.Replace(content, (match) =>
{
Group group = match.Groups[1];
return $"(/blog{group.Value}.html";
});

return content;
}
}
}
27 changes: 27 additions & 0 deletions Solutions/Stacker.Cli/Cleaners/WordPressImageResizerCleaner.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// <copyright file="WordPressImageResizerCleaner.cs" company="Endjin Limited">
// Copyright (c) Endjin Limited. All rights reserved.
// </copyright>

namespace Stacker.Cli.Cleaners
{
using System.Text.RegularExpressions;
using Stacker.Cli.Domain.Universal;

public class WordPressImageResizerCleaner : IPreDownloadCleaner
{
public ContentItem Clean(ContentItem contentItem)
{
string pattern = @"(-\d+?x\d+?|(_thumb(\d+?)?))(?=.png|.jpg)";

contentItem.Content.Body = Regex.Replace(contentItem.Content.Body, pattern, string.Empty);

foreach (var attachment in contentItem.Content.Attachments)
{
attachment.Path = Regex.Replace(attachment.Path, pattern, string.Empty);
attachment.Url = Regex.Replace(attachment.Url, pattern, string.Empty);
}

return contentItem;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ public Command Create()
var cmd = new Command("buffer", "Uploads content to Buffer to be published via Facebook")
{
Handler = CommandHandler.Create(async (string contentFilePath, string profileName, int itemCount, DateTime fromDate, DateTime toDate, PublicationPeriod publicationPeriod) =>
{
await this.contentTasks.BufferContentItemsAsync<FacebookFormatter>(contentFilePath, $"facebook|", profileName, publicationPeriod, fromDate, toDate, itemCount).ConfigureAwait(false);
}),
await this.contentTasks.BufferContentItemsAsync<FacebookFormatter>(contentFilePath, $"facebook|", profileName, publicationPeriod, fromDate, toDate, itemCount).ConfigureAwait(false)),
};

cmd.Add(new Argument<string>("content-file-path") { Description = "Content file path." });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,21 @@ namespace Stacker.Cli.Commands
public class WordPressExportCommandFactory : ICommandFactory<WordPressExportCommandFactory>
{
private readonly ICommandFactory<WordPressExportUniversalCommandFactory> universalExportCommandFactory;
private readonly ICommandFactory<WordPressExportMarkDownCommandFactory> markdownExportCommandFactory;

public WordPressExportCommandFactory(ICommandFactory<WordPressExportUniversalCommandFactory> universalExportCommandFactory)
public WordPressExportCommandFactory(
ICommandFactory<WordPressExportUniversalCommandFactory> universalExportCommandFactory,
ICommandFactory<WordPressExportMarkDownCommandFactory> markdownExportCommandFactory)
{
this.universalExportCommandFactory = universalExportCommandFactory;
this.markdownExportCommandFactory = markdownExportCommandFactory;
}

public Command Create()
{
var cmd = new Command("export", "Perform operations on WordPress export files.");
cmd.AddCommand(this.universalExportCommandFactory.Create());
cmd.AddCommand(this.markdownExportCommandFactory.Create());

return cmd;
}
Expand Down
Loading

0 comments on commit 813a697

Please sign in to comment.