Adds DOCX and Markdown export functionality

Introduces a modular exporter pattern supporting DOCX and Markdown formats
by implementing Exporter interfaces and restructuring application logic.

Enhances CI to install UPX for binary compression, excluding recent macOS
binaries due to compatibility issues.

Enables CGO when building binaries for all platforms, addressing potential
cross-platform compatibility concerns.

Bumps version to 0.1.1.
This commit is contained in:
2025-05-25 13:01:58 +02:00
parent 48cad7144f
commit 9de7222ec3
15 changed files with 1096 additions and 600 deletions

196
internal/exporters/docx.go Normal file
View File

@ -0,0 +1,196 @@
// Package exporters provides implementations of the Exporter interface
// for converting Articulate Rise courses into various file formats.
package exporters
import (
"fmt"
"strings"
"github.com/kjanat/articulate-parser/internal/interfaces"
"github.com/kjanat/articulate-parser/internal/models"
"github.com/kjanat/articulate-parser/internal/services"
"github.com/unidoc/unioffice/document"
)
// DocxExporter implements the Exporter interface for DOCX format.
// It converts Articulate Rise course data into a Microsoft Word document
// using the unioffice/document package.
type DocxExporter struct {
// htmlCleaner is used to convert HTML content to plain text
htmlCleaner *services.HTMLCleaner
}
// NewDocxExporter creates a new DocxExporter instance.
// It takes an HTMLCleaner to handle HTML content conversion.
//
// Parameters:
// - htmlCleaner: Service for cleaning HTML content in course data
//
// Returns:
// - An implementation of the Exporter interface for DOCX format
func NewDocxExporter(htmlCleaner *services.HTMLCleaner) interfaces.Exporter {
return &DocxExporter{
htmlCleaner: htmlCleaner,
}
}
// Export exports the course to a DOCX file.
// It creates a Word document with formatted content based on the course data
// and saves it to the specified output path.
//
// Parameters:
// - course: The course data model to export
// - outputPath: The file path where the DOCX content will be written
//
// Returns:
// - An error if creating or saving the document fails
func (e *DocxExporter) Export(course *models.Course, outputPath string) error {
doc := document.New()
// Add title
titlePara := doc.AddParagraph()
titleRun := titlePara.AddRun()
titleRun.AddText(course.Course.Title)
titleRun.Properties().SetBold(true)
titleRun.Properties().SetSize(16)
// Add description if available
if course.Course.Description != "" {
descPara := doc.AddParagraph()
descRun := descPara.AddRun()
cleanDesc := e.htmlCleaner.CleanHTML(course.Course.Description)
descRun.AddText(cleanDesc)
}
// Add each lesson
for _, lesson := range course.Course.Lessons {
e.exportLesson(doc, &lesson)
}
// Ensure output directory exists and add .docx extension
if !strings.HasSuffix(strings.ToLower(outputPath), ".docx") {
outputPath = outputPath + ".docx"
}
return doc.SaveToFile(outputPath)
}
// exportLesson adds a lesson to the document with appropriate formatting.
// It creates a lesson heading, adds the description, and processes all items in the lesson.
//
// Parameters:
// - doc: The Word document being created
// - lesson: The lesson data model to export
func (e *DocxExporter) exportLesson(doc *document.Document, lesson *models.Lesson) {
// Add lesson title
lessonPara := doc.AddParagraph()
lessonRun := lessonPara.AddRun()
lessonRun.AddText(fmt.Sprintf("Lesson: %s", lesson.Title))
lessonRun.Properties().SetBold(true)
lessonRun.Properties().SetSize(14)
// Add lesson description if available
if lesson.Description != "" {
descPara := doc.AddParagraph()
descRun := descPara.AddRun()
cleanDesc := e.htmlCleaner.CleanHTML(lesson.Description)
descRun.AddText(cleanDesc)
}
// Add each item in the lesson
for _, item := range lesson.Items {
e.exportItem(doc, &item)
}
}
// exportItem adds an item to the document.
// It creates an item heading and processes all sub-items within the item.
//
// Parameters:
// - doc: The Word document being created
// - item: The item data model to export
func (e *DocxExporter) exportItem(doc *document.Document, item *models.Item) {
// Add item type as heading
if item.Type != "" {
itemPara := doc.AddParagraph()
itemRun := itemPara.AddRun()
itemRun.AddText(strings.Title(item.Type))
itemRun.Properties().SetBold(true)
itemRun.Properties().SetSize(12)
}
// Add sub-items
for _, subItem := range item.Items {
e.exportSubItem(doc, &subItem)
}
}
// exportSubItem adds a sub-item to the document.
// It handles different components of a sub-item like title, heading,
// paragraph content, answers, and feedback.
//
// Parameters:
// - doc: The Word document being created
// - subItem: The sub-item data model to export
func (e *DocxExporter) exportSubItem(doc *document.Document, subItem *models.SubItem) {
// Add title if available
if subItem.Title != "" {
subItemPara := doc.AddParagraph()
subItemRun := subItemPara.AddRun()
subItemRun.AddText(" " + subItem.Title) // Indented
subItemRun.Properties().SetBold(true)
}
// Add heading if available
if subItem.Heading != "" {
headingPara := doc.AddParagraph()
headingRun := headingPara.AddRun()
cleanHeading := e.htmlCleaner.CleanHTML(subItem.Heading)
headingRun.AddText(" " + cleanHeading) // Indented
headingRun.Properties().SetBold(true)
}
// Add paragraph content if available
if subItem.Paragraph != "" {
contentPara := doc.AddParagraph()
contentRun := contentPara.AddRun()
cleanContent := e.htmlCleaner.CleanHTML(subItem.Paragraph)
contentRun.AddText(" " + cleanContent) // Indented
}
// Add answers if this is a question
if len(subItem.Answers) > 0 {
answersPara := doc.AddParagraph()
answersRun := answersPara.AddRun()
answersRun.AddText(" Answers:")
answersRun.Properties().SetBold(true)
for i, answer := range subItem.Answers {
answerPara := doc.AddParagraph()
answerRun := answerPara.AddRun()
prefix := fmt.Sprintf(" %d. ", i+1)
if answer.Correct {
prefix += "✓ "
}
cleanAnswer := e.htmlCleaner.CleanHTML(answer.Title)
answerRun.AddText(prefix + cleanAnswer)
}
}
// Add feedback if available
if subItem.Feedback != "" {
feedbackPara := doc.AddParagraph()
feedbackRun := feedbackPara.AddRun()
cleanFeedback := e.htmlCleaner.CleanHTML(subItem.Feedback)
feedbackRun.AddText(" Feedback: " + cleanFeedback)
feedbackRun.Properties().SetItalic(true)
}
}
// GetSupportedFormat returns the format name this exporter supports.
//
// Returns:
// - A string representing the supported format ("docx")
func (e *DocxExporter) GetSupportedFormat() string {
return "docx"
}

View File

@ -0,0 +1,63 @@
// Package exporters provides implementations of the Exporter interface
// for converting Articulate Rise courses into various file formats.
package exporters
import (
"fmt"
"strings"
"github.com/kjanat/articulate-parser/internal/interfaces"
"github.com/kjanat/articulate-parser/internal/services"
)
// Factory implements the ExporterFactory interface.
// It creates appropriate exporter instances based on the requested format.
type Factory struct {
// htmlCleaner is used by exporters to convert HTML content to plain text
htmlCleaner *services.HTMLCleaner
}
// NewFactory creates a new exporter factory.
// It takes an HTMLCleaner instance that will be passed to the exporters
// created by this factory.
//
// Parameters:
// - htmlCleaner: Service for cleaning HTML content in course data
//
// Returns:
// - An implementation of the ExporterFactory interface
func NewFactory(htmlCleaner *services.HTMLCleaner) interfaces.ExporterFactory {
return &Factory{
htmlCleaner: htmlCleaner,
}
}
// CreateExporter creates an exporter for the specified format.
// It returns an appropriate exporter implementation based on the format string.
// Format strings are case-insensitive.
//
// Parameters:
// - format: The desired export format (e.g., "markdown", "docx")
//
// Returns:
// - An implementation of the Exporter interface if the format is supported
// - An error if the format is not supported
func (f *Factory) CreateExporter(format string) (interfaces.Exporter, error) {
switch strings.ToLower(format) {
case "markdown", "md":
return NewMarkdownExporter(f.htmlCleaner), nil
case "docx", "word":
return NewDocxExporter(f.htmlCleaner), nil
default:
return nil, fmt.Errorf("unsupported export format: %s", format)
}
}
// GetSupportedFormats returns a list of all supported export formats.
// This includes both primary format names and their aliases.
//
// Returns:
// - A string slice containing all supported format names
func (f *Factory) GetSupportedFormats() []string {
return []string{"markdown", "md", "docx", "word"}
}

View File

@ -0,0 +1,225 @@
// Package exporters provides implementations of the Exporter interface
// for converting Articulate Rise courses into various file formats.
package exporters
import (
"bytes"
"fmt"
"os"
"strings"
"github.com/kjanat/articulate-parser/internal/interfaces"
"github.com/kjanat/articulate-parser/internal/models"
"github.com/kjanat/articulate-parser/internal/services"
)
// MarkdownExporter implements the Exporter interface for Markdown format.
// It converts Articulate Rise course data into a structured Markdown document.
type MarkdownExporter struct {
// htmlCleaner is used to convert HTML content to plain text
htmlCleaner *services.HTMLCleaner
}
// NewMarkdownExporter creates a new MarkdownExporter instance.
// It takes an HTMLCleaner to handle HTML content conversion.
//
// Parameters:
// - htmlCleaner: Service for cleaning HTML content in course data
//
// Returns:
// - An implementation of the Exporter interface for Markdown format
func NewMarkdownExporter(htmlCleaner *services.HTMLCleaner) interfaces.Exporter {
return &MarkdownExporter{
htmlCleaner: htmlCleaner,
}
}
// Export exports a course to Markdown format.
// It generates a structured Markdown document from the course data
// and writes it to the specified output path.
//
// Parameters:
// - course: The course data model to export
// - outputPath: The file path where the Markdown content will be written
//
// Returns:
// - An error if writing to the output file fails
func (e *MarkdownExporter) Export(course *models.Course, outputPath string) error {
var buf bytes.Buffer
// Write course header
buf.WriteString(fmt.Sprintf("# %s\n\n", course.Course.Title))
if course.Course.Description != "" {
buf.WriteString(fmt.Sprintf("%s\n\n", e.htmlCleaner.CleanHTML(course.Course.Description)))
}
// Add metadata
buf.WriteString("## Course Information\n\n")
buf.WriteString(fmt.Sprintf("- **Course ID**: %s\n", course.Course.ID))
buf.WriteString(fmt.Sprintf("- **Share ID**: %s\n", course.ShareID))
buf.WriteString(fmt.Sprintf("- **Navigation Mode**: %s\n", course.Course.NavigationMode))
if course.Course.ExportSettings != nil {
buf.WriteString(fmt.Sprintf("- **Export Format**: %s\n", course.Course.ExportSettings.Format))
}
buf.WriteString("\n---\n\n")
// Process lessons
for i, lesson := range course.Course.Lessons {
if lesson.Type == "section" {
buf.WriteString(fmt.Sprintf("# %s\n\n", lesson.Title))
continue
}
buf.WriteString(fmt.Sprintf("## Lesson %d: %s\n\n", i+1, lesson.Title))
if lesson.Description != "" {
buf.WriteString(fmt.Sprintf("%s\n\n", e.htmlCleaner.CleanHTML(lesson.Description)))
}
// Process lesson items
for _, item := range lesson.Items {
e.processItemToMarkdown(&buf, item, 3)
}
buf.WriteString("\n---\n\n")
}
return os.WriteFile(outputPath, buf.Bytes(), 0644)
}
// GetSupportedFormat returns the format name this exporter supports
// It indicates the file format that the MarkdownExporter can generate.
//
// Returns:
// - A string representing the supported format ("markdown")
func (e *MarkdownExporter) GetSupportedFormat() string {
return "markdown"
}
// processItemToMarkdown converts a course item into Markdown format
// and appends it to the provided buffer. It handles different item types
// with appropriate Markdown formatting.
//
// Parameters:
// - buf: The buffer to write the Markdown content to
// - item: The course item to process
// - level: The heading level for the item (determines the number of # characters)
func (e *MarkdownExporter) processItemToMarkdown(buf *bytes.Buffer, item models.Item, level int) {
headingPrefix := strings.Repeat("#", level)
switch item.Type {
case "text":
for _, subItem := range item.Items {
if subItem.Heading != "" {
heading := e.htmlCleaner.CleanHTML(subItem.Heading)
if heading != "" {
buf.WriteString(fmt.Sprintf("%s %s\n\n", headingPrefix, heading))
}
}
if subItem.Paragraph != "" {
paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph)
if paragraph != "" {
buf.WriteString(fmt.Sprintf("%s\n\n", paragraph))
}
}
}
case "list":
for _, subItem := range item.Items {
if subItem.Paragraph != "" {
paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph)
if paragraph != "" {
buf.WriteString(fmt.Sprintf("- %s\n", paragraph))
}
}
}
buf.WriteString("\n")
case "multimedia":
buf.WriteString(fmt.Sprintf("%s Media Content\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Media != nil {
if subItem.Media.Video != nil {
buf.WriteString(fmt.Sprintf("**Video**: %s\n", subItem.Media.Video.OriginalUrl))
if subItem.Media.Video.Duration > 0 {
buf.WriteString(fmt.Sprintf("**Duration**: %d seconds\n", subItem.Media.Video.Duration))
}
}
if subItem.Media.Image != nil {
buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl))
}
}
if subItem.Caption != "" {
caption := e.htmlCleaner.CleanHTML(subItem.Caption)
buf.WriteString(fmt.Sprintf("*%s*\n", caption))
}
}
buf.WriteString("\n")
case "image":
buf.WriteString(fmt.Sprintf("%s Image\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Media != nil && subItem.Media.Image != nil {
buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl))
}
if subItem.Caption != "" {
caption := e.htmlCleaner.CleanHTML(subItem.Caption)
buf.WriteString(fmt.Sprintf("*%s*\n", caption))
}
}
buf.WriteString("\n")
case "knowledgeCheck":
buf.WriteString(fmt.Sprintf("%s Knowledge Check\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Title != "" {
title := e.htmlCleaner.CleanHTML(subItem.Title)
buf.WriteString(fmt.Sprintf("**Question**: %s\n\n", title))
}
buf.WriteString("**Answers**:\n")
for i, answer := range subItem.Answers {
correctMark := ""
if answer.Correct {
correctMark = " ✓"
}
buf.WriteString(fmt.Sprintf("%d. %s%s\n", i+1, answer.Title, correctMark))
}
if subItem.Feedback != "" {
feedback := e.htmlCleaner.CleanHTML(subItem.Feedback)
buf.WriteString(fmt.Sprintf("\n**Feedback**: %s\n", feedback))
}
}
buf.WriteString("\n")
case "interactive":
buf.WriteString(fmt.Sprintf("%s Interactive Content\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Title != "" {
title := e.htmlCleaner.CleanHTML(subItem.Title)
buf.WriteString(fmt.Sprintf("**%s**\n\n", title))
}
}
case "divider":
buf.WriteString("---\n\n")
default:
// Handle unknown types
if len(item.Items) > 0 {
buf.WriteString(fmt.Sprintf("%s %s Content\n\n", headingPrefix, strings.Title(item.Type)))
for _, subItem := range item.Items {
if subItem.Title != "" {
title := e.htmlCleaner.CleanHTML(subItem.Title)
buf.WriteString(fmt.Sprintf("**%s**\n\n", title))
}
if subItem.Paragraph != "" {
paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph)
buf.WriteString(fmt.Sprintf("%s\n\n", paragraph))
}
}
}
}
}