Adds DOCX and Markdown export functionality

Introduces a modular exporter pattern supporting DOCX and Markdown formats by implementing Exporter interfaces and restructuring application logic. Enhances CI to install UPX for binary compression, excluding recent macOS binaries due to compatibility issues. Enables CGO when building binaries for all platforms, addressing potential cross-platform compatibility concerns. Bumps version to 0.1.1.
2026-03-03 04:21:26 +01:00 · 2025-05-25 13:01:58 +02:00
parent 48cad7144f
commit 9de7222ec3
15 changed files with 1096 additions and 600 deletions
--- a/internal/exporters/docx.go
+++ b/internal/exporters/docx.go
@@ -0,0 +1,196 @@
+// Package exporters provides implementations of the Exporter interface
+// for converting Articulate Rise courses into various file formats.
+package exporters
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/kjanat/articulate-parser/internal/interfaces"
+	"github.com/kjanat/articulate-parser/internal/models"
+	"github.com/kjanat/articulate-parser/internal/services"
+	"github.com/unidoc/unioffice/document"
+)
+
+// DocxExporter implements the Exporter interface for DOCX format.
+// It converts Articulate Rise course data into a Microsoft Word document
+// using the unioffice/document package.
+type DocxExporter struct {
+	// htmlCleaner is used to convert HTML content to plain text
+	htmlCleaner *services.HTMLCleaner
+}
+
+// NewDocxExporter creates a new DocxExporter instance.
+// It takes an HTMLCleaner to handle HTML content conversion.
+//
+// Parameters:
+//   - htmlCleaner: Service for cleaning HTML content in course data
+//
+// Returns:
+//   - An implementation of the Exporter interface for DOCX format
+func NewDocxExporter(htmlCleaner *services.HTMLCleaner) interfaces.Exporter {
+	return &DocxExporter{
+		htmlCleaner: htmlCleaner,
+	}
+}
+
+// Export exports the course to a DOCX file.
+// It creates a Word document with formatted content based on the course data
+// and saves it to the specified output path.
+//
+// Parameters:
+//   - course: The course data model to export
+//   - outputPath: The file path where the DOCX content will be written
+//
+// Returns:
+//   - An error if creating or saving the document fails
+func (e *DocxExporter) Export(course *models.Course, outputPath string) error {
+	doc := document.New()
+
+	// Add title
+	titlePara := doc.AddParagraph()
+	titleRun := titlePara.AddRun()
+	titleRun.AddText(course.Course.Title)
+	titleRun.Properties().SetBold(true)
+	titleRun.Properties().SetSize(16)
+
+	// Add description if available
+	if course.Course.Description != "" {
+		descPara := doc.AddParagraph()
+		descRun := descPara.AddRun()
+		cleanDesc := e.htmlCleaner.CleanHTML(course.Course.Description)
+		descRun.AddText(cleanDesc)
+	}
+
+	// Add each lesson
+	for _, lesson := range course.Course.Lessons {
+		e.exportLesson(doc, &lesson)
+	}
+
+	// Ensure output directory exists and add .docx extension
+	if !strings.HasSuffix(strings.ToLower(outputPath), ".docx") {
+		outputPath = outputPath + ".docx"
+	}
+
+	return doc.SaveToFile(outputPath)
+}
+
+// exportLesson adds a lesson to the document with appropriate formatting.
+// It creates a lesson heading, adds the description, and processes all items in the lesson.
+//
+// Parameters:
+//   - doc: The Word document being created
+//   - lesson: The lesson data model to export
+func (e *DocxExporter) exportLesson(doc *document.Document, lesson *models.Lesson) {
+	// Add lesson title
+	lessonPara := doc.AddParagraph()
+	lessonRun := lessonPara.AddRun()
+	lessonRun.AddText(fmt.Sprintf("Lesson: %s", lesson.Title))
+	lessonRun.Properties().SetBold(true)
+	lessonRun.Properties().SetSize(14)
+
+	// Add lesson description if available
+	if lesson.Description != "" {
+		descPara := doc.AddParagraph()
+		descRun := descPara.AddRun()
+		cleanDesc := e.htmlCleaner.CleanHTML(lesson.Description)
+		descRun.AddText(cleanDesc)
+	}
+
+	// Add each item in the lesson
+	for _, item := range lesson.Items {
+		e.exportItem(doc, &item)
+	}
+}
+
+// exportItem adds an item to the document.
+// It creates an item heading and processes all sub-items within the item.
+//
+// Parameters:
+//   - doc: The Word document being created
+//   - item: The item data model to export
+func (e *DocxExporter) exportItem(doc *document.Document, item *models.Item) {
+	// Add item type as heading
+	if item.Type != "" {
+		itemPara := doc.AddParagraph()
+		itemRun := itemPara.AddRun()
+		itemRun.AddText(strings.Title(item.Type))
+		itemRun.Properties().SetBold(true)
+		itemRun.Properties().SetSize(12)
+	}
+
+	// Add sub-items
+	for _, subItem := range item.Items {
+		e.exportSubItem(doc, &subItem)
+	}
+}
+
+// exportSubItem adds a sub-item to the document.
+// It handles different components of a sub-item like title, heading,
+// paragraph content, answers, and feedback.
+//
+// Parameters:
+//   - doc: The Word document being created
+//   - subItem: The sub-item data model to export
+func (e *DocxExporter) exportSubItem(doc *document.Document, subItem *models.SubItem) {
+	// Add title if available
+	if subItem.Title != "" {
+		subItemPara := doc.AddParagraph()
+		subItemRun := subItemPara.AddRun()
+		subItemRun.AddText("  " + subItem.Title) // Indented
+		subItemRun.Properties().SetBold(true)
+	}
+
+	// Add heading if available
+	if subItem.Heading != "" {
+		headingPara := doc.AddParagraph()
+		headingRun := headingPara.AddRun()
+		cleanHeading := e.htmlCleaner.CleanHTML(subItem.Heading)
+		headingRun.AddText("  " + cleanHeading) // Indented
+		headingRun.Properties().SetBold(true)
+	}
+
+	// Add paragraph content if available
+	if subItem.Paragraph != "" {
+		contentPara := doc.AddParagraph()
+		contentRun := contentPara.AddRun()
+		cleanContent := e.htmlCleaner.CleanHTML(subItem.Paragraph)
+		contentRun.AddText("  " + cleanContent) // Indented
+	}
+
+	// Add answers if this is a question
+	if len(subItem.Answers) > 0 {
+		answersPara := doc.AddParagraph()
+		answersRun := answersPara.AddRun()
+		answersRun.AddText("  Answers:")
+		answersRun.Properties().SetBold(true)
+
+		for i, answer := range subItem.Answers {
+			answerPara := doc.AddParagraph()
+			answerRun := answerPara.AddRun()
+			prefix := fmt.Sprintf("    %d. ", i+1)
+			if answer.Correct {
+				prefix += "✓ "
+			}
+			cleanAnswer := e.htmlCleaner.CleanHTML(answer.Title)
+			answerRun.AddText(prefix + cleanAnswer)
+		}
+	}
+
+	// Add feedback if available
+	if subItem.Feedback != "" {
+		feedbackPara := doc.AddParagraph()
+		feedbackRun := feedbackPara.AddRun()
+		cleanFeedback := e.htmlCleaner.CleanHTML(subItem.Feedback)
+		feedbackRun.AddText("  Feedback: " + cleanFeedback)
+		feedbackRun.Properties().SetItalic(true)
+	}
+}
+
+// GetSupportedFormat returns the format name this exporter supports.
+//
+// Returns:
+//   - A string representing the supported format ("docx")
+func (e *DocxExporter) GetSupportedFormat() string {
+	return "docx"
+}
--- a/internal/exporters/factory.go
+++ b/internal/exporters/factory.go
@@ -0,0 +1,63 @@
+// Package exporters provides implementations of the Exporter interface
+// for converting Articulate Rise courses into various file formats.
+package exporters
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/kjanat/articulate-parser/internal/interfaces"
+	"github.com/kjanat/articulate-parser/internal/services"
+)
+
+// Factory implements the ExporterFactory interface.
+// It creates appropriate exporter instances based on the requested format.
+type Factory struct {
+	// htmlCleaner is used by exporters to convert HTML content to plain text
+	htmlCleaner *services.HTMLCleaner
+}
+
+// NewFactory creates a new exporter factory.
+// It takes an HTMLCleaner instance that will be passed to the exporters
+// created by this factory.
+//
+// Parameters:
+//   - htmlCleaner: Service for cleaning HTML content in course data
+//
+// Returns:
+//   - An implementation of the ExporterFactory interface
+func NewFactory(htmlCleaner *services.HTMLCleaner) interfaces.ExporterFactory {
+	return &Factory{
+		htmlCleaner: htmlCleaner,
+	}
+}
+
+// CreateExporter creates an exporter for the specified format.
+// It returns an appropriate exporter implementation based on the format string.
+// Format strings are case-insensitive.
+//
+// Parameters:
+//   - format: The desired export format (e.g., "markdown", "docx")
+//
+// Returns:
+//   - An implementation of the Exporter interface if the format is supported
+//   - An error if the format is not supported
+func (f *Factory) CreateExporter(format string) (interfaces.Exporter, error) {
+	switch strings.ToLower(format) {
+	case "markdown", "md":
+		return NewMarkdownExporter(f.htmlCleaner), nil
+	case "docx", "word":
+		return NewDocxExporter(f.htmlCleaner), nil
+	default:
+		return nil, fmt.Errorf("unsupported export format: %s", format)
+	}
+}
+
+// GetSupportedFormats returns a list of all supported export formats.
+// This includes both primary format names and their aliases.
+//
+// Returns:
+//   - A string slice containing all supported format names
+func (f *Factory) GetSupportedFormats() []string {
+	return []string{"markdown", "md", "docx", "word"}
+}
--- a/internal/exporters/markdown.go
+++ b/internal/exporters/markdown.go
@@ -0,0 +1,225 @@
+// Package exporters provides implementations of the Exporter interface
+// for converting Articulate Rise courses into various file formats.
+package exporters
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/kjanat/articulate-parser/internal/interfaces"
+	"github.com/kjanat/articulate-parser/internal/models"
+	"github.com/kjanat/articulate-parser/internal/services"
+)
+
+// MarkdownExporter implements the Exporter interface for Markdown format.
+// It converts Articulate Rise course data into a structured Markdown document.
+type MarkdownExporter struct {
+	// htmlCleaner is used to convert HTML content to plain text
+	htmlCleaner *services.HTMLCleaner
+}
+
+// NewMarkdownExporter creates a new MarkdownExporter instance.
+// It takes an HTMLCleaner to handle HTML content conversion.
+//
+// Parameters:
+//   - htmlCleaner: Service for cleaning HTML content in course data
+//
+// Returns:
+//   - An implementation of the Exporter interface for Markdown format
+func NewMarkdownExporter(htmlCleaner *services.HTMLCleaner) interfaces.Exporter {
+	return &MarkdownExporter{
+		htmlCleaner: htmlCleaner,
+	}
+}
+
+// Export exports a course to Markdown format.
+// It generates a structured Markdown document from the course data
+// and writes it to the specified output path.
+//
+// Parameters:
+//   - course: The course data model to export
+//   - outputPath: The file path where the Markdown content will be written
+//
+// Returns:
+//   - An error if writing to the output file fails
+func (e *MarkdownExporter) Export(course *models.Course, outputPath string) error {
+	var buf bytes.Buffer
+
+	// Write course header
+	buf.WriteString(fmt.Sprintf("# %s\n\n", course.Course.Title))
+
+	if course.Course.Description != "" {
+		buf.WriteString(fmt.Sprintf("%s\n\n", e.htmlCleaner.CleanHTML(course.Course.Description)))
+	}
+
+	// Add metadata
+	buf.WriteString("## Course Information\n\n")
+	buf.WriteString(fmt.Sprintf("- **Course ID**: %s\n", course.Course.ID))
+	buf.WriteString(fmt.Sprintf("- **Share ID**: %s\n", course.ShareID))
+	buf.WriteString(fmt.Sprintf("- **Navigation Mode**: %s\n", course.Course.NavigationMode))
+	if course.Course.ExportSettings != nil {
+		buf.WriteString(fmt.Sprintf("- **Export Format**: %s\n", course.Course.ExportSettings.Format))
+	}
+	buf.WriteString("\n---\n\n")
+
+	// Process lessons
+	for i, lesson := range course.Course.Lessons {
+		if lesson.Type == "section" {
+			buf.WriteString(fmt.Sprintf("# %s\n\n", lesson.Title))
+			continue
+		}
+
+		buf.WriteString(fmt.Sprintf("## Lesson %d: %s\n\n", i+1, lesson.Title))
+
+		if lesson.Description != "" {
+			buf.WriteString(fmt.Sprintf("%s\n\n", e.htmlCleaner.CleanHTML(lesson.Description)))
+		}
+
+		// Process lesson items
+		for _, item := range lesson.Items {
+			e.processItemToMarkdown(&buf, item, 3)
+		}
+
+		buf.WriteString("\n---\n\n")
+	}
+
+	return os.WriteFile(outputPath, buf.Bytes(), 0644)
+}
+
+// GetSupportedFormat returns the format name this exporter supports
+// It indicates the file format that the MarkdownExporter can generate.
+//
+// Returns:
+//   - A string representing the supported format ("markdown")
+func (e *MarkdownExporter) GetSupportedFormat() string {
+	return "markdown"
+}
+
+// processItemToMarkdown converts a course item into Markdown format
+// and appends it to the provided buffer. It handles different item types
+// with appropriate Markdown formatting.
+//
+// Parameters:
+//   - buf: The buffer to write the Markdown content to
+//   - item: The course item to process
+//   - level: The heading level for the item (determines the number of # characters)
+func (e *MarkdownExporter) processItemToMarkdown(buf *bytes.Buffer, item models.Item, level int) {
+	headingPrefix := strings.Repeat("#", level)
+
+	switch item.Type {
+	case "text":
+		for _, subItem := range item.Items {
+			if subItem.Heading != "" {
+				heading := e.htmlCleaner.CleanHTML(subItem.Heading)
+				if heading != "" {
+					buf.WriteString(fmt.Sprintf("%s %s\n\n", headingPrefix, heading))
+				}
+			}
+			if subItem.Paragraph != "" {
+				paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph)
+				if paragraph != "" {
+					buf.WriteString(fmt.Sprintf("%s\n\n", paragraph))
+				}
+			}
+		}
+
+	case "list":
+		for _, subItem := range item.Items {
+			if subItem.Paragraph != "" {
+				paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph)
+				if paragraph != "" {
+					buf.WriteString(fmt.Sprintf("- %s\n", paragraph))
+				}
+			}
+		}
+		buf.WriteString("\n")
+
+	case "multimedia":
+		buf.WriteString(fmt.Sprintf("%s Media Content\n\n", headingPrefix))
+		for _, subItem := range item.Items {
+			if subItem.Media != nil {
+				if subItem.Media.Video != nil {
+					buf.WriteString(fmt.Sprintf("**Video**: %s\n", subItem.Media.Video.OriginalUrl))
+					if subItem.Media.Video.Duration > 0 {
+						buf.WriteString(fmt.Sprintf("**Duration**: %d seconds\n", subItem.Media.Video.Duration))
+					}
+				}
+				if subItem.Media.Image != nil {
+					buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl))
+				}
+			}
+			if subItem.Caption != "" {
+				caption := e.htmlCleaner.CleanHTML(subItem.Caption)
+				buf.WriteString(fmt.Sprintf("*%s*\n", caption))
+			}
+		}
+		buf.WriteString("\n")
+
+	case "image":
+		buf.WriteString(fmt.Sprintf("%s Image\n\n", headingPrefix))
+		for _, subItem := range item.Items {
+			if subItem.Media != nil && subItem.Media.Image != nil {
+				buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl))
+			}
+			if subItem.Caption != "" {
+				caption := e.htmlCleaner.CleanHTML(subItem.Caption)
+				buf.WriteString(fmt.Sprintf("*%s*\n", caption))
+			}
+		}
+		buf.WriteString("\n")
+
+	case "knowledgeCheck":
+		buf.WriteString(fmt.Sprintf("%s Knowledge Check\n\n", headingPrefix))
+		for _, subItem := range item.Items {
+			if subItem.Title != "" {
+				title := e.htmlCleaner.CleanHTML(subItem.Title)
+				buf.WriteString(fmt.Sprintf("**Question**: %s\n\n", title))
+			}
+
+			buf.WriteString("**Answers**:\n")
+			for i, answer := range subItem.Answers {
+				correctMark := ""
+				if answer.Correct {
+					correctMark = " ✓"
+				}
+				buf.WriteString(fmt.Sprintf("%d. %s%s\n", i+1, answer.Title, correctMark))
+			}
+
+			if subItem.Feedback != "" {
+				feedback := e.htmlCleaner.CleanHTML(subItem.Feedback)
+				buf.WriteString(fmt.Sprintf("\n**Feedback**: %s\n", feedback))
+			}
+		}
+		buf.WriteString("\n")
+
+	case "interactive":
+		buf.WriteString(fmt.Sprintf("%s Interactive Content\n\n", headingPrefix))
+		for _, subItem := range item.Items {
+			if subItem.Title != "" {
+				title := e.htmlCleaner.CleanHTML(subItem.Title)
+				buf.WriteString(fmt.Sprintf("**%s**\n\n", title))
+			}
+		}
+
+	case "divider":
+		buf.WriteString("---\n\n")
+
+	default:
+		// Handle unknown types
+		if len(item.Items) > 0 {
+			buf.WriteString(fmt.Sprintf("%s %s Content\n\n", headingPrefix, strings.Title(item.Type)))
+			for _, subItem := range item.Items {
+				if subItem.Title != "" {
+					title := e.htmlCleaner.CleanHTML(subItem.Title)
+					buf.WriteString(fmt.Sprintf("**%s**\n\n", title))
+				}
+				if subItem.Paragraph != "" {
+					paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph)
+					buf.WriteString(fmt.Sprintf("%s\n\n", paragraph))
+				}
+			}
+		}
+	}
+}