Adds DOCX and Markdown export functionality

Introduces a modular exporter pattern supporting DOCX and Markdown formats
by implementing Exporter interfaces and restructuring application logic.

Enhances CI to install UPX for binary compression, excluding recent macOS
binaries due to compatibility issues.

Enables CGO when building binaries for all platforms, addressing potential
cross-platform compatibility concerns.

Bumps version to 0.1.1.
This commit is contained in:
2025-05-25 13:01:58 +02:00
parent 48cad7144f
commit 9de7222ec3
15 changed files with 1096 additions and 600 deletions

663
main.go
View File

@ -1,622 +1,89 @@
// Package main provides the entry point for the articulate-parser application.
// This application fetches Articulate Rise courses from URLs or local files and
// exports them to different formats such as Markdown or DOCX.
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/unidoc/unioffice/document"
"github.com/kjanat/articulate-parser/internal/version"
"github.com/kjanat/articulate-parser/internal/exporters"
"github.com/kjanat/articulate-parser/internal/services"
)
// Core data structures based on the Articulate Rise JSON format
type Course struct {
ShareID string `json:"shareId"`
Author string `json:"author"`
Course CourseInfo `json:"course"`
LabelSet LabelSet `json:"labelSet"`
}
type CourseInfo struct {
ID string `json:"id"`
Title string `json:"title"`
Description string `json:"description"`
Color string `json:"color"`
NavigationMode string `json:"navigationMode"`
Lessons []Lesson `json:"lessons"`
CoverImage *Media `json:"coverImage,omitempty"`
ExportSettings *ExportSettings `json:"exportSettings,omitempty"`
}
type Lesson struct {
ID string `json:"id"`
Title string `json:"title"`
Description string `json:"description"`
Type string `json:"type"`
Icon string `json:"icon"`
Items []Item `json:"items"`
Position interface{} `json:"position"`
Ready bool `json:"ready"`
CreatedAt string `json:"createdAt"`
UpdatedAt string `json:"updatedAt"`
}
type Item struct {
ID string `json:"id"`
Type string `json:"type"`
Family string `json:"family"`
Variant string `json:"variant"`
Items []SubItem `json:"items"`
Settings interface{} `json:"settings"`
Data interface{} `json:"data"`
Media *Media `json:"media,omitempty"`
}
type SubItem struct {
ID string `json:"id"`
Type string `json:"type,omitempty"`
Title string `json:"title,omitempty"`
Heading string `json:"heading,omitempty"`
Paragraph string `json:"paragraph,omitempty"`
Caption string `json:"caption,omitempty"`
Media *Media `json:"media,omitempty"`
Answers []Answer `json:"answers,omitempty"`
Feedback string `json:"feedback,omitempty"`
Front *CardSide `json:"front,omitempty"`
Back *CardSide `json:"back,omitempty"`
}
type Answer struct {
ID string `json:"id"`
Title string `json:"title"`
Correct bool `json:"correct"`
MatchTitle string `json:"matchTitle,omitempty"`
}
type CardSide struct {
Media *Media `json:"media,omitempty"`
Description string `json:"description,omitempty"`
}
type Media struct {
Image *ImageMedia `json:"image,omitempty"`
Video *VideoMedia `json:"video,omitempty"`
}
type ImageMedia struct {
Key string `json:"key"`
Type string `json:"type"`
Width int `json:"width,omitempty"`
Height int `json:"height,omitempty"`
CrushedKey string `json:"crushedKey,omitempty"`
OriginalUrl string `json:"originalUrl"`
UseCrushedKey bool `json:"useCrushedKey,omitempty"`
}
type VideoMedia struct {
Key string `json:"key"`
URL string `json:"url"`
Type string `json:"type"`
Poster string `json:"poster,omitempty"`
Duration int `json:"duration,omitempty"`
InputKey string `json:"inputKey,omitempty"`
Thumbnail string `json:"thumbnail,omitempty"`
OriginalUrl string `json:"originalUrl"`
}
type ExportSettings struct {
Title string `json:"title"`
Format string `json:"format"`
}
type LabelSet struct {
ID string `json:"id"`
Name string `json:"name"`
Labels map[string]string `json:"labels"`
}
// Parser main struct
type ArticulateParser struct {
BaseURL string
Client *http.Client
}
func NewArticulateParser() *ArticulateParser {
return &ArticulateParser{
BaseURL: "https://rise.articulate.com",
Client: &http.Client{
Timeout: 30 * time.Second,
},
}
}
func (p *ArticulateParser) ExtractShareID(uri string) (string, error) {
// Extract share ID from URI like: https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO#/
re := regexp.MustCompile(`/share/([a-zA-Z0-9_-]+)`)
matches := re.FindStringSubmatch(uri)
if len(matches) < 2 {
return "", fmt.Errorf("could not extract share ID from URI: %s", uri)
}
return matches[1], nil
}
func (p *ArticulateParser) BuildAPIURL(shareID string) string {
return fmt.Sprintf("%s/api/rise-runtime/boot/share/%s", p.BaseURL, shareID)
}
func (p *ArticulateParser) FetchCourse(uri string) (*Course, error) {
shareID, err := p.ExtractShareID(uri)
if err != nil {
return nil, err
}
apiURL := p.BuildAPIURL(shareID)
resp, err := p.Client.Get(apiURL)
if err != nil {
return nil, fmt.Errorf("failed to fetch course data: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API returned status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
var course Course
if err := json.Unmarshal(body, &course); err != nil {
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
}
return &course, nil
}
func (p *ArticulateParser) LoadCourseFromFile(filePath string) (*Course, error) {
data, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read file: %w", err)
}
var course Course
if err := json.Unmarshal(data, &course); err != nil {
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
}
return &course, nil
}
// HTML cleaner utility
func cleanHTML(html string) string {
// Remove HTML tags but preserve content
re := regexp.MustCompile(`<[^>]*>`)
cleaned := re.ReplaceAllString(html, "")
// Replace HTML entities
cleaned = strings.ReplaceAll(cleaned, "&nbsp;", " ")
cleaned = strings.ReplaceAll(cleaned, "&amp;", "&")
cleaned = strings.ReplaceAll(cleaned, "&lt;", "<")
cleaned = strings.ReplaceAll(cleaned, "&gt;", ">")
cleaned = strings.ReplaceAll(cleaned, "&quot;", "\"")
cleaned = strings.ReplaceAll(cleaned, "&#39;", "'")
cleaned = strings.ReplaceAll(cleaned, "&iuml;", "ï")
cleaned = strings.ReplaceAll(cleaned, "&euml;", "ë")
cleaned = strings.ReplaceAll(cleaned, "&eacute;", "é")
// Clean up extra whitespace
cleaned = regexp.MustCompile(`\s+`).ReplaceAllString(cleaned, " ")
cleaned = strings.TrimSpace(cleaned)
return cleaned
}
// Markdown export functions
func (p *ArticulateParser) ExportToMarkdown(course *Course, outputPath string) error {
var buf bytes.Buffer
// Write course header
buf.WriteString(fmt.Sprintf("# %s\n\n", course.Course.Title))
if course.Course.Description != "" {
buf.WriteString(fmt.Sprintf("%s\n\n", cleanHTML(course.Course.Description)))
}
// Add metadata
buf.WriteString("## Course Information\n\n")
buf.WriteString(fmt.Sprintf("- **Course ID**: %s\n", course.Course.ID))
buf.WriteString(fmt.Sprintf("- **Share ID**: %s\n", course.ShareID))
buf.WriteString(fmt.Sprintf("- **Navigation Mode**: %s\n", course.Course.NavigationMode))
if course.Course.ExportSettings != nil {
buf.WriteString(fmt.Sprintf("- **Export Format**: %s\n", course.Course.ExportSettings.Format))
}
buf.WriteString("\n---\n\n")
// Process lessons
for i, lesson := range course.Course.Lessons {
if lesson.Type == "section" {
buf.WriteString(fmt.Sprintf("# %s\n\n", lesson.Title))
continue
}
buf.WriteString(fmt.Sprintf("## Lesson %d: %s\n\n", i+1, lesson.Title))
if lesson.Description != "" {
buf.WriteString(fmt.Sprintf("%s\n\n", cleanHTML(lesson.Description)))
}
// Process lesson items
for _, item := range lesson.Items {
p.processItemToMarkdown(&buf, item, 3)
}
buf.WriteString("\n---\n\n")
}
return os.WriteFile(outputPath, buf.Bytes(), 0644)
}
func (p *ArticulateParser) processItemToMarkdown(buf *bytes.Buffer, item Item, level int) {
headingPrefix := strings.Repeat("#", level)
switch item.Type {
case "text":
for _, subItem := range item.Items {
if subItem.Heading != "" {
heading := cleanHTML(subItem.Heading)
if heading != "" {
buf.WriteString(fmt.Sprintf("%s %s\n\n", headingPrefix, heading))
}
}
if subItem.Paragraph != "" {
paragraph := cleanHTML(subItem.Paragraph)
if paragraph != "" {
buf.WriteString(fmt.Sprintf("%s\n\n", paragraph))
}
}
}
case "list":
for _, subItem := range item.Items {
if subItem.Paragraph != "" {
paragraph := cleanHTML(subItem.Paragraph)
if paragraph != "" {
buf.WriteString(fmt.Sprintf("- %s\n", paragraph))
}
}
}
buf.WriteString("\n")
case "multimedia":
buf.WriteString(fmt.Sprintf("%s Media Content\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Media != nil {
if subItem.Media.Video != nil {
buf.WriteString(fmt.Sprintf("**Video**: %s\n", subItem.Media.Video.OriginalUrl))
if subItem.Media.Video.Duration > 0 {
buf.WriteString(fmt.Sprintf("- Duration: %d seconds\n", subItem.Media.Video.Duration))
}
}
if subItem.Media.Image != nil {
buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl))
}
}
if subItem.Caption != "" {
caption := cleanHTML(subItem.Caption)
buf.WriteString(fmt.Sprintf("*%s*\n", caption))
}
}
buf.WriteString("\n")
case "image":
buf.WriteString(fmt.Sprintf("%s Image\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Media != nil && subItem.Media.Image != nil {
buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl))
}
if subItem.Caption != "" {
caption := cleanHTML(subItem.Caption)
buf.WriteString(fmt.Sprintf("*%s*\n", caption))
}
}
buf.WriteString("\n")
case "knowledgeCheck":
buf.WriteString(fmt.Sprintf("%s Knowledge Check\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Title != "" {
title := cleanHTML(subItem.Title)
buf.WriteString(fmt.Sprintf("**Question**: %s\n\n", title))
}
buf.WriteString("**Answers**:\n")
for i, answer := range subItem.Answers {
answerText := cleanHTML(answer.Title)
correctMark := ""
if answer.Correct {
correctMark = " ✓"
}
buf.WriteString(fmt.Sprintf("%d. %s%s\n", i+1, answerText, correctMark))
}
if subItem.Feedback != "" {
feedback := cleanHTML(subItem.Feedback)
buf.WriteString(fmt.Sprintf("\n**Feedback**: %s\n", feedback))
}
}
buf.WriteString("\n")
case "interactive":
buf.WriteString(fmt.Sprintf("%s Interactive Content\n\n", headingPrefix))
for _, subItem := range item.Items {
if subItem.Front != nil && subItem.Front.Description != "" {
desc := cleanHTML(subItem.Front.Description)
buf.WriteString(fmt.Sprintf("**Front**: %s\n", desc))
}
if subItem.Back != nil && subItem.Back.Description != "" {
desc := cleanHTML(subItem.Back.Description)
buf.WriteString(fmt.Sprintf("**Back**: %s\n", desc))
}
}
buf.WriteString("\n")
case "divider":
buf.WriteString("---\n\n")
default:
// Handle unknown types
if len(item.Items) > 0 {
buf.WriteString(fmt.Sprintf("%s %s Content\n\n", headingPrefix, strings.Title(item.Type)))
for _, subItem := range item.Items {
if subItem.Title != "" {
title := cleanHTML(subItem.Title)
buf.WriteString(fmt.Sprintf("- %s\n", title))
}
}
buf.WriteString("\n")
}
}
}
// DOCX export functions
func (p *ArticulateParser) ExportToDocx(course *Course, outputPath string) error {
doc := document.New()
// Add title
title := doc.AddParagraph()
titleRun := title.AddRun()
titleRun.AddText(course.Course.Title)
titleRun.Properties().SetSize(20)
titleRun.Properties().SetBold(true)
// Add description
if course.Course.Description != "" {
desc := doc.AddParagraph()
descRun := desc.AddRun()
descRun.AddText(cleanHTML(course.Course.Description))
}
// Add course metadata
metadata := doc.AddParagraph()
metadataRun := metadata.AddRun()
metadataRun.Properties().SetBold(true)
metadataRun.AddText("Course Information")
courseInfo := doc.AddParagraph()
courseInfoRun := courseInfo.AddRun()
courseInfoText := fmt.Sprintf("Course ID: %s\nShare ID: %s\nNavigation Mode: %s",
course.Course.ID, course.ShareID, course.Course.NavigationMode)
courseInfoRun.AddText(courseInfoText)
// Process lessons
for i, lesson := range course.Course.Lessons {
if lesson.Type == "section" {
section := doc.AddParagraph()
sectionRun := section.AddRun()
sectionRun.AddText(lesson.Title)
sectionRun.Properties().SetSize(18)
sectionRun.Properties().SetBold(true)
continue
}
// Lesson title
lessonTitle := doc.AddParagraph()
lessonTitleRun := lessonTitle.AddRun()
lessonTitleRun.AddText(fmt.Sprintf("Lesson %d: %s", i+1, lesson.Title))
lessonTitleRun.Properties().SetSize(16)
lessonTitleRun.Properties().SetBold(true)
// Lesson description
if lesson.Description != "" {
lessonDesc := doc.AddParagraph()
lessonDescRun := lessonDesc.AddRun()
lessonDescRun.AddText(cleanHTML(lesson.Description))
}
// Process lesson items
for _, item := range lesson.Items {
p.processItemToDocx(doc, item)
}
}
return doc.SaveToFile(outputPath)
}
func (p *ArticulateParser) processItemToDocx(doc *document.Document, item Item) {
switch item.Type {
case "text":
for _, subItem := range item.Items {
if subItem.Heading != "" {
heading := cleanHTML(subItem.Heading)
if heading != "" {
para := doc.AddParagraph()
run := para.AddRun()
run.AddText(heading)
run.Properties().SetBold(true)
}
}
if subItem.Paragraph != "" {
paragraph := cleanHTML(subItem.Paragraph)
if paragraph != "" {
para := doc.AddParagraph()
run := para.AddRun()
run.AddText(paragraph)
}
}
}
case "list":
for _, subItem := range item.Items {
if subItem.Paragraph != "" {
paragraph := cleanHTML(subItem.Paragraph)
if paragraph != "" {
para := doc.AddParagraph()
run := para.AddRun()
run.AddText("• " + paragraph)
}
}
}
case "multimedia", "image":
para := doc.AddParagraph()
run := para.AddRun()
run.AddText("[Media Content]")
run.Properties().SetItalic(true)
for _, subItem := range item.Items {
if subItem.Media != nil {
if subItem.Media.Video != nil {
mediaPara := doc.AddParagraph()
mediaRun := mediaPara.AddRun()
mediaRun.AddText(fmt.Sprintf("Video: %s", subItem.Media.Video.OriginalUrl))
}
if subItem.Media.Image != nil {
mediaPara := doc.AddParagraph()
mediaRun := mediaPara.AddRun()
mediaRun.AddText(fmt.Sprintf("Image: %s", subItem.Media.Image.OriginalUrl))
}
}
if subItem.Caption != "" {
caption := cleanHTML(subItem.Caption)
captionPara := doc.AddParagraph()
captionRun := captionPara.AddRun()
captionRun.AddText(caption)
captionRun.Properties().SetItalic(true)
}
}
case "knowledgeCheck":
for _, subItem := range item.Items {
if subItem.Title != "" {
title := cleanHTML(subItem.Title)
questionPara := doc.AddParagraph()
questionRun := questionPara.AddRun()
questionRun.AddText("Question: " + title)
questionRun.Properties().SetBold(true)
}
for i, answer := range subItem.Answers {
answerText := cleanHTML(answer.Title)
correctMark := ""
if answer.Correct {
correctMark = " [CORRECT]"
}
answerPara := doc.AddParagraph()
answerRun := answerPara.AddRun()
answerRun.AddText(fmt.Sprintf("%d. %s%s", i+1, answerText, correctMark))
}
if subItem.Feedback != "" {
feedback := cleanHTML(subItem.Feedback)
feedbackPara := doc.AddParagraph()
feedbackRun := feedbackPara.AddRun()
feedbackRun.AddText("Feedback: " + feedback)
feedbackRun.Properties().SetItalic(true)
}
}
}
}
// main is the entry point of the application.
// It handles command-line arguments, sets up dependencies,
// and coordinates the parsing and exporting of courses.
func main() {
// Handle version flag
if len(os.Args) > 1 && (os.Args[1] == "-v" || os.Args[1] == "--version") {
fmt.Printf("articulate-parser %s\n", version.Version)
fmt.Printf("Build time: %s\n", version.BuildTime)
fmt.Printf("Commit: %s\n", version.GitCommit)
os.Exit(0)
}
// Dependency injection setup
htmlCleaner := services.NewHTMLCleaner()
parser := services.NewArticulateParser()
exporterFactory := exporters.NewFactory(htmlCleaner)
app := services.NewApp(parser, exporterFactory)
if len(os.Args) < 3 {
fmt.Println("Usage: articulate-parser <input_uri_or_file> <output_format> [output_path]")
fmt.Println(" articulate-parser -v|--version")
fmt.Println(" input_uri_or_file: Articulate Rise URI or local JSON file path")
fmt.Println(" output_format: md (Markdown) or docx (Word Document)")
fmt.Println(" output_path: Optional output file path")
// Check for required command-line arguments
if len(os.Args) < 4 {
fmt.Printf("Usage: %s <source> <format> <output>\n", os.Args[0])
fmt.Printf(" source: URI or file path to the course\n")
fmt.Printf(" format: export format (%s)\n", joinStrings(app.GetSupportedFormats(), ", "))
fmt.Printf(" output: output file path\n")
fmt.Println("\nExample:")
fmt.Printf(" %s articulate-sample.json markdown output.md\n", os.Args[0])
fmt.Printf(" %s https://rise.articulate.com/share/xyz docx output.docx\n", os.Args[0])
os.Exit(1)
}
input := os.Args[1]
format := strings.ToLower(os.Args[2])
source := os.Args[1]
format := os.Args[2]
output := os.Args[3]
if format != "md" && format != "docx" {
log.Fatal("Output format must be 'md' or 'docx'")
}
parser := NewArticulateParser()
var course *Course
var err error
// Determine if input is a URI or file path
if strings.HasPrefix(input, "http") {
course, err = parser.FetchCourse(input)
// Determine if source is a URI or file path
if isURI(source) {
err = app.ProcessCourseFromURI(source, format, output)
} else {
course, err = parser.LoadCourseFromFile(input)
err = app.ProcessCourseFromFile(source, format, output)
}
if err != nil {
log.Fatalf("Failed to load course: %v", err)
log.Fatalf("Error processing course: %v", err)
}
// Determine output path
var outputPath string
if len(os.Args) > 3 {
outputPath = os.Args[3]
} else {
baseDir := "output"
os.MkdirAll(baseDir, 0755)
// Create safe filename from course title
safeTitle := regexp.MustCompile(`[^a-zA-Z0-9\-_]`).ReplaceAllString(course.Course.Title, "_")
if safeTitle == "" {
safeTitle = "articulate_course"
}
outputPath = filepath.Join(baseDir, fmt.Sprintf("%s.%s", safeTitle, format))
}
// Export based on format
switch format {
case "md":
err = parser.ExportToMarkdown(course, outputPath)
case "docx":
err = parser.ExportToDocx(course, outputPath)
}
if err != nil {
log.Fatalf("Failed to export course: %v", err)
}
fmt.Printf("Course successfully exported to: %s\n", outputPath)
fmt.Printf("Course: %s (%d lessons)\n", course.Course.Title, len(course.Course.Lessons))
fmt.Printf("Successfully exported course to %s\n", output)
}
// isURI checks if a string is a URI by looking for http:// or https:// prefixes.
//
// Parameters:
// - str: The string to check
//
// Returns:
// - true if the string appears to be a URI, false otherwise
func isURI(str string) bool {
return len(str) > 7 && (str[:7] == "http://" || str[:8] == "https://")
}
// joinStrings concatenates a slice of strings using the specified separator.
//
// Parameters:
// - strs: The slice of strings to join
// - sep: The separator to insert between each string
//
// Returns:
// - A single string with all elements joined by the separator
func joinStrings(strs []string, sep string) string {
if len(strs) == 0 {
return ""
}
if len(strs) == 1 {
return strs[0]
}
result := strs[0]
for i := 1; i < len(strs); i++ {
result += sep + strs[i]
}
return result
}