diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cca1566..fd5ed68 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -105,6 +105,11 @@ jobs: - name: Run tests run: go test -v ./... + - name: Install UPX + run: | + sudo apt-get update + sudo apt-get install -y upx + - name: Build binaries run: | # Set the build time environment variable @@ -121,6 +126,32 @@ jobs: --verbose \ -ldflags "-s -w -X github.com/kjanat/articulate-parser/internal/version.Version=${{ github.ref_name }} -X github.com/kjanat/articulate-parser/internal/version.BuildTime=$BUILD_TIME -X github.com/kjanat/articulate-parser/internal/version.GitCommit=${{ github.sha }}" + - name: Compress binaries with UPX + run: | + echo "Compressing binaries with UPX..." + cd build/ + + # Get original sizes + echo "Original sizes:" + ls -lah + echo "" + + # Compress all binaries except Darwin (macOS) binaries as UPX doesn't work well with recent macOS versions + for binary in articulate-parser-*; do + if [[ "$binary" == *"darwin"* ]]; then + echo "Skipping UPX compression for $binary (macOS compatibility)" + else + echo "Compressing $binary..." + upx --best --lzma "$binary" || { + echo "Warning: UPX compression failed for $binary, keeping original" + } + fi + done + + echo "" + echo "Final sizes:" + ls -lah + - name: Upload a Build Artifact uses: actions/upload-artifact@v4.6.2 with: diff --git a/internal/exporters/docx.go b/internal/exporters/docx.go new file mode 100644 index 0000000..ce65b42 --- /dev/null +++ b/internal/exporters/docx.go @@ -0,0 +1,196 @@ +// Package exporters provides implementations of the Exporter interface +// for converting Articulate Rise courses into various file formats. +package exporters + +import ( + "fmt" + "strings" + + "github.com/kjanat/articulate-parser/internal/interfaces" + "github.com/kjanat/articulate-parser/internal/models" + "github.com/kjanat/articulate-parser/internal/services" + "github.com/unidoc/unioffice/document" +) + +// DocxExporter implements the Exporter interface for DOCX format. +// It converts Articulate Rise course data into a Microsoft Word document +// using the unioffice/document package. +type DocxExporter struct { + // htmlCleaner is used to convert HTML content to plain text + htmlCleaner *services.HTMLCleaner +} + +// NewDocxExporter creates a new DocxExporter instance. +// It takes an HTMLCleaner to handle HTML content conversion. +// +// Parameters: +// - htmlCleaner: Service for cleaning HTML content in course data +// +// Returns: +// - An implementation of the Exporter interface for DOCX format +func NewDocxExporter(htmlCleaner *services.HTMLCleaner) interfaces.Exporter { + return &DocxExporter{ + htmlCleaner: htmlCleaner, + } +} + +// Export exports the course to a DOCX file. +// It creates a Word document with formatted content based on the course data +// and saves it to the specified output path. +// +// Parameters: +// - course: The course data model to export +// - outputPath: The file path where the DOCX content will be written +// +// Returns: +// - An error if creating or saving the document fails +func (e *DocxExporter) Export(course *models.Course, outputPath string) error { + doc := document.New() + + // Add title + titlePara := doc.AddParagraph() + titleRun := titlePara.AddRun() + titleRun.AddText(course.Course.Title) + titleRun.Properties().SetBold(true) + titleRun.Properties().SetSize(16) + + // Add description if available + if course.Course.Description != "" { + descPara := doc.AddParagraph() + descRun := descPara.AddRun() + cleanDesc := e.htmlCleaner.CleanHTML(course.Course.Description) + descRun.AddText(cleanDesc) + } + + // Add each lesson + for _, lesson := range course.Course.Lessons { + e.exportLesson(doc, &lesson) + } + + // Ensure output directory exists and add .docx extension + if !strings.HasSuffix(strings.ToLower(outputPath), ".docx") { + outputPath = outputPath + ".docx" + } + + return doc.SaveToFile(outputPath) +} + +// exportLesson adds a lesson to the document with appropriate formatting. +// It creates a lesson heading, adds the description, and processes all items in the lesson. +// +// Parameters: +// - doc: The Word document being created +// - lesson: The lesson data model to export +func (e *DocxExporter) exportLesson(doc *document.Document, lesson *models.Lesson) { + // Add lesson title + lessonPara := doc.AddParagraph() + lessonRun := lessonPara.AddRun() + lessonRun.AddText(fmt.Sprintf("Lesson: %s", lesson.Title)) + lessonRun.Properties().SetBold(true) + lessonRun.Properties().SetSize(14) + + // Add lesson description if available + if lesson.Description != "" { + descPara := doc.AddParagraph() + descRun := descPara.AddRun() + cleanDesc := e.htmlCleaner.CleanHTML(lesson.Description) + descRun.AddText(cleanDesc) + } + + // Add each item in the lesson + for _, item := range lesson.Items { + e.exportItem(doc, &item) + } +} + +// exportItem adds an item to the document. +// It creates an item heading and processes all sub-items within the item. +// +// Parameters: +// - doc: The Word document being created +// - item: The item data model to export +func (e *DocxExporter) exportItem(doc *document.Document, item *models.Item) { + // Add item type as heading + if item.Type != "" { + itemPara := doc.AddParagraph() + itemRun := itemPara.AddRun() + itemRun.AddText(strings.Title(item.Type)) + itemRun.Properties().SetBold(true) + itemRun.Properties().SetSize(12) + } + + // Add sub-items + for _, subItem := range item.Items { + e.exportSubItem(doc, &subItem) + } +} + +// exportSubItem adds a sub-item to the document. +// It handles different components of a sub-item like title, heading, +// paragraph content, answers, and feedback. +// +// Parameters: +// - doc: The Word document being created +// - subItem: The sub-item data model to export +func (e *DocxExporter) exportSubItem(doc *document.Document, subItem *models.SubItem) { + // Add title if available + if subItem.Title != "" { + subItemPara := doc.AddParagraph() + subItemRun := subItemPara.AddRun() + subItemRun.AddText(" " + subItem.Title) // Indented + subItemRun.Properties().SetBold(true) + } + + // Add heading if available + if subItem.Heading != "" { + headingPara := doc.AddParagraph() + headingRun := headingPara.AddRun() + cleanHeading := e.htmlCleaner.CleanHTML(subItem.Heading) + headingRun.AddText(" " + cleanHeading) // Indented + headingRun.Properties().SetBold(true) + } + + // Add paragraph content if available + if subItem.Paragraph != "" { + contentPara := doc.AddParagraph() + contentRun := contentPara.AddRun() + cleanContent := e.htmlCleaner.CleanHTML(subItem.Paragraph) + contentRun.AddText(" " + cleanContent) // Indented + } + + // Add answers if this is a question + if len(subItem.Answers) > 0 { + answersPara := doc.AddParagraph() + answersRun := answersPara.AddRun() + answersRun.AddText(" Answers:") + answersRun.Properties().SetBold(true) + + for i, answer := range subItem.Answers { + answerPara := doc.AddParagraph() + answerRun := answerPara.AddRun() + prefix := fmt.Sprintf(" %d. ", i+1) + if answer.Correct { + prefix += "✓ " + } + cleanAnswer := e.htmlCleaner.CleanHTML(answer.Title) + answerRun.AddText(prefix + cleanAnswer) + } + } + + // Add feedback if available + if subItem.Feedback != "" { + feedbackPara := doc.AddParagraph() + feedbackRun := feedbackPara.AddRun() + cleanFeedback := e.htmlCleaner.CleanHTML(subItem.Feedback) + feedbackRun.AddText(" Feedback: " + cleanFeedback) + feedbackRun.Properties().SetItalic(true) + } +} + +// GetSupportedFormat returns the format name this exporter supports. +// +// Returns: +// - A string representing the supported format ("docx") +func (e *DocxExporter) GetSupportedFormat() string { + return "docx" +} diff --git a/internal/exporters/factory.go b/internal/exporters/factory.go new file mode 100644 index 0000000..ce1ad53 --- /dev/null +++ b/internal/exporters/factory.go @@ -0,0 +1,63 @@ +// Package exporters provides implementations of the Exporter interface +// for converting Articulate Rise courses into various file formats. +package exporters + +import ( + "fmt" + "strings" + + "github.com/kjanat/articulate-parser/internal/interfaces" + "github.com/kjanat/articulate-parser/internal/services" +) + +// Factory implements the ExporterFactory interface. +// It creates appropriate exporter instances based on the requested format. +type Factory struct { + // htmlCleaner is used by exporters to convert HTML content to plain text + htmlCleaner *services.HTMLCleaner +} + +// NewFactory creates a new exporter factory. +// It takes an HTMLCleaner instance that will be passed to the exporters +// created by this factory. +// +// Parameters: +// - htmlCleaner: Service for cleaning HTML content in course data +// +// Returns: +// - An implementation of the ExporterFactory interface +func NewFactory(htmlCleaner *services.HTMLCleaner) interfaces.ExporterFactory { + return &Factory{ + htmlCleaner: htmlCleaner, + } +} + +// CreateExporter creates an exporter for the specified format. +// It returns an appropriate exporter implementation based on the format string. +// Format strings are case-insensitive. +// +// Parameters: +// - format: The desired export format (e.g., "markdown", "docx") +// +// Returns: +// - An implementation of the Exporter interface if the format is supported +// - An error if the format is not supported +func (f *Factory) CreateExporter(format string) (interfaces.Exporter, error) { + switch strings.ToLower(format) { + case "markdown", "md": + return NewMarkdownExporter(f.htmlCleaner), nil + case "docx", "word": + return NewDocxExporter(f.htmlCleaner), nil + default: + return nil, fmt.Errorf("unsupported export format: %s", format) + } +} + +// GetSupportedFormats returns a list of all supported export formats. +// This includes both primary format names and their aliases. +// +// Returns: +// - A string slice containing all supported format names +func (f *Factory) GetSupportedFormats() []string { + return []string{"markdown", "md", "docx", "word"} +} diff --git a/internal/exporters/markdown.go b/internal/exporters/markdown.go new file mode 100644 index 0000000..adb2b24 --- /dev/null +++ b/internal/exporters/markdown.go @@ -0,0 +1,225 @@ +// Package exporters provides implementations of the Exporter interface +// for converting Articulate Rise courses into various file formats. +package exporters + +import ( + "bytes" + "fmt" + "os" + "strings" + + "github.com/kjanat/articulate-parser/internal/interfaces" + "github.com/kjanat/articulate-parser/internal/models" + "github.com/kjanat/articulate-parser/internal/services" +) + +// MarkdownExporter implements the Exporter interface for Markdown format. +// It converts Articulate Rise course data into a structured Markdown document. +type MarkdownExporter struct { + // htmlCleaner is used to convert HTML content to plain text + htmlCleaner *services.HTMLCleaner +} + +// NewMarkdownExporter creates a new MarkdownExporter instance. +// It takes an HTMLCleaner to handle HTML content conversion. +// +// Parameters: +// - htmlCleaner: Service for cleaning HTML content in course data +// +// Returns: +// - An implementation of the Exporter interface for Markdown format +func NewMarkdownExporter(htmlCleaner *services.HTMLCleaner) interfaces.Exporter { + return &MarkdownExporter{ + htmlCleaner: htmlCleaner, + } +} + +// Export exports a course to Markdown format. +// It generates a structured Markdown document from the course data +// and writes it to the specified output path. +// +// Parameters: +// - course: The course data model to export +// - outputPath: The file path where the Markdown content will be written +// +// Returns: +// - An error if writing to the output file fails +func (e *MarkdownExporter) Export(course *models.Course, outputPath string) error { + var buf bytes.Buffer + + // Write course header + buf.WriteString(fmt.Sprintf("# %s\n\n", course.Course.Title)) + + if course.Course.Description != "" { + buf.WriteString(fmt.Sprintf("%s\n\n", e.htmlCleaner.CleanHTML(course.Course.Description))) + } + + // Add metadata + buf.WriteString("## Course Information\n\n") + buf.WriteString(fmt.Sprintf("- **Course ID**: %s\n", course.Course.ID)) + buf.WriteString(fmt.Sprintf("- **Share ID**: %s\n", course.ShareID)) + buf.WriteString(fmt.Sprintf("- **Navigation Mode**: %s\n", course.Course.NavigationMode)) + if course.Course.ExportSettings != nil { + buf.WriteString(fmt.Sprintf("- **Export Format**: %s\n", course.Course.ExportSettings.Format)) + } + buf.WriteString("\n---\n\n") + + // Process lessons + for i, lesson := range course.Course.Lessons { + if lesson.Type == "section" { + buf.WriteString(fmt.Sprintf("# %s\n\n", lesson.Title)) + continue + } + + buf.WriteString(fmt.Sprintf("## Lesson %d: %s\n\n", i+1, lesson.Title)) + + if lesson.Description != "" { + buf.WriteString(fmt.Sprintf("%s\n\n", e.htmlCleaner.CleanHTML(lesson.Description))) + } + + // Process lesson items + for _, item := range lesson.Items { + e.processItemToMarkdown(&buf, item, 3) + } + + buf.WriteString("\n---\n\n") + } + + return os.WriteFile(outputPath, buf.Bytes(), 0644) +} + +// GetSupportedFormat returns the format name this exporter supports +// It indicates the file format that the MarkdownExporter can generate. +// +// Returns: +// - A string representing the supported format ("markdown") +func (e *MarkdownExporter) GetSupportedFormat() string { + return "markdown" +} + +// processItemToMarkdown converts a course item into Markdown format +// and appends it to the provided buffer. It handles different item types +// with appropriate Markdown formatting. +// +// Parameters: +// - buf: The buffer to write the Markdown content to +// - item: The course item to process +// - level: The heading level for the item (determines the number of # characters) +func (e *MarkdownExporter) processItemToMarkdown(buf *bytes.Buffer, item models.Item, level int) { + headingPrefix := strings.Repeat("#", level) + + switch item.Type { + case "text": + for _, subItem := range item.Items { + if subItem.Heading != "" { + heading := e.htmlCleaner.CleanHTML(subItem.Heading) + if heading != "" { + buf.WriteString(fmt.Sprintf("%s %s\n\n", headingPrefix, heading)) + } + } + if subItem.Paragraph != "" { + paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph) + if paragraph != "" { + buf.WriteString(fmt.Sprintf("%s\n\n", paragraph)) + } + } + } + + case "list": + for _, subItem := range item.Items { + if subItem.Paragraph != "" { + paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph) + if paragraph != "" { + buf.WriteString(fmt.Sprintf("- %s\n", paragraph)) + } + } + } + buf.WriteString("\n") + + case "multimedia": + buf.WriteString(fmt.Sprintf("%s Media Content\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Media != nil { + if subItem.Media.Video != nil { + buf.WriteString(fmt.Sprintf("**Video**: %s\n", subItem.Media.Video.OriginalUrl)) + if subItem.Media.Video.Duration > 0 { + buf.WriteString(fmt.Sprintf("**Duration**: %d seconds\n", subItem.Media.Video.Duration)) + } + } + if subItem.Media.Image != nil { + buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl)) + } + } + if subItem.Caption != "" { + caption := e.htmlCleaner.CleanHTML(subItem.Caption) + buf.WriteString(fmt.Sprintf("*%s*\n", caption)) + } + } + buf.WriteString("\n") + + case "image": + buf.WriteString(fmt.Sprintf("%s Image\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Media != nil && subItem.Media.Image != nil { + buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl)) + } + if subItem.Caption != "" { + caption := e.htmlCleaner.CleanHTML(subItem.Caption) + buf.WriteString(fmt.Sprintf("*%s*\n", caption)) + } + } + buf.WriteString("\n") + + case "knowledgeCheck": + buf.WriteString(fmt.Sprintf("%s Knowledge Check\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Title != "" { + title := e.htmlCleaner.CleanHTML(subItem.Title) + buf.WriteString(fmt.Sprintf("**Question**: %s\n\n", title)) + } + + buf.WriteString("**Answers**:\n") + for i, answer := range subItem.Answers { + correctMark := "" + if answer.Correct { + correctMark = " ✓" + } + buf.WriteString(fmt.Sprintf("%d. %s%s\n", i+1, answer.Title, correctMark)) + } + + if subItem.Feedback != "" { + feedback := e.htmlCleaner.CleanHTML(subItem.Feedback) + buf.WriteString(fmt.Sprintf("\n**Feedback**: %s\n", feedback)) + } + } + buf.WriteString("\n") + + case "interactive": + buf.WriteString(fmt.Sprintf("%s Interactive Content\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Title != "" { + title := e.htmlCleaner.CleanHTML(subItem.Title) + buf.WriteString(fmt.Sprintf("**%s**\n\n", title)) + } + } + + case "divider": + buf.WriteString("---\n\n") + + default: + // Handle unknown types + if len(item.Items) > 0 { + buf.WriteString(fmt.Sprintf("%s %s Content\n\n", headingPrefix, strings.Title(item.Type))) + for _, subItem := range item.Items { + if subItem.Title != "" { + title := e.htmlCleaner.CleanHTML(subItem.Title) + buf.WriteString(fmt.Sprintf("**%s**\n\n", title)) + } + if subItem.Paragraph != "" { + paragraph := e.htmlCleaner.CleanHTML(subItem.Paragraph) + buf.WriteString(fmt.Sprintf("%s\n\n", paragraph)) + } + } + } + } +} diff --git a/internal/interfaces/exporter.go b/internal/interfaces/exporter.go new file mode 100644 index 0000000..cd3eba7 --- /dev/null +++ b/internal/interfaces/exporter.go @@ -0,0 +1,31 @@ +// Package interfaces provides the core contracts for the articulate-parser application. +// It defines interfaces for parsing and exporting Articulate Rise courses. +package interfaces + +import "github.com/kjanat/articulate-parser/internal/models" + +// Exporter defines the interface for exporting courses to different formats. +// Implementations of this interface handle the conversion of course data to +// specific output formats like Markdown or DOCX. +type Exporter interface { + // Export converts a course to the supported format and writes it to the + // specified output path. It returns an error if the export operation fails. + Export(course *models.Course, outputPath string) error + + // GetSupportedFormat returns the name of the format this exporter supports. + // This is used to identify which exporter to use for a given format. + GetSupportedFormat() string +} + +// ExporterFactory creates exporters for different formats. +// It acts as a factory for creating appropriate Exporter implementations +// based on the requested format. +type ExporterFactory interface { + // CreateExporter instantiates an exporter for the specified format. + // It returns the appropriate exporter or an error if the format is not supported. + CreateExporter(format string) (Exporter, error) + + // GetSupportedFormats returns a list of all export formats supported by this factory. + // This is used to inform users of available export options. + GetSupportedFormats() []string +} diff --git a/internal/interfaces/parser.go b/internal/interfaces/parser.go new file mode 100644 index 0000000..815aff9 --- /dev/null +++ b/internal/interfaces/parser.go @@ -0,0 +1,20 @@ +// Package interfaces provides the core contracts for the articulate-parser application. +// It defines interfaces for parsing and exporting Articulate Rise courses. +package interfaces + +import "github.com/kjanat/articulate-parser/internal/models" + +// CourseParser defines the interface for loading course data. +// It provides methods to fetch course content either from a remote URI +// or from a local file path. +type CourseParser interface { + // FetchCourse loads a course from a URI (typically an Articulate Rise share URL). + // It retrieves the course data from the remote location and returns a parsed Course model. + // Returns an error if the fetch operation fails or if the data cannot be parsed. + FetchCourse(uri string) (*models.Course, error) + + // LoadCourseFromFile loads a course from a local file. + // It reads and parses the course data from the specified file path. + // Returns an error if the file cannot be read or if the data cannot be parsed. + LoadCourseFromFile(filePath string) (*models.Course, error) +} diff --git a/internal/models/course.go b/internal/models/course.go new file mode 100644 index 0000000..e4d0756 --- /dev/null +++ b/internal/models/course.go @@ -0,0 +1,55 @@ +// Package models defines the data structures representing Articulate Rise courses. +// These structures closely match the JSON format used by Articulate Rise. +package models + +// Course represents the top-level structure of an Articulate Rise course. +// It contains metadata and the actual course content. +type Course struct { + // ShareID is the unique identifier used in public sharing URLs + ShareID string `json:"shareId"` + // Author is the name of the course creator + Author string `json:"author"` + // Course contains the detailed course information and content + Course CourseInfo `json:"course"` + // LabelSet contains customized labels used in the course + LabelSet LabelSet `json:"labelSet"` +} + +// CourseInfo contains the main details and content of an Articulate Rise course. +type CourseInfo struct { + // ID is the internal unique identifier for the course + ID string `json:"id"` + // Title is the name of the course + Title string `json:"title"` + // Description is the course summary or introduction text + Description string `json:"description"` + // Color is the theme color of the course + Color string `json:"color"` + // NavigationMode specifies how users navigate through the course + NavigationMode string `json:"navigationMode"` + // Lessons is an ordered array of all lessons in the course + Lessons []Lesson `json:"lessons"` + // CoverImage is the main image displayed for the course + CoverImage *Media `json:"coverImage,omitempty"` + // ExportSettings contains configuration for exporting the course + ExportSettings *ExportSettings `json:"exportSettings,omitempty"` +} + +// ExportSettings defines configuration options for exporting a course. +type ExportSettings struct { + // Title specifies the export title which might differ from course title + Title string `json:"title"` + // Format indicates the preferred export format + Format string `json:"format"` +} + +// LabelSet contains customized labels used throughout the course. +// This allows course creators to modify standard terminology. +type LabelSet struct { + // ID is the unique identifier for this label set + ID string `json:"id"` + // Name is the descriptive name of the label set + Name string `json:"name"` + // Labels is a mapping of label keys to their customized values + Labels map[string]string `json:"labels"` +} diff --git a/internal/models/lesson.go b/internal/models/lesson.go new file mode 100644 index 0000000..cd0d654 --- /dev/null +++ b/internal/models/lesson.go @@ -0,0 +1,96 @@ +// Package models defines the data structures representing Articulate Rise courses. +// These structures closely match the JSON format used by Articulate Rise. +package models + +// Lesson represents a single lesson or section within an Articulate Rise course. +// Lessons are the main organizational units and contain various content items. +type Lesson struct { + // ID is the unique identifier for the lesson + ID string `json:"id"` + // Title is the name of the lesson + Title string `json:"title"` + // Description is the introductory text for the lesson + Description string `json:"description"` + // Type indicates whether this is a regular lesson or a section header + Type string `json:"type"` + // Icon is the identifier for the icon displayed with this lesson + Icon string `json:"icon"` + // Items is an ordered array of content items within the lesson + Items []Item `json:"items"` + // Position stores the ordering information for the lesson + Position interface{} `json:"position"` + // Ready indicates whether the lesson is marked as complete + Ready bool `json:"ready"` + // CreatedAt is the timestamp when the lesson was created + CreatedAt string `json:"createdAt"` + // UpdatedAt is the timestamp when the lesson was last modified + UpdatedAt string `json:"updatedAt"` +} + +// Item represents a content block within a lesson. +// Items can be of various types such as text, multimedia, knowledge checks, etc. +type Item struct { + // ID is the unique identifier for the item + ID string `json:"id"` + // Type indicates the kind of content (text, image, knowledge check, etc.) + Type string `json:"type"` + // Family groups similar item types together + Family string `json:"family"` + // Variant specifies a sub-type within the main type + Variant string `json:"variant"` + // Items contains the actual content elements (sub-items) of this item + Items []SubItem `json:"items"` + // Settings contains configuration options specific to this item type + Settings interface{} `json:"settings"` + // Data contains additional structured data for the item + Data interface{} `json:"data"` + // Media contains any associated media for the item + Media *Media `json:"media,omitempty"` +} + +// SubItem represents a specific content element within an Item. +// SubItems are the most granular content units like paragraphs, headings, or answers. +type SubItem struct { + // ID is the unique identifier for the sub-item + ID string `json:"id"` + // Type indicates the specific kind of sub-item + Type string `json:"type,omitempty"` + // Title is the name or label of the sub-item + Title string `json:"title,omitempty"` + // Heading is a heading text for this sub-item + Heading string `json:"heading,omitempty"` + // Paragraph contains regular text content + Paragraph string `json:"paragraph,omitempty"` + // Caption is text associated with media elements + Caption string `json:"caption,omitempty"` + // Media contains any associated images or videos + Media *Media `json:"media,omitempty"` + // Answers contains possible answers for question-type sub-items + Answers []Answer `json:"answers,omitempty"` + // Feedback is the response shown after user interaction + Feedback string `json:"feedback,omitempty"` + // Front contains content for the front side of a card-type sub-item + Front *CardSide `json:"front,omitempty"` + // Back contains content for the back side of a card-type sub-item + Back *CardSide `json:"back,omitempty"` +} + +// Answer represents a possible response in a knowledge check or quiz item. +type Answer struct { + // ID is the unique identifier for the answer + ID string `json:"id"` + // Title is the text of the answer option + Title string `json:"title"` + // Correct indicates whether this is the right answer + Correct bool `json:"correct"` + // MatchTitle is used in matching-type questions to pair answers + MatchTitle string `json:"matchTitle,omitempty"` +} + +// CardSide represents one side of a flipcard-type content element. +type CardSide struct { + // Media is the image or video associated with this side of the card + Media *Media `json:"media,omitempty"` + // Description is the text content for this side of the card + Description string `json:"description,omitempty"` +} diff --git a/internal/models/media.go b/internal/models/media.go new file mode 100644 index 0000000..c456832 --- /dev/null +++ b/internal/models/media.go @@ -0,0 +1,50 @@ +// Package models defines the data structures representing Articulate Rise courses. +// These structures closely match the JSON format used by Articulate Rise. +package models + +// Media represents a media element that can be either an image or a video. +// Only one of the fields (Image or Video) will be populated at a time. +type Media struct { + // Image contains metadata for an image element + Image *ImageMedia `json:"image,omitempty"` + // Video contains metadata for a video element + Video *VideoMedia `json:"video,omitempty"` +} + +// ImageMedia contains the metadata and properties of an image. +type ImageMedia struct { + // Key is the unique identifier for the image in the Articulate system + Key string `json:"key"` + // Type indicates the image format (jpg, png, etc.) + Type string `json:"type"` + // Width is the pixel width of the image + Width int `json:"width,omitempty"` + // Height is the pixel height of the image + Height int `json:"height,omitempty"` + // CrushedKey is the identifier for a compressed version of the image + CrushedKey string `json:"crushedKey,omitempty"` + // OriginalUrl is the URL to the full-resolution image + OriginalUrl string `json:"originalUrl"` + // UseCrushedKey indicates whether to use the compressed version + UseCrushedKey bool `json:"useCrushedKey,omitempty"` +} + +// VideoMedia contains the metadata and properties of a video. +type VideoMedia struct { + // Key is the unique identifier for the video in the Articulate system + Key string `json:"key"` + // URL is the direct link to the video content + URL string `json:"url"` + // Type indicates the video format (mp4, webm, etc.) + Type string `json:"type"` + // Poster is the URL to the static thumbnail image for the video + Poster string `json:"poster,omitempty"` + // Duration is the length of the video in seconds + Duration int `json:"duration,omitempty"` + // InputKey is the original identifier for uploaded videos + InputKey string `json:"inputKey,omitempty"` + // Thumbnail is the URL to a smaller preview image + Thumbnail string `json:"thumbnail,omitempty"` + // OriginalUrl is the URL to the source video file + OriginalUrl string `json:"originalUrl"` +} diff --git a/internal/services/app.go b/internal/services/app.go new file mode 100644 index 0000000..931e013 --- /dev/null +++ b/internal/services/app.go @@ -0,0 +1,76 @@ +// Package services provides the core functionality for the articulate-parser application. +// It implements the interfaces defined in the interfaces package. +package services + +import ( + "fmt" + + "github.com/kjanat/articulate-parser/internal/interfaces" + "github.com/kjanat/articulate-parser/internal/models" +) + +// App represents the main application service that coordinates the parsing +// and exporting of Articulate Rise courses. It serves as the primary entry +// point for the application's functionality. +type App struct { + // parser is responsible for loading course data from files or URLs + parser interfaces.CourseParser + // exporterFactory creates the appropriate exporter for a given format + exporterFactory interfaces.ExporterFactory +} + +// NewApp creates a new application instance with dependency injection. +// It takes a CourseParser for loading courses and an ExporterFactory for +// creating the appropriate exporters. +func NewApp(parser interfaces.CourseParser, exporterFactory interfaces.ExporterFactory) *App { + return &App{ + parser: parser, + exporterFactory: exporterFactory, + } +} + +// ProcessCourseFromFile loads a course from a local file and exports it to the specified format. +// It takes the path to the course file, the desired export format, and the output file path. +// Returns an error if loading or exporting fails. +func (a *App) ProcessCourseFromFile(filePath, format, outputPath string) error { + course, err := a.parser.LoadCourseFromFile(filePath) + if err != nil { + return fmt.Errorf("failed to load course from file: %w", err) + } + + return a.exportCourse(course, format, outputPath) +} + +// ProcessCourseFromURI fetches a course from the provided URI and exports it to the specified format. +// It takes the URI to fetch the course from, the desired export format, and the output file path. +// Returns an error if fetching or exporting fails. +func (a *App) ProcessCourseFromURI(uri, format, outputPath string) error { + course, err := a.parser.FetchCourse(uri) + if err != nil { + return fmt.Errorf("failed to fetch course: %w", err) + } + + return a.exportCourse(course, format, outputPath) +} + +// exportCourse exports a course to the specified format and output path. +// It's a helper method that creates the appropriate exporter and performs the export. +// Returns an error if creating the exporter or exporting the course fails. +func (a *App) exportCourse(course *models.Course, format, outputPath string) error { + exporter, err := a.exporterFactory.CreateExporter(format) + if err != nil { + return fmt.Errorf("failed to create exporter: %w", err) + } + + if err := exporter.Export(course, outputPath); err != nil { + return fmt.Errorf("failed to export course: %w", err) + } + + return nil +} + +// GetSupportedFormats returns a list of all export formats supported by the application. +// This information is provided by the ExporterFactory. +func (a *App) GetSupportedFormats() []string { + return a.exporterFactory.GetSupportedFormats() +} diff --git a/internal/services/html_cleaner.go b/internal/services/html_cleaner.go new file mode 100644 index 0000000..3fd0c90 --- /dev/null +++ b/internal/services/html_cleaner.go @@ -0,0 +1,53 @@ +// Package services provides the core functionality for the articulate-parser application. +// It implements the interfaces defined in the interfaces package. +package services + +import ( + "regexp" + "strings" +) + +// HTMLCleaner provides utilities for converting HTML content to plain text. +// It removes HTML tags while preserving their content and converts HTML entities +// to their plain text equivalents. +type HTMLCleaner struct{} + +// NewHTMLCleaner creates a new HTML cleaner instance. +// This service is typically injected into exporters that need to handle +// HTML content from Articulate Rise courses. +func NewHTMLCleaner() *HTMLCleaner { + return &HTMLCleaner{} +} + +// CleanHTML removes HTML tags and converts entities, returning clean plain text. +// The function preserves the textual content of the HTML while removing markup. +// It handles common HTML entities like  , &, etc., and normalizes whitespace. +// +// Parameters: +// - html: The HTML content to clean +// +// Returns: +// - A plain text string with all HTML elements and entities removed/converted +func (h *HTMLCleaner) CleanHTML(html string) string { + // Remove HTML tags but preserve content + re := regexp.MustCompile(`<[^>]*>`) + cleaned := re.ReplaceAllString(html, "") + + // Replace common HTML entities with their character equivalents + cleaned = strings.ReplaceAll(cleaned, " ", " ") + cleaned = strings.ReplaceAll(cleaned, "&", "&") + cleaned = strings.ReplaceAll(cleaned, "<", "<") + cleaned = strings.ReplaceAll(cleaned, ">", ">") + cleaned = strings.ReplaceAll(cleaned, """, "\"") + cleaned = strings.ReplaceAll(cleaned, "'", "'") + cleaned = strings.ReplaceAll(cleaned, "ï", "ï") + cleaned = strings.ReplaceAll(cleaned, "ë", "ë") + cleaned = strings.ReplaceAll(cleaned, "é", "é") + + // Clean up extra whitespace by replacing multiple spaces, tabs, and newlines + // with a single space, then trim any leading/trailing whitespace + cleaned = regexp.MustCompile(`\s+`).ReplaceAllString(cleaned, " ") + cleaned = strings.TrimSpace(cleaned) + + return cleaned +} diff --git a/internal/services/parser.go b/internal/services/parser.go new file mode 100644 index 0000000..ae5a797 --- /dev/null +++ b/internal/services/parser.go @@ -0,0 +1,133 @@ +// Package services provides the core functionality for the articulate-parser application. +// It implements the interfaces defined in the interfaces package. +package services + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "regexp" + "time" + + "github.com/kjanat/articulate-parser/internal/interfaces" + "github.com/kjanat/articulate-parser/internal/models" +) + +// ArticulateParser implements the CourseParser interface specifically for Articulate Rise courses. +// It can fetch courses from the Articulate Rise API or load them from local JSON files. +type ArticulateParser struct { + // BaseURL is the root URL for the Articulate Rise API + BaseURL string + // Client is the HTTP client used to make requests to the API + Client *http.Client +} + +// NewArticulateParser creates a new ArticulateParser instance with default settings. +// The default configuration uses the standard Articulate Rise API URL and a +// HTTP client with a 30-second timeout. +func NewArticulateParser() interfaces.CourseParser { + return &ArticulateParser{ + BaseURL: "https://rise.articulate.com", + Client: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +// FetchCourse fetches a course from the given URI. +// It extracts the share ID from the URI, constructs an API URL, and fetches the course data. +// The course data is then unmarshalled into a Course model. +// +// Parameters: +// - uri: The Articulate Rise share URL (e.g., https://rise.articulate.com/share/SHARE_ID) +// +// Returns: +// - A parsed Course model if successful +// - An error if the fetch fails, if the share ID can't be extracted, +// or if the response can't be parsed +func (p *ArticulateParser) FetchCourse(uri string) (*models.Course, error) { + shareID, err := p.extractShareID(uri) + if err != nil { + return nil, err + } + + apiURL := p.buildAPIURL(shareID) + + resp, err := p.Client.Get(apiURL) + if err != nil { + return nil, fmt.Errorf("failed to fetch course data: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API returned status %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var course models.Course + if err := json.Unmarshal(body, &course); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + + return &course, nil +} + +// LoadCourseFromFile loads an Articulate Rise course from a local JSON file. +// The file should contain a valid JSON representation of an Articulate Rise course. +// +// Parameters: +// - filePath: The path to the JSON file containing the course data +// +// Returns: +// - A parsed Course model if successful +// - An error if the file can't be read or the JSON can't be parsed +func (p *ArticulateParser) LoadCourseFromFile(filePath string) (*models.Course, error) { + data, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("failed to read file: %w", err) + } + + var course models.Course + if err := json.Unmarshal(data, &course); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + + return &course, nil +} + +// extractShareID extracts the share ID from a Rise URI. +// It uses a regular expression to find the share ID in URIs like: +// https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO#/ +// +// Parameters: +// - uri: The Articulate Rise share URL +// +// Returns: +// - The share ID string if found +// - An error if the share ID can't be extracted from the URI +func (p *ArticulateParser) extractShareID(uri string) (string, error) { + re := regexp.MustCompile(`/share/([a-zA-Z0-9_-]+)`) + matches := re.FindStringSubmatch(uri) + if len(matches) < 2 { + return "", fmt.Errorf("could not extract share ID from URI: %s", uri) + } + return matches[1], nil +} + +// buildAPIURL constructs the API URL for fetching course data. +// It combines the base URL with the API path and the share ID. +// +// Parameters: +// - shareID: The extracted share ID from the course URI +// +// Returns: +// - The complete API URL string for fetching the course data +func (p *ArticulateParser) buildAPIURL(shareID string) string { + return fmt.Sprintf("%s/api/rise-runtime/boot/share/%s", p.BaseURL, shareID) +} diff --git a/internal/version/version.go b/internal/version/version.go index 81470f4..d523ac3 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -5,7 +5,7 @@ package version // Version information. var ( // Version is the current version of the application. - Version = "0.1.0" + Version = "0.1.1" // BuildTime is the time the binary was built. BuildTime = "unknown" diff --git a/main.go b/main.go index 4c57c77..d2d873f 100644 --- a/main.go +++ b/main.go @@ -1,622 +1,89 @@ +// Package main provides the entry point for the articulate-parser application. +// This application fetches Articulate Rise courses from URLs or local files and +// exports them to different formats such as Markdown or DOCX. package main import ( - "bytes" - "encoding/json" "fmt" - "io" "log" - "net/http" "os" - "path/filepath" - "regexp" - "strings" - "time" - "github.com/unidoc/unioffice/document" - - "github.com/kjanat/articulate-parser/internal/version" + "github.com/kjanat/articulate-parser/internal/exporters" + "github.com/kjanat/articulate-parser/internal/services" ) -// Core data structures based on the Articulate Rise JSON format -type Course struct { - ShareID string `json:"shareId"` - Author string `json:"author"` - Course CourseInfo `json:"course"` - LabelSet LabelSet `json:"labelSet"` -} - -type CourseInfo struct { - ID string `json:"id"` - Title string `json:"title"` - Description string `json:"description"` - Color string `json:"color"` - NavigationMode string `json:"navigationMode"` - Lessons []Lesson `json:"lessons"` - CoverImage *Media `json:"coverImage,omitempty"` - ExportSettings *ExportSettings `json:"exportSettings,omitempty"` -} - -type Lesson struct { - ID string `json:"id"` - Title string `json:"title"` - Description string `json:"description"` - Type string `json:"type"` - Icon string `json:"icon"` - Items []Item `json:"items"` - Position interface{} `json:"position"` - Ready bool `json:"ready"` - CreatedAt string `json:"createdAt"` - UpdatedAt string `json:"updatedAt"` -} - -type Item struct { - ID string `json:"id"` - Type string `json:"type"` - Family string `json:"family"` - Variant string `json:"variant"` - Items []SubItem `json:"items"` - Settings interface{} `json:"settings"` - Data interface{} `json:"data"` - Media *Media `json:"media,omitempty"` -} - -type SubItem struct { - ID string `json:"id"` - Type string `json:"type,omitempty"` - Title string `json:"title,omitempty"` - Heading string `json:"heading,omitempty"` - Paragraph string `json:"paragraph,omitempty"` - Caption string `json:"caption,omitempty"` - Media *Media `json:"media,omitempty"` - Answers []Answer `json:"answers,omitempty"` - Feedback string `json:"feedback,omitempty"` - Front *CardSide `json:"front,omitempty"` - Back *CardSide `json:"back,omitempty"` -} - -type Answer struct { - ID string `json:"id"` - Title string `json:"title"` - Correct bool `json:"correct"` - MatchTitle string `json:"matchTitle,omitempty"` -} - -type CardSide struct { - Media *Media `json:"media,omitempty"` - Description string `json:"description,omitempty"` -} - -type Media struct { - Image *ImageMedia `json:"image,omitempty"` - Video *VideoMedia `json:"video,omitempty"` -} - -type ImageMedia struct { - Key string `json:"key"` - Type string `json:"type"` - Width int `json:"width,omitempty"` - Height int `json:"height,omitempty"` - CrushedKey string `json:"crushedKey,omitempty"` - OriginalUrl string `json:"originalUrl"` - UseCrushedKey bool `json:"useCrushedKey,omitempty"` -} - -type VideoMedia struct { - Key string `json:"key"` - URL string `json:"url"` - Type string `json:"type"` - Poster string `json:"poster,omitempty"` - Duration int `json:"duration,omitempty"` - InputKey string `json:"inputKey,omitempty"` - Thumbnail string `json:"thumbnail,omitempty"` - OriginalUrl string `json:"originalUrl"` -} - -type ExportSettings struct { - Title string `json:"title"` - Format string `json:"format"` -} - -type LabelSet struct { - ID string `json:"id"` - Name string `json:"name"` - Labels map[string]string `json:"labels"` -} - -// Parser main struct -type ArticulateParser struct { - BaseURL string - Client *http.Client -} - -func NewArticulateParser() *ArticulateParser { - return &ArticulateParser{ - BaseURL: "https://rise.articulate.com", - Client: &http.Client{ - Timeout: 30 * time.Second, - }, - } -} - -func (p *ArticulateParser) ExtractShareID(uri string) (string, error) { - // Extract share ID from URI like: https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO#/ - re := regexp.MustCompile(`/share/([a-zA-Z0-9_-]+)`) - matches := re.FindStringSubmatch(uri) - if len(matches) < 2 { - return "", fmt.Errorf("could not extract share ID from URI: %s", uri) - } - return matches[1], nil -} - -func (p *ArticulateParser) BuildAPIURL(shareID string) string { - return fmt.Sprintf("%s/api/rise-runtime/boot/share/%s", p.BaseURL, shareID) -} - -func (p *ArticulateParser) FetchCourse(uri string) (*Course, error) { - shareID, err := p.ExtractShareID(uri) - if err != nil { - return nil, err - } - - apiURL := p.BuildAPIURL(shareID) - - resp, err := p.Client.Get(apiURL) - if err != nil { - return nil, fmt.Errorf("failed to fetch course data: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("API returned status %d", resp.StatusCode) - } - - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read response body: %w", err) - } - - var course Course - if err := json.Unmarshal(body, &course); err != nil { - return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) - } - - return &course, nil -} - -func (p *ArticulateParser) LoadCourseFromFile(filePath string) (*Course, error) { - data, err := os.ReadFile(filePath) - if err != nil { - return nil, fmt.Errorf("failed to read file: %w", err) - } - - var course Course - if err := json.Unmarshal(data, &course); err != nil { - return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) - } - - return &course, nil -} - -// HTML cleaner utility -func cleanHTML(html string) string { - // Remove HTML tags but preserve content - re := regexp.MustCompile(`<[^>]*>`) - cleaned := re.ReplaceAllString(html, "") - - // Replace HTML entities - cleaned = strings.ReplaceAll(cleaned, " ", " ") - cleaned = strings.ReplaceAll(cleaned, "&", "&") - cleaned = strings.ReplaceAll(cleaned, "<", "<") - cleaned = strings.ReplaceAll(cleaned, ">", ">") - cleaned = strings.ReplaceAll(cleaned, """, "\"") - cleaned = strings.ReplaceAll(cleaned, "'", "'") - cleaned = strings.ReplaceAll(cleaned, "ï", "ï") - cleaned = strings.ReplaceAll(cleaned, "ë", "ë") - cleaned = strings.ReplaceAll(cleaned, "é", "é") - - // Clean up extra whitespace - cleaned = regexp.MustCompile(`\s+`).ReplaceAllString(cleaned, " ") - cleaned = strings.TrimSpace(cleaned) - - return cleaned -} - -// Markdown export functions -func (p *ArticulateParser) ExportToMarkdown(course *Course, outputPath string) error { - var buf bytes.Buffer - - // Write course header - buf.WriteString(fmt.Sprintf("# %s\n\n", course.Course.Title)) - - if course.Course.Description != "" { - buf.WriteString(fmt.Sprintf("%s\n\n", cleanHTML(course.Course.Description))) - } - - // Add metadata - buf.WriteString("## Course Information\n\n") - buf.WriteString(fmt.Sprintf("- **Course ID**: %s\n", course.Course.ID)) - buf.WriteString(fmt.Sprintf("- **Share ID**: %s\n", course.ShareID)) - buf.WriteString(fmt.Sprintf("- **Navigation Mode**: %s\n", course.Course.NavigationMode)) - if course.Course.ExportSettings != nil { - buf.WriteString(fmt.Sprintf("- **Export Format**: %s\n", course.Course.ExportSettings.Format)) - } - buf.WriteString("\n---\n\n") - - // Process lessons - for i, lesson := range course.Course.Lessons { - if lesson.Type == "section" { - buf.WriteString(fmt.Sprintf("# %s\n\n", lesson.Title)) - continue - } - - buf.WriteString(fmt.Sprintf("## Lesson %d: %s\n\n", i+1, lesson.Title)) - - if lesson.Description != "" { - buf.WriteString(fmt.Sprintf("%s\n\n", cleanHTML(lesson.Description))) - } - - // Process lesson items - for _, item := range lesson.Items { - p.processItemToMarkdown(&buf, item, 3) - } - - buf.WriteString("\n---\n\n") - } - - return os.WriteFile(outputPath, buf.Bytes(), 0644) -} - -func (p *ArticulateParser) processItemToMarkdown(buf *bytes.Buffer, item Item, level int) { - headingPrefix := strings.Repeat("#", level) - - switch item.Type { - case "text": - for _, subItem := range item.Items { - if subItem.Heading != "" { - heading := cleanHTML(subItem.Heading) - if heading != "" { - buf.WriteString(fmt.Sprintf("%s %s\n\n", headingPrefix, heading)) - } - } - if subItem.Paragraph != "" { - paragraph := cleanHTML(subItem.Paragraph) - if paragraph != "" { - buf.WriteString(fmt.Sprintf("%s\n\n", paragraph)) - } - } - } - - case "list": - for _, subItem := range item.Items { - if subItem.Paragraph != "" { - paragraph := cleanHTML(subItem.Paragraph) - if paragraph != "" { - buf.WriteString(fmt.Sprintf("- %s\n", paragraph)) - } - } - } - buf.WriteString("\n") - - case "multimedia": - buf.WriteString(fmt.Sprintf("%s Media Content\n\n", headingPrefix)) - for _, subItem := range item.Items { - if subItem.Media != nil { - if subItem.Media.Video != nil { - buf.WriteString(fmt.Sprintf("**Video**: %s\n", subItem.Media.Video.OriginalUrl)) - if subItem.Media.Video.Duration > 0 { - buf.WriteString(fmt.Sprintf("- Duration: %d seconds\n", subItem.Media.Video.Duration)) - } - } - if subItem.Media.Image != nil { - buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl)) - } - } - if subItem.Caption != "" { - caption := cleanHTML(subItem.Caption) - buf.WriteString(fmt.Sprintf("*%s*\n", caption)) - } - } - buf.WriteString("\n") - - case "image": - buf.WriteString(fmt.Sprintf("%s Image\n\n", headingPrefix)) - for _, subItem := range item.Items { - if subItem.Media != nil && subItem.Media.Image != nil { - buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl)) - } - if subItem.Caption != "" { - caption := cleanHTML(subItem.Caption) - buf.WriteString(fmt.Sprintf("*%s*\n", caption)) - } - } - buf.WriteString("\n") - - case "knowledgeCheck": - buf.WriteString(fmt.Sprintf("%s Knowledge Check\n\n", headingPrefix)) - for _, subItem := range item.Items { - if subItem.Title != "" { - title := cleanHTML(subItem.Title) - buf.WriteString(fmt.Sprintf("**Question**: %s\n\n", title)) - } - - buf.WriteString("**Answers**:\n") - for i, answer := range subItem.Answers { - answerText := cleanHTML(answer.Title) - correctMark := "" - if answer.Correct { - correctMark = " ✓" - } - buf.WriteString(fmt.Sprintf("%d. %s%s\n", i+1, answerText, correctMark)) - } - - if subItem.Feedback != "" { - feedback := cleanHTML(subItem.Feedback) - buf.WriteString(fmt.Sprintf("\n**Feedback**: %s\n", feedback)) - } - } - buf.WriteString("\n") - - case "interactive": - buf.WriteString(fmt.Sprintf("%s Interactive Content\n\n", headingPrefix)) - for _, subItem := range item.Items { - if subItem.Front != nil && subItem.Front.Description != "" { - desc := cleanHTML(subItem.Front.Description) - buf.WriteString(fmt.Sprintf("**Front**: %s\n", desc)) - } - if subItem.Back != nil && subItem.Back.Description != "" { - desc := cleanHTML(subItem.Back.Description) - buf.WriteString(fmt.Sprintf("**Back**: %s\n", desc)) - } - } - buf.WriteString("\n") - - case "divider": - buf.WriteString("---\n\n") - - default: - // Handle unknown types - if len(item.Items) > 0 { - buf.WriteString(fmt.Sprintf("%s %s Content\n\n", headingPrefix, strings.Title(item.Type))) - for _, subItem := range item.Items { - if subItem.Title != "" { - title := cleanHTML(subItem.Title) - buf.WriteString(fmt.Sprintf("- %s\n", title)) - } - } - buf.WriteString("\n") - } - } -} - -// DOCX export functions -func (p *ArticulateParser) ExportToDocx(course *Course, outputPath string) error { - doc := document.New() - - // Add title - title := doc.AddParagraph() - titleRun := title.AddRun() - titleRun.AddText(course.Course.Title) - titleRun.Properties().SetSize(20) - titleRun.Properties().SetBold(true) - - // Add description - if course.Course.Description != "" { - desc := doc.AddParagraph() - descRun := desc.AddRun() - descRun.AddText(cleanHTML(course.Course.Description)) - } - - // Add course metadata - metadata := doc.AddParagraph() - metadataRun := metadata.AddRun() - metadataRun.Properties().SetBold(true) - metadataRun.AddText("Course Information") - - courseInfo := doc.AddParagraph() - courseInfoRun := courseInfo.AddRun() - courseInfoText := fmt.Sprintf("Course ID: %s\nShare ID: %s\nNavigation Mode: %s", - course.Course.ID, course.ShareID, course.Course.NavigationMode) - courseInfoRun.AddText(courseInfoText) - - // Process lessons - for i, lesson := range course.Course.Lessons { - if lesson.Type == "section" { - section := doc.AddParagraph() - sectionRun := section.AddRun() - sectionRun.AddText(lesson.Title) - sectionRun.Properties().SetSize(18) - sectionRun.Properties().SetBold(true) - continue - } - - // Lesson title - lessonTitle := doc.AddParagraph() - lessonTitleRun := lessonTitle.AddRun() - lessonTitleRun.AddText(fmt.Sprintf("Lesson %d: %s", i+1, lesson.Title)) - lessonTitleRun.Properties().SetSize(16) - lessonTitleRun.Properties().SetBold(true) - - // Lesson description - if lesson.Description != "" { - lessonDesc := doc.AddParagraph() - lessonDescRun := lessonDesc.AddRun() - lessonDescRun.AddText(cleanHTML(lesson.Description)) - } - - // Process lesson items - for _, item := range lesson.Items { - p.processItemToDocx(doc, item) - } - } - - return doc.SaveToFile(outputPath) -} - -func (p *ArticulateParser) processItemToDocx(doc *document.Document, item Item) { - switch item.Type { - case "text": - for _, subItem := range item.Items { - if subItem.Heading != "" { - heading := cleanHTML(subItem.Heading) - if heading != "" { - para := doc.AddParagraph() - run := para.AddRun() - run.AddText(heading) - run.Properties().SetBold(true) - } - } - if subItem.Paragraph != "" { - paragraph := cleanHTML(subItem.Paragraph) - if paragraph != "" { - para := doc.AddParagraph() - run := para.AddRun() - run.AddText(paragraph) - } - } - } - - case "list": - for _, subItem := range item.Items { - if subItem.Paragraph != "" { - paragraph := cleanHTML(subItem.Paragraph) - if paragraph != "" { - para := doc.AddParagraph() - run := para.AddRun() - run.AddText("• " + paragraph) - } - } - } - - case "multimedia", "image": - para := doc.AddParagraph() - run := para.AddRun() - run.AddText("[Media Content]") - run.Properties().SetItalic(true) - - for _, subItem := range item.Items { - if subItem.Media != nil { - if subItem.Media.Video != nil { - mediaPara := doc.AddParagraph() - mediaRun := mediaPara.AddRun() - mediaRun.AddText(fmt.Sprintf("Video: %s", subItem.Media.Video.OriginalUrl)) - } - if subItem.Media.Image != nil { - mediaPara := doc.AddParagraph() - mediaRun := mediaPara.AddRun() - mediaRun.AddText(fmt.Sprintf("Image: %s", subItem.Media.Image.OriginalUrl)) - } - } - if subItem.Caption != "" { - caption := cleanHTML(subItem.Caption) - captionPara := doc.AddParagraph() - captionRun := captionPara.AddRun() - captionRun.AddText(caption) - captionRun.Properties().SetItalic(true) - } - } - - case "knowledgeCheck": - for _, subItem := range item.Items { - if subItem.Title != "" { - title := cleanHTML(subItem.Title) - questionPara := doc.AddParagraph() - questionRun := questionPara.AddRun() - questionRun.AddText("Question: " + title) - questionRun.Properties().SetBold(true) - } - - for i, answer := range subItem.Answers { - answerText := cleanHTML(answer.Title) - correctMark := "" - if answer.Correct { - correctMark = " [CORRECT]" - } - answerPara := doc.AddParagraph() - answerRun := answerPara.AddRun() - answerRun.AddText(fmt.Sprintf("%d. %s%s", i+1, answerText, correctMark)) - } - - if subItem.Feedback != "" { - feedback := cleanHTML(subItem.Feedback) - feedbackPara := doc.AddParagraph() - feedbackRun := feedbackPara.AddRun() - feedbackRun.AddText("Feedback: " + feedback) - feedbackRun.Properties().SetItalic(true) - } - } - } -} - +// main is the entry point of the application. +// It handles command-line arguments, sets up dependencies, +// and coordinates the parsing and exporting of courses. func main() { - // Handle version flag - if len(os.Args) > 1 && (os.Args[1] == "-v" || os.Args[1] == "--version") { - fmt.Printf("articulate-parser %s\n", version.Version) - fmt.Printf("Build time: %s\n", version.BuildTime) - fmt.Printf("Commit: %s\n", version.GitCommit) - os.Exit(0) - } + // Dependency injection setup + htmlCleaner := services.NewHTMLCleaner() + parser := services.NewArticulateParser() + exporterFactory := exporters.NewFactory(htmlCleaner) + app := services.NewApp(parser, exporterFactory) - if len(os.Args) < 3 { - fmt.Println("Usage: articulate-parser [output_path]") - fmt.Println(" articulate-parser -v|--version") - fmt.Println(" input_uri_or_file: Articulate Rise URI or local JSON file path") - fmt.Println(" output_format: md (Markdown) or docx (Word Document)") - fmt.Println(" output_path: Optional output file path") + // Check for required command-line arguments + if len(os.Args) < 4 { + fmt.Printf("Usage: %s \n", os.Args[0]) + fmt.Printf(" source: URI or file path to the course\n") + fmt.Printf(" format: export format (%s)\n", joinStrings(app.GetSupportedFormats(), ", ")) + fmt.Printf(" output: output file path\n") + fmt.Println("\nExample:") + fmt.Printf(" %s articulate-sample.json markdown output.md\n", os.Args[0]) + fmt.Printf(" %s https://rise.articulate.com/share/xyz docx output.docx\n", os.Args[0]) os.Exit(1) } - input := os.Args[1] - format := strings.ToLower(os.Args[2]) + source := os.Args[1] + format := os.Args[2] + output := os.Args[3] - if format != "md" && format != "docx" { - log.Fatal("Output format must be 'md' or 'docx'") - } - - parser := NewArticulateParser() - var course *Course var err error - // Determine if input is a URI or file path - if strings.HasPrefix(input, "http") { - course, err = parser.FetchCourse(input) + // Determine if source is a URI or file path + if isURI(source) { + err = app.ProcessCourseFromURI(source, format, output) } else { - course, err = parser.LoadCourseFromFile(input) + err = app.ProcessCourseFromFile(source, format, output) } if err != nil { - log.Fatalf("Failed to load course: %v", err) + log.Fatalf("Error processing course: %v", err) } - // Determine output path - var outputPath string - if len(os.Args) > 3 { - outputPath = os.Args[3] - } else { - baseDir := "output" - os.MkdirAll(baseDir, 0755) - - // Create safe filename from course title - safeTitle := regexp.MustCompile(`[^a-zA-Z0-9\-_]`).ReplaceAllString(course.Course.Title, "_") - if safeTitle == "" { - safeTitle = "articulate_course" - } - - outputPath = filepath.Join(baseDir, fmt.Sprintf("%s.%s", safeTitle, format)) - } - - // Export based on format - switch format { - case "md": - err = parser.ExportToMarkdown(course, outputPath) - case "docx": - err = parser.ExportToDocx(course, outputPath) - } - - if err != nil { - log.Fatalf("Failed to export course: %v", err) - } - - fmt.Printf("Course successfully exported to: %s\n", outputPath) - fmt.Printf("Course: %s (%d lessons)\n", course.Course.Title, len(course.Course.Lessons)) + fmt.Printf("Successfully exported course to %s\n", output) +} + +// isURI checks if a string is a URI by looking for http:// or https:// prefixes. +// +// Parameters: +// - str: The string to check +// +// Returns: +// - true if the string appears to be a URI, false otherwise +func isURI(str string) bool { + return len(str) > 7 && (str[:7] == "http://" || str[:8] == "https://") +} + +// joinStrings concatenates a slice of strings using the specified separator. +// +// Parameters: +// - strs: The slice of strings to join +// - sep: The separator to insert between each string +// +// Returns: +// - A single string with all elements joined by the separator +func joinStrings(strs []string, sep string) string { + if len(strs) == 0 { + return "" + } + if len(strs) == 1 { + return strs[0] + } + + result := strs[0] + for i := 1; i < len(strs); i++ { + result += sep + strs[i] + } + return result } diff --git a/scripts/build.sh b/scripts/build.sh index f30c9ad..3f79521 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -315,7 +315,7 @@ for idx in "${!TARGETS[@]}"; do fi build_cmd+=("${GO_BUILD_FLAGS_ARRAY[@]}" -o "$OUTDIR/$BIN" "$ENTRYPOINT") - if GOOS="$os" GOARCH="$arch" "${build_cmd[@]}" 2>"$OUTDIR/$BIN.log"; then + if CGO_ENABLED=1 GOOS="$os" GOARCH="$arch" "${build_cmd[@]}" 2>"$OUTDIR/$BIN.log"; then update_status $((idx + 1)) '✔' "$BIN done" rm -f "$OUTDIR/$BIN.log" else