Adds DOCX and Markdown export functionality

Introduces a modular exporter pattern supporting DOCX and Markdown formats
by implementing Exporter interfaces and restructuring application logic.

Enhances CI to install UPX for binary compression, excluding recent macOS
binaries due to compatibility issues.

Enables CGO when building binaries for all platforms, addressing potential
cross-platform compatibility concerns.

Bumps version to 0.1.1.
This commit is contained in:
2025-05-25 13:01:58 +02:00
parent 48cad7144f
commit 9de7222ec3
15 changed files with 1096 additions and 600 deletions

76
internal/services/app.go Normal file
View File

@ -0,0 +1,76 @@
// Package services provides the core functionality for the articulate-parser application.
// It implements the interfaces defined in the interfaces package.
package services
import (
"fmt"
"github.com/kjanat/articulate-parser/internal/interfaces"
"github.com/kjanat/articulate-parser/internal/models"
)
// App represents the main application service that coordinates the parsing
// and exporting of Articulate Rise courses. It serves as the primary entry
// point for the application's functionality.
type App struct {
// parser is responsible for loading course data from files or URLs
parser interfaces.CourseParser
// exporterFactory creates the appropriate exporter for a given format
exporterFactory interfaces.ExporterFactory
}
// NewApp creates a new application instance with dependency injection.
// It takes a CourseParser for loading courses and an ExporterFactory for
// creating the appropriate exporters.
func NewApp(parser interfaces.CourseParser, exporterFactory interfaces.ExporterFactory) *App {
return &App{
parser: parser,
exporterFactory: exporterFactory,
}
}
// ProcessCourseFromFile loads a course from a local file and exports it to the specified format.
// It takes the path to the course file, the desired export format, and the output file path.
// Returns an error if loading or exporting fails.
func (a *App) ProcessCourseFromFile(filePath, format, outputPath string) error {
course, err := a.parser.LoadCourseFromFile(filePath)
if err != nil {
return fmt.Errorf("failed to load course from file: %w", err)
}
return a.exportCourse(course, format, outputPath)
}
// ProcessCourseFromURI fetches a course from the provided URI and exports it to the specified format.
// It takes the URI to fetch the course from, the desired export format, and the output file path.
// Returns an error if fetching or exporting fails.
func (a *App) ProcessCourseFromURI(uri, format, outputPath string) error {
course, err := a.parser.FetchCourse(uri)
if err != nil {
return fmt.Errorf("failed to fetch course: %w", err)
}
return a.exportCourse(course, format, outputPath)
}
// exportCourse exports a course to the specified format and output path.
// It's a helper method that creates the appropriate exporter and performs the export.
// Returns an error if creating the exporter or exporting the course fails.
func (a *App) exportCourse(course *models.Course, format, outputPath string) error {
exporter, err := a.exporterFactory.CreateExporter(format)
if err != nil {
return fmt.Errorf("failed to create exporter: %w", err)
}
if err := exporter.Export(course, outputPath); err != nil {
return fmt.Errorf("failed to export course: %w", err)
}
return nil
}
// GetSupportedFormats returns a list of all export formats supported by the application.
// This information is provided by the ExporterFactory.
func (a *App) GetSupportedFormats() []string {
return a.exporterFactory.GetSupportedFormats()
}

View File

@ -0,0 +1,53 @@
// Package services provides the core functionality for the articulate-parser application.
// It implements the interfaces defined in the interfaces package.
package services
import (
"regexp"
"strings"
)
// HTMLCleaner provides utilities for converting HTML content to plain text.
// It removes HTML tags while preserving their content and converts HTML entities
// to their plain text equivalents.
type HTMLCleaner struct{}
// NewHTMLCleaner creates a new HTML cleaner instance.
// This service is typically injected into exporters that need to handle
// HTML content from Articulate Rise courses.
func NewHTMLCleaner() *HTMLCleaner {
return &HTMLCleaner{}
}
// CleanHTML removes HTML tags and converts entities, returning clean plain text.
// The function preserves the textual content of the HTML while removing markup.
// It handles common HTML entities like  , &, etc., and normalizes whitespace.
//
// Parameters:
// - html: The HTML content to clean
//
// Returns:
// - A plain text string with all HTML elements and entities removed/converted
func (h *HTMLCleaner) CleanHTML(html string) string {
// Remove HTML tags but preserve content
re := regexp.MustCompile(`<[^>]*>`)
cleaned := re.ReplaceAllString(html, "")
// Replace common HTML entities with their character equivalents
cleaned = strings.ReplaceAll(cleaned, "&nbsp;", " ")
cleaned = strings.ReplaceAll(cleaned, "&amp;", "&")
cleaned = strings.ReplaceAll(cleaned, "&lt;", "<")
cleaned = strings.ReplaceAll(cleaned, "&gt;", ">")
cleaned = strings.ReplaceAll(cleaned, "&quot;", "\"")
cleaned = strings.ReplaceAll(cleaned, "&#39;", "'")
cleaned = strings.ReplaceAll(cleaned, "&iuml;", "ï")
cleaned = strings.ReplaceAll(cleaned, "&euml;", "ë")
cleaned = strings.ReplaceAll(cleaned, "&eacute;", "é")
// Clean up extra whitespace by replacing multiple spaces, tabs, and newlines
// with a single space, then trim any leading/trailing whitespace
cleaned = regexp.MustCompile(`\s+`).ReplaceAllString(cleaned, " ")
cleaned = strings.TrimSpace(cleaned)
return cleaned
}

133
internal/services/parser.go Normal file
View File

@ -0,0 +1,133 @@
// Package services provides the core functionality for the articulate-parser application.
// It implements the interfaces defined in the interfaces package.
package services
import (
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"regexp"
"time"
"github.com/kjanat/articulate-parser/internal/interfaces"
"github.com/kjanat/articulate-parser/internal/models"
)
// ArticulateParser implements the CourseParser interface specifically for Articulate Rise courses.
// It can fetch courses from the Articulate Rise API or load them from local JSON files.
type ArticulateParser struct {
// BaseURL is the root URL for the Articulate Rise API
BaseURL string
// Client is the HTTP client used to make requests to the API
Client *http.Client
}
// NewArticulateParser creates a new ArticulateParser instance with default settings.
// The default configuration uses the standard Articulate Rise API URL and a
// HTTP client with a 30-second timeout.
func NewArticulateParser() interfaces.CourseParser {
return &ArticulateParser{
BaseURL: "https://rise.articulate.com",
Client: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// FetchCourse fetches a course from the given URI.
// It extracts the share ID from the URI, constructs an API URL, and fetches the course data.
// The course data is then unmarshalled into a Course model.
//
// Parameters:
// - uri: The Articulate Rise share URL (e.g., https://rise.articulate.com/share/SHARE_ID)
//
// Returns:
// - A parsed Course model if successful
// - An error if the fetch fails, if the share ID can't be extracted,
// or if the response can't be parsed
func (p *ArticulateParser) FetchCourse(uri string) (*models.Course, error) {
shareID, err := p.extractShareID(uri)
if err != nil {
return nil, err
}
apiURL := p.buildAPIURL(shareID)
resp, err := p.Client.Get(apiURL)
if err != nil {
return nil, fmt.Errorf("failed to fetch course data: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("API returned status %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read response body: %w", err)
}
var course models.Course
if err := json.Unmarshal(body, &course); err != nil {
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
}
return &course, nil
}
// LoadCourseFromFile loads an Articulate Rise course from a local JSON file.
// The file should contain a valid JSON representation of an Articulate Rise course.
//
// Parameters:
// - filePath: The path to the JSON file containing the course data
//
// Returns:
// - A parsed Course model if successful
// - An error if the file can't be read or the JSON can't be parsed
func (p *ArticulateParser) LoadCourseFromFile(filePath string) (*models.Course, error) {
data, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read file: %w", err)
}
var course models.Course
if err := json.Unmarshal(data, &course); err != nil {
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
}
return &course, nil
}
// extractShareID extracts the share ID from a Rise URI.
// It uses a regular expression to find the share ID in URIs like:
// https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO#/
//
// Parameters:
// - uri: The Articulate Rise share URL
//
// Returns:
// - The share ID string if found
// - An error if the share ID can't be extracted from the URI
func (p *ArticulateParser) extractShareID(uri string) (string, error) {
re := regexp.MustCompile(`/share/([a-zA-Z0-9_-]+)`)
matches := re.FindStringSubmatch(uri)
if len(matches) < 2 {
return "", fmt.Errorf("could not extract share ID from URI: %s", uri)
}
return matches[1], nil
}
// buildAPIURL constructs the API URL for fetching course data.
// It combines the base URL with the API path and the share ID.
//
// Parameters:
// - shareID: The extracted share ID from the course URI
//
// Returns:
// - The complete API URL string for fetching the course data
func (p *ArticulateParser) buildAPIURL(shareID string) string {
return fmt.Sprintf("%s/api/rise-runtime/boot/share/%s", p.BaseURL, shareID)
}