mirror of
https://github.com/kjanat/articulate-parser.git
synced 2026-01-16 17:02:11 +01:00
Add comprehensive unit tests for services and main package
- Implement tests for the app service, including course processing from file and URI. - Create mock implementations for CourseParser and Exporter to facilitate testing. - Add tests for HTML cleaner service to validate HTML content cleaning functionality. - Develop tests for the parser service, covering course fetching and loading from files. - Introduce tests for utility functions in the main package, ensuring URI validation and string joining. - Include benchmarks for performance evaluation of key functions.
This commit is contained in:
353
internal/services/app_test.go
Normal file
353
internal/services/app_test.go
Normal file
@ -0,0 +1,353 @@
|
||||
// Package services_test provides tests for the services package.
|
||||
package services
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/kjanat/articulate-parser/internal/interfaces"
|
||||
"github.com/kjanat/articulate-parser/internal/models"
|
||||
)
|
||||
|
||||
// MockCourseParser is a mock implementation of interfaces.CourseParser for testing.
|
||||
type MockCourseParser struct {
|
||||
mockFetchCourse func(uri string) (*models.Course, error)
|
||||
mockLoadCourseFromFile func(filePath string) (*models.Course, error)
|
||||
}
|
||||
|
||||
func (m *MockCourseParser) FetchCourse(uri string) (*models.Course, error) {
|
||||
if m.mockFetchCourse != nil {
|
||||
return m.mockFetchCourse(uri)
|
||||
}
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (m *MockCourseParser) LoadCourseFromFile(filePath string) (*models.Course, error) {
|
||||
if m.mockLoadCourseFromFile != nil {
|
||||
return m.mockLoadCourseFromFile(filePath)
|
||||
}
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
// MockExporter is a mock implementation of interfaces.Exporter for testing.
|
||||
type MockExporter struct {
|
||||
mockExport func(course *models.Course, outputPath string) error
|
||||
mockGetSupportedFormat func() string
|
||||
}
|
||||
|
||||
func (m *MockExporter) Export(course *models.Course, outputPath string) error {
|
||||
if m.mockExport != nil {
|
||||
return m.mockExport(course, outputPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockExporter) GetSupportedFormat() string {
|
||||
if m.mockGetSupportedFormat != nil {
|
||||
return m.mockGetSupportedFormat()
|
||||
}
|
||||
return "mock"
|
||||
}
|
||||
|
||||
// MockExporterFactory is a mock implementation of interfaces.ExporterFactory for testing.
|
||||
type MockExporterFactory struct {
|
||||
mockCreateExporter func(format string) (*MockExporter, error)
|
||||
mockGetSupportedFormats func() []string
|
||||
}
|
||||
|
||||
func (m *MockExporterFactory) CreateExporter(format string) (interfaces.Exporter, error) {
|
||||
if m.mockCreateExporter != nil {
|
||||
exporter, err := m.mockCreateExporter(format)
|
||||
return exporter, err
|
||||
}
|
||||
return &MockExporter{}, nil
|
||||
}
|
||||
|
||||
func (m *MockExporterFactory) GetSupportedFormats() []string {
|
||||
if m.mockGetSupportedFormats != nil {
|
||||
return m.mockGetSupportedFormats()
|
||||
}
|
||||
return []string{"mock"}
|
||||
}
|
||||
|
||||
// createTestCourse creates a sample course for testing purposes.
|
||||
func createTestCourse() *models.Course {
|
||||
return &models.Course{
|
||||
ShareID: "test-share-id",
|
||||
Author: "Test Author",
|
||||
Course: models.CourseInfo{
|
||||
ID: "test-course-id",
|
||||
Title: "Test Course",
|
||||
Description: "This is a test course",
|
||||
Lessons: []models.Lesson{
|
||||
{
|
||||
ID: "lesson-1",
|
||||
Title: "Test Lesson",
|
||||
Type: "lesson",
|
||||
Items: []models.Item{
|
||||
{
|
||||
ID: "item-1",
|
||||
Type: "text",
|
||||
Items: []models.SubItem{
|
||||
{
|
||||
ID: "subitem-1",
|
||||
Title: "Test Title",
|
||||
Paragraph: "Test paragraph content",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewApp tests the NewApp constructor.
|
||||
func TestNewApp(t *testing.T) {
|
||||
parser := &MockCourseParser{}
|
||||
factory := &MockExporterFactory{}
|
||||
|
||||
app := NewApp(parser, factory)
|
||||
|
||||
if app == nil {
|
||||
t.Fatal("NewApp() returned nil")
|
||||
}
|
||||
|
||||
if app.parser != parser {
|
||||
t.Error("App parser was not set correctly")
|
||||
}
|
||||
|
||||
// Test that the factory is set (we can't directly compare interface values)
|
||||
formats := app.GetSupportedFormats()
|
||||
if len(formats) == 0 {
|
||||
t.Error("App exporterFactory was not set correctly - no supported formats")
|
||||
}
|
||||
}
|
||||
|
||||
// TestApp_ProcessCourseFromFile tests the ProcessCourseFromFile method.
|
||||
func TestApp_ProcessCourseFromFile(t *testing.T) {
|
||||
testCourse := createTestCourse()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
filePath string
|
||||
format string
|
||||
outputPath string
|
||||
setupMocks func(*MockCourseParser, *MockExporterFactory, *MockExporter)
|
||||
expectedError string
|
||||
}{
|
||||
{
|
||||
name: "successful processing",
|
||||
filePath: "test.json",
|
||||
format: "markdown",
|
||||
outputPath: "output.md",
|
||||
setupMocks: func(parser *MockCourseParser, factory *MockExporterFactory, exporter *MockExporter) {
|
||||
parser.mockLoadCourseFromFile = func(filePath string) (*models.Course, error) {
|
||||
if filePath != "test.json" {
|
||||
t.Errorf("Expected filePath 'test.json', got '%s'", filePath)
|
||||
}
|
||||
return testCourse, nil
|
||||
}
|
||||
|
||||
factory.mockCreateExporter = func(format string) (*MockExporter, error) {
|
||||
if format != "markdown" {
|
||||
t.Errorf("Expected format 'markdown', got '%s'", format)
|
||||
}
|
||||
return exporter, nil
|
||||
}
|
||||
|
||||
exporter.mockExport = func(course *models.Course, outputPath string) error {
|
||||
if outputPath != "output.md" {
|
||||
t.Errorf("Expected outputPath 'output.md', got '%s'", outputPath)
|
||||
}
|
||||
if course != testCourse {
|
||||
t.Error("Expected course to match testCourse")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "file loading error",
|
||||
filePath: "nonexistent.json",
|
||||
format: "markdown",
|
||||
outputPath: "output.md",
|
||||
setupMocks: func(parser *MockCourseParser, factory *MockExporterFactory, exporter *MockExporter) {
|
||||
parser.mockLoadCourseFromFile = func(filePath string) (*models.Course, error) {
|
||||
return nil, errors.New("file not found")
|
||||
}
|
||||
},
|
||||
expectedError: "failed to load course from file",
|
||||
},
|
||||
{
|
||||
name: "exporter creation error",
|
||||
filePath: "test.json",
|
||||
format: "unsupported",
|
||||
outputPath: "output.txt",
|
||||
setupMocks: func(parser *MockCourseParser, factory *MockExporterFactory, exporter *MockExporter) {
|
||||
parser.mockLoadCourseFromFile = func(filePath string) (*models.Course, error) {
|
||||
return testCourse, nil
|
||||
}
|
||||
|
||||
factory.mockCreateExporter = func(format string) (*MockExporter, error) {
|
||||
return nil, errors.New("unsupported format")
|
||||
}
|
||||
},
|
||||
expectedError: "failed to create exporter",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
parser := &MockCourseParser{}
|
||||
exporter := &MockExporter{}
|
||||
factory := &MockExporterFactory{}
|
||||
|
||||
tt.setupMocks(parser, factory, exporter)
|
||||
|
||||
app := NewApp(parser, factory)
|
||||
err := app.ProcessCourseFromFile(tt.filePath, tt.format, tt.outputPath)
|
||||
|
||||
if tt.expectedError != "" {
|
||||
if err == nil {
|
||||
t.Fatalf("Expected error containing '%s', got nil", tt.expectedError)
|
||||
}
|
||||
if !contains(err.Error(), tt.expectedError) {
|
||||
t.Errorf("Expected error containing '%s', got '%s'", tt.expectedError, err.Error())
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error, got: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApp_ProcessCourseFromURI tests the ProcessCourseFromURI method.
|
||||
func TestApp_ProcessCourseFromURI(t *testing.T) {
|
||||
testCourse := createTestCourse()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
uri string
|
||||
format string
|
||||
outputPath string
|
||||
setupMocks func(*MockCourseParser, *MockExporterFactory, *MockExporter)
|
||||
expectedError string
|
||||
}{
|
||||
{
|
||||
name: "successful processing",
|
||||
uri: "https://rise.articulate.com/share/test123",
|
||||
format: "docx",
|
||||
outputPath: "output.docx",
|
||||
setupMocks: func(parser *MockCourseParser, factory *MockExporterFactory, exporter *MockExporter) {
|
||||
parser.mockFetchCourse = func(uri string) (*models.Course, error) {
|
||||
if uri != "https://rise.articulate.com/share/test123" {
|
||||
t.Errorf("Expected uri 'https://rise.articulate.com/share/test123', got '%s'", uri)
|
||||
}
|
||||
return testCourse, nil
|
||||
}
|
||||
|
||||
factory.mockCreateExporter = func(format string) (*MockExporter, error) {
|
||||
if format != "docx" {
|
||||
t.Errorf("Expected format 'docx', got '%s'", format)
|
||||
}
|
||||
return exporter, nil
|
||||
}
|
||||
|
||||
exporter.mockExport = func(course *models.Course, outputPath string) error {
|
||||
if outputPath != "output.docx" {
|
||||
t.Errorf("Expected outputPath 'output.docx', got '%s'", outputPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "fetch error",
|
||||
uri: "invalid-uri",
|
||||
format: "docx",
|
||||
outputPath: "output.docx",
|
||||
setupMocks: func(parser *MockCourseParser, factory *MockExporterFactory, exporter *MockExporter) {
|
||||
parser.mockFetchCourse = func(uri string) (*models.Course, error) {
|
||||
return nil, errors.New("network error")
|
||||
}
|
||||
},
|
||||
expectedError: "failed to fetch course",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
parser := &MockCourseParser{}
|
||||
exporter := &MockExporter{}
|
||||
factory := &MockExporterFactory{}
|
||||
|
||||
tt.setupMocks(parser, factory, exporter)
|
||||
|
||||
app := NewApp(parser, factory)
|
||||
err := app.ProcessCourseFromURI(tt.uri, tt.format, tt.outputPath)
|
||||
|
||||
if tt.expectedError != "" {
|
||||
if err == nil {
|
||||
t.Fatalf("Expected error containing '%s', got nil", tt.expectedError)
|
||||
}
|
||||
if !contains(err.Error(), tt.expectedError) {
|
||||
t.Errorf("Expected error containing '%s', got '%s'", tt.expectedError, err.Error())
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error, got: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestApp_GetSupportedFormats tests the GetSupportedFormats method.
|
||||
func TestApp_GetSupportedFormats(t *testing.T) {
|
||||
expectedFormats := []string{"markdown", "docx", "pdf"}
|
||||
|
||||
parser := &MockCourseParser{}
|
||||
factory := &MockExporterFactory{
|
||||
mockGetSupportedFormats: func() []string {
|
||||
return expectedFormats
|
||||
},
|
||||
}
|
||||
|
||||
app := NewApp(parser, factory)
|
||||
formats := app.GetSupportedFormats()
|
||||
|
||||
if len(formats) != len(expectedFormats) {
|
||||
t.Errorf("Expected %d formats, got %d", len(expectedFormats), len(formats))
|
||||
}
|
||||
|
||||
for i, format := range formats {
|
||||
if format != expectedFormats[i] {
|
||||
t.Errorf("Expected format '%s' at index %d, got '%s'", expectedFormats[i], i, format)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// contains checks if a string contains a substring.
|
||||
func contains(s, substr string) bool {
|
||||
return len(s) >= len(substr) &&
|
||||
(len(substr) == 0 ||
|
||||
s == substr ||
|
||||
(len(s) > len(substr) &&
|
||||
(s[:len(substr)] == substr ||
|
||||
s[len(s)-len(substr):] == substr ||
|
||||
containsSubstring(s, substr))))
|
||||
}
|
||||
|
||||
// containsSubstring checks if s contains substr as a substring.
|
||||
func containsSubstring(s, substr string) bool {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
325
internal/services/html_cleaner_test.go
Normal file
325
internal/services/html_cleaner_test.go
Normal file
@ -0,0 +1,325 @@
|
||||
// Package services_test provides tests for the HTML cleaner service.
|
||||
package services
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestNewHTMLCleaner tests the NewHTMLCleaner constructor.
|
||||
func TestNewHTMLCleaner(t *testing.T) {
|
||||
cleaner := NewHTMLCleaner()
|
||||
|
||||
if cleaner == nil {
|
||||
t.Fatal("NewHTMLCleaner() returned nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHTMLCleaner_CleanHTML tests the CleanHTML method with various HTML inputs.
|
||||
func TestHTMLCleaner_CleanHTML(t *testing.T) {
|
||||
cleaner := NewHTMLCleaner()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "plain text (no HTML)",
|
||||
input: "This is plain text",
|
||||
expected: "This is plain text",
|
||||
},
|
||||
{
|
||||
name: "empty string",
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "simple HTML tag",
|
||||
input: "<p>Hello world</p>",
|
||||
expected: "Hello world",
|
||||
},
|
||||
{
|
||||
name: "multiple HTML tags",
|
||||
input: "<h1>Title</h1><p>Paragraph text</p>",
|
||||
expected: "TitleParagraph text",
|
||||
},
|
||||
{
|
||||
name: "nested HTML tags",
|
||||
input: "<div><h1>Title</h1><p>Paragraph with <strong>bold</strong> text</p></div>",
|
||||
expected: "TitleParagraph with bold text",
|
||||
},
|
||||
{
|
||||
name: "HTML with attributes",
|
||||
input: "<p class=\"test\" id=\"para1\">Text with attributes</p>",
|
||||
expected: "Text with attributes",
|
||||
},
|
||||
{
|
||||
name: "self-closing tags",
|
||||
input: "Line 1<br/>Line 2<hr/>End",
|
||||
expected: "Line 1Line 2End",
|
||||
},
|
||||
{
|
||||
name: "HTML entities - basic",
|
||||
input: "AT&T <company> "quoted" text",
|
||||
expected: "AT&T <company> \"quoted\" text",
|
||||
},
|
||||
{
|
||||
name: "HTML entities - apostrophe",
|
||||
input: "It's a test",
|
||||
expected: "It's a test",
|
||||
},
|
||||
{
|
||||
name: "HTML entities - special characters",
|
||||
input: "ïber ëlite écarté",
|
||||
expected: "ïber ëlite écarté",
|
||||
},
|
||||
{
|
||||
name: "HTML entities - nbsp",
|
||||
input: "Word1 Word2",
|
||||
expected: "Word1 Word2",
|
||||
},
|
||||
{
|
||||
name: "mixed HTML and entities",
|
||||
input: "<p>Hello & welcome to <strong>our</strong> site!</p>",
|
||||
expected: "Hello & welcome to our site!",
|
||||
},
|
||||
{
|
||||
name: "multiple whitespace",
|
||||
input: "Text with\t\tmultiple\n\nspaces",
|
||||
expected: "Text with multiple spaces",
|
||||
},
|
||||
{
|
||||
name: "whitespace with HTML",
|
||||
input: "<p> Text with </p> <div> spaces </div> ",
|
||||
expected: "Text with spaces",
|
||||
},
|
||||
{
|
||||
name: "complex content",
|
||||
input: "<div class=\"content\"><h1>Course Title</h1><p>This is a <em>great</em> course about & HTML entities like and "quotes".</p></div>",
|
||||
expected: "Course TitleThis is a great course about & HTML entities like and \"quotes\".",
|
||||
},
|
||||
{
|
||||
name: "malformed HTML",
|
||||
input: "<p>Unclosed paragraph<div>Another <span>tag</p></div>",
|
||||
expected: "Unclosed paragraphAnother tag",
|
||||
},
|
||||
{
|
||||
name: "HTML comments (should be removed)",
|
||||
input: "Text before<!-- This is a comment -->Text after",
|
||||
expected: "Text beforeText after",
|
||||
},
|
||||
{
|
||||
name: "script and style tags content",
|
||||
input: "<script>alert('test');</script>Content<style>body{color:red;}</style>",
|
||||
expected: "alert('test');Contentbody{color:red;}",
|
||||
},
|
||||
{
|
||||
name: "line breaks and formatting",
|
||||
input: "<p>Line 1</p>\n<p>Line 2</p>\n<p>Line 3</p>",
|
||||
expected: "Line 1 Line 2 Line 3",
|
||||
},
|
||||
{
|
||||
name: "only whitespace",
|
||||
input: " \t\n ",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "only HTML tags",
|
||||
input: "<div><p></p></div>",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "HTML with newlines",
|
||||
input: "<p>\n Paragraph with\n line breaks\n</p>",
|
||||
expected: "Paragraph with line breaks",
|
||||
},
|
||||
{
|
||||
name: "complex nested structure",
|
||||
input: "<article><header><h1>Title</h1></header><section><p>First paragraph with <a href=\"#\">link</a>.</p><ul><li>Item 1</li><li>Item 2</li></ul></section></article>",
|
||||
expected: "TitleFirst paragraph with link.Item 1Item 2",
|
||||
},
|
||||
{
|
||||
name: "entities in attributes (should still be processed)",
|
||||
input: "<p title=\"AT&T\">Content</p>",
|
||||
expected: "Content",
|
||||
},
|
||||
{
|
||||
name: "special HTML5 entities",
|
||||
input: "Left arrow ← Right arrow →",
|
||||
expected: "Left arrow ← Right arrow →", // These are not handled by the cleaner
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := cleaner.CleanHTML(tt.input)
|
||||
if result != tt.expected {
|
||||
t.Errorf("CleanHTML(%q) = %q, want %q", tt.input, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestHTMLCleaner_CleanHTML_LargeContent tests the CleanHTML method with large content.
|
||||
func TestHTMLCleaner_CleanHTML_LargeContent(t *testing.T) {
|
||||
cleaner := NewHTMLCleaner()
|
||||
|
||||
// Create a large HTML string
|
||||
var builder strings.Builder
|
||||
builder.WriteString("<html><body>")
|
||||
for i := 0; i < 1000; i++ {
|
||||
builder.WriteString("<p>Paragraph ")
|
||||
builder.WriteString(string(rune('0' + i%10)))
|
||||
builder.WriteString(" with some content & entities.</p>")
|
||||
}
|
||||
builder.WriteString("</body></html>")
|
||||
|
||||
input := builder.String()
|
||||
result := cleaner.CleanHTML(input)
|
||||
|
||||
// Check that HTML tags are removed
|
||||
if strings.Contains(result, "<") || strings.Contains(result, ">") {
|
||||
t.Error("Result should not contain HTML tags")
|
||||
}
|
||||
|
||||
// Check that content is preserved
|
||||
if !strings.Contains(result, "Paragraph") {
|
||||
t.Error("Result should contain paragraph content")
|
||||
}
|
||||
|
||||
// Check that entities are converted
|
||||
if strings.Contains(result, "&") {
|
||||
t.Error("Result should not contain unconverted HTML entities")
|
||||
}
|
||||
if !strings.Contains(result, "&") {
|
||||
t.Error("Result should contain converted ampersand")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHTMLCleaner_CleanHTML_EdgeCases tests edge cases for the CleanHTML method.
|
||||
func TestHTMLCleaner_CleanHTML_EdgeCases(t *testing.T) {
|
||||
cleaner := NewHTMLCleaner()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "only entities",
|
||||
input: "&<>"' ",
|
||||
expected: "&<>\"'",
|
||||
},
|
||||
{
|
||||
name: "repeated entities",
|
||||
input: "&&&",
|
||||
expected: "&&&",
|
||||
},
|
||||
{
|
||||
name: "entities without semicolon (should not be converted)",
|
||||
input: "& test < test",
|
||||
expected: "& test < test",
|
||||
},
|
||||
{
|
||||
name: "mixed valid and invalid entities",
|
||||
input: "& &invalid; < &fake;",
|
||||
expected: "& &invalid; < &fake;",
|
||||
},
|
||||
{
|
||||
name: "unclosed tag at end",
|
||||
input: "Content <p>with unclosed",
|
||||
expected: "Content with unclosed",
|
||||
},
|
||||
{
|
||||
name: "tag with no closing bracket",
|
||||
input: "Content <p class='test' with no closing bracket",
|
||||
expected: "Content <p class='test' with no closing bracket",
|
||||
},
|
||||
{
|
||||
name: "extremely nested tags",
|
||||
input: "<div><div><div><div><div>Deep content</div></div></div></div></div>",
|
||||
expected: "Deep content",
|
||||
},
|
||||
{
|
||||
name: "empty tags with whitespace",
|
||||
input: "<p> </p><div>\t\n</div>",
|
||||
expected: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := cleaner.CleanHTML(tt.input)
|
||||
if result != tt.expected {
|
||||
t.Errorf("CleanHTML(%q) = %q, want %q", tt.input, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestHTMLCleaner_CleanHTML_Unicode tests Unicode content handling.
|
||||
func TestHTMLCleaner_CleanHTML_Unicode(t *testing.T) {
|
||||
cleaner := NewHTMLCleaner()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "unicode characters",
|
||||
input: "<p>Hello 世界! Café naïve résumé</p>",
|
||||
expected: "Hello 世界! Café naïve résumé",
|
||||
},
|
||||
{
|
||||
name: "unicode with entities",
|
||||
input: "<p>Unicode: 你好 & emoji: 🌍</p>",
|
||||
expected: "Unicode: 你好 & emoji: 🌍",
|
||||
},
|
||||
{
|
||||
name: "mixed scripts",
|
||||
input: "<div>English العربية русский 日本語</div>",
|
||||
expected: "English العربية русский 日本語",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := cleaner.CleanHTML(tt.input)
|
||||
if result != tt.expected {
|
||||
t.Errorf("CleanHTML(%q) = %q, want %q", tt.input, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkHTMLCleaner_CleanHTML benchmarks the CleanHTML method.
|
||||
func BenchmarkHTMLCleaner_CleanHTML(b *testing.B) {
|
||||
cleaner := NewHTMLCleaner()
|
||||
input := "<div class=\"content\"><h1>Course Title</h1><p>This is a <em>great</em> course about & HTML entities like and "quotes".</p><ul><li>Item 1</li><li>Item 2</li></ul></div>"
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
cleaner.CleanHTML(input)
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkHTMLCleaner_CleanHTML_Large benchmarks the CleanHTML method with large content.
|
||||
func BenchmarkHTMLCleaner_CleanHTML_Large(b *testing.B) {
|
||||
cleaner := NewHTMLCleaner()
|
||||
|
||||
// Create a large HTML string
|
||||
var builder strings.Builder
|
||||
for i := 0; i < 100; i++ {
|
||||
builder.WriteString("<p>Paragraph ")
|
||||
builder.WriteString(string(rune('0' + i%10)))
|
||||
builder.WriteString(" with some content & entities <test>.</p>")
|
||||
}
|
||||
input := builder.String()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
cleaner.CleanHTML(input)
|
||||
}
|
||||
}
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"time"
|
||||
@ -112,6 +113,17 @@ func (p *ArticulateParser) LoadCourseFromFile(filePath string) (*models.Course,
|
||||
// - The share ID string if found
|
||||
// - An error if the share ID can't be extracted from the URI
|
||||
func (p *ArticulateParser) extractShareID(uri string) (string, error) {
|
||||
// Parse the URL to validate the domain
|
||||
parsedURL, err := url.Parse(uri)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid URI: %s", uri)
|
||||
}
|
||||
|
||||
// Validate that it's an Articulate Rise domain
|
||||
if parsedURL.Host != "rise.articulate.com" {
|
||||
return "", fmt.Errorf("invalid domain for Articulate Rise URI: %s", parsedURL.Host)
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`/share/([a-zA-Z0-9_-]+)`)
|
||||
matches := re.FindStringSubmatch(uri)
|
||||
if len(matches) < 2 {
|
||||
|
||||
440
internal/services/parser_test.go
Normal file
440
internal/services/parser_test.go
Normal file
@ -0,0 +1,440 @@
|
||||
// Package services_test provides tests for the parser service.
|
||||
package services
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kjanat/articulate-parser/internal/models"
|
||||
)
|
||||
|
||||
// TestNewArticulateParser tests the NewArticulateParser constructor.
|
||||
func TestNewArticulateParser(t *testing.T) {
|
||||
parser := NewArticulateParser()
|
||||
|
||||
if parser == nil {
|
||||
t.Fatal("NewArticulateParser() returned nil")
|
||||
}
|
||||
|
||||
// Type assertion to check internal structure
|
||||
articulateParser, ok := parser.(*ArticulateParser)
|
||||
if !ok {
|
||||
t.Fatal("NewArticulateParser() returned wrong type")
|
||||
}
|
||||
|
||||
expectedBaseURL := "https://rise.articulate.com"
|
||||
if articulateParser.BaseURL != expectedBaseURL {
|
||||
t.Errorf("Expected BaseURL '%s', got '%s'", expectedBaseURL, articulateParser.BaseURL)
|
||||
}
|
||||
|
||||
if articulateParser.Client == nil {
|
||||
t.Error("Client should not be nil")
|
||||
}
|
||||
|
||||
expectedTimeout := 30 * time.Second
|
||||
if articulateParser.Client.Timeout != expectedTimeout {
|
||||
t.Errorf("Expected timeout %v, got %v", expectedTimeout, articulateParser.Client.Timeout)
|
||||
}
|
||||
}
|
||||
|
||||
// TestArticulateParser_FetchCourse tests the FetchCourse method.
|
||||
func TestArticulateParser_FetchCourse(t *testing.T) {
|
||||
// Create a test course object
|
||||
testCourse := &models.Course{
|
||||
ShareID: "test-share-id",
|
||||
Author: "Test Author",
|
||||
Course: models.CourseInfo{
|
||||
ID: "test-course-id",
|
||||
Title: "Test Course",
|
||||
Description: "Test Description",
|
||||
},
|
||||
}
|
||||
|
||||
// Create test server
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Check request path
|
||||
expectedPath := "/api/rise-runtime/boot/share/test-share-id"
|
||||
if r.URL.Path != expectedPath {
|
||||
t.Errorf("Expected path '%s', got '%s'", expectedPath, r.URL.Path)
|
||||
}
|
||||
|
||||
// Check request method
|
||||
if r.Method != http.MethodGet {
|
||||
t.Errorf("Expected method GET, got %s", r.Method)
|
||||
}
|
||||
|
||||
// Return mock response
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(testCourse); err != nil {
|
||||
t.Fatalf("Failed to encode test course: %v", err)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
// Create parser with test server URL
|
||||
parser := &ArticulateParser{
|
||||
BaseURL: server.URL,
|
||||
Client: &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
uri string
|
||||
expectedError string
|
||||
}{
|
||||
{
|
||||
name: "valid articulate rise URI",
|
||||
uri: "https://rise.articulate.com/share/test-share-id#/",
|
||||
},
|
||||
{
|
||||
name: "valid articulate rise URI without fragment",
|
||||
uri: "https://rise.articulate.com/share/test-share-id",
|
||||
},
|
||||
{
|
||||
name: "invalid URI format",
|
||||
uri: "invalid-uri",
|
||||
expectedError: "invalid domain for Articulate Rise URI:",
|
||||
},
|
||||
{
|
||||
name: "empty URI",
|
||||
uri: "",
|
||||
expectedError: "invalid domain for Articulate Rise URI:",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
course, err := parser.FetchCourse(tt.uri)
|
||||
|
||||
if tt.expectedError != "" {
|
||||
if err == nil {
|
||||
t.Fatalf("Expected error containing '%s', got nil", tt.expectedError)
|
||||
}
|
||||
if !strings.Contains(err.Error(), tt.expectedError) {
|
||||
t.Errorf("Expected error containing '%s', got '%s'", tt.expectedError, err.Error())
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error, got: %v", err)
|
||||
}
|
||||
if course == nil {
|
||||
t.Fatal("Expected course, got nil")
|
||||
}
|
||||
if course.ShareID != testCourse.ShareID {
|
||||
t.Errorf("Expected ShareID '%s', got '%s'", testCourse.ShareID, course.ShareID)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestArticulateParser_FetchCourse_NetworkError tests network error handling.
|
||||
func TestArticulateParser_FetchCourse_NetworkError(t *testing.T) {
|
||||
// Create parser with invalid URL to simulate network error
|
||||
parser := &ArticulateParser{
|
||||
BaseURL: "http://localhost:99999", // Invalid port
|
||||
Client: &http.Client{
|
||||
Timeout: 1 * time.Millisecond, // Very short timeout
|
||||
},
|
||||
}
|
||||
|
||||
_, err := parser.FetchCourse("https://rise.articulate.com/share/test-share-id")
|
||||
if err == nil {
|
||||
t.Fatal("Expected network error, got nil")
|
||||
}
|
||||
|
||||
if !strings.Contains(err.Error(), "failed to fetch course data") {
|
||||
t.Errorf("Expected error to contain 'failed to fetch course data', got '%s'", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// TestArticulateParser_FetchCourse_InvalidJSON tests invalid JSON response handling.
|
||||
func TestArticulateParser_FetchCourse_InvalidJSON(t *testing.T) {
|
||||
// Create test server that returns invalid JSON
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write([]byte("invalid json"))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
parser := &ArticulateParser{
|
||||
BaseURL: server.URL,
|
||||
Client: &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
},
|
||||
}
|
||||
|
||||
_, err := parser.FetchCourse("https://rise.articulate.com/share/test-share-id")
|
||||
if err == nil {
|
||||
t.Fatal("Expected JSON parsing error, got nil")
|
||||
}
|
||||
|
||||
if !strings.Contains(err.Error(), "failed to unmarshal JSON") {
|
||||
t.Errorf("Expected error to contain 'failed to unmarshal JSON', got '%s'", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// TestArticulateParser_LoadCourseFromFile tests the LoadCourseFromFile method.
|
||||
func TestArticulateParser_LoadCourseFromFile(t *testing.T) {
|
||||
// Create a temporary test file
|
||||
testCourse := &models.Course{
|
||||
ShareID: "file-test-share-id",
|
||||
Author: "File Test Author",
|
||||
Course: models.CourseInfo{
|
||||
ID: "file-test-course-id",
|
||||
Title: "File Test Course",
|
||||
Description: "File Test Description",
|
||||
},
|
||||
}
|
||||
|
||||
// Create temporary directory and file
|
||||
tempDir := t.TempDir()
|
||||
tempFile := filepath.Join(tempDir, "test-course.json")
|
||||
|
||||
// Write test data to file
|
||||
data, err := json.Marshal(testCourse)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal test course: %v", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(tempFile, data, 0644); err != nil {
|
||||
t.Fatalf("Failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
parser := NewArticulateParser()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
filePath string
|
||||
expectedError string
|
||||
}{
|
||||
{
|
||||
name: "valid file",
|
||||
filePath: tempFile,
|
||||
},
|
||||
{
|
||||
name: "nonexistent file",
|
||||
filePath: filepath.Join(tempDir, "nonexistent.json"),
|
||||
expectedError: "failed to read file",
|
||||
},
|
||||
{
|
||||
name: "empty path",
|
||||
filePath: "",
|
||||
expectedError: "failed to read file",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
course, err := parser.LoadCourseFromFile(tt.filePath)
|
||||
|
||||
if tt.expectedError != "" {
|
||||
if err == nil {
|
||||
t.Fatalf("Expected error containing '%s', got nil", tt.expectedError)
|
||||
}
|
||||
if !strings.Contains(err.Error(), tt.expectedError) {
|
||||
t.Errorf("Expected error containing '%s', got '%s'", tt.expectedError, err.Error())
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error, got: %v", err)
|
||||
}
|
||||
if course == nil {
|
||||
t.Fatal("Expected course, got nil")
|
||||
}
|
||||
if course.ShareID != testCourse.ShareID {
|
||||
t.Errorf("Expected ShareID '%s', got '%s'", testCourse.ShareID, course.ShareID)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestArticulateParser_LoadCourseFromFile_InvalidJSON tests invalid JSON file handling.
|
||||
func TestArticulateParser_LoadCourseFromFile_InvalidJSON(t *testing.T) {
|
||||
// Create temporary file with invalid JSON
|
||||
tempDir := t.TempDir()
|
||||
tempFile := filepath.Join(tempDir, "invalid.json")
|
||||
|
||||
if err := os.WriteFile(tempFile, []byte("invalid json content"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write test file: %v", err)
|
||||
}
|
||||
|
||||
parser := NewArticulateParser()
|
||||
_, err := parser.LoadCourseFromFile(tempFile)
|
||||
|
||||
if err == nil {
|
||||
t.Fatal("Expected JSON parsing error, got nil")
|
||||
}
|
||||
|
||||
if !strings.Contains(err.Error(), "failed to unmarshal JSON") {
|
||||
t.Errorf("Expected error to contain 'failed to unmarshal JSON', got '%s'", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractShareID tests the extractShareID method.
|
||||
func TestExtractShareID(t *testing.T) {
|
||||
parser := &ArticulateParser{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
uri string
|
||||
expected string
|
||||
hasError bool
|
||||
}{
|
||||
{
|
||||
name: "standard articulate rise URI with fragment",
|
||||
uri: "https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO#/",
|
||||
expected: "N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO",
|
||||
},
|
||||
{
|
||||
name: "standard articulate rise URI without fragment",
|
||||
uri: "https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO",
|
||||
expected: "N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO",
|
||||
},
|
||||
{
|
||||
name: "URI with trailing slash",
|
||||
uri: "https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO/",
|
||||
expected: "N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO",
|
||||
},
|
||||
{
|
||||
name: "short share ID",
|
||||
uri: "https://rise.articulate.com/share/abc123",
|
||||
expected: "abc123",
|
||||
},
|
||||
{
|
||||
name: "share ID with hyphens and underscores",
|
||||
uri: "https://rise.articulate.com/share/test_ID-123_abc",
|
||||
expected: "test_ID-123_abc",
|
||||
},
|
||||
{
|
||||
name: "invalid URI - no share path",
|
||||
uri: "https://rise.articulate.com/",
|
||||
hasError: true,
|
||||
},
|
||||
{
|
||||
name: "invalid URI - wrong domain",
|
||||
uri: "https://example.com/share/test123",
|
||||
hasError: true,
|
||||
},
|
||||
{
|
||||
name: "invalid URI - no share ID",
|
||||
uri: "https://rise.articulate.com/share/",
|
||||
hasError: true,
|
||||
},
|
||||
{
|
||||
name: "empty URI",
|
||||
uri: "",
|
||||
hasError: true,
|
||||
},
|
||||
{
|
||||
name: "malformed URI",
|
||||
uri: "not-a-uri",
|
||||
hasError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := parser.extractShareID(tt.uri)
|
||||
|
||||
if tt.hasError {
|
||||
if err == nil {
|
||||
t.Fatalf("Expected error for URI '%s', got nil", tt.uri)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Fatalf("Expected no error for URI '%s', got: %v", tt.uri, err)
|
||||
}
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected share ID '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildAPIURL tests the buildAPIURL method.
|
||||
func TestBuildAPIURL(t *testing.T) {
|
||||
parser := &ArticulateParser{
|
||||
BaseURL: "https://rise.articulate.com",
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
shareID string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "standard share ID",
|
||||
shareID: "N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO",
|
||||
expected: "https://rise.articulate.com/api/rise-runtime/boot/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO",
|
||||
},
|
||||
{
|
||||
name: "short share ID",
|
||||
shareID: "abc123",
|
||||
expected: "https://rise.articulate.com/api/rise-runtime/boot/share/abc123",
|
||||
},
|
||||
{
|
||||
name: "share ID with special characters",
|
||||
shareID: "test_ID-123_abc",
|
||||
expected: "https://rise.articulate.com/api/rise-runtime/boot/share/test_ID-123_abc",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := parser.buildAPIURL(tt.shareID)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected URL '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildAPIURL_DifferentBaseURL tests buildAPIURL with different base URLs.
|
||||
func TestBuildAPIURL_DifferentBaseURL(t *testing.T) {
|
||||
parser := &ArticulateParser{
|
||||
BaseURL: "https://custom.domain.com",
|
||||
}
|
||||
|
||||
shareID := "test123"
|
||||
expected := "https://custom.domain.com/api/rise-runtime/boot/share/test123"
|
||||
result := parser.buildAPIURL(shareID)
|
||||
|
||||
if result != expected {
|
||||
t.Errorf("Expected URL '%s', got '%s'", expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkExtractShareID benchmarks the extractShareID method.
|
||||
func BenchmarkExtractShareID(b *testing.B) {
|
||||
parser := &ArticulateParser{}
|
||||
uri := "https://rise.articulate.com/share/N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO#/"
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_, _ = parser.extractShareID(uri)
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkBuildAPIURL benchmarks the buildAPIURL method.
|
||||
func BenchmarkBuildAPIURL(b *testing.B) {
|
||||
parser := &ArticulateParser{
|
||||
BaseURL: "https://rise.articulate.com",
|
||||
}
|
||||
shareID := "N_APNg40Vr2CSH2xNz-ZLATM5kNviDIO"
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = parser.buildAPIURL(shareID)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user