commit 9474ea3c12747cc372b04c4058a3dc7c49c12d52 Author: Kaj Kowalski Date: Sat May 24 18:13:05 2025 +0200 first commit diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..52f5664 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,18 @@ +# These owners will be the default owners for everything in +# the repo. Unless a later match takes precedence, they will +# be requested for review when someone opens a pull request. +* @kjanat + +# Specific file/directory ownership examples: +# /parser/ @parsing-expert +# /exporters/ @export-specialist +# *.go @go-reviewer + +# Documentation files +/README.md @kjanat +/docs/ @kjanat + +# Configuration files +/.github/ @kjanat +/go.mod @kjanat +/go.sum @kjanat diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..65cf5bb --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,123 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Project maintainers are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the project maintainers responsible for enforcement. +All complaints will be reviewed and investigated promptly and fairly. + +All project maintainers are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Project maintainers will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from project maintainers, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +. + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +. diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..3b4e6ae --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,177 @@ +# Contributing to Articulate Rise Parser + +Thank you for your interest in contributing to the Articulate Rise Parser! We welcome contributions from the community. + +## Code of Conduct + +This project and everyone participating in it is governed by our Code of Conduct. By participating, you are expected to uphold this code. + +## How Can I Contribute? + +### Reporting Bugs + +Before creating bug reports, please check existing issues as you might find that the issue has already been reported. When creating a bug report, include as many details as possible: + +- Use the bug report template +- Include sample Articulate Rise content that reproduces the issue +- Provide your environment details (OS, Go version, etc.) +- Include error messages and stack traces + +### Suggesting Enhancements + +Enhancement suggestions are welcome! Please use the feature request template and include: + +- A clear description of the enhancement +- Your use case and why this would be valuable +- Any implementation ideas you might have + +### Pull Requests + +1. **Fork the repository** and create your branch from `master` +2. **Make your changes** following our coding standards +3. **Add tests** for any new functionality +4. **Ensure all tests pass** by running `go test ./...` +5. **Run `go fmt`** to format your code +6. **Run `go vet`** to check for common issues +7. **Update documentation** if needed +8. **Create a pull request** with a clear title and description + +## Development Setup + +1. **Prerequisites:** + +- Go 1.21 or later +- Git + +2. **Clone and setup:** + + ```bash + git clone https://github.com/your-username/articulate-parser.git + cd articulate-parser + go mod download + ``` + +3. **Run tests:** + + ```bash + go test -v ./... + ``` + +4. **Build:** + + ```bash + go build main.go + ``` + +## Coding Standards + +### Go Style Guide + +- Follow the [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments) +- Use `gofmt` to format your code +- Use meaningful variable and function names +- Add comments for exported functions and types +- Keep functions focused and small + +### Testing + +- Write tests for new functionality +- Use table-driven tests where appropriate +- Aim for good test coverage +- Test error cases and edge conditions + +### Commit Messages + +Use clear and meaningful commit messages: + +```txt +Add support for new content type: interactive timeline + +- Parse timeline content blocks +- Export timeline data to markdown +- Add tests for timeline parsing +- Update documentation + +Fixes #123 +``` + +## Project Structure + +```txt +articulate-parser/ +├── main.go # Entry point and CLI handling +├── parser/ # Core parsing logic +├── exporters/ # Output format handlers +├── types/ # Data structures +├── utils/ # Utility functions +├── tests/ # Test files and data +└── docs/ # Documentation +``` + +## Adding New Features + +### New Content Types + +1. Add the content type definition to `types/` +2. Implement parsing logic in `parser/` +3. Add export handling in `exporters/` +4. Write comprehensive tests +5. Update documentation + +### New Export Formats + +1. Create a new exporter in `exporters/` +2. Implement the `Exporter` interface +3. Add CLI support in `main.go` +4. Add tests with sample output +5. Update README with usage examples + +## Testing + +### Running Tests + +```bash +# Run all tests +go test ./... + +# Run tests with coverage +go test -cover ./... + +# Run tests with race detection +go test -race ./... + +# Run specific test +go test -run TestSpecificFunction ./... +``` + +### Test Data + +- Add sample Articulate Rise JSON files to `tests/data/` +- Include both simple and complex content examples +- Test edge cases and error conditions + +## Documentation + +- Update the README for user-facing changes +- Add inline code comments for complex logic +- Update examples when adding new features +- Keep the feature list current + +## Release Process + +Releases are handled by maintainers: + +1. Version bumping follows semantic versioning +2. Releases are created from the `master` branch +3. GitHub Actions automatically builds and publishes releases +4. Release notes are auto-generated from commits + +## Questions? + +- Open a discussion for general questions +- Use the question issue template for specific help +- Check existing issues and documentation first + +## Recognition + +Contributors will be recognized in release notes and the project README. Thank you for helping make this project better! diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..e69de29 diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..40f9a80 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: 📖 Documentation + url: https://github.com/kjanat/articulate-parser/blob/master/README.md + about: Check the README for usage instructions and examples + - name: 💬 Discussions + url: https://github.com/kjanat/articulate-parser/discussions + about: Ask questions and discuss ideas with the community diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..61b6c8b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,82 @@ +name: Feature Request +description: Suggest an idea for this project +title: "[FEATURE] " +labels: ["enhancement", "triage"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to suggest a new feature! + + - type: textarea + id: problem + attributes: + label: Is your feature request related to a problem? + description: A clear and concise description of what the problem is. + placeholder: I'm always frustrated when... + validations: + required: false + + - type: textarea + id: solution + attributes: + label: Describe the solution you'd like + description: A clear and concise description of what you want to happen. + placeholder: I would like... + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Describe alternatives you've considered + description: A clear and concise description of any alternative solutions or features you've considered. + placeholder: Alternative approaches... + validations: + required: false + + - type: dropdown + id: category + attributes: + label: Feature Category + description: What category does this feature fall into? + options: + - Export Formats (PDF, HTML, etc.) + - Content Type Support + - Performance Improvements + - CLI/UX Improvements + - Media Handling + - Batch Processing + - Documentation + - Testing + - Other + validations: + required: true + + - type: dropdown + id: priority + attributes: + label: Priority + description: How important is this feature to you? + options: + - Low - Nice to have + - Medium - Would be helpful + - High - Really need this + - Critical - Blocking my use case + validations: + required: true + + - type: textarea + id: use-case + attributes: + label: Use Case + description: Describe your specific use case and how this feature would help you. + placeholder: In my workflow, I need to... + validations: + required: true + + - type: textarea + id: additional-context + attributes: + label: Additional Context + description: Add any other context, screenshots, or examples about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml new file mode 100644 index 0000000..145ec52 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -0,0 +1,57 @@ +name: Question +description: Ask a question about the project +title: "[QUESTION] " +labels: ["question", "triage"] +body: + - type: markdown + attributes: + value: | + Thanks for your question! Please check the README and existing issues first. + + - type: textarea + id: question + attributes: + label: Question + description: What would you like to know? + placeholder: How do I...? + validations: + required: true + + - type: dropdown + id: category + attributes: + label: Question Category + description: What is your question about? + options: + - Installation/Setup + - Usage/How-to + - Supported Content Types + - Output Formats + - Troubleshooting + - Contributing + - Other + validations: + required: true + + - type: textarea + id: context + attributes: + label: Additional Context + description: | + Provide any additional context that might help us answer your question: + - What you're trying to achieve + - What you've already tried + - Any error messages + - Your environment details + placeholder: I'm trying to... + + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: Please confirm you have done the following + options: + - label: I have read the README + required: true + - label: I have searched existing issues + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..b6da683 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,51 @@ +## Description + + + +## Related Issue + + +Fixes # + +## Type of Change + + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Documentation update +- [ ] Performance improvement +- [ ] Code refactoring (no functional changes) +- [ ] Test updates + +## Checklist + + +- [ ] My code follows the style guidelines of this project +- [ ] I have performed a self-review of my code +- [ ] I have added comments to complex logic +- [ ] I have updated the documentation +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] I have checked for potential breaking changes +- [ ] No new warnings are generated +- [ ] The commit message follows our guidelines + +## Screenshots (if appropriate) + + + +## Additional Context + + + +## Testing Instructions + + +1. +2. +3. + +## Implementation Details (optional) + + diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 0000000..5b7d306 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,44 @@ +# Security Policy + +## Supported Versions + +Currently, the following versions of Articulate Rise Parser are supported with security updates: + +| Version | Supported | +| ------- | ------------------ | +| 1.0.x | :white_check_mark: | +| < 1.0 | :x: | + +## Reporting a Vulnerability + +We take the security of Articulate Rise Parser seriously. If you believe you have found a security vulnerability, please follow these steps: + +1. **Do not disclose the vulnerability publicly** - Please do not create a public GitHub issue for security vulnerabilities. +2. **Email the details to [security+articulate-parser@kjanat.com]** - Include as much information as possible about the vulnerability. +3. **Wait for a response** - We will acknowledge your email within 48 hours and provide an estimated timeline for a fix. +4. **Work with us** - We may ask for additional information to help us understand and address the issue. + +## What to Include in a Report + +When reporting a vulnerability, please include: + +- A clear description of the issue +- Steps to reproduce the vulnerability +- The potential impact of the vulnerability +- Any possible mitigations you've identified + +## What to Expect + +- We will acknowledge receipt of your vulnerability report within 48 hours. +- We will provide regular updates about our progress. +- We will notify you when the vulnerability is fixed. +- With your permission, we will include your name in the acknowledgments. + +## Security Measures + +This project follows these security practices: + +- Regular dependency updates via Dependabot +- CodeQL security scanning +- Automated testing for each pull request +- Code review requirements for all changes diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..55ee2ac --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,34 @@ +version: 2 +updates: + # Check for updates to GitHub Actions + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'weekly' + open-pull-requests-limit: 10 + labels: + - 'dependencies' + - 'github-actions' + commit-message: + prefix: 'ci' + include: 'scope' + + # Check for updates to Go modules + - package-ecosystem: 'gomod' + directory: '/' + schedule: + interval: 'weekly' + open-pull-requests-limit: 10 + labels: + - 'dependencies' + - 'go' + commit-message: + prefix: 'deps' + include: 'scope' + groups: + go-modules: + patterns: + - '*' + update-types: + - 'minor' + - 'patch' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f1b72e9 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,62 @@ +name: CI + +on: + push: + branches: [master, develop] + pull_request: + branches: [master, develop] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + go-version: [1.21.x, 1.22.x, 1.23.x] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + + - name: Cache Go modules + uses: actions/cache@v4 + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go- + + - name: Download dependencies + run: go mod download + + - name: Verify dependencies + run: go mod verify + + - name: Build + run: go build -v ./... + + - name: Run tests + run: go test -v -race -coverprofile=coverage.out ./... + + - name: Run go vet + run: go vet ./... + + - name: Run go fmt + run: | + if [ "$(gofmt -s -l . | wc -l)" -gt 0 ]; then + echo "The following files are not formatted:" + gofmt -s -l . + exit 1 + fi + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4 + with: + file: ./coverage.out + flags: unittests + name: codecov-umbrella + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..e3358d7 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,40 @@ +name: CodeQL + +on: + push: + branches: [master, develop] + pull_request: + branches: [master] + schedule: + - cron: '30 1 * * 0' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: ['go'] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: '/language:${{matrix.language}}' diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml new file mode 100644 index 0000000..889faf9 --- /dev/null +++ b/.github/workflows/dependency-review.yml @@ -0,0 +1,22 @@ +name: Dependency Review + +on: [pull_request] + +permissions: + contents: read + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - name: 'Checkout Repository' + uses: actions/checkout@v4 + + - name: 'Dependency Review' + uses: actions/dependency-review-action@v4 + with: + fail-on-severity: moderate + comment-summary-in-pr: always + + # # Use comma-separated names to pass list arguments: + # deny-licenses: LGPL-2.0, BSD-2-Clause diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..9fe5098 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,47 @@ +name: Release + +on: + push: + tags: + - 'v*' + +permissions: + contents: write + +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: 1.21.x + + - name: Run tests + run: go test -v ./... + + - name: Build binaries + run: | + # Build for different platforms + GOOS=windows GOARCH=amd64 go build -o articulate-parser-windows-amd64.exe main.go + GOOS=linux GOARCH=amd64 go build -o articulate-parser-linux-amd64 main.go + GOOS=darwin GOARCH=amd64 go build -o articulate-parser-darwin-amd64 main.go + GOOS=darwin GOARCH=arm64 go build -o articulate-parser-darwin-arm64 main.go + + - name: Create Release + uses: softprops/action-gh-release@v1 + with: + files: | + articulate-parser-windows-amd64.exe + articulate-parser-linux-amd64 + articulate-parser-darwin-amd64 + articulate-parser-darwin-arm64 + generate_release_notes: true + draft: false + prerelease: false + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5cd41c --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Created by https://www.toptal.com/developers/gitignore/api/go +# Edit at https://www.toptal.com/developers/gitignore?templates=go + +### Go ### +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work + +# End of https://www.toptal.com/developers/gitignore/api/go + +# Local test files +output/ +articulate-sample.json diff --git a/README.md b/README.md new file mode 100644 index 0000000..f10b786 --- /dev/null +++ b/README.md @@ -0,0 +1,163 @@ +# Articulate Rise Parser + +A Go-based parser that converts Articulate Rise e-learning content to various formats including Markdown and Word documents. + +## Features + +- Parse Articulate Rise JSON data from URLs or local files +- Export to Markdown (.md) format +- Export to Word Document (.docx) format +- Support for various content types: + - Text content with headings and paragraphs + - Lists and bullet points + - Multimedia content (videos and images) + - Knowledge checks and quizzes + - Interactive content (flashcards) + - Course structure and metadata + +## Installation + +1. Ensure you have Go 1.21 or later installed +2. Clone or download the parser code +3. Initialize the Go module: + +```bash +go mod init articulate-parser +go mod tidy +``` + +## Dependencies + +The parser uses the following external library: + +- `github.com/unidoc/unioffice` - For creating Word documents + +## Usage + +### Command Line Interface + +```bash +go run main.go [output_path] +``` + +#### Parameters + +- `input_uri_or_file`: Either an Articulate Rise share URL or path to a local JSON file +- `output_format`: `md` for Markdown or `docx` for Word Document +- `output_path`: Optional. If not provided, files are saved to `./output/` directory + +#### Examples + +1. **Parse from URL and export to Markdown:** + +```bash +go run main.go "https://rise.articulate.com/share/rcIndCUPTdBfKAShckA5XSz3YSHpi5al#/" md +``` + +2. **Parse from local file and export to Word:** + +```bash +go run main.go "articulate-sample.json" docx "my-course.docx" +``` + +3. **Parse from local file and export to Markdown:** + +```bash +go run main.go "C:\Users\kjana\Projects\articulate-parser\articulate-sample.json" md +``` + +### Building the Executable + +To build a standalone executable: + +```bash +go build -o articulate-parser main.go +``` + +Then run: + +```bash +./articulate-parser input.json md output.md +``` + +## Output Formats + +### Markdown (.md) + +- Hierarchical structure with proper heading levels +- Clean text content with HTML tags removed +- Lists and bullet points preserved +- Quiz questions with correct answers marked +- Media references included +- Course metadata at the top + +### Word Document (.docx) + +- Professional document formatting +- Bold headings and proper typography +- Bulleted lists +- Quiz questions with answers +- Media content references +- Maintains course structure + +## Supported Content Types + +The parser handles the following Articulate Rise content types: + +- **Text blocks**: Headings and paragraphs +- **Lists**: Bullet points and numbered lists +- **Multimedia**: Videos and images (references only) +- **Knowledge Checks**: Multiple choice, multiple response, fill-in-the-blank, matching +- **Interactive Content**: Flashcards and interactive scenarios +- **Dividers**: Section breaks +- **Sections**: Course organization + +## Data Structure + +The parser works with the standard Articulate Rise JSON format which includes: + +- Course metadata (title, description, settings) +- Lesson structure +- Content items with various types +- Media references +- Quiz/assessment data +- Styling and layout information + +## URL Pattern Recognition + +The parser automatically extracts share IDs from Articulate Rise URLs: + +- Input: `https://rise.articulate.com/share/rcIndCUPTdBfKAShckA5XSz3YSHpi5al#/` +- API URL: `https://rise.articulate.com/api/rise-runtime/boot/share/rcIndCUPTdBfKAShckA5XSz3YSHpi5al` + +## Error Handling + +The parser includes error handling for: + +- Invalid URLs or share IDs +- Network connection issues +- Malformed JSON data +- File I/O errors +- Unsupported content types + +## Limitations + +- Media files (videos, images) are referenced but not downloaded +- Complex interactive elements may be simplified in export +- Styling and visual formatting is not preserved +- Assessment logic and interactivity is lost in static exports + +## Future Enhancements + +Potential improvements could include: + +- PDF export support +- Media file downloading +- HTML export with preserved styling +- SCORM package support +- Batch processing capabilities +- Custom template support + +## License + +This is a utility tool for educational content conversion. Please ensure you have appropriate rights to the Articulate Rise content you're parsing. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..11f8d80 --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module articulate-parser + +go 1.21 + +require github.com/unidoc/unioffice v1.39.0 + +require github.com/richardlehane/msoleps v1.0.4 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..13e48e6 --- /dev/null +++ b/go.sum @@ -0,0 +1,6 @@ +github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM= +github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/richardlehane/msoleps v1.0.4 h1:WuESlvhX3gH2IHcd8UqyCuFY5yiq/GR/yqaSM/9/g00= +github.com/richardlehane/msoleps v1.0.4/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/unidoc/unioffice v1.39.0 h1:Wo5zvrzCqhyK/1Zi5dg8a5F5+NRftIMZPnFPYwruLto= +github.com/unidoc/unioffice v1.39.0/go.mod h1:Axz6ltIZZTUUyHoEnPe4Mb3VmsN4TRHT5iZCGZ1rgnU= diff --git a/main.go b/main.go new file mode 100644 index 0000000..e09ad38 --- /dev/null +++ b/main.go @@ -0,0 +1,611 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/unidoc/unioffice/document" +) + +// Core data structures based on the Articulate Rise JSON format +type Course struct { + ShareID string `json:"shareId"` + Author string `json:"author"` + Course CourseInfo `json:"course"` + LabelSet LabelSet `json:"labelSet"` +} + +type CourseInfo struct { + ID string `json:"id"` + Title string `json:"title"` + Description string `json:"description"` + Color string `json:"color"` + NavigationMode string `json:"navigationMode"` + Lessons []Lesson `json:"lessons"` + CoverImage *Media `json:"coverImage,omitempty"` + ExportSettings *ExportSettings `json:"exportSettings,omitempty"` +} + +type Lesson struct { + ID string `json:"id"` + Title string `json:"title"` + Description string `json:"description"` + Type string `json:"type"` + Icon string `json:"icon"` + Items []Item `json:"items"` + Position interface{} `json:"position"` + Ready bool `json:"ready"` + CreatedAt string `json:"createdAt"` + UpdatedAt string `json:"updatedAt"` +} + +type Item struct { + ID string `json:"id"` + Type string `json:"type"` + Family string `json:"family"` + Variant string `json:"variant"` + Items []SubItem `json:"items"` + Settings interface{} `json:"settings"` + Data interface{} `json:"data"` + Media *Media `json:"media,omitempty"` +} + +type SubItem struct { + ID string `json:"id"` + Type string `json:"type,omitempty"` + Title string `json:"title,omitempty"` + Heading string `json:"heading,omitempty"` + Paragraph string `json:"paragraph,omitempty"` + Caption string `json:"caption,omitempty"` + Media *Media `json:"media,omitempty"` + Answers []Answer `json:"answers,omitempty"` + Feedback string `json:"feedback,omitempty"` + Front *CardSide `json:"front,omitempty"` + Back *CardSide `json:"back,omitempty"` +} + +type Answer struct { + ID string `json:"id"` + Title string `json:"title"` + Correct bool `json:"correct"` + MatchTitle string `json:"matchTitle,omitempty"` +} + +type CardSide struct { + Media *Media `json:"media,omitempty"` + Description string `json:"description,omitempty"` +} + +type Media struct { + Image *ImageMedia `json:"image,omitempty"` + Video *VideoMedia `json:"video,omitempty"` +} + +type ImageMedia struct { + Key string `json:"key"` + Type string `json:"type"` + Width int `json:"width,omitempty"` + Height int `json:"height,omitempty"` + CrushedKey string `json:"crushedKey,omitempty"` + OriginalUrl string `json:"originalUrl"` + UseCrushedKey bool `json:"useCrushedKey,omitempty"` +} + +type VideoMedia struct { + Key string `json:"key"` + URL string `json:"url"` + Type string `json:"type"` + Poster string `json:"poster,omitempty"` + Duration int `json:"duration,omitempty"` + InputKey string `json:"inputKey,omitempty"` + Thumbnail string `json:"thumbnail,omitempty"` + OriginalUrl string `json:"originalUrl"` +} + +type ExportSettings struct { + Title string `json:"title"` + Format string `json:"format"` +} + +type LabelSet struct { + ID string `json:"id"` + Name string `json:"name"` + Labels map[string]string `json:"labels"` +} + +// Parser main struct +type ArticulateParser struct { + BaseURL string + Client *http.Client +} + +func NewArticulateParser() *ArticulateParser { + return &ArticulateParser{ + BaseURL: "https://rise.articulate.com", + Client: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +func (p *ArticulateParser) ExtractShareID(uri string) (string, error) { + // Extract share ID from URI like: https://rise.articulate.com/share/rcIndCUPTdBfKAShckA5XSz3YSHpi5al#/ + re := regexp.MustCompile(`/share/([a-zA-Z0-9_-]+)`) + matches := re.FindStringSubmatch(uri) + if len(matches) < 2 { + return "", fmt.Errorf("could not extract share ID from URI: %s", uri) + } + return matches[1], nil +} + +func (p *ArticulateParser) BuildAPIURL(shareID string) string { + return fmt.Sprintf("%s/api/rise-runtime/boot/share/%s", p.BaseURL, shareID) +} + +func (p *ArticulateParser) FetchCourse(uri string) (*Course, error) { + shareID, err := p.ExtractShareID(uri) + if err != nil { + return nil, err + } + + apiURL := p.BuildAPIURL(shareID) + + resp, err := p.Client.Get(apiURL) + if err != nil { + return nil, fmt.Errorf("failed to fetch course data: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("API returned status %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + var course Course + if err := json.Unmarshal(body, &course); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + + return &course, nil +} + +func (p *ArticulateParser) LoadCourseFromFile(filePath string) (*Course, error) { + data, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("failed to read file: %w", err) + } + + var course Course + if err := json.Unmarshal(data, &course); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + + return &course, nil +} + +// HTML cleaner utility +func cleanHTML(html string) string { + // Remove HTML tags but preserve content + re := regexp.MustCompile(`<[^>]*>`) + cleaned := re.ReplaceAllString(html, "") + + // Replace HTML entities + cleaned = strings.ReplaceAll(cleaned, " ", " ") + cleaned = strings.ReplaceAll(cleaned, "&", "&") + cleaned = strings.ReplaceAll(cleaned, "<", "<") + cleaned = strings.ReplaceAll(cleaned, ">", ">") + cleaned = strings.ReplaceAll(cleaned, """, "\"") + cleaned = strings.ReplaceAll(cleaned, "'", "'") + cleaned = strings.ReplaceAll(cleaned, "ï", "ï") + cleaned = strings.ReplaceAll(cleaned, "ë", "ë") + cleaned = strings.ReplaceAll(cleaned, "é", "é") + + // Clean up extra whitespace + cleaned = regexp.MustCompile(`\s+`).ReplaceAllString(cleaned, " ") + cleaned = strings.TrimSpace(cleaned) + + return cleaned +} + +// Markdown export functions +func (p *ArticulateParser) ExportToMarkdown(course *Course, outputPath string) error { + var buf bytes.Buffer + + // Write course header + buf.WriteString(fmt.Sprintf("# %s\n\n", course.Course.Title)) + + if course.Course.Description != "" { + buf.WriteString(fmt.Sprintf("%s\n\n", cleanHTML(course.Course.Description))) + } + + // Add metadata + buf.WriteString("## Course Information\n\n") + buf.WriteString(fmt.Sprintf("- **Course ID**: %s\n", course.Course.ID)) + buf.WriteString(fmt.Sprintf("- **Share ID**: %s\n", course.ShareID)) + buf.WriteString(fmt.Sprintf("- **Navigation Mode**: %s\n", course.Course.NavigationMode)) + if course.Course.ExportSettings != nil { + buf.WriteString(fmt.Sprintf("- **Export Format**: %s\n", course.Course.ExportSettings.Format)) + } + buf.WriteString("\n---\n\n") + + // Process lessons + for i, lesson := range course.Course.Lessons { + if lesson.Type == "section" { + buf.WriteString(fmt.Sprintf("# %s\n\n", lesson.Title)) + continue + } + + buf.WriteString(fmt.Sprintf("## Lesson %d: %s\n\n", i+1, lesson.Title)) + + if lesson.Description != "" { + buf.WriteString(fmt.Sprintf("%s\n\n", cleanHTML(lesson.Description))) + } + + // Process lesson items + for _, item := range lesson.Items { + p.processItemToMarkdown(&buf, item, 3) + } + + buf.WriteString("\n---\n\n") + } + + return os.WriteFile(outputPath, buf.Bytes(), 0644) +} + +func (p *ArticulateParser) processItemToMarkdown(buf *bytes.Buffer, item Item, level int) { + headingPrefix := strings.Repeat("#", level) + + switch item.Type { + case "text": + for _, subItem := range item.Items { + if subItem.Heading != "" { + heading := cleanHTML(subItem.Heading) + if heading != "" { + buf.WriteString(fmt.Sprintf("%s %s\n\n", headingPrefix, heading)) + } + } + if subItem.Paragraph != "" { + paragraph := cleanHTML(subItem.Paragraph) + if paragraph != "" { + buf.WriteString(fmt.Sprintf("%s\n\n", paragraph)) + } + } + } + + case "list": + for _, subItem := range item.Items { + if subItem.Paragraph != "" { + paragraph := cleanHTML(subItem.Paragraph) + if paragraph != "" { + buf.WriteString(fmt.Sprintf("- %s\n", paragraph)) + } + } + } + buf.WriteString("\n") + + case "multimedia": + buf.WriteString(fmt.Sprintf("%s Media Content\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Media != nil { + if subItem.Media.Video != nil { + buf.WriteString(fmt.Sprintf("**Video**: %s\n", subItem.Media.Video.OriginalUrl)) + if subItem.Media.Video.Duration > 0 { + buf.WriteString(fmt.Sprintf("- Duration: %d seconds\n", subItem.Media.Video.Duration)) + } + } + if subItem.Media.Image != nil { + buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl)) + } + } + if subItem.Caption != "" { + caption := cleanHTML(subItem.Caption) + buf.WriteString(fmt.Sprintf("*%s*\n", caption)) + } + } + buf.WriteString("\n") + + case "image": + buf.WriteString(fmt.Sprintf("%s Image\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Media != nil && subItem.Media.Image != nil { + buf.WriteString(fmt.Sprintf("**Image**: %s\n", subItem.Media.Image.OriginalUrl)) + } + if subItem.Caption != "" { + caption := cleanHTML(subItem.Caption) + buf.WriteString(fmt.Sprintf("*%s*\n", caption)) + } + } + buf.WriteString("\n") + + case "knowledgeCheck": + buf.WriteString(fmt.Sprintf("%s Knowledge Check\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Title != "" { + title := cleanHTML(subItem.Title) + buf.WriteString(fmt.Sprintf("**Question**: %s\n\n", title)) + } + + buf.WriteString("**Answers**:\n") + for i, answer := range subItem.Answers { + answerText := cleanHTML(answer.Title) + correctMark := "" + if answer.Correct { + correctMark = " ✓" + } + buf.WriteString(fmt.Sprintf("%d. %s%s\n", i+1, answerText, correctMark)) + } + + if subItem.Feedback != "" { + feedback := cleanHTML(subItem.Feedback) + buf.WriteString(fmt.Sprintf("\n**Feedback**: %s\n", feedback)) + } + } + buf.WriteString("\n") + + case "interactive": + buf.WriteString(fmt.Sprintf("%s Interactive Content\n\n", headingPrefix)) + for _, subItem := range item.Items { + if subItem.Front != nil && subItem.Front.Description != "" { + desc := cleanHTML(subItem.Front.Description) + buf.WriteString(fmt.Sprintf("**Front**: %s\n", desc)) + } + if subItem.Back != nil && subItem.Back.Description != "" { + desc := cleanHTML(subItem.Back.Description) + buf.WriteString(fmt.Sprintf("**Back**: %s\n", desc)) + } + } + buf.WriteString("\n") + + case "divider": + buf.WriteString("---\n\n") + + default: + // Handle unknown types + if len(item.Items) > 0 { + buf.WriteString(fmt.Sprintf("%s %s Content\n\n", headingPrefix, strings.Title(item.Type))) + for _, subItem := range item.Items { + if subItem.Title != "" { + title := cleanHTML(subItem.Title) + buf.WriteString(fmt.Sprintf("- %s\n", title)) + } + } + buf.WriteString("\n") + } + } +} + +// DOCX export functions +func (p *ArticulateParser) ExportToDocx(course *Course, outputPath string) error { + doc := document.New() + + // Add title + title := doc.AddParagraph() + titleRun := title.AddRun() + titleRun.AddText(course.Course.Title) + titleRun.Properties().SetSize(20) + titleRun.Properties().SetBold(true) + + // Add description + if course.Course.Description != "" { + desc := doc.AddParagraph() + descRun := desc.AddRun() + descRun.AddText(cleanHTML(course.Course.Description)) + } + + // Add course metadata + metadata := doc.AddParagraph() + metadataRun := metadata.AddRun() + metadataRun.Properties().SetBold(true) + metadataRun.AddText("Course Information") + + courseInfo := doc.AddParagraph() + courseInfoRun := courseInfo.AddRun() + courseInfoText := fmt.Sprintf("Course ID: %s\nShare ID: %s\nNavigation Mode: %s", + course.Course.ID, course.ShareID, course.Course.NavigationMode) + courseInfoRun.AddText(courseInfoText) + + // Process lessons + for i, lesson := range course.Course.Lessons { + if lesson.Type == "section" { + section := doc.AddParagraph() + sectionRun := section.AddRun() + sectionRun.AddText(lesson.Title) + sectionRun.Properties().SetSize(18) + sectionRun.Properties().SetBold(true) + continue + } + + // Lesson title + lessonTitle := doc.AddParagraph() + lessonTitleRun := lessonTitle.AddRun() + lessonTitleRun.AddText(fmt.Sprintf("Lesson %d: %s", i+1, lesson.Title)) + lessonTitleRun.Properties().SetSize(16) + lessonTitleRun.Properties().SetBold(true) + + // Lesson description + if lesson.Description != "" { + lessonDesc := doc.AddParagraph() + lessonDescRun := lessonDesc.AddRun() + lessonDescRun.AddText(cleanHTML(lesson.Description)) + } + + // Process lesson items + for _, item := range lesson.Items { + p.processItemToDocx(doc, item) + } + } + + return doc.SaveToFile(outputPath) +} + +func (p *ArticulateParser) processItemToDocx(doc *document.Document, item Item) { + switch item.Type { + case "text": + for _, subItem := range item.Items { + if subItem.Heading != "" { + heading := cleanHTML(subItem.Heading) + if heading != "" { + para := doc.AddParagraph() + run := para.AddRun() + run.AddText(heading) + run.Properties().SetBold(true) + } + } + if subItem.Paragraph != "" { + paragraph := cleanHTML(subItem.Paragraph) + if paragraph != "" { + para := doc.AddParagraph() + run := para.AddRun() + run.AddText(paragraph) + } + } + } + + case "list": + for _, subItem := range item.Items { + if subItem.Paragraph != "" { + paragraph := cleanHTML(subItem.Paragraph) + if paragraph != "" { + para := doc.AddParagraph() + run := para.AddRun() + run.AddText("• " + paragraph) + } + } + } + + case "multimedia", "image": + para := doc.AddParagraph() + run := para.AddRun() + run.AddText("[Media Content]") + run.Properties().SetItalic(true) + + for _, subItem := range item.Items { + if subItem.Media != nil { + if subItem.Media.Video != nil { + mediaPara := doc.AddParagraph() + mediaRun := mediaPara.AddRun() + mediaRun.AddText(fmt.Sprintf("Video: %s", subItem.Media.Video.OriginalUrl)) + } + if subItem.Media.Image != nil { + mediaPara := doc.AddParagraph() + mediaRun := mediaPara.AddRun() + mediaRun.AddText(fmt.Sprintf("Image: %s", subItem.Media.Image.OriginalUrl)) + } + } + if subItem.Caption != "" { + caption := cleanHTML(subItem.Caption) + captionPara := doc.AddParagraph() + captionRun := captionPara.AddRun() + captionRun.AddText(caption) + captionRun.Properties().SetItalic(true) + } + } + + case "knowledgeCheck": + for _, subItem := range item.Items { + if subItem.Title != "" { + title := cleanHTML(subItem.Title) + questionPara := doc.AddParagraph() + questionRun := questionPara.AddRun() + questionRun.AddText("Question: " + title) + questionRun.Properties().SetBold(true) + } + + for i, answer := range subItem.Answers { + answerText := cleanHTML(answer.Title) + correctMark := "" + if answer.Correct { + correctMark = " [CORRECT]" + } + answerPara := doc.AddParagraph() + answerRun := answerPara.AddRun() + answerRun.AddText(fmt.Sprintf("%d. %s%s", i+1, answerText, correctMark)) + } + + if subItem.Feedback != "" { + feedback := cleanHTML(subItem.Feedback) + feedbackPara := doc.AddParagraph() + feedbackRun := feedbackPara.AddRun() + feedbackRun.AddText("Feedback: " + feedback) + feedbackRun.Properties().SetItalic(true) + } + } + } +} + +func main() { + if len(os.Args) < 3 { + fmt.Println("Usage: articulate-parser [output_path]") + fmt.Println(" input_uri_or_file: Articulate Rise URI or local JSON file path") + fmt.Println(" output_format: md (Markdown) or docx (Word Document)") + fmt.Println(" output_path: Optional output file path") + os.Exit(1) + } + + input := os.Args[1] + format := strings.ToLower(os.Args[2]) + + if format != "md" && format != "docx" { + log.Fatal("Output format must be 'md' or 'docx'") + } + + parser := NewArticulateParser() + var course *Course + var err error + + // Determine if input is a URI or file path + if strings.HasPrefix(input, "http") { + course, err = parser.FetchCourse(input) + } else { + course, err = parser.LoadCourseFromFile(input) + } + + if err != nil { + log.Fatalf("Failed to load course: %v", err) + } + + // Determine output path + var outputPath string + if len(os.Args) > 3 { + outputPath = os.Args[3] + } else { + baseDir := "output" + os.MkdirAll(baseDir, 0755) + + // Create safe filename from course title + safeTitle := regexp.MustCompile(`[^a-zA-Z0-9\-_]`).ReplaceAllString(course.Course.Title, "_") + if safeTitle == "" { + safeTitle = "articulate_course" + } + + outputPath = filepath.Join(baseDir, fmt.Sprintf("%s.%s", safeTitle, format)) + } + + // Export based on format + switch format { + case "md": + err = parser.ExportToMarkdown(course, outputPath) + case "docx": + err = parser.ExportToDocx(course, outputPath) + } + + if err != nil { + log.Fatalf("Failed to export course: %v", err) + } + + fmt.Printf("Course successfully exported to: %s\n", outputPath) + fmt.Printf("Course: %s (%d lessons)\n", course.Course.Title, len(course.Course.Lessons)) +}