Skip to content

Parse markdown files #56

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/CommandLine/CaptureSlim.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (c) Josef Pihrt. All rights reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

namespace Orang.CommandLine;

internal readonly record struct CaptureSlim(string Value, int Index, int Length);
1 change: 1 addition & 0 deletions src/CommandLine/CommandLine.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<ItemGroup>
<PackageReference Include="CommandLineParser" Version="2.8.0" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="4.7.0" />
<PackageReference Include="Markdig.Signed" Version="0.31.0" />
</ItemGroup>

<ItemGroup>
Expand Down
5 changes: 3 additions & 2 deletions src/CommandLine/Commands/CommonReplaceCommand`1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ private void ExecuteInput(SearchContext context, string input)
predicate: contentFilter.Predicate,
captures: groups);

IEnumerable<ICapture> captures = GetCaptures(groups, context.CancellationToken)
IEnumerable<ICapture> captures = GetCaptures(groups, null, context.CancellationToken)
?? groups.Select(f => (ICapture)new RegexCapture(f));

using (IEnumerator<ICapture> en = captures.GetEnumerator())
Expand Down Expand Up @@ -151,7 +151,7 @@ protected override void ExecuteMatchWithContentCore(
predicate: Options.ContentFilter!.Predicate,
captures: groups);

List<ICapture>? captures = GetCaptures(groups, context.CancellationToken);
List<ICapture>? captures = GetCaptures(groups, fileMatch, context.CancellationToken);

using (IEnumerator<ICapture> en = (captures ?? groups.Select(f => (ICapture)new RegexCapture(f))).GetEnumerator())
{
Expand Down Expand Up @@ -447,6 +447,7 @@ private static void WriteMatches(ContentWriter writer, IEnumerator<ICapture> en,

protected virtual List<ICapture>? GetCaptures(
List<Capture> groups,
FileMatch? fileMatch,
CancellationToken cancellationToken)
{
return null;
Expand Down
51 changes: 35 additions & 16 deletions src/CommandLine/Commands/SpellcheckCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Threading;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Text;
using Orang.FileSystem;
using Orang.Spelling;

namespace Orang.CommandLine;
Expand Down Expand Up @@ -37,33 +38,51 @@ protected override void ExecuteCore(SearchContext context)
return;
}

protected override List<ICapture>? GetCaptures(List<Capture> groups, CancellationToken cancellationToken)
protected override List<ICapture>? GetCaptures(
List<Capture> groups,
FileMatch? fileMatch,
CancellationToken cancellationToken)
{
var captures = new List<ICapture>();
List<TextSpan>? filteredSpans = null;

foreach (Capture capture in groups)
{
foreach (SpellingMatch spellingMatch in SpellcheckState.Spellchecker.AnalyzeText(capture.Value))
IEnumerable<CaptureSlim>? subcaptures = null;

if (fileMatch is not null
&& FileSystemUtilities.HasExtension(fileMatch.Path, "md"))
{
subcaptures = MarkdownProcessor.ProcessText(capture.Value);
}
else
{
var captureInfo = new SpellingCapture(
spellingMatch.Value,
capture.Index + spellingMatch.Index,
containingValue: spellingMatch.Parent,
containingValueIndex: spellingMatch.ParentIndex);
subcaptures = new[] { new CaptureSlim(capture.Value, capture.Index, capture.Length) };
}

if (filteredSpans is null)
filteredSpans = GetFilteredSpans(groups, cancellationToken);
foreach (CaptureSlim subcapture in subcaptures)
{
foreach (SpellingMatch spellingMatch in SpellcheckState.Spellchecker.AnalyzeText(subcapture.Value))
{
var captureInfo = new SpellingCapture(
spellingMatch.Value,
capture.Index + subcapture.Index + spellingMatch.Index,
containingValue: spellingMatch.Parent,
containingValueIndex: spellingMatch.ParentIndex);

var captureSpan = new TextSpan(captureInfo.Index, captureInfo.Length);
if (filteredSpans is null)
filteredSpans = GetFilteredSpans(groups, cancellationToken);

foreach (TextSpan filteredSpan in filteredSpans)
{
if (filteredSpan.IntersectsWith(captureSpan))
continue;
}
var captureSpan = new TextSpan(captureInfo.Index, captureInfo.Length);

foreach (TextSpan filteredSpan in filteredSpans)
{
if (filteredSpan.IntersectsWith(captureSpan))
continue;
}

captures.Add(captureInfo);
captures.Add(captureInfo);
}
}
}

Expand Down
104 changes: 104 additions & 0 deletions src/CommandLine/Markdown/MarkdownProcessor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (c) Josef Pihrt. All rights reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System.Collections.Generic;
using System.Diagnostics;
using Markdig;
using Markdig.Extensions.CustomContainers;
using Markdig.Extensions.Tables;
using Markdig.Helpers;
using Markdig.Syntax;
using Markdig.Syntax.Inlines;

namespace Orang.CommandLine;

internal static class MarkdownProcessor
{
private static readonly MarkdownPipeline _pipeline = new MarkdownPipelineBuilder().UseAdvancedExtensions().Build();

public static IEnumerable<CaptureSlim> ProcessText(string text)
{
MarkdownDocument document = Markdown.Parse(text, _pipeline);

foreach (MarkdownObject item in document.Descendants())
{
switch (item)
{
case CodeInline code:
{
yield return new CaptureSlim(code.Content, code.Span.Start + code.DelimiterCount, code.Span.Length);
break;
}
case LiteralInline literal:
{
string value = literal.Content.ToString();
SourceSpan span = literal.Span;
int offset = (literal.IsFirstCharacterEscaped) ? 1 : 0;

yield return new CaptureSlim(value, span.Start + offset, span.Length + offset);
break;
}
case LinkInline link:
{
string? label = link.Label;
string? title = link.Title;

if (!string.IsNullOrEmpty(label))
yield return new CaptureSlim(label, link.LabelSpan.Start, link.LabelSpan.Length);

if (!string.IsNullOrEmpty(title))
yield return new CaptureSlim(title, link.TitleSpan.Start, link.TitleSpan.Length);

break;
}
case LinkReferenceDefinition linkReferenceDef:
{
string? label = linkReferenceDef.Label;
string? title = linkReferenceDef.Title;

if (!string.IsNullOrEmpty(label))
yield return new CaptureSlim(label, linkReferenceDef.LabelSpan.Start, linkReferenceDef.LabelSpan.Length);

if (!string.IsNullOrEmpty(title))
yield return new CaptureSlim(title, linkReferenceDef.TitleSpan.Start, linkReferenceDef.TitleSpan.Length);

break;
}
case CodeBlock codeBlock:
{
foreach (StringLine line in codeBlock.Lines.Lines)
{
StringSlice slice = line.Slice;
yield return new CaptureSlim(slice.ToString(), slice.Start, slice.Length);
}

break;
}
case ContainerInline: // EmphasisInline, DelimiterInline, EmphasisDelimiterInline, LinkDelimiterInline
case AutolinkInline:
case HtmlEntityInline:
case LineBreakInline:
case HtmlInline:
case HeadingBlock:
case ListBlock:
case ListItemBlock:
case ParagraphBlock:
case ThematicBreakBlock:
case LinkReferenceDefinitionGroup:
case Table:
case TableRow:
case TableCell:
case QuoteBlock:
case CustomContainer:
case HtmlBlock:
{
break;
}
default:
{
Debug.Fail(item.GetType().FullName);
break;
}
}
}
}
}
9 changes: 9 additions & 0 deletions src/FileSystem/FileSystem/FileSystemUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,15 @@ public static int GetExtensionIndex(string path)
return path.Length;
}

public static bool HasExtension(string path, string extension)
{
int index = GetExtensionIndex(path);

return (index >= 0)
&& index < path.Length - 1
&& string.CompareOrdinal(path, index + 1, extension, 0, extension.Length) == 0;
}

public static bool IsDirectorySeparator(char ch)
{
return ch == Path.DirectorySeparatorChar
Expand Down
2 changes: 2 additions & 0 deletions src/spellcheck
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ Josef
Orang
Pihrt
singleline
subcapture
subcaptures
txt
Utc