Pdf preview WIP

This commit is contained in:
2023-09-06 00:10:29 +02:00
parent ba0b55c733
commit 0e6c0c5cd5
2 changed files with 81 additions and 14 deletions

View File

@@ -18,6 +18,7 @@
<PackageReference Include="morelinq" Version="3.4.2" /> <PackageReference Include="morelinq" Version="3.4.2" />
<PackageReference Include="MvvmGen" Version="1.2.1" /> <PackageReference Include="MvvmGen" Version="1.2.1" />
<PackageReference Include="ObservableComputations" Version="2.3.0" /> <PackageReference Include="ObservableComputations" Version="2.3.0" />
<PackageReference Include="PdfPig" Version="0.1.8" />
<PackageReference Include="PropertyChanged.SourceGenerator" Version="1.0.8"> <PackageReference Include="PropertyChanged.SourceGenerator" Version="1.0.8">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>

View File

@@ -1,15 +1,20 @@
using System.Text; using System.Text;
using FileTime.App.Core.Models; using FileTime.App.Core.Models;
using FileTime.Core.ContentAccess;
using FileTime.Core.Models; using FileTime.Core.Models;
using InitableService; using InitableService;
using MvvmGen; using PropertyChanged.SourceGenerator;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
namespace FileTime.App.Core.ViewModels.ItemPreview; namespace FileTime.App.Core.ViewModels.ItemPreview;
[ViewModel]
public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncInitable<IElement> public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncInitable<IElement>
{ {
private readonly IContentAccessorFactory _contentAccessorFactory;
public const string PreviewName = "ElementPreview"; public const string PreviewName = "ElementPreview";
private record EncodingResult(char BinaryChar, string PartialResult); private record EncodingResult(char BinaryChar, string PartialResult);
private const int MaxTextPreviewSize = 1024 * 1024; private const int MaxTextPreviewSize = 1024 * 1024;
@@ -25,18 +30,78 @@ public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncI
public ItemPreviewMode Mode { get; private set; } public ItemPreviewMode Mode { get; private set; }
[Property] private string? _textContent; [Notify] private string _textContent = string.Empty;
[Property] private byte[]? _binaryContent; [Notify] private byte[] _binaryContent = Array.Empty<byte>();
[Property] private string? _textEncoding; [Notify] private string _textEncoding = string.Empty;
public string Name => PreviewName; public string Name => PreviewName;
public ElementPreviewViewModel(IContentAccessorFactory contentAccessorFactory)
{
_contentAccessorFactory = contentAccessorFactory;
}
public async Task InitAsync(IElement element) public async Task InitAsync(IElement element)
{ {
try try
{
if (element.FullName?.Path.EndsWith(".pdf") ?? false)
{
var readerFactory = _contentAccessorFactory.GetContentReaderFactory(element.Provider);
var reader = await readerFactory.CreateContentReaderAsync(element);
await using var inputStream = reader.AsStream();
using var pdfDocument = PdfDocument.Open(inputStream);
var contentBuilder = new StringBuilder();
contentBuilder.AppendLine(element.Name + ", " + pdfDocument.NumberOfPages + " pages");
foreach (var page in pdfDocument.GetPages())
{
contentBuilder.AppendLine("=== Page " + page.Number + "===");
var words = page.GetWords();
var lines = words.GroupBy(x => (int)Math.Round((x.Letters[0].StartBaseLine.Y / 7.0) * 7));
foreach (var line in lines)
{
Word? previousWord = null;
foreach (var word in line.OrderBy(x => x.BoundingBox.Left))
{
if (previousWord != null)
{
var gap = word.BoundingBox.Left - previousWord.BoundingBox.Right;
var spaceSize = word.Letters[0].Width * 2;
if (gap > spaceSize)
{
contentBuilder.Append(' ', (int)(gap / spaceSize));
}
contentBuilder.Append(word).Append(" ");
}
else
{
contentBuilder.Append(word).Append(" ");
}
previousWord = word;
}
contentBuilder.AppendLine();
}
contentBuilder.AppendLine();
if (contentBuilder.Length > MaxTextPreviewSize)
break;
}
TextContent = contentBuilder.ToString();
TextEncoding = "UTF-8";
}
else
{ {
var content = await element.Provider.GetContentAsync(element, MaxTextPreviewSize); var content = await element.Provider.GetContentAsync(element, MaxTextPreviewSize);
BinaryContent = content; BinaryContent = content ?? Array.Empty<byte>();
if (content is null) if (content is null)
{ {
@@ -46,10 +111,11 @@ public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncI
{ {
(TextContent, var encoding) = GetNormalizedText(content); (TextContent, var encoding) = GetNormalizedText(content);
TextEncoding = encoding is null TextEncoding = encoding is null
? null ? string.Empty
: $"{encoding.EncodingName} ({encoding.WebName})"; : $"{encoding.EncodingName} ({encoding.WebName})";
} }
} }
}
catch (Exception ex) catch (Exception ex)
{ {
TextContent = $"Error while getting content of {element.FullName}. " + ex.ToString(); TextContent = $"Error while getting content of {element.FullName}. " + ex.ToString();