Pdf preview WIP

This commit is contained in:
2023-09-06 00:10:29 +02:00
parent ba0b55c733
commit 0e6c0c5cd5
2 changed files with 81 additions and 14 deletions

View File

@@ -18,6 +18,7 @@
<PackageReference Include="morelinq" Version="3.4.2" />
<PackageReference Include="MvvmGen" Version="1.2.1" />
<PackageReference Include="ObservableComputations" Version="2.3.0" />
<PackageReference Include="PdfPig" Version="0.1.8" />
<PackageReference Include="PropertyChanged.SourceGenerator" Version="1.0.8">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>

View File

@@ -1,15 +1,20 @@
using System.Text;
using FileTime.App.Core.Models;
using FileTime.Core.ContentAccess;
using FileTime.Core.Models;
using InitableService;
using MvvmGen;
using PropertyChanged.SourceGenerator;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Content;
using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
namespace FileTime.App.Core.ViewModels.ItemPreview;
[ViewModel]
public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncInitable<IElement>
{
private readonly IContentAccessorFactory _contentAccessorFactory;
public const string PreviewName = "ElementPreview";
private record EncodingResult(char BinaryChar, string PartialResult);
private const int MaxTextPreviewSize = 1024 * 1024;
@@ -25,18 +30,78 @@ public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncI
public ItemPreviewMode Mode { get; private set; }
[Property] private string? _textContent;
[Property] private byte[]? _binaryContent;
[Property] private string? _textEncoding;
[Notify] private string _textContent = string.Empty;
[Notify] private byte[] _binaryContent = Array.Empty<byte>();
[Notify] private string _textEncoding = string.Empty;
public string Name => PreviewName;
public ElementPreviewViewModel(IContentAccessorFactory contentAccessorFactory)
{
_contentAccessorFactory = contentAccessorFactory;
}
public async Task InitAsync(IElement element)
{
try
{
if (element.FullName?.Path.EndsWith(".pdf") ?? false)
{
var readerFactory = _contentAccessorFactory.GetContentReaderFactory(element.Provider);
var reader = await readerFactory.CreateContentReaderAsync(element);
await using var inputStream = reader.AsStream();
using var pdfDocument = PdfDocument.Open(inputStream);
var contentBuilder = new StringBuilder();
contentBuilder.AppendLine(element.Name + ", " + pdfDocument.NumberOfPages + " pages");
foreach (var page in pdfDocument.GetPages())
{
contentBuilder.AppendLine("=== Page " + page.Number + "===");
var words = page.GetWords();
var lines = words.GroupBy(x => (int)Math.Round((x.Letters[0].StartBaseLine.Y / 7.0) * 7));
foreach (var line in lines)
{
Word? previousWord = null;
foreach (var word in line.OrderBy(x => x.BoundingBox.Left))
{
if (previousWord != null)
{
var gap = word.BoundingBox.Left - previousWord.BoundingBox.Right;
var spaceSize = word.Letters[0].Width * 2;
if (gap > spaceSize)
{
contentBuilder.Append(' ', (int)(gap / spaceSize));
}
contentBuilder.Append(word).Append(" ");
}
else
{
contentBuilder.Append(word).Append(" ");
}
previousWord = word;
}
contentBuilder.AppendLine();
}
contentBuilder.AppendLine();
if (contentBuilder.Length > MaxTextPreviewSize)
break;
}
TextContent = contentBuilder.ToString();
TextEncoding = "UTF-8";
}
else
{
var content = await element.Provider.GetContentAsync(element, MaxTextPreviewSize);
BinaryContent = content;
BinaryContent = content ?? Array.Empty<byte>();
if (content is null)
{
@@ -46,10 +111,11 @@ public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncI
{
(TextContent, var encoding) = GetNormalizedText(content);
TextEncoding = encoding is null
? null
? string.Empty
: $"{encoding.EncodingName} ({encoding.WebName})";
}
}
}
catch (Exception ex)
{
TextContent = $"Error while getting content of {element.FullName}. " + ex.ToString();