From 0e6c0c5cd5c2cddfadf784c237174cb1f7376ded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81d=C3=A1m=20Kov=C3=A1cs?= Date: Wed, 6 Sep 2023 00:10:29 +0200 Subject: [PATCH] Pdf preview WIP --- .../FileTime.App.Core.csproj | 1 + .../ItemPreview/ElementPreviewViewModel.cs | 94 ++++++++++++++++--- 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/src/AppCommon/FileTime.App.Core/FileTime.App.Core.csproj b/src/AppCommon/FileTime.App.Core/FileTime.App.Core.csproj index cb16d26..7053464 100644 --- a/src/AppCommon/FileTime.App.Core/FileTime.App.Core.csproj +++ b/src/AppCommon/FileTime.App.Core/FileTime.App.Core.csproj @@ -18,6 +18,7 @@ + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/src/AppCommon/FileTime.App.Core/ViewModels/ItemPreview/ElementPreviewViewModel.cs b/src/AppCommon/FileTime.App.Core/ViewModels/ItemPreview/ElementPreviewViewModel.cs index 585bde5..73251b3 100644 --- a/src/AppCommon/FileTime.App.Core/ViewModels/ItemPreview/ElementPreviewViewModel.cs +++ b/src/AppCommon/FileTime.App.Core/ViewModels/ItemPreview/ElementPreviewViewModel.cs @@ -1,15 +1,20 @@ using System.Text; using FileTime.App.Core.Models; +using FileTime.Core.ContentAccess; using FileTime.Core.Models; using InitableService; -using MvvmGen; +using PropertyChanged.SourceGenerator; +using UglyToad.PdfPig; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter; namespace FileTime.App.Core.ViewModels.ItemPreview; -[ViewModel] public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncInitable { + private readonly IContentAccessorFactory _contentAccessorFactory; public const string PreviewName = "ElementPreview"; + private record EncodingResult(char BinaryChar, string PartialResult); private const int MaxTextPreviewSize = 1024 * 1024; @@ -25,29 +30,90 @@ public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncI public ItemPreviewMode Mode { get; private set; } - [Property] private string? _textContent; - [Property] private byte[]? _binaryContent; - [Property] private string? _textEncoding; + [Notify] private string _textContent = string.Empty; + [Notify] private byte[] _binaryContent = Array.Empty(); + [Notify] private string _textEncoding = string.Empty; public string Name => PreviewName; + public ElementPreviewViewModel(IContentAccessorFactory contentAccessorFactory) + { + _contentAccessorFactory = contentAccessorFactory; + } + public async Task InitAsync(IElement element) { try { - var content = await element.Provider.GetContentAsync(element, MaxTextPreviewSize); - BinaryContent = content; - - if (content is null) + if (element.FullName?.Path.EndsWith(".pdf") ?? false) { - TextContent = "Could not read any data from file " + element.Name; + var readerFactory = _contentAccessorFactory.GetContentReaderFactory(element.Provider); + var reader = await readerFactory.CreateContentReaderAsync(element); + await using var inputStream = reader.AsStream(); + using var pdfDocument = PdfDocument.Open(inputStream); + + var contentBuilder = new StringBuilder(); + contentBuilder.AppendLine(element.Name + ", " + pdfDocument.NumberOfPages + " pages"); + foreach (var page in pdfDocument.GetPages()) + { + contentBuilder.AppendLine("=== Page " + page.Number + "==="); + + var words = page.GetWords(); + + var lines = words.GroupBy(x => (int)Math.Round((x.Letters[0].StartBaseLine.Y / 7.0) * 7)); + + foreach (var line in lines) + { + Word? previousWord = null; + foreach (var word in line.OrderBy(x => x.BoundingBox.Left)) + { + if (previousWord != null) + { + var gap = word.BoundingBox.Left - previousWord.BoundingBox.Right; + + var spaceSize = word.Letters[0].Width * 2; + if (gap > spaceSize) + { + contentBuilder.Append(' ', (int)(gap / spaceSize)); + } + + contentBuilder.Append(word).Append(" "); + } + else + { + contentBuilder.Append(word).Append(" "); + } + + previousWord = word; + } + + contentBuilder.AppendLine(); + } + contentBuilder.AppendLine(); + + if (contentBuilder.Length > MaxTextPreviewSize) + break; + } + + TextContent = contentBuilder.ToString(); + TextEncoding = "UTF-8"; } else { - (TextContent, var encoding) = GetNormalizedText(content); - TextEncoding = encoding is null - ? null - : $"{encoding.EncodingName} ({encoding.WebName})"; + var content = await element.Provider.GetContentAsync(element, MaxTextPreviewSize); + BinaryContent = content ?? Array.Empty(); + + if (content is null) + { + TextContent = "Could not read any data from file " + element.Name; + } + else + { + (TextContent, var encoding) = GetNormalizedText(content); + TextEncoding = encoding is null + ? string.Empty + : $"{encoding.EncodingName} ({encoding.WebName})"; + } } } catch (Exception ex)