Pdf preview WIP
This commit is contained in:
@@ -18,6 +18,7 @@
|
|||||||
<PackageReference Include="morelinq" Version="3.4.2" />
|
<PackageReference Include="morelinq" Version="3.4.2" />
|
||||||
<PackageReference Include="MvvmGen" Version="1.2.1" />
|
<PackageReference Include="MvvmGen" Version="1.2.1" />
|
||||||
<PackageReference Include="ObservableComputations" Version="2.3.0" />
|
<PackageReference Include="ObservableComputations" Version="2.3.0" />
|
||||||
|
<PackageReference Include="PdfPig" Version="0.1.8" />
|
||||||
<PackageReference Include="PropertyChanged.SourceGenerator" Version="1.0.8">
|
<PackageReference Include="PropertyChanged.SourceGenerator" Version="1.0.8">
|
||||||
<PrivateAssets>all</PrivateAssets>
|
<PrivateAssets>all</PrivateAssets>
|
||||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
|||||||
@@ -1,15 +1,20 @@
|
|||||||
using System.Text;
|
using System.Text;
|
||||||
using FileTime.App.Core.Models;
|
using FileTime.App.Core.Models;
|
||||||
|
using FileTime.Core.ContentAccess;
|
||||||
using FileTime.Core.Models;
|
using FileTime.Core.Models;
|
||||||
using InitableService;
|
using InitableService;
|
||||||
using MvvmGen;
|
using PropertyChanged.SourceGenerator;
|
||||||
|
using UglyToad.PdfPig;
|
||||||
|
using UglyToad.PdfPig.Content;
|
||||||
|
using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
|
||||||
|
|
||||||
namespace FileTime.App.Core.ViewModels.ItemPreview;
|
namespace FileTime.App.Core.ViewModels.ItemPreview;
|
||||||
|
|
||||||
[ViewModel]
|
|
||||||
public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncInitable<IElement>
|
public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncInitable<IElement>
|
||||||
{
|
{
|
||||||
|
private readonly IContentAccessorFactory _contentAccessorFactory;
|
||||||
public const string PreviewName = "ElementPreview";
|
public const string PreviewName = "ElementPreview";
|
||||||
|
|
||||||
private record EncodingResult(char BinaryChar, string PartialResult);
|
private record EncodingResult(char BinaryChar, string PartialResult);
|
||||||
|
|
||||||
private const int MaxTextPreviewSize = 1024 * 1024;
|
private const int MaxTextPreviewSize = 1024 * 1024;
|
||||||
@@ -25,29 +30,90 @@ public partial class ElementPreviewViewModel : IElementPreviewViewModel, IAsyncI
|
|||||||
|
|
||||||
public ItemPreviewMode Mode { get; private set; }
|
public ItemPreviewMode Mode { get; private set; }
|
||||||
|
|
||||||
[Property] private string? _textContent;
|
[Notify] private string _textContent = string.Empty;
|
||||||
[Property] private byte[]? _binaryContent;
|
[Notify] private byte[] _binaryContent = Array.Empty<byte>();
|
||||||
[Property] private string? _textEncoding;
|
[Notify] private string _textEncoding = string.Empty;
|
||||||
|
|
||||||
public string Name => PreviewName;
|
public string Name => PreviewName;
|
||||||
|
|
||||||
|
public ElementPreviewViewModel(IContentAccessorFactory contentAccessorFactory)
|
||||||
|
{
|
||||||
|
_contentAccessorFactory = contentAccessorFactory;
|
||||||
|
}
|
||||||
|
|
||||||
public async Task InitAsync(IElement element)
|
public async Task InitAsync(IElement element)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
var content = await element.Provider.GetContentAsync(element, MaxTextPreviewSize);
|
if (element.FullName?.Path.EndsWith(".pdf") ?? false)
|
||||||
BinaryContent = content;
|
|
||||||
|
|
||||||
if (content is null)
|
|
||||||
{
|
{
|
||||||
TextContent = "Could not read any data from file " + element.Name;
|
var readerFactory = _contentAccessorFactory.GetContentReaderFactory(element.Provider);
|
||||||
|
var reader = await readerFactory.CreateContentReaderAsync(element);
|
||||||
|
await using var inputStream = reader.AsStream();
|
||||||
|
using var pdfDocument = PdfDocument.Open(inputStream);
|
||||||
|
|
||||||
|
var contentBuilder = new StringBuilder();
|
||||||
|
contentBuilder.AppendLine(element.Name + ", " + pdfDocument.NumberOfPages + " pages");
|
||||||
|
foreach (var page in pdfDocument.GetPages())
|
||||||
|
{
|
||||||
|
contentBuilder.AppendLine("=== Page " + page.Number + "===");
|
||||||
|
|
||||||
|
var words = page.GetWords();
|
||||||
|
|
||||||
|
var lines = words.GroupBy(x => (int)Math.Round((x.Letters[0].StartBaseLine.Y / 7.0) * 7));
|
||||||
|
|
||||||
|
foreach (var line in lines)
|
||||||
|
{
|
||||||
|
Word? previousWord = null;
|
||||||
|
foreach (var word in line.OrderBy(x => x.BoundingBox.Left))
|
||||||
|
{
|
||||||
|
if (previousWord != null)
|
||||||
|
{
|
||||||
|
var gap = word.BoundingBox.Left - previousWord.BoundingBox.Right;
|
||||||
|
|
||||||
|
var spaceSize = word.Letters[0].Width * 2;
|
||||||
|
if (gap > spaceSize)
|
||||||
|
{
|
||||||
|
contentBuilder.Append(' ', (int)(gap / spaceSize));
|
||||||
|
}
|
||||||
|
|
||||||
|
contentBuilder.Append(word).Append(" ");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
contentBuilder.Append(word).Append(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
previousWord = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
contentBuilder.AppendLine();
|
||||||
|
}
|
||||||
|
contentBuilder.AppendLine();
|
||||||
|
|
||||||
|
if (contentBuilder.Length > MaxTextPreviewSize)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
TextContent = contentBuilder.ToString();
|
||||||
|
TextEncoding = "UTF-8";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
(TextContent, var encoding) = GetNormalizedText(content);
|
var content = await element.Provider.GetContentAsync(element, MaxTextPreviewSize);
|
||||||
TextEncoding = encoding is null
|
BinaryContent = content ?? Array.Empty<byte>();
|
||||||
? null
|
|
||||||
: $"{encoding.EncodingName} ({encoding.WebName})";
|
if (content is null)
|
||||||
|
{
|
||||||
|
TextContent = "Could not read any data from file " + element.Name;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
(TextContent, var encoding) = GetNormalizedText(content);
|
||||||
|
TextEncoding = encoding is null
|
||||||
|
? string.Empty
|
||||||
|
: $"{encoding.EncodingName} ({encoding.WebName})";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
|
|||||||
Reference in New Issue
Block a user