Quote:
Originally Posted by Cactus Chef
...
I do wish they were CBZ rather than PDF. Oh well.
|
I had some PDFs that were just collections of images and I wanted to have them in a different format so I whipped up a small C# .NET application to extract the images. It assumes that the PDF just consists of a collection of embedded JPG images (and some small miscellaneous stuff it ignores) and saves them out to individual files. Then you could add them to a new CBZ file.
Code:
using iText.Kernel.Exceptions;
using iText.Kernel.Pdf;
PdfDocument pdfDoc = new(new PdfReader("../../../pdfs/test.pdf"));
PdfObject obj;
List<int> streamLengths = new();
int imgNum = 0;
for (int i = 1; i <= pdfDoc.GetNumberOfPdfObjects(); i++)
{
obj = pdfDoc.GetPdfObject(i);
if (obj != null && obj.IsStream())
{
byte[] b;
try
{
b = ((PdfStream)obj).GetBytes();
}
catch (PdfException)
{
b = ((PdfStream)obj).GetBytes(false);
}
if (b.Length > 2048 )
{
MemoryStream fos = new(b);
string pageNum = ("0000" + imgNum++)[^4..];
FileStream file = new($"../../../images/{pageNum}.jpg", FileMode.Create, System.IO.FileAccess.Write);
fos.WriteTo(file);
streamLengths.Add(b.Length);
fos.Close();
file.Close();
}
}
}
pdfDoc.Close();