当前位置:网站首页>C#/VB.NET:从 PDF 文档中提取所有表格
C#/VB.NET:从 PDF 文档中提取所有表格
2022-08-01 18:00:00 【InfoQ】
安装 Spire.PDF for .NET
从PDF文档中提取表格
using Spire.Pdf;
using Spire.Pdf.Utilities;
using System.IO;
using System.Text;
namespace ExtractTable
{
class Program
{
static void Main(string[] args)
{
//实例化PdfDocument类的对象
PdfDocument pdf = new PdfDocument();
//加载PDF文档
pdf.LoadFromFile("sample.pdf");
//创建StringBuilder类的对象
StringBuilder builder = new StringBuilder();
//实例化PdfTableExtractor类的对象
PdfTableExtractor extractor = new PdfTableExtractor(pdf);
//声明一个PdfTable类的表格数组
PdfTable[] tableLists;
//遍历PDF页面
for (int pageIndex = 0; pageIndex < pdf.Pages.Count; pageIndex++)
{
//从页面提取表格
tableLists = extractor.ExtractTable(pageIndex);
//判断表格列表是否为空
if (tableLists != null && tableLists.Length > 0)
{
//遍历表格
foreach (PdfTable table in tableLists)
{
//获取表格中的行和列数
int row = table.GetRowCount();
int column = table.GetColumnCount();
//遍历表格行和列
for (int i = 0; i < row; i++)
{
for (int j = 0; j < column; j++)
{
//获取行和列中的文本
string text = table.GetText(i, j);
//写入文本到StringBuilder容器
builder.Append(text + " ");
}
builder.Append("\r\n");
}
}
}
}
//保存提取的表格内容为.txt文档
File.WriteAllText("ExtractedTable.txt", builder.ToString());
}
}
}
Imports Spire.Pdf
Imports Spire.Pdf.Utilities
Imports System.IO
Imports System.Text
Namespace ExtractTable
Class Program
Private Shared Sub Main(args As String())
'实例化PdfDocument类的对象
Dim pdf As New PdfDocument()
'加载PDF文档
pdf.LoadFromFile("sample.pdf")
'创建StringBuilder类的对象
Dim builder As New StringBuilder()
'实例化PdfTableExtractor类的对象
Dim extractor As New PdfTableExtractor(pdf)
'声明一个PdfTable类的表格数组
Dim tableLists As PdfTable()
'遍历PDF页面
For pageIndex As Integer = 0 To pdf.Pages.Count - 1
'从页面提取表格
tableLists = extractor.ExtractTable(pageIndex)
'判断表格列表是否为空
If tableLists IsNot Nothing AndAlso tableLists.Length > 0 Then
'遍历表格
For Each table As PdfTable In tableLists
'获取表格中的行和列数
Dim row As Integer = table.GetRowCount()
Dim column As Integer = table.GetColumnCount()
'遍历表格行和列
For i As Integer = 0 To row - 1
For j As Integer = 0 To column - 1
'获取行和列中的文本
Dim text As String = table.GetText(i, j)
'写入文本到StringBuilder容器
builder.Append(text & Convert.ToString(" "))
Next
builder.Append(vbCr & vbLf)
Next
Next
End If
Next
'保存提取的表格内容为.txt文档
File.WriteAllText("ExtractedTable.txt", builder.ToString())
End Sub
End Class
End Namespace


边栏推荐
猜你喜欢

【Day_12 0507】查找组成一个偶数最接近的两个素数

B011 - 51-based multifunctional fingerprint smart lock

Leetcode75. Color Classification

创造建材数字转型新视界,中建材如何多边赋能集团业务快速发展

2022年SQL大厂高频实战面试题(详细解析)

XAML WPF item groupBox control

How to use the Golang coroutine scheduler scheduler

Leetcode72. 编辑距离

研发团队数字化转型实践

Leetcode73. Matrix Zeroing
随机推荐
后台管理系统的权限思路
【Day_10 0428】密码强度等级
我在启牛开户安全吗?谁能告诉我开不靠谱?
公用函数----mfc
Basic image processing in opencv
Leetcode71. 简化路径
DBPack SQL Tracing 功能及数据加密功能详解
SRM供应商管理系统如何助力口腔护理企业实现采购战略的转型升级
SQL函数 TO_CHAR(一)
浅谈大数据背景下数据库安全保障体系
极化微波成像概述3
md5sum源码 可多平台编译
成为优秀架构师必备技能:怎样才能画出让所有人赞不绝口的系统架构图?秘诀是什么?快来打开这篇文章看看吧!...
B001 - Intelligent ecological fish tank based on STM32
How can become a good architect necessary skills: painting for all the people praise the system architecture diagram?What is the secret?Quick to open this article and have a look!.
MySQL关系型数据库事务的ACID特性与实现方法
粒子滤波 particle filter —从贝叶斯滤波到粒子滤波——Part-I(贝叶斯滤波)
el-form-item prop属性动态绑定不生效如何解决
Are online account opening commissions reliable? Is online account opening safe?
RecSys'22|CARCA:交叉注意力感知上下文和属性进行推荐