使用NOPI读取Excel的例子很多,读取Word的例子不多。
Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。
Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。
Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)
也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。
1 using NPOI.POIFS.FileSystem;
2 using NPOI.SS.UserModel;
3 using NPOI.XSSF.UserModel;
4 using NPOI.XWPF.UserModel;
5 using System;
6 using System.Collections.Generic;
7 using System.Configuration;
8 using System.IO;
9 using System.Text;
10
11 namespace eyuan
12 {
13 public static class NOPIHandler
14 {
15 /// <summary>
16 ///
17 /// </summary>
18 /// <param name="fileName"></param>
19 /// <returns></returns>
20 public static List<List<List<string>>> ReadExcel(string fileName)
21 {
22 //打开Excel工作簿
23 XSSFWorkbook hssfworkbook = null;
24 try
25 {
26 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
27 {
28 hssfworkbook = new XSSFWorkbook(file);
29 }
30 }
31 catch (Exception e)
32 {
33 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));
34 }
35 //循环Sheet页
36 int sheetsCount = hssfworkbook.NumberOfSheets;
37 List<List<List<string>>> workBookContent = new List<List<List<string>>>();
38 for (int i = 0; i < sheetsCount; i++)
39 {
40 //Sheet索引从0开始
41 ISheet sheet = hssfworkbook.GetSheetAt(i);
42 //循环行
43 List<List<string>> sheetContent = new List<List<string>>();
44 int rowCount = sheet.PhysicalNumberOfRows;
45 for (int j = 0; j < rowCount; j++)
46 {
47 //Row(逻辑行)的索引从0开始
48 IRow row = sheet.GetRow(j);
49 //循环列(各行的列数可能不同)
50 List<string> rowContent = new List<string>();
51 int cellCount = row.PhysicalNumberOfCells;
52 for (int k = 0; k < cellCount; k++)
53 {
54 //ICell cell = row.GetCell(k);
55 ICell cell = row.Cells[k];
56 if (cell == null)
57 {
58 rowContent.Add("NIL");
59 }
60 else
61 {
62 rowContent.Add(cell.ToString());
63 //rowContent.Add(cell.StringCellValue);
64 }
65 }
66 //添加行到集合中
67 sheetContent.Add(rowContent);
68 }
69 //添加Sheet到集合中
70 workBookContent.Add(sheetContent);
71 }
72
73 return workBookContent;
74 }
75
76 /// <summary>
77 ///
78 /// </summary>
79 /// <param name="fileName"></param>
80 /// <returns></returns>
81 public static string ReadExcelText(string fileName)
82 {
83 string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"];
84 string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"];
85 string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"];
86 //
87 List<List<List<string>>> excelContent = ReadExcel(fileName);
88 string fileText = string.Empty;
89 StringBuilder sbFileText = new StringBuilder();
90 //循环处理WorkBook中的各Sheet页
91 List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator();
92 while (enumeratorWorkBook.MoveNext())
93 {
94
95 //循环处理当期Sheet页中的各行
96 List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator();
97 while (enumeratorSheet.MoveNext())
98 {
99
100 string[] rowContent = enumeratorSheet.Current.ToArray();
101 sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));
102 sbFileText.Append(ExcelRowSeparator);
103 }
104 sbFileText.Append(ExcelSheetSeparator);
105 }
106 //
107 fileText = sbFileText.ToString();
108 return fileText;
109 }
110
111 /// <summary>
112 /// 读取Word内容
113 /// </summary>
114 /// <param name="fileName"></param>
115 /// <returns></returns>
116 public static string ReadWordText(string fileName)
117 {
118 string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];
119 string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];
120 string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];
121 //
122 string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];
123 string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];
124 string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];
125 string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];
126 //
127 string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];
128 //
129 string fileText = string.Empty;
130 StringBuilder sbFileText = new StringBuilder();
131
132 #region 打开文档
133 XWPFDocument document = null;
134 try
135 {
136 using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))
137 {
138 document = new XWPFDocument(file);
139 }
140 }
141 catch (Exception e)
142 {
143 LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));
144 }
145 #endregion
146
147 #region 页眉、页脚
148 //页眉
149 if (CaptureWordHeader == "true")
150 {
151 sbFileText.AppendLine("Capture Header Begin");
152 foreach (XWPFHeader xwpfHeader in document.HeaderList)
153 {
154 sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text }));
155 }
156 sbFileText.AppendLine("Capture Header End");
157 }
158 //页脚
159 if (CaptureWordFooter == "true")
160 {
161 sbFileText.AppendLine("Capture Footer Begin");
162 foreach (XWPFFooter xwpfFooter in document.FooterList)
163 {
164 sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text }));
165 }
166 sbFileText.AppendLine("Capture Footer End");
167 }
168 #endregion
169
170 #region 表格
171 if (CaptureWordTable == "true")
172 {
173 sbFileText.AppendLine("Capture Table Begin");
174 foreach (XWPFTable table in document.Tables)
175 {
176 //循环表格行
177 foreach (XWPFTableRow row in table.Rows)
178 {
179 foreach (XWPFTableCell cell in row.GetTableCells())
180 {
181 sbFileText.Append(cell.GetText());
182 //
183 sbFileText.Append(WordTableCellSeparator);
184 }
185
186 sbFileText.Append(WordTableRowSeparator);
187 }
188 sbFileText.Append(WordTableSeparator);
189 }
190 sbFileText.AppendLine("Capture Table End");
191 }
192 #endregion
193
194 #region 图片
195 if (CaptureWordImage == "true")
196 {
197 sbFileText.AppendLine("Capture Image Begin");
198 foreach (XWPFPictureData pictureData in document.AllPictures)
199 {
200 string picExtName = pictureData.suggestFileExtension();
201 string picFileName = pictureData.GetFileName();
202 byte[] picFileContent = pictureData.GetData();
203 //
204 string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });
205 //
206 using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))
207 {
208 fs.Write(picFileContent, 0, picFileContent.Length);
209 fs.Close();
210 }
211 //
212 sbFileText.AppendLine(picTempName);
213 }
214 sbFileText.AppendLine("Capture Image End");
215 }
216 #endregion
217
218 //正文段落
219 sbFileText.AppendLine("Capture Paragraph Begin");
220 foreach (XWPFParagraph paragraph in document.Paragraphs)
221 {
222 sbFileText.AppendLine(paragraph.ParagraphText);
223
224 }
225 sbFileText.AppendLine("Capture Paragraph End");
226 //
227
228 //
229 fileText = sbFileText.ToString();
230 return fileText;
231 }
232
233
234 }
235 }
扫码关注腾讯云开发者
领取腾讯云代金券
Copyright © 2013 - 2025 Tencent Cloud. All Rights Reserved. 腾讯云 版权所有
深圳市腾讯计算机系统有限公司 ICP备案/许可证号:粤B2-20090059 深公网安备号 44030502008569
腾讯云计算(北京)有限责任公司 京ICP证150476号 | 京ICP备11018762号 | 京公网安备号11010802020287
Copyright © 2013 - 2025 Tencent Cloud.
All Rights Reserved. 腾讯云 版权所有