Open XML操作Excel导入数据
项目中发现使用OleDb(using System.Data.OleDb)相关对象处理Excel导入功能,不是很稳定经常出问题,需要把这个问题解决掉。项目组提出使用OpenXML来处理Excel的导入、导出问题,出于兴趣对OpenXML了解一下,做了简单Demo。 3.简单功能介绍 6.示例Demo代码: using System; System.Collections.Generic; System.Linq; System.Text; System.Data; System.IO; DocumentFormat.OpenXml.Packaging; DocumentFormat.OpenXml.Spreadsheet; System.Xml; System.Diagnostics; DocumentFormat.OpenXml; System.Reflection; namespace OpenXMLTest { /// <summary> /// 思考问题: 1.对于Excel中所有不进行任何验证,直接转化为Table(有列头和无列头) 2.对于Excel中数据匹配某一指定表的列头及其数据(有列头) 3.对于Excel中数据不是处理在一张表中(有列头和无列头) 4.对于Excel中数据多表处理和单表处理 5.对于Excel中一个Sheet的数据来自多张数据库表 </summary> public class ExcelOper { <summary> 将DataTable转化为XML输出 </summary> <param name="dataTable">DataTable</param> <param name="fileName">文件名称</param> void DataTableToXML(DataTable dataTable,string fileName) { //指定程序安装目录 string filePath = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + fileName; using (FileStream fs = new FileStream(filePath,FileMode.OpenOrCreate,FileAccess.Write)) { using (XmlWriter xmlWriter = XmlWriter.Create(fs)) { dataTable.WriteXml(xmlWriter,XmlWriteMode.IgnoreSchema); } } Process.Start(filePath); } 将Excel多单一表转化为DataSet数据集对象 <param name="filePath">Excel文件路径<returns>转化的数据集</returns> public DataSet ExcelToDataSet( filePath) { DataSet dataSet = DataSet(); try { using (SpreadsheetDocument spreadDocument = SpreadsheetDocument.Open(filePath,255); line-height:1.5!important">false)) { 指定WorkbookPart对象 WorkbookPart workBookPart = spreadDocument.WorkbookPart; 获取Excel中SheetName集合 List<string> sheetNames = GetSheetNames(workBookPart); foreach (string sheetName in sheetNames) { DataTable dataTable = WorkSheetToTable(workBookPart,sheetName); if (dataTable != null) { dataSet.Tables.Add(dataTable);将表添加到数据集 } } } } catch (Exception exp) { throw new Exception("可能Excel正在打开中,请关闭重新操作!"); } return dataSet; } 将Excel单一表转化为DataTable对象 <param name="sheetName">SheetName<param name="stream">DataTable对象public DataTable ExcelToDataTable(string sheetName,0); line-height:1.5!important"> filePath) { DataTable dataTable = DataTable(); 根据Excel流转换为spreadDocument对象 false))Excel文档包 { Workbook workBook = spreadDocument.WorkbookPart.Workbook;主文档部件的根元素 Sheets sheeets = workBook.Sheets;块级结构(如工作表、文件版本等)的容器 WorkbookPart workBookPart =if (sheetNames.Contains(sheetName)) { 根据WorkSheet转化为Table dataTable = dataTable; } 根据WorkbookPart获取所有SheetName <param name="workBookPart"></param> SheetName集合private List<string> GetSheetNames(WorkbookPart workBookPart) { List<string> sheetNames = new List<(); Sheets sheets = workBookPart.Workbook.Sheets; foreach (Sheet sheet sheets) { string sheetName = sheet.Name; if (!.IsNullOrEmpty(sheetName)) { sheetNames.Add(sheetName); } } sheetNames; } 根据WorkbookPart和sheetName获取该Sheet下所有Row数据 <param name="workBookPart">WorkbookPart对象该SheetName下的所有Row数据public IEnumerable<Row> GetWorkBookPartRows(WorkbookPart workBookPart,0); line-height:1.5!important"> sheetName) { IEnumerable<Row> sheetRows = ; 根据表名在WorkbookPart中获取Sheet集合 IEnumerable<Sheet> sheets = workBookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName); if (sheets.Count() == 0) { return null;没有数据 } WorksheetPart workSheetPart = workBookPart.GetPartById(sheets.First().Id) as WorksheetPart; 获取Excel中得到的行 sheetRows = workSheetPart.Worksheet.Descendants<Row>(); sheetRows; } 根据WorkbookPart和表名创建DataTable对象 <param name="tableName">表名转化后的DataTableprivate DataTable WorkSheetToTable(WorkbookPart workBookPart,0); line-height:1.5!important"> sheetName) { 创建Table DataTable dataTable = DataTable(sheetName); 根据WorkbookPart和sheetName获取该Sheet下所有行数据 IEnumerable<Row> sheetRows = GetWorkBookPartRows(workBookPart,sheetName); if (sheetRows == null || sheetRows.Count() <= ; } 将数据导入DataTable,假定第一行为列名,第二行以后为数据 foreach (Row row sheetRows) { 获取Excel中的列头 if (row.RowIndex == 1) { List<DataColumn> listCols = GetDataColumn(row,workBookPart); dataTable.Columns.AddRange(listCols.ToArray()); } else { Excel第二行同时为DataTable的第一行数据 DataRow dataRow = GetDataRow(row,dataTable,workBookPart); if (dataRow != ) { dataTable.Rows.Add(dataRow); } } } 根据WorkbookPart获取NumberingFormats样式集合 NumberingFormats样式集合 GetNumberFormatsStyle(WorkbookPart workBookPart) { List<string> dicStyle = (); Stylesheet styleSheet = workBookPart.WorkbookStylesPart.Stylesheet; OpenXmlElementList list = styleSheet.NumberingFormats.ChildElements;获取NumberingFormats样式集合 var element in list)格式化节点 { (element.HasAttributes) { using (OpenXmlReader reader = OpenXmlReader.Create(element)) { (reader.Read()) { if (reader.Attributes.Count > ) { string numFmtId = reader.Attributes[0].Value;格式化ID string formatCode = reader.Attributes[1].Value;格式化Code dicStyle.Add(formatCode);将格式化Code写入List集合 } } } } } dicStyle; } 根据行对象和WorkbookPart对象获取DataColumn集合 <param name="row">Excel中行记录返回DataColumn对象集合private List<DataColumn> GetDataColumn(Row row,WorkbookPart workBookPart) { List<DataColumn> listCols = new List<DataColumn>(); foreach (Cell cell row) { string cellValue = GetCellValue(cell,workBookPart); DataColumn col = DataColumn(cellValue); listCols.Add(col); } listCols; } 根据Excel行数据库表WorkbookPart对象获取数据DataRow Excel中行对象<param name="dateTable">数据表返回一条数据记录private DataRow GetDataRow(Row row,DataTable dateTable,WorkbookPart workBookPart) { 读取Excel中数据,一一读取单元格,若整行为空则忽视该行 DataRow dataRow = dateTable.NewRow(); IEnumerable<Cell> cells = row.Elements<Cell>int cellIndex = 0;单元格索引 int nullCellCount = cellIndex;空行索引 string cellVlue =if (.IsNullOrEmpty(cellVlue)) { nullCellCount++; } dataRow[cellIndex] = cellVlue; cellIndex++; } if (nullCellCount == cellIndex)剔除空行 { dataRow = 一行中单元格索引和空行索引一样 dataRow; } 根据Excel单元格和WorkbookPart对象获取单元格的值 <param name="cell">Excel单元格对象Excel WorkbookPart对象单元格的值 GetCellValue(Cell cell,255); line-height:1.5!important">string cellValue = .Empty; if (cell.ChildElements.Count == 0)Cell节点下没有子节点 cellValue; } string cellRefId = cell.CellReference.InnerText;获取引用相对位置 string cellInnerText = cell.CellValue.InnerText;获取Cell的InnerText cellValue = cellInnerText;指定默认值(其实用来处理Excel中的数字) 获取WorkbookPart中NumberingFormats样式集合 List<string> dicStyles = GetNumberFormatsStyle(workBookPart); 获取WorkbookPart中共享String数据 SharedStringTable sharedTable = workBookPart.SharedStringTablePart.SharedStringTable; { EnumValue<CellValues> cellType = cell.DataType;获取Cell数据类型 if (cellType != null)Excel对象数据 switch (cellType.Value) { case CellValues.SharedString:字符串 获取该Cell的所在的索引 int cellIndex = int.Parse(cellInnerText); cellValue = sharedTable.ChildElements[cellIndex].InnerText; break; case CellValues.Boolean:布尔 cellValue = (cellInnerText == "1") ? TRUE" : FALSE"; case CellValues.Date:日期 cellValue = Convert.ToDateTime(cellInnerText).ToString(); case CellValues.Number:数字 cellValue = Convert.ToDecimal(cellInnerText).ToString(); default: cellValue = cellInnerText; ; } } 格式化数据 if (dicStyles.Count > 0 && cell.StyleIndex != 对于数字,cell.StyleIndex==null { int styleIndex = Convert.ToInt32(cell.StyleIndex.Value); string cellStyle = dicStyles[styleIndex - 1];获取该索引的样式 if (cellStyle.Contains(yyyy") || cellStyle.Contains(h) || cellStyle.Contains(ddss)) { 如果为日期或时间进行格式处理,去掉“;@” cellStyle = cellStyle.Replace(;@",""); while (cellStyle.Contains([") && cellStyle.Contains(])) { int otherStart = cellStyle.IndexOf(''); int otherEnd = cellStyle.IndexOf(); cellStyle = cellStyle.Remove(otherStart,otherEnd - otherStart + ); } double doubleDateTime = double.Parse(cellInnerText); DateTime dateTime = DateTime.FromOADate(doubleDateTime);将Double日期数字转为日期格式 m")) { cellStyle = cellStyle.Replace(M); } AM/PM); } cellValue = dateTime.ToString(cellStyle);不知道为什么Excel 2007中格式日期为yyyy/m/d } 其他的货币、数值 { cellStyle = cellStyle.Substring(cellStyle.LastIndexOf(.') - 1).Replace(decimal decimalNum = decimal.Parse(cellInnerText); cellValue = .Parse(decimalNum.ToString(cellStyle)).ToString(); } } } } string expMessage = string.Format("Excel中{0}位置数据有误,请确认填写正确!",cellRefId); throw new Exception(expMessage); cellValue = N/A cellValue; } 获取Excel中多表的表名 <param name="filePath"></param> <returns></returns> string> GetExcelSheetNames( filePath) { string sheetName = .Empty; List<string>();所有Sheet表名 )) { WorkbookPart workBook = spreadDocument.WorkbookPart; Stream stream = workBook.GetStream(FileMode.Open); XmlDocument xmlDocument = XmlDocument(); xmlDocument.Load(stream); XmlNamespaceManager xmlNSManager = XmlNamespaceManager(xmlDocument.NameTable); xmlNSManager.AddNamespace(default,xmlDocument.DocumentElement.NamespaceURI); XmlNodeList nodeList = xmlDocument.SelectNodes(//default:sheets/default:sheetforeach (XmlNode node nodeList) { sheetName = node.Attributes[name].Value; sheetNames.Add(sheetName); } } sheetNames; } #region SaveCell private void InsertTextCellValue(Worksheet worksheet,255); line-height:1.5!important">string column,255); line-height:1.5!important">uint row,0); line-height:1.5!important"> value) { Cell cell = ReturnCell(worksheet,column,row); CellValue v = CellValue(); v.Text = value; cell.AppendChild(v); cell.DataType = new EnumValue<CellValues>(CellValues.String); worksheet.Save(); } void InsertNumberCellValue(Worksheet worksheet,0); line-height:1.5!important">(CellValues.Number); worksheet.Save(); } static Cell ReturnCell(Worksheet worksheet,255); line-height:1.5!important">string columnName,255); line-height:1.5!important">uint row) { Row targetRow = ReturnRow(worksheet,row); if (targetRow == ) ; return targetRow.Elements<Cell>().Where(c => string.Compare(c.CellReference.Value,columnName + row,true) == ).First(); } static Row ReturnRow(Worksheet worksheet,0); line-height:1.5!important"> row) { return worksheet.GetFirstChild<SheetData>(). Elements<Row>().Where(r => r.RowIndex == row).First(); } #endregion } } 2).ExcelOperMatch.cs代码 |