Excel Regular Expression Parsing
Sample piece of code to parse Excel files with regular expressions using Excel Query (The first piece does not loop through all the rows to make it more efficient and fetches the resultant set based on the first row. The second piece goes through all the columns and finds a match):
//This piece uses ExcelQuery -->
using System;
using
System.Collections.Generic;
using
System.Linq;
using
System.Web;
using
System.IO;
using
System.Text;
using
System.Web.UI;
using
System.Text.RegularExpressions;
using
System.Web.UI.WebControls;
using
Scanning;
using
System.Data;
using
Innovative.Data;
//Install ExcelQuery
namespace
WebApplication1
{
public partial class WebForm1 :
System.Web.UI.Page
{
protected void Page_Load(object
sender, EventArgs e)
{
ExcelQuery excelQuery = new ExcelQuery("C:\\Test\\Test.xlsx");
excelQuery.HeaderHasFieldNames = true;
excelQuery.Provider = ExcelQuery.ConnectionStringItems.Providers.Jet12;
excelQuery.ExcelVersion = ExcelQuery.ConnectionStringItems.ExcelVersions.Excel2007to2013;
excelQuery.ConnectionProperties = "IMEX=0";
bool test = searchResults(excelQuery);
Response.Write(test.ToString());
}
protected bool searchResults(ExcelQuery
excelQuery)
{
var x = excelQuery.GetSheets();
foreach (string y in x)
{
int i = 1,a=1;
string sql = "SELECT * FROM
[" + y+"]";
Regex numberRegex = new Regex(@"\d{3}-\d{2}-\d{4}", RegexOptions.IgnoreCase);
DataSet data = excelQuery.ExecuteDataSet(sql,"Table");
while (i <= 1)
{
foreach (DataTable dt in
data.Tables)
{
do
{
a++;
var rowAsString = string.Join(",
", dt.Rows[0].ItemArray);
Match
match = numberRegex.Match(rowAsString);
if (match.Success)
{
return true;
}
} while (a <= 1);
i++;
}
}
}
return false;
}
}
} //This piece uses ClosedXML and loops through columns
Regex numberRegex = new Regex(@"\d{3}-\d{2}-\d{4}", RegexOptions.IgnoreCase);
XLWorkbook wbk = new XLWorkbook("C:\\Test.xlsx");
//List
foreach (IXLWorksheet worksheet in wbk.Worksheets)
{
//var test = worksheet.ToString();
var companyRange = worksheet.RangeUsed();
var companyTable = companyRange.AsTable();
if (companyTable != null)
{
var results = companyTable.DataRange.Rows()
.Where(companyRow => numberRegex.IsMatch(companyRow.Field(x).GetString()) == true)
.ToList();
int cnt = results.Count;
if (cnt > 0)
{
Response.Write("Testing");
}
}
}
//This Piece just loops through Cells Used
byte[] array = File.ReadAllBytes("C:\\Projects\\Sheet1test.xlsx");
Stream stream
= new MemoryStream(array);
Regex
numberRegex = new Regex(@"\d{3}-\d{2}-\d{4}", RegexOptions.IgnoreCase);
XLWorkbook
wbk = new XLWorkbook(stream);
foreach (IXLWorksheet
worksheet in
wbk.Worksheets)
{
IXLCells
cells = worksheet.Columns().CellsUsed();
var results =
cells.Where(cellval => numberRegex.IsMatch(cellval.GetString()) == true).ToList();
int cnt =
results.Count;
if (cnt > 0)
{
Response.Write("Testing");
return;
}
}
Comments