Excel Regular Expression Parsing

Sample piece of code to parse Excel files with regular expressions using Excel Query (The first piece does not loop through all the rows to make it more efficient and fetches the resultant set based on the first row. The second piece goes through all the columns and finds a match):

//This piece uses ExcelQuery -->

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.IO;
using System.Text;
using System.Web.UI;
using System.Text.RegularExpressions;
using System.Web.UI.WebControls;
using Scanning;
using System.Data;
using Innovative.Data;

//Install ExcelQuery
namespace WebApplication1
{
    public partial class WebForm1 : System.Web.UI.Page
    {
         protected void Page_Load(object sender, EventArgs e)
        {

            ExcelQuery excelQuery = new ExcelQuery("C:\\Test\\Test.xlsx");
            excelQuery.HeaderHasFieldNames = true;
                     excelQuery.Provider = ExcelQuery.ConnectionStringItems.Providers.Jet12;
                     excelQuery.ExcelVersion = ExcelQuery.ConnectionStringItems.ExcelVersions.Excel2007to2013;
            excelQuery.ConnectionProperties = "IMEX=0";
            bool test = searchResults(excelQuery);
            Response.Write(test.ToString());
                  
        }


        protected bool searchResults(ExcelQuery  excelQuery)
        {
            var x = excelQuery.GetSheets();
            foreach (string y in x)
            {
                int i = 1,a=1;
                string sql = "SELECT * FROM [" + y+"]";
                Regex numberRegex = new Regex(@"\d{3}-\d{2}-\d{4}", RegexOptions.IgnoreCase);
                DataSet data = excelQuery.ExecuteDataSet(sql,"Table");
                while (i <= 1)
                {
                    foreach (DataTable dt in data.Tables)
                    {
                        do
                        {
                            a++;

                            var rowAsString = string.Join(", ", dt.Rows[0].ItemArray);

                           Match match = numberRegex.Match(rowAsString);
                            if (match.Success)
                            {
                                return true;
                            }


                       } while (a <= 1);
                      
                        i++;
                    }
                }

           }
           
            return false;
         }

    }
}   


//This piece uses ClosedXML and loops through columns


            Regex numberRegex = new Regex(@"\d{3}-\d{2}-\d{4}", RegexOptions.IgnoreCase);
            XLWorkbook wbk = new XLWorkbook("C:\\Test.xlsx");
            //List results;

            foreach (IXLWorksheet worksheet in wbk.Worksheets)
            {
                //var test  = worksheet.ToString();
                var companyRange = worksheet.RangeUsed();
                var companyTable = companyRange.AsTable();

                if (companyTable != null)
                {
                 var results = companyTable.DataRange.Rows()
                       .Where(companyRow => numberRegex.IsMatch(companyRow.Field(x).GetString()) == true)
                       .ToList();

                        int cnt = results.Count;

                        if (cnt > 0)
                        {
                            Response.Write("Testing");
                        }

                    }
                }

//This Piece just loops through Cells Used

byte[] array = File.ReadAllBytes("C:\\Projects\\Sheet1test.xlsx");
            Stream stream = new MemoryStream(array);
            Regex numberRegex = new Regex(@"\d{3}-\d{2}-\d{4}", RegexOptions.IgnoreCase);
            XLWorkbook wbk = new XLWorkbook(stream);
            foreach (IXLWorksheet worksheet in wbk.Worksheets)
            {
      
                    IXLCells cells = worksheet.Columns().CellsUsed();
                    var results = cells.Where(cellval => numberRegex.IsMatch(cellval.GetString()) == true).ToList();

                    int cnt = results.Count;

                    if (cnt > 0)
                    {
                        Response.Write("Testing");
                        return;
                    }
           }
          


Comments

Popular posts from this blog

Rhino - ETL

Microsoft acquires LinkedIn

Redshift Experience