VBA处理数据与Python Pandas处理数据案例对比分析

103次阅读

共计 4336 个字符，预计需要花费 11 分钟才能阅读完成。

导读	这篇文章主要介绍了 VBA 处理数据与 Python Pandas 处理数据案例比较, 本文通过实例代码给大家介绍的非常详细，具有一定的参考借鉴价值，需要的朋友可以参考下

需求：

现有一个 csv 文件，包含 ’CNUM’ 和 ’COMPANY’ 两列，数据里包含空行，且有内容重复的行数据。

要求：

1）去掉空行；

2）重复行数据只保留一行有效数据；

3）修改 ’COMPANY’ 列的名称为 ’Company_New‘；

4）并在其后增加六列，分别为 ’C_col’,‘D_col’,‘E_col’,‘F_col’,‘G_col’,‘H_col’。

VBA 处理数据与 Python Pandas 处理数据案例对比分析

一，使用 Python Pandas 来处理：

 import pandas as pd
import numpy as np
from pandas import DataFrame,Series
 
def deal_with_data(filepath,newpath):
  file_obj=open(filepath)
  df=pd.read_csv(file_obj)  # 读取 csv 文件，创建 DataFrame
  df=df.reindex(columns=['CNUM','COMPANY','C_col','D_col','E_col','F_col','G_col','H_col'],fill_value=None)  # 重新指定列索引
  df.rename(columns={'COMPANY':'Company_New'}, inplace = True) # 修改列名
  df=df.dropna(axis=0,how='all')         # 去除 NAN 即文件中的空行
  df['CNUM'] = df['CNUM'].astype('int32')    # 将 CNUM 列的数据类型指定为 int32
  df = df.drop_duplicates(subset=['CNUM', 'Company_New'], keep='first') # 去除重复行
  df.to_csv(newpath,index=False,encoding='GBK')
  file_obj.close()
   
if __name__=='__main__':
  file_path=r'C:\Users\12078\Desktop\python\CNUM_COMPANY.csv'
  file_save_path=r'C:\Users\12078\Desktop\python\CNUM_COMPANY_OUTPUT.csv'
  deal_with_data(file_path,file_save_path)

二，使用 VBA 来处理：

 Option Base 1
Option Explicit
 
Sub main()
 On Error GoTo error_handling
 Dim wb         As Workbook
 Dim wb_out       As Workbook
 Dim sht         As Worksheet
 Dim sht_out       As Worksheet
 Dim rng         As Range
 Dim usedrows      As Byte
 Dim usedrows_out    As Byte
 Dim dict_cnum_company  As Object
 Dim str_file_path    As String
    Dim str_new_file_path  As String
    'assign values to variables:
    str_file_path = "C:\Users\12078\Desktop\Python\CNUM_COMPANY.csv"
    str_new_file_path = "C:\Users\12078\Desktop\Python\CNUM_COMPANY_OUTPUT.csv"
  
 Set wb = checkAndAttachWorkbook(str_file_path)
 Set sht = wb.Worksheets("CNUM_COMPANY")
 Set wb_out = Workbooks.Add
 wb_out.SaveAs str_new_file_path, xlCSV 'create a csv file
 Set sht_out = wb_out.Worksheets("CNUM_COMPANY_OUTPUT")
 
 Set dict_cnum_company = CreateObject("Scripting.Dictionary")
 usedrows = WorksheetFunction.Max(getLastValidRow(sht, "A"), getLastValidRow(sht, "B"))
 
 'rename the header'COMPANY'to'Company_New',remove blank & duplicate lines/rows.
 Dim cnum_company As String
 cnum_company = ""For Each rng In sht.Range("A1","A" & usedrows)
   If VBA.Trim(rng.Offset(0, 1).Value) = "COMPANY" Then
     rng.Offset(0, 1).Value = "Company_New"
   End If
   cnum_company = rng.Value & "-" & rng.Offset(0, 1).Value
   If VBA.Trim(cnum_company)  "-" And Not dict_cnum_company.Exists(rng.Value & "-" & rng.Offset(0, 1).Value) Then
     dict_cnum_company.Add rng.Value & "-" & rng.Offset(0, 1).Value, ""
   End If
 Next rng
  
 'loop the keys of dict split the keyes by'-' into cnum array and company array.
 Dim index_dict As Byte
 Dim arr_cnum()
 Dim arr_Company()
 For index_dict = 0 To UBound(dict_cnum_company.keys)
   ReDim Preserve arr_cnum(1 To UBound(dict_cnum_company.keys) + 1)
   ReDim Preserve arr_Company(1 To UBound(dict_cnum_company.keys) + 1)
   arr_cnum(index_dict + 1) = Split(dict_cnum_company.keys()(index_dict), "-")(0)
   arr_Company(index_dict + 1) = Split(dict_cnum_company.keys()(index_dict), "-")(1)
   Debug.Print index_dict
 Next
 
 'assigns the value of the arrays to the celles.
 sht_out.Range("A1", "A" & UBound(arr_cnum)) = Application.WorksheetFunction.Transpose(arr_cnum)
 sht_out.Range("B1", "B" & UBound(arr_Company)) = Application.WorksheetFunction.Transpose(arr_Company)
 
 'add 6 columns to output csv file:
 Dim arr_columns() As Variant
 arr_columns = Array("C_col", "D_col", "E_col", "F_col", "G_col", "H_col")  'sht_out.Range("C1:H1") = arr_columns
 Call checkAndCloseWorkbook(str_file_path, False)
 Call checkAndCloseWorkbook(str_new_file_path, True)
 
Exit Sub
error_handling:
  Call checkAndCloseWorkbook(str_file_path, False)
  Call checkAndCloseWorkbook(str_new_file_path, False)
End Sub
 
'辅助函数：'Get last row of Column N in a Worksheet
Function getLastValidRow(in_ws As Worksheet, in_col As String)
  getLastValidRow = in_ws.Cells(in_ws.Rows.count, in_col).End(xlUp).Row
End Function
 
Function checkAndAttachWorkbook(in_wb_path As String) As Workbook
  Dim wb As Workbook
  Dim mywb As String
  mywb = in_wb_path
   
  For Each wb In Workbooks
    If LCase(wb.FullName) = LCase(mywb) Then
      Set checkAndAttachWorkbook = wb
      Exit Function
    End If
  Next
   
  Set wb = Workbooks.Open(in_wb_path, UpdateLinks:=0)
  Set checkAndAttachWorkbook = wb
 
End Function
  
Function checkAndCloseWorkbook(in_wb_path As String, in_saved As Boolean)
  Dim wb As Workbook
  Dim mywb As String
  mywb = in_wb_path
  For Each wb In Workbooks
    If LCase(wb.FullName) = LCase(mywb) Then
      wb.Close savechanges:=in_saved
      Exit Function
    End If
  Next
End Function

三，输出结果：

VBA 处理数据与 Python Pandas 处理数据案例对比分析

两种方法输出结果相同：

四，比较总结：

Python pandas 内置了大量处理数据的方法，我们不需要重复造轮子，用起来很方便，代码简洁的多。

Excel VBA 处理这个需求，使用了数组，字典等数据结构（实际需求中，数据量往往很大，所以一些地方没有直接使用遍历单元格的方法），以及处理字符串，数组和字典的很多方法，对文件的操作也很复杂，一旦出错，调试起来比 python 也较困难，代码已经尽量优化，但还是远比 Python 要多。

到此这篇关于 VBA 处理数据与 Python Pandas 处理数据案例比较分析的文章就介绍到这了

阿里云 2 核 2G 服务器 3M 带宽 61 元 1 年，有高配

腾讯云新客低至 82 元 / 年，老客户 99 元 / 年

代金券：在阿里云专用满减优惠券

正文完

星哥玩云-微信公众号

发表至： linux教程

2024-07-25

0

转载说明：除特殊说明外本站文章皆由CC-4.0协议发布，转载请注明出处。

大神教你在 Linux 中查看你的时区

LNMP 自动安装包 V1.4 版发布

使用Docker部署ccaa，实现离线下载，在线播放

Git分支的前世今生

【Linux面试真题】- 怎么清屏？怎么退出当前命令？怎么执行睡眠？怎么查看当前用户 id？查看指定帮助用什么命令？

程序员为什么一定要用Linux

作为计算机从业人员，应该怎样学习Linux？

Systemd 三部曲之 PHP7

简述HP iLO中的几种开关机选项

VBA处理数据与Python Pandas处理数据案例对比分析

申请腾讯混元的API Key并且使用LobeChat调用混元AI

Docker部署搭建一个开源强大的图书管理系统

基于Docker快速搭建一个开源的IT人员在线工具箱-it-tools

让每个人都可以轻松使用Git-腾讯自研Git客户端

使用Docker部署开源的WPS-Office

查看linux中所有用户的三种方式

网络渗透测试的工具SPARTA的安装与使用

腾讯云服务器99元一年还是买3年划算？三年划算

如何在Linux下进行SFTP用户权限设置

fold 命令入门学习

	import pandas as pd
	import numpy as np
	from pandas import DataFrame,Series

	def deal_with_data(filepath,newpath):
	file_obj=open(filepath)
	df=pd.read_csv(file_obj) # 读取 csv 文件，创建 DataFrame
	df=df.reindex(columns=['CNUM','COMPANY','C_col','D_col','E_col','F_col','G_col','H_col'],fill_value=None) # 重新指定列索引
	df.rename(columns={'COMPANY':'Company_New'}, inplace = True) # 修改列名
	df=df.dropna(axis=0,how='all') # 去除 NAN 即文件中的空行
	df['CNUM'] = df['CNUM'].astype('int32') # 将 CNUM 列的数据类型指定为 int32
	df = df.drop_duplicates(subset=['CNUM', 'Company_New'], keep='first') # 去除重复行
	df.to_csv(newpath,index=False,encoding='GBK')
	file_obj.close()

	if __name__=='__main__':
	file_path=r'C:\Users\12078\Desktop\python\CNUM_COMPANY.csv'
	file_save_path=r'C:\Users\12078\Desktop\python\CNUM_COMPANY_OUTPUT.csv'
	deal_with_data(file_path,file_save_path)

	Option Base 1
	Option Explicit

	Sub main()
	On Error GoTo error_handling
	Dim wb As Workbook
	Dim wb_out As Workbook
	Dim sht As Worksheet
	Dim sht_out As Worksheet
	Dim rng As Range
	Dim usedrows As Byte
	Dim usedrows_out As Byte
	Dim dict_cnum_company As Object
	Dim str_file_path As String
	Dim str_new_file_path As String
	'assign values to variables:
	str_file_path = "C:\Users\12078\Desktop\Python\CNUM_COMPANY.csv"
	str_new_file_path = "C:\Users\12078\Desktop\Python\CNUM_COMPANY_OUTPUT.csv"

	Set wb = checkAndAttachWorkbook(str_file_path)
	Set sht = wb.Worksheets("CNUM_COMPANY")
	Set wb_out = Workbooks.Add
	wb_out.SaveAs str_new_file_path, xlCSV 'create a csv file
	Set sht_out = wb_out.Worksheets("CNUM_COMPANY_OUTPUT")

	Set dict_cnum_company = CreateObject("Scripting.Dictionary")
	usedrows = WorksheetFunction.Max(getLastValidRow(sht, "A"), getLastValidRow(sht, "B"))

	'rename the header'COMPANY'to'Company_New',remove blank & duplicate lines/rows.
	Dim cnum_company As String
	cnum_company = ""For Each rng In sht.Range("A1","A" & usedrows)
	If VBA.Trim(rng.Offset(0, 1).Value) = "COMPANY" Then
	rng.Offset(0, 1).Value = "Company_New"
	End If
	cnum_company = rng.Value & "-" & rng.Offset(0, 1).Value
	If VBA.Trim(cnum_company) "-" And Not dict_cnum_company.Exists(rng.Value & "-" & rng.Offset(0, 1).Value) Then
	dict_cnum_company.Add rng.Value & "-" & rng.Offset(0, 1).Value, ""
	End If
	Next rng

	'loop the keys of dict split the keyes by'-' into cnum array and company array.
	Dim index_dict As Byte
	Dim arr_cnum()
	Dim arr_Company()
	For index_dict = 0 To UBound(dict_cnum_company.keys)
	ReDim Preserve arr_cnum(1 To UBound(dict_cnum_company.keys) + 1)
	ReDim Preserve arr_Company(1 To UBound(dict_cnum_company.keys) + 1)
	arr_cnum(index_dict + 1) = Split(dict_cnum_company.keys()(index_dict), "-")(0)
	arr_Company(index_dict + 1) = Split(dict_cnum_company.keys()(index_dict), "-")(1)
	Debug.Print index_dict
	Next

	'assigns the value of the arrays to the celles.
	sht_out.Range("A1", "A" & UBound(arr_cnum)) = Application.WorksheetFunction.Transpose(arr_cnum)
	sht_out.Range("B1", "B" & UBound(arr_Company)) = Application.WorksheetFunction.Transpose(arr_Company)

	'add 6 columns to output csv file:
	Dim arr_columns() As Variant
	arr_columns = Array("C_col", "D_col", "E_col", "F_col", "G_col", "H_col") 'sht_out.Range("C1:H1") = arr_columns
	Call checkAndCloseWorkbook(str_file_path, False)
	Call checkAndCloseWorkbook(str_new_file_path, True)

	Exit Sub
	error_handling:
	Call checkAndCloseWorkbook(str_file_path, False)
	Call checkAndCloseWorkbook(str_new_file_path, False)
	End Sub

	'辅助函数：'Get last row of Column N in a Worksheet
	Function getLastValidRow(in_ws As Worksheet, in_col As String)
	getLastValidRow = in_ws.Cells(in_ws.Rows.count, in_col).End(xlUp).Row
	End Function

	Function checkAndAttachWorkbook(in_wb_path As String) As Workbook
	Dim wb As Workbook
	Dim mywb As String
	mywb = in_wb_path

	For Each wb In Workbooks
	If LCase(wb.FullName) = LCase(mywb) Then
	Set checkAndAttachWorkbook = wb
	Exit Function
	End If
	Next

	Set wb = Workbooks.Open(in_wb_path, UpdateLinks:=0)
	Set checkAndAttachWorkbook = wb

	End Function

	Function checkAndCloseWorkbook(in_wb_path As String, in_saved As Boolean)
	Dim wb As Workbook
	Dim mywb As String
	mywb = in_wb_path
	For Each wb In Workbooks
	If LCase(wb.FullName) = LCase(mywb) Then
	wb.Close savechanges:=in_saved
	Exit Function
	End If
	Next
	End Function