openpyxl操控Excel文件

这个word与excel结合的实例是我跟网友Garson共同完成的,多谢Garson提供这么好的教学实例!

我的水平十分有限,只能写非常业余的程序,但是只要能解决问题就行啊!有好的咱当然用好的,但没有好的就用手边滴将就下啦。

我们遇到的问题是这样的:
我们有个excel表格数据,不知道是什么玩意儿自动产生到(忘记问Garson啦),

主要想强调不是手工输入的,所以格式比较工整,出现误输入的可能性比较小。

这表格有成千上万行,里面的数据一组一组的,虽然也有噪音数据,但每组数据的界限非常明显。我们需要分析采集分析每一组数据,生成三个表格和一段分析结果的文字描述到word文档中。

我们采用分而治之各个击破的战术

先随意选出一组数据来研究,观察规律。而后用四个函数分别实现输出表格一、二、三和分析结果的功能,然后才一个一个研究每个函数要怎么写。

承认个错误,调试程序时数次出现死循环,我还是用print()把函数里的变量打印出来找症结的,而没有用logging.debug()。

在这里我就不给自己找理由了,下次争取不再犯。

没想到啰嗦了这么长,必须切成两个,否则太大,怕超出上传视频限定
 

 

 

# readCensusExcel.py - Tabulates population and number of census tracts
# for each county.

import openpyxl
import docx

# Read the spreadsheet data
print('Opening workbook...')
wb = openpyxl.load_workbook('censuspopdata.xlsx')
sheet = wb.get_sheet_by_name('Population by Census Tract')

countryData = {}
stateData = {}

# Fill in countyData with each county's population and tracts.
print('Reading rows...')
for row in range(2, sheet.max_row + 1):
    # Each row in the spreadsheet has data for one census tract.
    state = sheet['B' + str(row)].value
    county = sheet['C' + str(row)].value
    pop = sheet['D' + str(row)].value

    # Make sure the key for this state exists.
    countryData.setdefault(state, {})
    # Make sure the key for this county in this state exists.
    countryData[state].setdefault(county, {'tracts': 0, 'pop': 0})

    # Each row represents one census tract, so increment by one.
    countryData[state][county]['tracts'] += 1
    # Increase the county pop by the pop in this census tract.
    countryData[state][county]['pop'] += int(pop)


    stateData.setdefault(state, 0)
    stateData[state] += int(pop)


# Open a new text file and write the contents of countyData to it.
print('Writing results...')
doc = docx.Document()

table = doc.add_table(rows=1, cols=2)
table.style = 'Light Grid Accent 1'

# Table head
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'State'
hdr_cells[1].text = 'Population'

for stateName, statistic in stateData.items():
        row_cells = table.add_row().cells
        row_cells[0].text = stateName
        row_cells[1].text = str(statistic)

doc.save('PopulationStatistic.docx')
print('Done.')



import openpyxl
import docx

def rowDataSet(ws, row):

	rowSet = set()	# 空集合

	for cell in ws[row]:
		if cell.value not in [None, '']: # 单元格不为空,把内容加入集合
			rowSet.add(cell.value)

	return rowSet	# 返回集合

def remove_row(table, row):
	tbl = table._tbl
	tr =row._tr
	tbl.remove(tr)

# 输出第一个表格
def exportDataToWordTable1(ws, excelRow, doc, analysis):

	# 画word里面的表格(两行七列)
	docRows = 2
	docCols = 7
	table = doc.add_table(rows = docRows, cols = docCols, style = 'Table Grid')

	# excel数据填入word表格1里
	for row in range(docRows):
		for col in range(docCols):
			table.rows[row].cells[col].text = str(ws.cell(row=excelRow, column= col+1).value)

		excelRow += 1

	table.rows[0].cells[1].text = '票房'
	table.rows[0].cells[2].text = '天数'
	table.rows[0].cells[3].text = '票房/天数'
	table.rows[1].cells[3].text = '{:.3f}'.format(float(table.rows[1].cells[1].text)/float(table.rows[1].cells[2].text))
	table.rows[0].cells[0].text = ''

	# 数据分析,保存结果
	if float(table.rows[1].cells[4].text) > 0.85 and float(table.rows[1].cells[5].text) > 0.85:
		analysis['direction'] = '南'
	else:
		analysis['direction'] = '北'

	if float(table.rows[1].cells[6].text) < 0.2:
		analysis['feeling'] = '凉'
	else:
		analysis['feeling'] = '暖'

	excelRow += 1 # 跳过factor_loading那行
	vSet = set()

	# facotr_loading到alpha这一列的数据放到vSet集合里
	while str(ws.cell(row=excelRow, column=2).value) != 'alpha':
		v = ws.cell(row=excelRow, column=2).value
		vSet.add(abs(v))
		excelRow += 1

	if min(vSet) > 0.3:
		analysis['season'] = '夏'
		analysis['duration'] = '长'
	else:
		analysis['season'] = '冬'
		analysis['duration'] = '短'

	# 一二表格间加一空行
	doc.add_paragraph('\n')

	return excelRow, analysis

# 输出第二个表格
def exportDataToWordTable2(ws, excelRow, doc, analysis):

	docCols = 2
	# 一行一行往word里面添加表格2的数据
	table = doc.add_table(rows=1, cols=docCols, style='Table Grid')
	wordRow = 0  # word表格里的行指针

	# excelRow从alpha这一行开始,到X这一行结束。从excel一行一行去数据,填入word表格
	while ws.cell(row=excelRow, column=1).value != 'X':
		for docCol in range(docCols):
			table.rows[wordRow].cells[docCol].text = str(ws.cell(row=excelRow, column=docCol+1).value)
		wordRow += 1
		excelRow += 1
		rowObj = table.add_row()

	# 删除最后一个空行
	remove_row(table, rowObj)

	# 记录alpha的值,并进行分析
	analysis['alpha'] = float(table.rows[1].cells[1].text)

	if analysis['alpha'] > 0.8:
		analysis['climate'] = '炎热'
	else:
		analysis['climate'] = '严寒'

	doc.add_paragraph('\n')

	return excelRow, analysis

# 输出第三个表格
def exportDataToWordTable3(ws, excelRow, doc):

	# 首先确定行列
	docCols = len(rowDataSet(ws, excelRow))

	# word里画出表格
	table = doc.add_table(rows=docCols, cols=docCols, style='Table Grid')

	# 提取excel数据填入word表格3
	for docRow in range(docCols):
		for docCol in range(docCols):
			table.rows[docRow].cells[docCol].text = str(ws.cell(row=excelRow, column=docCol+1).value)
		excelRow += 1

	doc.add_paragraph('\n')

	return excelRow

# 文字输出(对表格数据的分析)
def textOutput(doc, analysis):

	outputStr = '''人皆苦{},我爱{}日{}。
\n熏风自{}来,殿阁生微{}。
\n
		alpha = {}'''.format(analysis['climate'], analysis['season'],
							 analysis['duration'], analysis['direction'],
							 analysis['feeling'], analysis['alpha'])

	p = doc.add_paragraph(outputStr)
	p.add_run('\n')

	doc.add_paragraph('-*-'*33)
	doc.add_page_break()

wb = openpyxl.load_workbook('data.xlsx')
ws = wb.get_sheet_by_name('Sheet 1')

doc = docx.Document()

dataAnalysis = {'direction':'',
				'feeling':'',
				'season':'',
				'duration':'',
				'climate':'',
				'alpha':0}
# 初始化excel行指针
row = 1

while row < ws.max_row:

	if {'TLI', 'CFI', 'RMSEA'} < rowDataSet(ws, row):

		# 输出第一张表,记录数据分析结果
		row, dataAnalysis = exportDataToWordTable1(ws, row, doc, dataAnalysis)

		# 输出第二张表,记录数据分析结果
		row, dataAnalysis = exportDataToWordTable2(ws, row, doc, dataAnalysis)

		# 输出第三张表,记录数据分析结果
		row = exportDataToWordTable3(ws, row, doc)

		# 输出数据分析结果
		textOutput(doc, dataAnalysis)

	row += 1

doc.save('analysisResult.docx')

1 1 1 1 1 1 1 1 1 1 Rating 3.61 (9 Votes)