#!/usr/bin/python3 import elementtree.ElementTree as ET import os, os.path SrcEnc = 'utf_8' TargetEnc = 'latin1' def convertBillFile(srcFile, dstFile): data = str(open(srcFile, 'r').read(), SrcEnc) data = data.encode('utf-8') et = ET.XML(data) num = int(getDataFromXPath(et, 'vote-metadata/rollcall-num')) billName = getDataFromXPath(et, 'vote-metadata/legis-num') title = getDataFromXPath(et, 'vote-metadata/vote-desc') voteType = getDataFromXPath(et, 'vote-metadata/vote-question') dateTime = getDataFromXPath(et, 'vote-metadata/action-date') + ' ' + getDataFromXPath(et, 'vote-metadata/action-time') dstF = open(dstFile, 'w') dstF.write(("%d\n%s\n%s\n%s\n%s" % (num, billName, title, voteType, dateTime)).encode(TargetEnc, 'replace')) dstF.close() def convertVotesFile(srcFile, dstFile, legisFile): data = str(open(srcFile, 'r').read(), SrcEnc) data = data.encode('utf-8') et = ET.XML(data) votes = et.findall('vote-data/recorded-vote') dstF = open(dstFile, 'w') legisExists = True if (not os.path.exists(legisFile)): legisExists = False legisF = open(legisFile, 'w') #print votes for vote in votes: legisTODO = vote.find('legislator').get('unaccented-name') #legis = vote.find('legislator').text.encode(TargetEnc, 'replace') v = vote.find('vote').text # Wow, this is annoying. if (v == 'Aye'): v = 'Yea' elif (v == 'No'): v = 'Nay' dstF.write("%s:%s\n" % (legisTODO, v)) if (not legisExists): id = vote.find('legislator').get('name-id') party = vote.find('legislator').get('party') state = vote.find('legislator').get('state') legisTODO = vote.find('legislator').get('unaccented-name') legisF.write("%s:%s:%s:%s\n" % (legisTODO, id, party, state)) dstF.close() if (not legisExists): legisF.close() def getDataFromXPath(et, xpath): elem = et.find(xpath) if (elem != None and elem.text != None): return elem.text else: return "" def main(): srcDir = 'votes' dstDir = 'parsedVotes' #data = open(srcDir + '/2006/roll002.xml', 'r').read() #convertVotesFile(srcDir + '/2006/roll002.xml', dstDir + '/2006/roll002.votes.txt') dirs = os.listdir(srcDir) #dirs = [] for dir in dirs: for file in os.listdir(os.path.join(srcDir, dir)): if (file.endswith('.xml')): print(os.path.join(srcDir, dir, file)) convertBillFile(os.path.join(srcDir, dir, file), os.path.join(dstDir, dir, file[:-4] + '.bill.txt')) convertVotesFile(os.path.join(srcDir, dir, file), os.path.join(dstDir, dir, file[:-4] + '.votes.txt'), os.path.join(dstDir, dir, "legisinfo.txt")) #convertFile(srcDir + '/2006/roll001.xml', dstDir + '/2006/roll001.bill.txt') if (__name__ == '__main__'): main()