In [1]:
#Python3
from xml.etree import ElementTree
import os
import urllib.request
import time
import csv
In [2]:
nmsp = {'dtt': 'http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4', 'are': 'http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_answer/v_1.0.1', 'D': 'http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3'}
In [3]:
#zpracovane davky
names = {'2': 'firmy', '4': 'osvc'}
davky_osvc = []
davky_firmy = []
for key in names.keys():
davky_zprac = []
with open('/Users/jancibulka/DEVEL/DATA/ares-zmeny/data/' + names[key] + '_zprac.csv', 'r') as dvk:
reader = csv.reader(dvk, delimiter=',', quotechar='"')
for row in reader:
davky_zprac.append(row[0])
davky_akt = []
url = urllib.request.urlopen('http://wwwinfo.mfcr.cz/cgi-bin/ares/darv_zm.cgi?cislo_zdroje=' + key + '&cislo_davky_od=2&cislo_davky_do=3')
doc = ElementTree.parse(url)
for node in doc.findall('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}C_davky'):
davky_akt.append(node.text)
davky = []
for davka in davky_akt:
if (davka not in davky_zprac):
davky.append(davka)
if (key == '2'):
davky_firmy = davky
else:
davky_osvc = davky
In [5]:
#FIRMY
out = open('/Users/jancibulka/DEVEL/DATA/ares-zmeny/data/out_firmy.csv', 'a')
outwriter = csv.writer(out, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for davka in davky_firmy:
url = urllib.request.urlopen('http://wwwinfo.mfcr.cz/cgi-bin/ares/darv_zm.cgi?cislo_zdroje=2&cislo_davky_od={0}&cislo_davky_do={0}'.format(davka))
doc = ElementTree.parse(url)
for node in doc.findall('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}ic'):
statut = []
if (node.attrib['p'] == 'N'):
addr = urllib.request.urlopen('http://wwwinfo.mfcr.cz/cgi-bin/ares/darv_or.cgi?ico=' + node.text.strip())
bulk = ElementTree.parse(addr)
firma = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}OF')
ulice = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}NU')
cis_domu = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}CD')
obec = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}N')
psc = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}PSC')
zapsano = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}DZOR')
for statutar in bulk.findall('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}CSO'):
fce = statutar.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}F')
jmeno = statutar.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}J')
prijmeni = statutar.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}P')
nar = statutar.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.3}DN')
if (fce is not None):
fce = fce.text.strip()
else:
fce = ''
if (jmeno is not None):
jmeno = jmeno.text.strip()
else:
jmeno = ''
if (prijmeni is not None):
prijmeni = prijmeni.text.strip()
else:
prijmeni = ''
if (nar is not None):
nar = nar.text.strip()
else:
nar = ''
statut.append(fce + ' - ' + jmeno + ' ' + prijmeni + ' ' + nar)
ic = node.text.strip()
if (firma is not None):
firma = firma.text.strip()
else:
firma = ''
if (ulice is not None):
ulice = ulice.text.strip()
else:
ulice = ''
if (cis_domu is not None):
cis_domu = cis_domu.text.strip()
else:
cis_domu = ''
if (obec is not None):
obec = obec.text.strip()
else:
obec = ''
if (psc is not None):
psc = psc.text.strip()
else:
psc = ''
if (zapsano is not None):
zapsano = zapsano.text.strip()
else:
zapsano = ''
#print([ic, firma, ulice, cis_domu, obec, psc, zapsano, statut])
outwriter.writerow([ic, firma, ulice, cis_domu, obec, psc, zapsano, statut])
time.sleep(0.75)
with open('/Users/jancibulka/DEVEL/DATA/ares-zmeny/data/firmy_zprac.csv', 'a') as dvk:
writer = csv.writer(dvk, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow([davka])
out.close()
In [12]:
davky_osvc
Out[12]:
In [19]:
#OSVC
out = open('/Users/jancibulka/DEVEL/DATA/ares-zmeny/data/out_osvc.csv', 'a')
outwriter = csv.writer(out, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for davka in davky_osvc:
url = urllib.request.urlopen('http://wwwinfo.mfcr.cz/cgi-bin/ares/darv_zm.cgi?cislo_zdroje=4&cislo_davky_od={0}&cislo_davky_do={0}'.format(davka))
doc = ElementTree.parse(url)
for node in doc.findall('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}ic'):
#print(node)
if (node.attrib['p'] == 'N'):
addr = urllib.request.urlopen('http://wwwinfo.mfcr.cz/cgi-bin/ares/darv_std.cgi?ico=' + node.text.strip())
bulk = ElementTree.parse(addr)
firma = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_answer/v_1.0.1}Obchodni_firma')
ulice = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}Nazev_ulice')
cis_domu = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}Cislo_domovni')
obec = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}Nazev_obce')
psc = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}PSC')
adresa_text = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_datatypes/v_1.0.4}Adresa_textem')
zapsano = bulk.find('.//{http://wwwinfo.mfcr.cz/ares/xml_doc/schemas/ares/ares_answer/v_1.0.1}Datum_vzniku')
ic = node.text.strip()
if (firma is not None):
firma = firma.text.strip()
else:
firma = ''
if (ulice is not None):
ulice = ulice.text.strip()
else:
ulice = ''
if (cis_domu is not None):
cis_domu = cis_domu.text.strip()
else:
cis_domu = ''
if (obec is not None):
obec = obec.text.strip()
else:
obec = ''
if (psc is not None):
psc = psc.text.strip()
else:
psc = ''
if (adresa_text is not None):
adresa_text = adresa_text.text.strip()
else:
adresa_text = ''
if (zapsano is not None):
zapsano = zapsano.text.strip()
else:
zapsano = ''
outwriter.writerow([ic, firma, ulice, cis_domu, obec, psc, adresa_text, zapsano])
time.sleep(0.75)
#break
with open('/Users/jancibulka/DEVEL/DATA/ares-zmeny/data/osvc_zprac.csv', 'a') as dvk:
writer = csv.writer(dvk, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow([davka])
out.close()