In [48]:
xml = ""

with open ("../demos/dataset/nvdcve-2.0-2016_mini.xml") as f:
    xml = f.read()
    
xml;

In [52]:
import xmldataset

# xmldataset declaration
profile = """
nvd
    entry.id
"""

# Print the output
print(xmldataset.parse_using_profile(xml, profile))


{}

In [18]:
import pandas as pd

header = pd.read_csv("../demos/dataset/cve_allitems.csv.gz", skiprows=2, nrows=0)
header


Out[18]:
Name Status Description References Phase Votes Comments

In [36]:
cve = pd.read_csv(
    "../demos/dataset/cve_allitems.csv.gz",
    skiprows=10,
    encoding='latin-1',
    names=header
    )
cve.head()


C:\dev\apps\Anaconda3\lib\site-packages\pandas\io\parsers.py:709: UserWarning: Duplicate names specified. This will raise an error in the future.
  return _read(filepath_or_buffer, kwds)
C:\dev\apps\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2728: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[36]:
Name Status Description References Phase Votes Comments
0 CVE-1999-0001 Candidate ip_input.c in BSD-derived TCP/IP implementatio... CERT:CA-98-13-tcp-denial-of-service | BUGT... Modified (20051217) MODIFY(1) Frech | NOOP(2) Northcutt, W... Christey> A Bugtraq posting indicates that th...
1 CVE-1999-0002 Entry Buffer overflow in NFS mountd gives root acces... SGI:19981006-01-I | URL:ftp://patches.sgi.... NaN NaN NaN
2 CVE-1999-0003 Entry Execute commands as root via buffer overflow i... NAI:NAI-29 | CERT:CA-98.11.tooltalk | ... NaN NaN NaN
3 CVE-1999-0004 Candidate MIME buffer overflow in email clients, e.g. So... CERT:CA-98.10.mime_buffer_overflows | XF:o... Modified (19990621-01) ACCEPT(8) Baker, Cole, Collins, Dik, Landfi... Frech> Extremely minor, but I believe e-mail ...
4 CVE-1999-0005 Entry Arbitrary command execution via IMAP buffer ov... CERT:CA-98.09.imapd | SUN:00177 | URL:... NaN NaN NaN

In [39]:
cve.describe(include='all')


Out[39]:
Name Status Description References Phase Votes Comments
count 135983 135983 135983 109073 132930 132930 1855
unique 135983 2 108157 97379 4452 1289 1774
top CVE-2016-7580 Candidate ** RESERVED ** This candidate has been reserve... CONFIRM:https://source.android.com/security/bu... Assigned (20160909) None (candidate not yet proposed) Cole> See comments for CVE-2000-0101 | Fre...
freq 1 132930 20918 262 1322 130004 11

In [41]:
cve.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135983 entries, 0 to 135982
Data columns (total 7 columns):
Name           135983 non-null object
Status         135983 non-null object
Description    135983 non-null object
References     109073 non-null object
Phase          132930 non-null object
Votes          132930 non-null object
Comments       1855 non-null object
dtypes: object(7)
memory usage: 7.3+ MB

In [ ]:


In [42]:
import xmldataset

# xmldataset declaration
profile = """
colleagues
    colleague
        title = dataset:colleagues
        phone = dataset:colleagues
        email = dataset:colleagues"""

# Print the output
print(xmldataset.parse_using_profile(xml, profile))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-42-d1071ce1b97a> in <module>()
     10 
     11 # Print the output
---> 12 print(xmldataset.parse_using_profile(xml, profile))

NameError: name 'xml' is not defined