In [1]:
from __future__ import print_function
import os
from pyspark import SQLContext

spark_home = os.environ.get('SPARK_HOME', None)

In [2]:
# Quick test of Spark
#text_file = sc.textFile(spark_home + "/README.md")
#word_counts = text_file \
#    .flatMap(lambda line: line.split()) \
#    .map(lambda word: (word, 1)) \
#    .reduceByKey(lambda a, b: a + b)
#word_counts.collect()

In [3]:
# Load iDigBio Parquet
sqlContext = SQLContext(sc)
idbdf = sqlContext.read.parquet("../data/idigbio-100k/occurrence.txt.parquet")

In [4]:
print(idbdf.count())
#print(idbdf.head())


99999

In [5]:
print(idbdf.head()["http://rs.tdwg.org/dwc/terms/county"])


Oxford

In [6]:
idbdf.printSchema()


root
 |-- id: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/county: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/locality: string (nullable = true)
 |-- http://portal.idigbio.org/terms/tribe: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/infraspecificEpithet: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/rightsHolder: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/lithostratigraphicTerms: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associatedFamily: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/location_Basin: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/earliestAgeOrLowestStage: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/superfamily: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/ownerInstitutionCode: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/taxonRank: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/bed: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Live: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/country: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimDepth: string (nullable = true)
 |-- http://purl.org/dc/terms/source: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/maximumElevationInMeters: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/waterBody: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/family: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associateCondition: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Dead: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associateAuthor: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Instars_Male: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/identificationReferences: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/rights: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/island: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/geodeticDatum: string (nullable = true)
 |-- http://purl.org/dc/terms/bibliographicCitation: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/nomenclaturalCode: string (nullable = true)
 |-- http://purl.org/dc/terms/type: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/%20identificationQualifier: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associateCommonName: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/earliestEpochOrLowestSeries: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/namePublishedIn: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Juv_females: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/day: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/lifeStage: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/identificationRemarks: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimTaxonRank: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/latestPeriodOrHighestSystem: string (nullable = true)
 |-- http://portal.idigbio.org/terms/determinationHistory: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/recordedBy: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/order: string (nullable = true)
 |-- http://purl.org/dc/terms/references: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/islandGroup: string (nullable = true)
 |-- http://purl.org/dc/terms/accessRights: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/group: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/dateIdentified: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/informationWithheld: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/scientificNameID: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimElevation: string (nullable = true)
 |-- http://purl.org/dc/terms/rightsHolder: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/establishmentMeans: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Total_females: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/maximumDepthInMeters: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/typeStatus: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/FormII_females: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimLatitude: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/occurrenceStatus: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/locationID: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/basisOfRecord: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/taxonRemarks: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Vouchered: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/latestEpochOrHighestSeries: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/dynamicProperties: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/municipality: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/FormII_males: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/previousIdentifications: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/latestAgeOrHighestStage: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/vernacularName: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/fieldNotes: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/institutionCode: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/class: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/member: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimLongitude: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/minimumDepthInMeters: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimLocality: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Juv_males: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associateNotes: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/phylum: string (nullable = true)
 |-- http://purl.org/dc/terms/rights: string (nullable = true)
 |-- symbiotaverbatimScientificName: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/relatedResourceID: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/minimumElevationInMeters: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/associatedTaxa: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/locationTrs: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/samplingProtocol: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/startDayOfYear: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimCoordinates: string (nullable = true)
 |-- http://portal.idigbio.org/terms/etag: string (nullable = true)
 |-- http://portal.idigbio.org/terms/uuid: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/georeferencedDate: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/habitat: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Instars_Female: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/scientificNameAuthorship: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/occurrenceID: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/associatedMedia: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/dataGeneralizations: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/location_RiverMile: string (nullable = true)
 |-- http://portal.idigbio.org/terms/createdBy: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/location_Stream: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associateDeterminedBy: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/earliestPeriodOrLowestSystem: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/subgenus: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/decimalLatitude: string (nullable = true)
 |-- http://portal.idigbio.org/terms/hostFamily: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/georeferenceRemarks: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/occurrenceRemarks: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/preparations: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/identificationVerificationStatus: string (nullable = true)
 |-- http://portal.idigbio.org/terms/subfamily: string (nullable = true)
 |-- http://portal.idigbio.org/terms/preparationCount: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Total_Males: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/eventRemarks: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/associatedReferences: string (nullable = true)
 |-- http://portal.idigbio.org/terms/endangeredStatus: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/georeferenceSources: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/associatedSequences: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/formation: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/higherClassification: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/catalogNumber: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/higherGeography: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/individualCount: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/decimalLongitude: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/datasetName: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/month: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/georeferencedBy: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/eventTime: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/identificationQualifier: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associateRelationship: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/state: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/specificEpithet: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/countryCode: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associateIdentifier: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/kingdom: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/fieldNumber: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/coordinatePrecision: string (nullable = true)
 |-- http://portal.idigbio.org/terms/recordIds: string (nullable = true)
 |-- http://purl.org/dc/terms/language: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/stateProvince: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Relic: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/eventDate: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/collectionID: string (nullable = true)
 |-- http://portal.idigbio.org/terms/recordId: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/collectionCode: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/nameAccordingTo: string (nullable = true)
 |-- http://portal.idigbio.org/terms/barcodeValue: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/georeferenceVerificationStatus: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/associatedOccurrences: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/Juv_undetermined: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/recordNumber: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/genus: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/continent: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/sex: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/identifiedBy: string (nullable = true)
 |-- http://portal.idigbio.org/terms/version: string (nullable = true)
 |-- http://symbiota.org/terms/verbatimScientificName: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/disposition: string (nullable = true)
 |-- http://portal.idigbio.org/terms/inhs/FormI_males: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/preparation: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/latestEraOrHighestErathem: string (nullable = true)
 |-- http://portal.idigbio.org/terms/associatedRelationship: string (nullable = true)
 |-- http://portal.idigbio.org/terms/dateModified: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/locationAccordingTo: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/institutionID: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/locationRemarks: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/reproductiveCondition: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/eventID: string (nullable = true)
 |-- http://portal.idigbio.org/terms/subgenus: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/endDayOfYear: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/scientificName: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/nomenclaturalStatus: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/otherCatalogNumbers: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/verbatimEventDate: string (nullable = true)
 |-- http://purl.org/dc/terms/reference: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/accessRights: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/earliestEraOrLowestErathem: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/georeferenceProtocol: string (nullable = true)
 |-- http://purl.org/dc/terms/modified: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/footprintWKT: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/datasetID: string (nullable = true)
 |-- http://rs.tdwg.org/dwc/terms/year: string (nullable = true)


In [7]:
col_name = "symbiotaverbatimScientificName"
idbdf.filter(idbdf[col_name] != "").count()


Out[7]:
34438

In [8]:
idbdf.registerTempTable("idbtable")
scinames = sqlContext.sql("select `symbiotaverbatimScientificName` from idbtable")
print(scinames.head())


Row(symbiotaverbatimScientificName=u'Hypogymnia physodes')

In [9]:
notes = sqlContext.sql("select `http://rs.tdwg.org/dwc/terms/fieldNotes` from idbtable")
print(notes.head())


Row(http://rs.tdwg.org/dwc/terms/fieldNotes=u'')

In [10]:
col_name = "http://rs.tdwg.org/dwc/terms/fieldNotes"
idbdf.filter(idbdf[col_name] != "").count()


---------------------------------------------------------------------------
AnalysisException                         Traceback (most recent call last)
<ipython-input-10-9d4fdff5d12e> in <module>()
      1 col_name = "http://rs.tdwg.org/dwc/terms/fieldNotes"
----> 2 idbdf.filter(idbdf[col_name] != "").count()

/opt/spark/python/pyspark/sql/dataframe.py in __getitem__(self, item)
    819         """
    820         if isinstance(item, basestring):
--> 821             jc = self._jdf.apply(item)
    822             return Column(jc)
    823         elif isinstance(item, Column):

/opt/spark/python/lib/py4j-0.9.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
    833         answer = self.gateway_client.send_command(command)
    834         return_value = get_return_value(
--> 835             answer, self.gateway_client, self.target_id, self.name)
    836 
    837         for temp_arg in temp_args:

/opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
     49                                              e.java_exception.getStackTrace()))
     50             if s.startswith('org.apache.spark.sql.AnalysisException: '):
---> 51                 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
     52             if s.startswith('java.lang.IllegalArgumentException: '):
     53                 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)

AnalysisException: u'Cannot resolve column name "http://rs.tdwg.org/dwc/terms/fieldNotes" among (id, http://rs.tdwg.org/dwc/terms/county, http://rs.tdwg.org/dwc/terms/locality, http://portal.idigbio.org/terms/tribe, http://rs.tdwg.org/dwc/terms/infraspecificEpithet, http://rs.tdwg.org/dwc/terms/rightsHolder, http://rs.tdwg.org/dwc/terms/lithostratigraphicTerms, http://portal.idigbio.org/terms/associatedFamily, http://portal.idigbio.org/terms/inhs/location_Basin, http://rs.tdwg.org/dwc/terms/earliestAgeOrLowestStage, http://portal.idigbio.org/terms/inhs/superfamily, http://rs.tdwg.org/dwc/terms/ownerInstitutionCode, http://rs.tdwg.org/dwc/terms/taxonRank, http://rs.tdwg.org/dwc/terms/bed, http://portal.idigbio.org/terms/inhs/Live, http://rs.tdwg.org/dwc/terms/country, http://rs.tdwg.org/dwc/terms/verbatimDepth, http://purl.org/dc/terms/source, http://rs.tdwg.org/dwc/terms/maximumElevationInMeters, http://rs.tdwg.org/dwc/terms/waterBody, http://rs.tdwg.org/dwc/terms/family, http://portal.idigbio.org/terms/associateCondition, http://portal.idigbio.org/terms/inhs/Dead, http://portal.idigbio.org/terms/associateAuthor, http://portal.idigbio.org/terms/inhs/Instars_Male, http://rs.tdwg.org/dwc/terms/identificationReferences, http://rs.tdwg.org/dwc/terms/rights, http://rs.tdwg.org/dwc/terms/island, http://rs.tdwg.org/dwc/terms/geodeticDatum, http://purl.org/dc/terms/bibliographicCitation, http://rs.tdwg.org/dwc/terms/nomenclaturalCode, http://purl.org/dc/terms/type, http://rs.tdwg.org/dwc/terms/%20identificationQualifier, http://portal.idigbio.org/terms/associateCommonName, http://rs.tdwg.org/dwc/terms/earliestEpochOrLowestSeries, http://rs.tdwg.org/dwc/terms/namePublishedIn, http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem, http://portal.idigbio.org/terms/inhs/Juv_females, http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters, http://rs.tdwg.org/dwc/terms/day, http://rs.tdwg.org/dwc/terms/lifeStage, http://rs.tdwg.org/dwc/terms/identificationRemarks, http://rs.tdwg.org/dwc/terms/verbatimTaxonRank, http://rs.tdwg.org/dwc/terms/latestPeriodOrHighestSystem, http://portal.idigbio.org/terms/determinationHistory, http://rs.tdwg.org/dwc/terms/recordedBy, http://rs.tdwg.org/dwc/terms/order, http://purl.org/dc/terms/references, http://rs.tdwg.org/dwc/terms/islandGroup, http://purl.org/dc/terms/accessRights, http://rs.tdwg.org/dwc/terms/group, http://rs.tdwg.org/dwc/terms/dateIdentified, http://rs.tdwg.org/dwc/terms/informationWithheld, http://rs.tdwg.org/dwc/terms/scientificNameID, http://rs.tdwg.org/dwc/terms/verbatimElevation, http://purl.org/dc/terms/rightsHolder, http://rs.tdwg.org/dwc/terms/establishmentMeans, http://portal.idigbio.org/terms/inhs/Total_females, http://rs.tdwg.org/dwc/terms/maximumDepthInMeters, http://rs.tdwg.org/dwc/terms/typeStatus, http://portal.idigbio.org/terms/inhs/FormII_females, http://rs.tdwg.org/dwc/terms/verbatimLatitude, http://rs.tdwg.org/dwc/terms/occurrenceStatus, http://rs.tdwg.org/dwc/terms/locationID, http://rs.tdwg.org/dwc/terms/basisOfRecord, http://rs.tdwg.org/dwc/terms/taxonRemarks, http://portal.idigbio.org/terms/inhs/Vouchered, http://rs.tdwg.org/dwc/terms/latestEpochOrHighestSeries, http://rs.tdwg.org/dwc/terms/dynamicProperties, http://rs.tdwg.org/dwc/terms/municipality, http://portal.idigbio.org/terms/inhs/FormII_males, http://rs.tdwg.org/dwc/terms/previousIdentifications, http://rs.tdwg.org/dwc/terms/latestAgeOrHighestStage, http://rs.tdwg.org/dwc/terms/vernacularName, http://rs.tdwg.org/dwc/terms/fieldNotes, http://rs.tdwg.org/dwc/terms/institutionCode, http://rs.tdwg.org/dwc/terms/class, http://rs.tdwg.org/dwc/terms/member, http://rs.tdwg.org/dwc/terms/verbatimLongitude, http://rs.tdwg.org/dwc/terms/minimumDepthInMeters, http://rs.tdwg.org/dwc/terms/verbatimLocality, http://portal.idigbio.org/terms/inhs/Juv_males, http://portal.idigbio.org/terms/associateNotes, http://rs.tdwg.org/dwc/terms/phylum, http://purl.org/dc/terms/rights, symbiotaverbatimScientificName, http://rs.tdwg.org/dwc/terms/relatedResourceID, http://rs.tdwg.org/dwc/terms/minimumElevationInMeters, http://rs.tdwg.org/dwc/terms/associatedTaxa, http://portal.idigbio.org/terms/inhs/locationTrs, http://rs.tdwg.org/dwc/terms/samplingProtocol, http://rs.tdwg.org/dwc/terms/startDayOfYear, http://rs.tdwg.org/dwc/terms/verbatimCoordinates, http://portal.idigbio.org/terms/etag, http://portal.idigbio.org/terms/uuid, http://rs.tdwg.org/dwc/terms/georeferencedDate, http://rs.tdwg.org/dwc/terms/habitat, http://portal.idigbio.org/terms/inhs/Instars_Female, http://rs.tdwg.org/dwc/terms/scientificNameAuthorship, http://rs.tdwg.org/dwc/terms/occurrenceID, http://rs.tdwg.org/dwc/terms/associatedMedia, http://rs.tdwg.org/dwc/terms/dataGeneralizations, http://portal.idigbio.org/terms/inhs/location_RiverMile, http://portal.idigbio.org/terms/createdBy, http://portal.idigbio.org/terms/inhs/location_Stream, http://portal.idigbio.org/terms/associateDeterminedBy, http://rs.tdwg.org/dwc/terms/earliestPeriodOrLowestSystem, http://rs.tdwg.org/dwc/terms/subgenus, http://rs.tdwg.org/dwc/terms/decimalLatitude, http://portal.idigbio.org/terms/hostFamily, http://rs.tdwg.org/dwc/terms/georeferenceRemarks, http://rs.tdwg.org/dwc/terms/occurrenceRemarks, http://rs.tdwg.org/dwc/terms/preparations, http://rs.tdwg.org/dwc/terms/identificationVerificationStatus, http://portal.idigbio.org/terms/subfamily, http://portal.idigbio.org/terms/preparationCount, http://portal.idigbio.org/terms/inhs/Total_Males, http://rs.tdwg.org/dwc/terms/eventRemarks, http://rs.tdwg.org/dwc/terms/associatedReferences, http://portal.idigbio.org/terms/endangeredStatus, http://rs.tdwg.org/dwc/terms/georeferenceSources, http://rs.tdwg.org/dwc/terms/associatedSequences, http://rs.tdwg.org/dwc/terms/formation, http://rs.tdwg.org/dwc/terms/higherClassification, http://rs.tdwg.org/dwc/terms/catalogNumber, http://rs.tdwg.org/dwc/terms/higherGeography, http://rs.tdwg.org/dwc/terms/individualCount, http://rs.tdwg.org/dwc/terms/decimalLongitude, http://rs.tdwg.org/dwc/terms/datasetName, http://rs.tdwg.org/dwc/terms/month, http://rs.tdwg.org/dwc/terms/georeferencedBy, http://rs.tdwg.org/dwc/terms/eventTime, http://rs.tdwg.org/dwc/terms/identificationQualifier, http://portal.idigbio.org/terms/associateRelationship, http://rs.tdwg.org/dwc/terms/state, http://rs.tdwg.org/dwc/terms/specificEpithet, http://rs.tdwg.org/dwc/terms/countryCode, http://portal.idigbio.org/terms/associateIdentifier, http://rs.tdwg.org/dwc/terms/kingdom, http://rs.tdwg.org/dwc/terms/fieldNumber, http://rs.tdwg.org/dwc/terms/coordinatePrecision, http://portal.idigbio.org/terms/recordIds, http://purl.org/dc/terms/language, http://rs.tdwg.org/dwc/terms/stateProvince, http://portal.idigbio.org/terms/inhs/Relic, http://rs.tdwg.org/dwc/terms/eventDate, http://rs.tdwg.org/dwc/terms/collectionID, http://portal.idigbio.org/terms/recordId, http://rs.tdwg.org/dwc/terms/collectionCode, http://rs.tdwg.org/dwc/terms/nameAccordingTo, http://portal.idigbio.org/terms/barcodeValue, http://rs.tdwg.org/dwc/terms/georeferenceVerificationStatus, http://rs.tdwg.org/dwc/terms/associatedOccurrences, http://portal.idigbio.org/terms/inhs/Juv_undetermined, http://rs.tdwg.org/dwc/terms/recordNumber, http://rs.tdwg.org/dwc/terms/genus, http://rs.tdwg.org/dwc/terms/continent, http://rs.tdwg.org/dwc/terms/sex, http://rs.tdwg.org/dwc/terms/identifiedBy, http://portal.idigbio.org/terms/version, http://symbiota.org/terms/verbatimScientificName, http://rs.tdwg.org/dwc/terms/disposition, http://portal.idigbio.org/terms/inhs/FormI_males, http://rs.tdwg.org/dwc/terms/preparation, http://rs.tdwg.org/dwc/terms/latestEraOrHighestErathem, http://portal.idigbio.org/terms/associatedRelationship, http://portal.idigbio.org/terms/dateModified, http://rs.tdwg.org/dwc/terms/locationAccordingTo, http://rs.tdwg.org/dwc/terms/institutionID, http://rs.tdwg.org/dwc/terms/locationRemarks, http://rs.tdwg.org/dwc/terms/reproductiveCondition, http://rs.tdwg.org/dwc/terms/eventID, http://portal.idigbio.org/terms/subgenus, http://rs.tdwg.org/dwc/terms/endDayOfYear, http://rs.tdwg.org/dwc/terms/scientificName, http://rs.tdwg.org/dwc/terms/nomenclaturalStatus, http://rs.tdwg.org/dwc/terms/otherCatalogNumbers, http://rs.tdwg.org/dwc/terms/verbatimEventDate, http://purl.org/dc/terms/reference, http://rs.tdwg.org/dwc/terms/accessRights, http://rs.tdwg.org/dwc/terms/earliestEraOrLowestErathem, http://rs.tdwg.org/dwc/terms/georeferenceProtocol, http://purl.org/dc/terms/modified, http://rs.tdwg.org/dwc/terms/footprintWKT, http://rs.tdwg.org/dwc/terms/datasetID, http://rs.tdwg.org/dwc/terms/year);'

In [11]:
w_notes = idbdf.filter(idbdf['`http://rs.tdwg.org/dwc/terms/fieldNotes`'] != "")
w_notes.head()["http://rs.tdwg.org/dwc/terms/fieldNotes"]
#w_notes.count()


Out[11]:
u'[ Gamasiphis | F ][ AL 1048 ]'

In [ ]: