In [1]:
import sys

In [2]:
sys.path.append("../src/")

In [8]:
#!/usr/bin/python

"""
Script to query the ALMA archive with filter to select the list of Calibrators to be analyzed


HISTORY:
    2015.09.15:
        - first shot
        - parse the list of calibrators and eliminate the duplication 
        - query the region around the calibrators for public data
        
    2015.09.16:
        - add a selectDeepfield to constrain the selection of the target
        - method to write the final report
        
    2015.09.23:
        - add maxFreqRes in selectDeepField (mainly for absorption)
        
        
    2017.03.28:
        - fixing a bug
        - adapted to async by RWW
        - add an option
        - still a bug with verbose = True ...

    2017.03.29
        - fixing verbose = True
        - add safety if query is fail (just skip) | if fail, try to change the radius in query
        - remove the temporary xml-file

    2017.03.30
        - add option selectPol
        - add flux and acceptedProject in report
    
        
        
"""

__author__="S. Leon @ ALMA, RWW @ ITB"
__version__="0.2.0@2017.03.28"




from astroquery.alma import Alma
from astropy import coordinates
from astropy import units as u
import time
import os
import numpy as np
import pandas as pd
from xml.etree import ElementTree as ET
from bs4 import BeautifulSoup


class queryCal:
    
    def __init__(self,fileCal, fluxrange, readFile = True):
        
        if readFile:
            self.listCal = self.readCal(fileCal, fluxrange)
        else:
            self.listCal = []



    def readCal(self, ifile, fluxrange = [0.5, 1.0]):
        "Read a list of calibrators in CSV format from the Source Catalogue web interface"

        listcal = []

        with open(ifile, 'r') as fcal:
            for line in fcal:
                if line[0] != "#":
                    tok       = line.split(",")
                    band      = tok[0].split(" ")[1]
                    flux      = float(tok[7])
                    name      = tok[13].strip().split("|")[0]
                    alpha2000 = tok[3]
                    delta2000 = tok[5]

                    if (flux >= fluxrange[0] and flux <= fluxrange[1]):
                        found = False
                        for nameYet in listcal:
                            if nameYet[0] == name:
                                found = True

                        if not found:
                            coord = coordinates.SkyCoord(alpha2000 + delta2000, unit=(u.hourangle, u.deg), equinox='J2000') # converted using astropy
                            listcal.append([name, coord.ra.value, coord.dec.value, flux])

        return(listcal)



    def readCalold(self,file, fluxrange = [0.5, 1.0]):
        "Read a list of calibrators in CSV format from the Source Catalogue web interface"
        
        listcal = []
        
        fcal = open(file)
        for line in fcal:
            if line[0] != "#":
                tok       = line.split(",")
                band      = tok[0].split(" ")[1]
                flux      = float(tok[7])
                name      = tok[13].split("|")[0]
                alpha2000 = float(tok[3])
                delta2000 = float(tok[5])
                
                if (flux >= fluxrange[0] and flux <= fluxrange[1]):
                    found = False
                    for nameYet in listcal:
                        if nameYet[0] == name:
                            found = True
                    
                    if not found:
                        listcal.append([name, alpha2000, delta2000])
                        
            
        return(listcal)
    


    def queryAlma(self, listname, public = True ):
        "Query the data for the list of ALMA name"
        
        result = []
        
        for name in listname:
            
            region = coordinates.SkyCoord(name[1], name[2], unit='deg')
            alma_data = Alma.query_region_async(region, 0.005*u.deg, science = False, public =  public)
            
            # alma_data is in unicode
            xmldata = BeautifulSoup(alma_data.text, "lxml")

            if len(xmldata.contents) < 2:
                print name, " query FAIL!"
            else:
                with open('xmldata.xml', 'w') as ofile: # write in a file, 
                    ofile.write(str(xmldata.contents[1]))

                tree = ET.parse("xmldata.xml") # read the file again. Find a better way.
                root = tree.getroot()

                projects = []
                for i, proj in enumerate(root[0][0][0][2][36][0]): # many child
                    projects.append([])
                    for data in proj:
                        if data.text is None:
                            projects[i].append('None')
                        else:
                            projects[i].append(data.text)

                columns=['Project code', 'Source name', 'RA', 'Dec', 'Galactic longitude', 'Galactic latitude', \
                         'Band', 'Spatial resolution', 'Frequency resolution', 'Array', 'Mosaic', 'Integration', \
                         'Release date', 'Frequency support', 'Velocity resolution', 'Pol products', \
                         'Observation date', 'PI name', 'SB name', 'Proposal authors', 'Line sensitivity (10 km/s)', \
                         'Continuum sensitivity', 'PWV', 'Group ous id', 'Member ous id', 'Asdm uid', 'Project title', \
                         'Project type', 'Scan intent', 'Field of view', 'Largest angular scale', 'QA2 Status',\
                         'Pub', 'Science keyword', 'Scientific category', 'ASA_PROJECT_CODE']

                #convert python list to Pandas DataFrame
                df = pd.DataFrame(projects, columns=columns)

                result.append([name, df])

        if os.path.exists('xmldata.xml'): # remove the temporary xml file
            os.remove('xmldata.xml')
            
        return(result)
            
            
            
    def selectDeepField(self, data, minTimeBand ={3:1e9,6:1e9,7:1e9}, maxFreqRes = 1000.0, selectPol=False, verbose = True):
        """
        From the queryAlma result we filter to select the deep field
        minTimeBand  : dictionary to select the minimum time  per band 
        maxFreqRes: maximum of the frequency resolution (in kHz)
        
        Return the names and projects in a report with the date !!
        """
        
        
        finalReport = []
        # for every object/source
        for idx, item in enumerate(data):
            name      = item[0][0]
            alpha2000 = item[0][1]
            delta2000 = item[0][2]
            flux      = item[0][3]
            
            projects = []
            
            totalTime = {3:0., 4:0., 5:0., 6:0., 7:0., 8:0., 9:0., 10:0.}
            
            reportSource  = "\n\n#### Source : %s #####\n"%(name)
            reportSource += "#### No : %d \n"%(idx)
            reportSource += "#### Coord. 2000: %f  %f \n"%(alpha2000, delta2000)
            reportSource += "#### Flux : %f\n"%(flux)
            reportSource += "\n"
            
            uids = item[1]
            nuids = len(item[1])

            code        = uids['Project code']
            source      = uids['Source name']
            band        = uids['Band']
            integration = uids['Integration']
            frequency   = uids['Frequency support']
            obsdate     = uids['Observation date']
            res         = uids['Spatial resolution']
            asdm        = uids['Asdm uid']
            freqRes     = uids['Frequency resolution']
            pol         = uids['Pol products']
            

            # selection process
            # if selectPol == True: check the Polarization AND freqRes else: only check freqRes
            # if accepted: sum up the integration time
            for i in range(nuids):
                selectSG = False

                try: # for an error in ALMA data Band ("6 7")
                    bandi = int(band[i])
                except:
                    print "Ew, double band in ", code[i]
                    bandi = [int(x) for x in band[i].split(" ")]
                    

                if float(freqRes[i]) < maxFreqRes:
                        if selectPol:
                            if pol[i] =='XX XY YX YY':
                                selectSG = True
                        else:
                            selectSG = True

                if selectSG:
                    if isinstance(bandi, int):
                        totalTime[bandi] += float(integration[i])
                    else:
                        for band_i in bandi:
                            totalTime[band_i] += float(integration[i])


                if verbose and selectSG:
                    reportSource += "## %s  %20s   Band:%s   obsdate:%s   FreqRes:%s   Res:%s   Pol:%s    asdm:%s  integration:%s\n"%(code[i], source[i], band[i], obsdate[i], freqRes[i], res[i], pol[i], asdm[i], integration[i])
                
                foundProject = False
                for p in projects:
                    if p == code[i]:
                        foundProject = True
                    
                if not foundProject and selectSG:
                    projects.append(code[i])

            
            acceptedProject = len(projects) # number of accepted projects

            reportSource += "\n"
            for key in totalTime:
                if totalTime[key] > 0.:
                    reportSource += "Time Band %d : %6.0fs (%3.1fh) \n"%(key, totalTime[key], totalTime[key] / 3600.)
            
            # select source if number of uid > 0 AND totalTime > minTimeBand
            # add report to the finalReport
            selectSource = False
            if acceptedProject > 0:
                selectSource = True
                reportSource += "\n Project codes: \n"
                for p in projects:
                    reportSource += " %s "%(p)
                
                reportSource += "\n\n"
                reportSource += "Total uids: %d \n"%(nuids)
                reportSource += "Total accepted projects: %d"%(acceptedProject)
                reportSource += "\n"

                for k in minTimeBand:
                    if totalTime[k] < minTimeBand[k]:
                        selectSource = False
            
                    
            if selectSource:
                finalReport.append([nuids, item[0], reportSource])
                
        # sorting according to the number of uids
        finalReportSorted = sorted(finalReport, key=lambda data: data[0])
        
        return(finalReportSorted)

        
        
    def writeReport(self, report, file = "deepfieldRG.txt"):
        "output the final report from selectDeepField"
        
        fout = open(file,"w")
        
        nsource = 0
        for rep in report:
            nsource += 1
            fout.write(rep[2])
            print(rep[2])
            
        endText  = "#################################\n"
        endText += "### Total Number of Sources : %d \n"%(nsource)
        
        fout.write(endText)
        print(endText)
        
        fout.close()

In [5]:
from astropy import coordinates

In [ ]:
ra ='10h01m48s'
dec = '+02 23 04'

In [6]:
c = coordinates.SkyCoord('10 01 48 +02 23 04', unit=(u.hourangle, u.deg))

In [7]:
c


Out[7]:
<SkyCoord (ICRS): (ra, dec) in deg
    ( 150.45,  2.38444444)>

In [13]:
def queryalma(region, radius=1.0, public=True):
    alma_data = Alma.query_region_async(region, radius*u.deg, science = False, public =  public)

    # alma_data is in unicode
    xmldata = BeautifulSoup(alma_data.text, "lxml")

    if len(xmldata.contents) < 2:
        print name, " query FAIL!"
    else:
        with open('xmldata.xml', 'w') as ofile: # write in a file, 
            ofile.write(str(xmldata.contents[1]))

        tree = ET.parse("xmldata.xml") # read the file again. Find a better way.
        root = tree.getroot()

        projects = []
        for i, proj in enumerate(root[0][0][0][2][36][0]): # many child
            projects.append([])
            for data in proj:
                if data.text is None:
                    projects[i].append('None')
                else:
                    projects[i].append(data.text)

        columns=['Project code', 'Source name', 'RA', 'Dec', 'Galactic longitude', 'Galactic latitude', \
                 'Band', 'Spatial resolution', 'Frequency resolution', 'Array', 'Mosaic', 'Integration', \
                 'Release date', 'Frequency support', 'Velocity resolution', 'Pol products', \
                 'Observation date', 'PI name', 'SB name', 'Proposal authors', 'Line sensitivity (10 km/s)', \
                 'Continuum sensitivity', 'PWV', 'Group ous id', 'Member ous id', 'Asdm uid', 'Project title', \
                 'Project type', 'Scan intent', 'Field of view', 'Largest angular scale', 'QA2 Status',\
                 'Pub', 'Science keyword', 'Scientific category', 'ASA_PROJECT_CODE']

        #convert python list to Pandas DataFrame
        df = pd.DataFrame(projects, columns=columns)
        
    return df

In [14]:
queryalma(c, radius=0.1)


Out[14]:
Project code Source name RA Dec Galactic longitude Galactic latitude Band Spatial resolution Frequency resolution Array ... Project title Project type Scan intent Field of view Largest angular scale QA2 Status Pub Science keyword Scientific category ASA_PROJECT_CODE
0 2012.1.00076.S ID163 150.41145833333334 2.3574580555555555 236.88463847308412 42.447434406449695 6 0.80670451685201761 31250.0 12m ... The evolution in the molecular gas content of ... S TARGET WVR 25.920048209946096 5.54697662283447 Y 3 Luminous and Ultra-Luminous Infra-Red Galaxies... Galaxy evolution 2012.1.00076.S
1 2013.1.00034.S highz_cell10_221555 150.47522916666668 2.3961111111111113 236.8921565907467 42.52175159414084 6 0.44485739008607977 31250.0 12m ... Evolution of ISM in Star-Forming Galaxies at z... S TARGET 25.714285714285715 2.0225941724554826 Y 4 Starburst galaxies Active galaxies 2013.1.00034.S
2 2013.1.00208.S zC412490 150.532333 2.396851 236.93633609749293 42.56861582980612 7 0.81002471560619238 31250.0 12m ... A systematic study of gas in z > 2 Main Sequen... S TARGET 18.340901017141245 3.8928604376940967 Y 1 Galaxy structure & evolution Galaxy evolution 2013.1.00208.S
3 2013.1.00208.S zC412369 150.4458 2.390278 236.87543266952116 42.494427663037364 7 0.80624332965896361 31250.0 12m ... A systematic study of gas in z > 2 Main Sequen... S TARGET 18.340901017141245 3.874687649296395 Y 1 Galaxy structure & evolution Galaxy evolution 2013.1.00208.S
4 2013.1.00118.S AzTECC23 150.42770833333333 2.3091944444444445 236.95064658137403 42.43260030843427 6 0.93620298155252435 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.417870458875557 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
5 2013.1.00118.S AzTECC15 150.38195833333333 2.4191666666666665 236.79334566324155 42.459247452818474 6 0.93691732571320729 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.419984912006149 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
6 2013.1.00118.S AzTECC31 150.44762499999996 2.4155277777777777 236.84898330580882 42.51057189806045 6 0.93710697644966179 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.422317155879552 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
7 2013.1.00118.S AzTECC1 150.42375 2.45325 236.78852839310434 42.51302887446676 6 0.93732569349811956 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.424547466520511 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
8 2013.1.00118.S AzTECC91 150.36920833333335 2.3965 236.8083546509306 42.435723081319246 6 0.93669619402604509 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.4230861748696535 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
9 2013.1.00118.S AzTECC41 150.45033333333333 2.358138888888889 236.9144757924778 42.47944498042983 6 0.93666330366748496 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.419633314093242 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
10 2013.1.00118.S AzTECC64 150.41516666666666 2.395888888888889 236.84514282095688 42.47276510566142 6 0.93684383010045169 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.4220950081209836 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
11 2013.1.00118.S AzTECC85 150.4170416666667 2.428138888888889 236.8110021724949 42.493004658110706 6 0.93710477082137933 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.423534487149806 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
12 2013.1.00118.S AzTECC126 150.49783333333335 2.377611111111111 236.93039277390653 42.5293836421407 6 0.936975506678836 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.403057818579217 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
13 2013.1.00118.S AzTECC52 150.48429166666668 2.35175 236.94827117626573 42.503341276141725 6 0.93672636961917211 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.418634697637112 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
14 2013.1.00118.S AzTECC60 150.367875 2.3582500000000004 236.84950701093032 42.41244124454453 6 0.93639034437965751 31250.0 12m ... Unveiling the population of high-redshift subm... S TARGET 26.25 5.421362939282071 Y 3 Sub-mm Galaxies (SMG) Galaxy evolution 2013.1.00118.S
15 2015.1.00540.S UVISTA-238225 150.46791666666667 2.4284166666666667 236.85071010930983 42.534561899822485 6 1.0745748059288278 31250.0 12m ... How dusty are the brightest z ~ 7 galaxies? S TARGET 27.03862660944206 5.230051346998502 Y 0 Lyman Break Galaxies (LBG) Galaxy evolution 2015.1.00540.S

16 rows × 36 columns


In [ ]: