In [1]:
from scrapy import signals
from scrapy.utils.project import get_project_settings
from scrapy.crawler import CrawlerProcess
from scrapy.xlib.pydispatch import dispatcher
from multiprocessing.queues import Queue
import scrapy
import multiprocessing
import datetime
import collections
import urllib
import collections
from scrapy import Request as Request


/home/jun_gentoo/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: ScrapyDeprecationWarning: Importing from scrapy.xlib.pydispatch is deprecated and will no longer be supported in future Scrapy versions. If you just want to connect signals use the from_crawler class method, otherwise import pydispatch directly if needed. See: https://github.com/scrapy/scrapy/issues/1762

In [2]:
business_data=collections.namedtuple('business_data','link,title,status')

In [3]:
class CrawlerWorker(multiprocessing.Process):

    def __init__(self, spider, result_queue):
        multiprocessing.Process.__init__(self)
        self.result_queue = result_queue

        self.crawler = CrawlerProcess(get_project_settings())
        #if not hasattr(project, 'crawler'):
        #    self.crawler.install()
        #self.crawler.configure()

        self.items = []
        self.spider = spider
        dispatcher.connect(self._item_passed, signals.item_passed)

    def _item_passed(self, item):
        self.items.append(item)

    def run(self):
        self.crawler.crawl(self.spider)
        self.crawler.start()
        self.crawler.stop()
        self.result_queue.put(self.items)

In [4]:
class CanberraBusinessSpider(scrapy.Spider):
    name = "CanberraBusinessSpider"
    allowed_domains = ["www.seekbusiness.com.au"]
    start_urls = ['https://www.seekbusiness.com.au/businesses-for-sale/in-canberra-act-2601?rad=50']

    def parse(self, response):
        next_page=response.xpath('//a[contains(@class,"next")]/@href').extract()[0]
        next_page_link=urllib.basejoin(response.url,next_page)
        #print(next_page_link)
        #yield Request(next_page_link,callback=self.parse)
        
        business_tags=response.xpath('//div[contains(@class,"sr-l") and @onclick]')
        for business in business_tags:
            title=business.xpath('div[@class="t"]/a/text()').extract()[0]
            link=business.xpath('div[@class="t"]/a/@href').extract()[0]
            full_link=urllib.basejoin(response.url,link)
            contents=business.xpath('div[@class="m-c"]/div[@class="det-c"]/div[@class="smry"]/text()').extract()
            full_content='\n'.join(contents)
            print title,full_link,full_content
        """
        business_tags=response.xpath('//a[contains(@href,"business-listing") and count(@*)=1]')
        for invidual_business in business_tags:
            title=invidual_business.xpath('text()').extract()[0]
            link=invidual_business.xpath('@href').extract()[0]
            full_link=urllib.basejoin(response.url,link)
            yield {title:business_data(full_link,title,'not yet')}
            #business={'title':title,'link':full_link}
            #print(business)
        #print(business_tags)
        """

In [5]:
def main():
    result_queue = Queue()
    crawler = CrawlerWorker(CanberraBusinessSpider(), result_queue)
    crawler.start()
    for item in result_queue.get():
        print item
        
if __name__=="__main__":
    main()


2017-01-20 16:07:37 [scrapy] INFO: Scrapy 1.1.1 started (bot: scrapybot)
2017-01-20 16:07:37 [scrapy] INFO: Overridden settings: {}
2017-01-20 16:07:37 [scrapy] INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
2017-01-20 16:07:37 [scrapy] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware',
 'scrapy.downloadermiddlewares.stats.DownloaderStats']
2017-01-20 16:07:37 [scrapy] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
 'scrapy.spidermiddlewares.referer.RefererMiddleware',
 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
 'scrapy.spidermiddlewares.depth.DepthMiddleware']
2017-01-20 16:07:37 [scrapy] INFO: Enabled item pipelines:
[]
2017-01-20 16:07:37 [scrapy] INFO: Spider opened
2017-01-20 16:07:37 [scrapy] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2017-01-20 16:07:37 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023
2017-01-20 16:07:39 [scrapy] DEBUG: Crawled (200) <GET https://www.seekbusiness.com.au/businesses-for-sale/in-canberra-act-2601?rad=50> (referer: None)
CafeCoffeeShop - Canberra https://www.seekbusiness.com.au/business-listing/cafecoffeeshop-canberra/265625?s=0 Cafe - Franchise - Canberra
                

            
Supercheap Storage l Mobile self-storage l Simple management, highly profitable! https://www.seekbusiness.com.au/business-listing/supercheap-storage-l-mobile-self-storage-l-simple-management-highly-profitable/223127?s=0 Supercheap Storage, mobile self-storage provider with a difference. Simplified, convenient service. Stress-free management. Supercheap Storage - the business in Canberra you have been searching for
                

            
Supercheap Storage l Mobile self-storage l Simple management, highly profitable! https://www.seekbusiness.com.au/business-listing/supercheap-storage-l-mobile-self-storage-l-simple-management-highly-profitable/223108?s=0 Supercheap Storage, mobile self-storage provider with a difference. Simplified, convenient service. Stress-free management. Supercheap Storage - the business in Canberra you have been searching for
                

            
CafeCoffeeShop - Canberra Area https://www.seekbusiness.com.au/business-listing/cafecoffeeshop-canberra-area/203549?s=0 Cafe - Takeaway - Canberra Area
                

            
Rent Roll Sale Canberra/Queanbeyan Region (75-100 properties) https://www.seekbusiness.com.au/business-listing/rent-roll-sale-canberra-queanbeyan-region-75-100-properties/291164?s=0 Rent Roll Sale Canberra/Queanbeyan Region (75-100 properties)
                

            
CafeCoffeeShop - Canberra https://www.seekbusiness.com.au/business-listing/cafecoffeeshop-canberra/265305?s=0 Cafe -    Takeaway  -  Franchise - Canberra
                

            
Mo's Mobiles is looking for Licensed Dealers to operate existing Vodafone stores https://www.seekbusiness.com.au/business-listing/mos-mobiles-is-looking-for-licensed-dealers-to-operate-existing-vodafone-stores/281849?s=0 Mo’s Mobiles is Vodafone’s largest exclusive dealer. We are looking for Licensees to operate a number of our existing stores.

                

            
Gozleme King Franchise : Turkish Street Food & Cafe https://www.seekbusiness.com.au/business-listing/gozleme-king-franchise-turkish-street-food-cafe/270622?s=0 Gözleme King is  offering premium franchise opportunities located in major shopping centers , busy high street's and major CBD's.  Contact us now to find out more.
                

            
Healthy Start Civic https://www.seekbusiness.com.au/business-listing/healthy-start-civic/258794?s=0 A healthy bargain buy to start your next business.
                

            
The Fat Goanna Cafe https://www.seekbusiness.com.au/business-listing/the-fat-goanna-cafe/290623?s=0 5-day week cafe surrounded by government offices low rent and secure lease.

                

            
Community sector RTO for sale. Delegate Authority. Funding- 2 states. Excellent! https://www.seekbusiness.com.au/business-listing/community-sector-rto-for-sale-delegate-authority-funding-2-states-excellent/286398?s=0 Predominantly servicing the Community services & hospitality sectors - This RTO is showing a first quarter profit of $90k. 

RTO is reluctantly on the market amidst significant within the business.
                

            
Rent Roll Sale 80-100 Managements https://www.seekbusiness.com.au/business-listing/rent-roll-sale-80-100-managements/264886?s=0 -----SOLD-----
                

            
Lollipop's Playland - Children's Indoor Play centre and Café franchise https://www.seekbusiness.com.au/business-listing/lollipops-playland-childrens-indoor-play-centre-and-cafe-franchise/107888?s=0 Lollipop's Playlands are Australia's largest Indoor play centre business with 20 centres nationally and 31 globally with a proud 20 year history.
                

            
Sumo Salad Tuggeranong: a successful absentee owner food franchise https://www.seekbusiness.com.au/business-listing/sumo-salad-tuggeranong-a-successful-absentee-owner-food-franchise/286193?s=0 Secure Food Franchise Opportunity
                

            
Exciting opportunity to own and manage your own Skin & Laser clinic!!! https://www.seekbusiness.com.au/business-listing/exciting-opportunity-to-own-and-manage-your-own-skin-laser-clinic/289893?s=0 Clearskincare Clinics Managing Owners can start living the life you've imagined with a 40% profit share and a $85,000 package per year with a proven business model, and ongoing support & training.
                

            
Own your own RTO today! https://www.seekbusiness.com.au/business-listing/own-your-own-rto-today/239855?s=0 ----SOLD----
                

            
Southside Supermarket https://www.seekbusiness.com.au/business-listing/southside-supermarket/205055?s=0 ---------------SOLD------------
                

            
Ace Sushi Braddon https://www.seekbusiness.com.au/business-listing/ace-sushi-braddon/289743?s=0 Under Instructions From The Administrator – ACE Sushi
                

            
Supercheap Storage l Mobile self-storage l Simple management, highly profitable! https://www.seekbusiness.com.au/business-listing/supercheap-storage-l-mobile-self-storage-l-simple-management-highly-profitable/222538?s=0 Supercheap Storage, mobile self-storage provider with a difference. Simplified, convenient service. Stress-free management. Supercheap Storage - the business in Canberra you have been searching for
                

            
Japanese Tapas Bar https://www.seekbusiness.com.au/business-listing/japanese-tapas-bar/289567?s=0 Under Instructions From The Administrator - Japanese Tapas Bar
                

            
2017-01-20 16:07:39 [scrapy] INFO: Closing spider (finished)
2017-01-20 16:07:39 [scrapy] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 268,
 'downloader/request_count': 1,
 'downloader/request_method_count/GET': 1,
 'downloader/response_bytes': 30312,
 'downloader/response_count': 1,
 'downloader/response_status_count/200': 1,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2017, 1, 20, 5, 7, 39, 402623),
 'log_count/DEBUG': 2,
 'log_count/INFO': 7,
 'response_received_count': 1,
 'scheduler/dequeued': 1,
 'scheduler/dequeued/memory': 1,
 'scheduler/enqueued': 1,
 'scheduler/enqueued/memory': 1,
 'start_time': datetime.datetime(2017, 1, 20, 5, 7, 37, 540959)}
2017-01-20 16:07:39 [scrapy] INFO: Spider closed (finished)

In [ ]: