Friday 18 October 2013

Python code to crawl the web page

 import thread  
 from lxml import html  
 import requests  
 import thread  
 tocrawl = ["link","link"]  
 def print_def( threadName):  
   page = requests.get(crawling)  
   tree = html.fromstring(page.text)  
   text = tree.xpath('//item')  
   print text  
   f = open('myfile','a+')  
   f.write(str(text)+"/n")  
   f.close  
 try:  
    for crawling in tocrawl:  
    thread.start_new_thread( print_def, (crawling,) )  
 except:  
   print "Error: unable to start thread"  
 while 1:  
   pass  

No comments:

Post a Comment