#!/usr/bin/python
#
ver=0.07
# This version close to ready for prime time
# scrapes craigslist for work
# pflint Sat 19 Apr 2014 09:42:23 AM EDT 
# Guidance on soup https://www.crummy.com/software/BeautifulSoup/bs4/doc/
# Local guidance: http://docbox.flint.com:8081/boj#BeautifulsoupInstall
# Additional Guidance:  http://docbox.flint.com:8081/boj#PythonBased
# Past Programs: http://docbox.flint.com:8081/boj#programs
# 2014-10-06 08:03:39 need error checking on open!!!
# 2017-03-17 14:16:18 rewrite
#
# PYTHONIOENCODING=utf-8 # die unicode die
#
import os
import re
import sys
import stat
import optparse
import subprocess
import unicodedata
import doctest
import signal
import time
import io
import csv
import string
from subprocess import call
# from BeautifulSoup import BeautifulSoup
from bs4 import BeautifulSoup
import urllib2
# base="https://montreal.fr.craigslist.ca/search/"
cityw = {
	'Burlington VT':'https://burlington.craigslist.org',
	'Boston MA':'https://boston.craigslist.org',
	'Washington DC':'https://washingtondc.craigslist.org',
	'New York NY':'https://newyork.craigslist.org',
	'Albany NY':'https://albany.craigslist.org',
	'Eastern CT':'https://newlondon.craigslist.org',
	'Hartford CT':'https://hartford.craigslist.org',
	'New Haven CT':'https://newhaven.craigslist.org',
	'Northwest CT':'https://nwct.craigslist.org',	
	'Montreal CA':'https://montreal.craigslist.ca' 
	}
#	Above is the real data set.  Below is the test one.
test_cityw = {
	'Burlington VT':'https://burlington.craigslist.org',
	'Washington DC':'https://washingtondc.craigslist.org',
	'New York NY':'https://newyork.craigslist.org',
	'Montreal CA':'https://montreal.fr.craigslist.ca',
	'Montreal CA':'https://montreal.en.craigslist.ca',
	'Montreal CA':'https://montreal.craigslist.ca'
	}
# 
specs =['eng','sad','sof','tch']
test_specs =['sof','tch']
for key, value in dict.items(cityw):
	# print key,value
	city=key
	base=value
	print '<p>  <B>'+city+'</B> </p>'
	# print city+"\t"+base 
	for spec in specs:
		url=base+"/search/"+spec+"?is_telecommuting=1"
		# Create the soup
		page=urllib2.urlopen(url)
		soup = BeautifulSoup(page.read(),"lxml")
		# print(soup.find_all('a'))
		sline = soup.find_all('a','hdrlnk')
		# print len(sline)
		for i in range(len(sline)):
			job=str(sline[i]).split('href="',1)[1]
			# print i, job, job[:2], 
			if '//' == job[:2]:
				print '<p>  <a href="https:'+job
			else: 
				print '<p>  <a href="'+base+str(sline[i]).split('href="',1)[1]+' </p>'
		#

