1. 채용 조건 wordcloud로 만들기
browser = webdriver.Chrome('C:/chromedriver.exe') browser.get("http://www.saramin.co.kr/zf_user/search/recruit?search_area=main&search_done=y&search_optional_item=n&searchType=search&searchword=%EB%B9%85%EB%8D%B0%EC%9D%B4%ED%84%B0&recruitPage=1") time.sleep(3) soup = BeautifulSoup(browser.page_source,'html.parser') job = [] for i in soup.findAll('div',class_='item_recruit'): job.append(i.find('div',class_='job_condition').text.strip()) for i in range(2,11): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job.append(i.find('div',class_='job_condition').text.strip()) for i in range(2,12): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job.append(i.find('div',class_='job_condition').text.strip()) for i in range(3,6): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job.append(i.find('div',class_='job_condition').text.strip()) # list -> str 형변환 txt = '' for i in job : txt = re.sub('[·|↑]',' ',i)+ ''+txt # wordcloud 만들기 wordcloud = WordCloud(font_path='C:\windows/fonts/malgun.ttf', stopwords = STOPWORDS, background_color = 'white', width = 1500, height = 1200).generate(txt) plt.figure(figsize=(20,200)) plt.imshow(wordcloud) plt.axis("off") plt.show() browser.quit() |
2. 데이터 프레임으로 만들기
browser = webdriver.Chrome('C:/chromedriver.exe') browser.get("http://www.saramin.co.kr/zf_user/search/recruit?search_area=main&search_done=y&search_optional_item=n&searchType=search&searchword=%EB%B9%85%EB%8D%B0%EC%9D%B4%ED%84%B0&recruitPage=1") time.sleep(3) soup = BeautifulSoup(browser.page_source,'html.parser') df = DataFrame(columns=['공고명','회사명','근무지역','채용조건']) for j in soup.findAll('div',class_='item_recruit'): job_name = j.find('h2',class_='job_tit').text.strip() company = j.find('strong',class_='corp_name').text working_area = j.find('div',class_='job_condition').find('a').text.strip() job_info = j.find('div',class_='job_condition') [s.extract() for s in job_info('a')] job_info = job_info.text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용조건':job_info}, ignore_index = True) for i in range(2,11): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job_name = i.find('h2',class_='job_tit').text.strip() company = i.find('strong',class_='corp_name').text job_info = i.find('div',class_='job_condition').text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용건':job_info}, ignore_index = True) for i in range(2,12): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job_name = i.find('h2',class_='job_tit').text.strip() company = i.find('strong',class_='corp_name').text job_info = i.find('div',class_='job_condition').text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용건':job_info}, ignore_index = True) for i in range(3,6): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job_name = i.find('h2',class_='job_tit').text.strip() company = i.find('strong',class_='corp_name').text job_info = i.find('div',class_='job_condition').text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용건':job_info}, ignore_index = True) browser.quit() df |
3. 엑셀 파일(.xlsx)로 만들기
# 엑셀 파일 생성 / 실제 파일은 안만들어짐 workbook = xlsxwriter.Workbook("C:/data/job_bigdata.xlsx") # 워크시트 생성 worksheet = workbook.add_worksheet() browser = webdriver.Chrome('C:/chromedriver.exe') browser.get("http://www.saramin.co.kr/zf_user/search/recruit?search_area=main&search_done=y&search_optional_item=n&searchType=search&searchword=%EB%B9%85%EB%8D%B0%EC%9D%B4%ED%84%B0&recruitPage=1") time.sleep(3) soup = BeautifulSoup(browser.page_source,'html.parser') df = DataFrame(columns=['공고명','회사명','근무지역','채용조건']) row_cnt = 1 # 엑셀의 행 수 for j in soup.findAll('div',class_='item_recruit'): job_name = j.find('h2',class_='job_tit').text.strip() company = j.find('strong',class_='corp_name').text working_area = j.find('div',class_='job_condition').find('a').text.strip() job_info = j.find('div',class_='job_condition') [s.extract() for s in job_info('a')] job_info = job_info.text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용조건':job_info}, ignore_index = True) worksheet.write('A{}'.format(row_cnt),job_name) worksheet.write('B{}'.format(row_cnt),company) worksheet.write('C{}'.format(row_cnt),working_area) worksheet.write('D{}'.format(row_cnt),job_info) row_cnt += 1 for i in range(2,11): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job_name = i.find('h2',class_='job_tit').text.strip() company = i.find('strong',class_='corp_name').text job_info = i.find('div',class_='job_condition').text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용조건':job_info}, ignore_index = True) worksheet.write('A{}'.format(row_cnt),job_name) worksheet.write('B{}'.format(row_cnt),company) worksheet.write('C{}'.format(row_cnt),working_area) worksheet.write('D{}'.format(row_cnt),job_info) row_cnt += 1 for i in range(2,12): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job_name = i.find('h2',class_='job_tit').text.strip() company = i.find('strong',class_='corp_name').text job_info = i.find('div',class_='job_condition').text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용조건':job_info}, ignore_index = True) worksheet.write('A{}'.format(row_cnt),job_name) worksheet.write('B{}'.format(row_cnt),company) worksheet.write('C{}'.format(row_cnt),working_area) worksheet.write('D{}'.format(row_cnt),job_info) row_cnt += 1 for i in range(3,6): browser.find_element_by_xpath('//*[@id="recruit_info_list"]/div[2]/div/a[{}]'.format(i)).click() time.sleep(3) for i in soup.findAll('div',class_='item_recruit'): job_name = i.find('h2',class_='job_tit').text.strip() company = i.find('strong',class_='corp_name').text job_info = i.find('div',class_='job_condition').text.strip() df = df.append({'공고명':job_name,'회사명':company,'근무지역':working_area,'채용조건':job_info}, ignore_index = True) worksheet.write('A{}'.format(row_cnt),job_name) worksheet.write('B{}'.format(row_cnt),company) worksheet.write('C{}'.format(row_cnt),working_area) worksheet.write('D{}'.format(row_cnt),job_info) row_cnt += 1 browser.quit() # 엑셀 파일 생성 및 닫기 / 이때 실제 파일이 만들어짐 workbook.close() |
|
4. 공고명 wordcloud로 만들기 (+불용어 처리)
# url 접속 및 데이터 불러오기 browser = webdriver.Chrome('C:/chromedriver.exe')
# 문장 정제작업 txt = ''
# 단어별 빈도수 체크 / 불용어 처리 okt = Okt()
# wordcloud 만들기 wordcloud = WordCloud(font_path='C:\windows/fonts/malgun.ttf',
|
|
5. 채용정보 wordcloud로 만들기 (+불용어 처리)
# url 접속 및 데이터 불러오기 browser = webdriver.Chrome('C:/chromedriver.exe')
# 문장 정제작업 txt = ''
# 단어별 빈도수 체크 / 불용어 처리 from konlpy.tag import Hannanum
# wordcloud 만들기 wordcloud = WordCloud(font_path='C:\windows/fonts/malgun.ttf',
|
'컴퓨터 > 파이썬' 카테고리의 다른 글
파이썬(Python) - 크롤링 연습 ② 국민 청원 청원 목록 수집(추천순) (2) | 2020.04.03 |
---|---|
파이썬(Python) - 한글 형태소 분석 (0) | 2020.04.02 |
파이썬(Python) - 스크래핑 ③ JSON을 이용한 크롤링 (0) | 2020.03.31 |
파이썬(Python) - 예외사항 처리 (0) | 2020.03.30 |
파이썬(Python) - 스크래핑 ② 웹 스크래핑/크롤링 연습, wordcloud사용 (0) | 2020.03.25 |