1.  select_one() 

๐Ÿฅ‘  find๊ฐ€ ์›ํ•˜๋Š” ํƒœ๊ทธ๋ฅผ ์ฐพ๋Š”๊ฒŒ ๋ชฉ์ ์ด๋ผ๋ฉด select๋Š” CSS selector๋กœ tag ๊ฐ์ฒด๋ฅผ ์ฐพ์•„ ๋ฐ˜ํ™˜
๐Ÿฅ‘  select_one()์€ ์›ํ•˜๋Š” ํƒœ๊ทธ ํ•˜๋‚˜๋งŒ ๊ฐ€์ ธ์˜ค๊ณ , ํƒœ๊ทธ๊ฐ€ ๋งŽ์€ ๊ฒฝ์šฐ์—๋Š” ๋งจ ์•ž์˜ ๊ฒƒ๋งŒ ๊ฐ€์ ธ์˜ด

 

    ๐Ÿฐ select ๊ณ„์—ด์˜ ๋ฉ”์†Œ๋“œ๋Š” css selector ์ด์šฉ ๊ฐ€๋Šฅ
    ๐Ÿฐ  '.' -> class ์†์„ฑ /  '#' -> id ์†์„ฑ
    ๐Ÿฐ class : ํ•˜๋‚˜์˜ html ์—์„œ ์—ฌ๋Ÿฌ ํƒœ๊ทธ์— ์ค‘๋ณต ์‚ฌ์šฉ ๊ฐ€๋Šฅ
    ๐Ÿฐ id : ํ•˜๋‚˜์˜ html์—์„œ ํ•œ๋ฒˆ๋งŒ ์‚ฌ์šฉ. ๊ถŒ์žฅ์‚ฌํ•ญ

# ์š”์†Œ ๋‚ด text ๊ฐ€์ ธ์˜ค๊ธฐ
title = soup.select_one('title')

print(title.string)  # ์„ ํƒ๋œ ์š”์†Œ text๋งŒ
print(title.text)
print(title.get_text())
# text, get_text๋Š” ํ•˜์œ„ text ๊นŒ์ง€ ๊ฐ™์ด

 

 

1)  select_one('ํƒœ๊ทธ๋ช…') ์‚ฌ์šฉ ์˜ˆ์ œ

# ๋‹ค์Œ > ๋‰ด์Šค > IT > ์˜ค๋Š˜์˜ ์—ฐ์žฌ์˜ ์ฒซ๋ฒˆ์งธ ๊ธ€ ์ œ๋ชฉ๊ณผ ์‹ ๋ฌธ์‚ฌ ๋“ค๊ณ ์˜ค๊ธฐ
url = 'https://news.daum.net/digital#1'
resp = requests.get(url)
soup = bs(resp.text, 'html.parser')

html ์†Œ์Šค

 

tag_series = soup.select_one(('.list_todayseries li'))
pprint.pprint(tag_series)

tag_series_title = tag_series.select_one('.link_txt').text
print(f'์ œ๋ชฉ: {tag_series_title}')
# ์ œ๋ชฉ: ์ „์ž์‚ฌ์ „์œผ๋กœ ์ธ๊ธฐ ๋Œ๋˜ '์ƒคํ”„'...์ตœ๊ทผ์—” AI ์•„๋ฐ”ํƒ€์™€ ํ•จ๊ป˜

tag_series_press = tag_series.select_one('.txt_info').text
print(f'์‹ ๋ฌธ์‚ฌ: {tag_series_press}')
# ์‹ ๋ฌธ์‚ฌ: ์ „์ž์‹ ๋ฌธ

tag_series ์‹คํ–‰๊ฒฐ๊ณผ

 


 

2)  select_one('CSS์„ ํƒ์ž') ์˜ˆ์ œ

 

import requests
from bs4 import BeautifulSoup as bs
import pprint

# ํ• ๋ฆฌ์Šค ์ปคํ”ผ : ๋งค์žฅ ๊ฒ€์ƒ‰
url = 'https://www.hollys.co.kr/store/korea/korStore2.do'
resp = requests.get(url)
soup = bs(resp.text, 'html.parser')

๋งค์žฅ ํ…Œ์ด๋ธ”์˜ html ์†Œ์Šค

 

# ๋งค์žฅ ํ…Œ์ด๋ธ” ๊ฐ€์ ธ์˜ค๊ธฐ
stores = soup.select_one('#contents > div.content > fieldset > fieldset > div.tableType01 > table')
pprint.pprint(stores)

  

 

 

 

 

๐Ÿ“Œ  ์™ผ์ชฝ์€ css selector๋กœ ๊ฐ€์ ธ์˜จ ๊ฒฐ๊ณผ์ด๋‹ค

      โžก๏ธ selector ์†Œ์Šค๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ๋ฐฉ๋ฒ•์€ html ์†Œ์Šค์ฝ”๋“œ ์ค‘ ํ•ด๋‹น ํƒœ๊ทธ ์œ„์— ์ปค์„œ๋ฅผ ๊ฐ€์ ธ๋‹ค ๋†“๊ณ  ์šฐํด๋ฆญ โ–ถ๏ธ ๋ณต์‚ฌ โ–ถ๏ธ selector ๋ณต์‚ฌ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ๋œ๋‹ค.

 

 

 

 

 

 

 

 

 

 

 

 

 

# ์ฒซ ๋ฒˆ์งธ ๊ฐ€๊ฒŒ ๊ด€๋ จ
first_store = stores.select_one('#contents > div.content > fieldset > fieldset > div.tableType01 > table > tbody > tr:nth-child(1)')
pprint.pprint(first_store)

 

first_store ๊ฒฐ๊ณผ

 

# td:nth-child(1) -> td ํƒœ๊ทธ์ค‘ ์ฒซ๋ฒˆ์งธ
second_store_name = first_store.select_one('td:nth-child(2)')
print(second_store_name.text)
# ๋ถ€์‚ฐ์‚ฌ์ƒ๊ด‘์žฅ์ 

# td:nth-child(1) -> td ํƒœ๊ทธ์ค‘ 4๋ฒˆ์งธ
second_store_addr = first_store.select_one('td:nth-child(4)')
print(second_store_addr.text)
# ๋ถ€์‚ฐ๊ด‘์—ญ์‹œ ์‚ฌ์ƒ๊ตฌ ๊ด‘์žฅ๋กœ 22 (๊ด˜๋ฒ•๋™) 2์ธต ์‚ฌ์ƒ๊ตฌ ๊ด˜๋ฒ•๋™ 565-2

 


 

2. select()

๐Ÿฅ‘  CSS selector๋กœ ์ง€์ •ํ•œ ํƒœ๊ทธ๋“ค์„ ๋ชจ๋‘ ๊ฐ€์ ธ์˜ค๋Š” ๋ฉ”์†Œ๋“œ๋กœ ๊ฐ€์ ธ์˜จ ํƒœ๊ทธ๋“ค์€ ๋ชจ๋‘ ๋ฆฌ์ŠคํŠธ์— ๋ณด๊ด€

# ๋„ค์ด๋ฒ„ ํ™˜์œจ ํฌ๋กค๋ง
# ๋„ค์ด๋ฒ„์—์„œ 'ํ™˜์œจ' ๊ฒ€์ƒ‰ ํ›„ 'ํ™˜์œจ ๋”๋ณด๊ธฐ'

url = 'https://finance.naver.com/marketindex'
resp = requests.get(url)
soup = bs(resp.text, 'html.parser')
pprint.pprint(soup)

์‹คํ–‰๊ฒฐ๊ณผ

 

# ํ™˜์ „๊ณ ์‹œ ๊ตญ๊ฐ€ ๊ฐ€์ ธ์˜ค๊ธฐ
nations = soup.select('#exchangeList > li > a.head > h3 > span')
print(nations)  # ๋ฆฌ์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜

์‹คํ–‰๊ฒฐ๊ณผ

 

# ๋‚˜๋ผ๋ณ„ ํ™˜์œจ ๊ฐ€์ ธ์˜ค๊ธฐ
exchange_rates = soup.select('#exchangeList > li > a.head > div > span.value')
print(exchange_rates)

 

# ๋‚˜๋ผ๋ณ„ ํ™”ํ ๋‹จ์œ„์™€ ํ™˜์œจ ๊ฐ™์ด ์ถœ๋ ฅํ•˜๊ธฐ
for idx, item in enumerate(nations):
    print(f'{item.text} : {exchange_rates[idx].text}')
  
'''
์‹คํ–‰๊ฒฐ๊ณผ)
๋ฏธ๊ตญ USD : 1,317.50
์ผ๋ณธ JPY(100์—”) : 895.98
์œ ๋Ÿฝ์—ฐํ•ฉ EUR : 1,440.55
์ค‘๊ตญ CNY : 182.99
'''

 


 

3. CSS selector 

 

1) ํƒœ๊ทธ๋ช… ์„ ํƒ    ex. li, a

test = soup.select('a')

 

 

2) ํ•˜์œ„ ํƒœ๊ทธ ์„ ํƒ   ex. ul a / ul > a

# ์ƒ์œ„ ํƒœ๊ทธ > ํ•˜์œ„ ํƒœ๊ทธ
test = soup.select('li a')
test = soup.select('li > a')

 

3) ํด๋ž˜์Šค ์ด๋ฆ„์œผ๋กœ ์„ ํƒ   ex. li.course / .course / li.course.paid

# ํƒœ๊ทธ.ํด๋ž˜์Šค๋ช…
test = soup.select('li.value')
# .ํด๋ž˜์Šค๋ช…
test = soup.select('.value')
# ํƒœ๊ทธ.ํด๋ž˜์Šค๋ช….ํด๋ž˜์Šค๋ช… (์—ฌ๋Ÿฌ ํด๋ž˜์Šค๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ)
test = soup.select('li.value.fieldset')

 

 

4) id ์ด๋ฆ„์œผ๋กœ ์„ ํƒ   ex. #start

# '#id์ด๋ฆ„'
test = soup.select('#list50')
# 'ํƒœ๊ทธ๋ช…#id์ด๋ฆ„'
test = soup.select('tr#list50')

 

 

 

 

[ ๋‚ด์šฉ ์ฐธ๊ณ  : IT ํ•™์› ๊ฐ•์˜ ]

+ Recent posts