
โ Selenium์ด๋?
์น ๋ธ๋ผ์ฐ์ ๋ฅผ ์ฌ๋์ด ์ง์ ์กฐ์ํ๋ ๊ฒ์ฒ๋ผ ์๋์ผ๋ก ์ ์ดํ ์ ์๊ฒ ํด์ฃผ๋ ๋๊ตฌ
(BeautifulSoup๋ html์์๋ง ์ฌ์ฉ ๊ฐ๋ฅํ๋ค.)
๊ธฐ๋ณธ ์ฌ์ฉ๋ฒ
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
KEYWORD = "buy domain"
options = Options()
options.add_argument("--disable-blink-features=AutomationControlled")
browser: WebDriver = webdriver.Chrome(options=options)
browser.get("https://google.com")
search_bar = WebDriverWait(browser, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "gLFyf"))
)
search_bar.send_keys(KEYWORD)
search_bar.send_keys(Keys.ENTER)
search_results = WebDriverWait(browser, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "MjjYud"))
)
for index, search_result in enumerate(search_results):
if search_result.is_displayed() and search_result.size["height"] > 0:
search_result.screenshot(f"screenshots/{KEYWORD}x{index}.png")
browser.quit()
๊ตฌ๊ธ ์ฌ์ดํธ ์ด๊ณ ๊ฒ์์ฐฝ์ ์ฐพ์ ๊ฒ์์ด๋ฅผ ์ ์ ํ ์ํฐ๋ฅผ ๋๋ฅด๊ณ , ์ด๋ํ ํ์ด์ง์์ class๋ก element ์ฐพ๋ ๋์์ ์คํํ๋๋ฐ, ์ฐพ์ ๋๊น์ง 10์ด๊ฐ ๊ธฐ๋ค๋ฆฌ๊ณ ๋ง์ฝ ์ผ์ฐ ์ฐพ์๋ค๋ฉด 10์ด๋ฅผ ์ ๋ถ ๊ธฐ๋ค๋ฆฌ๋ ๊ฒ์ด ์๋ ๋ฐ๋ก ๋ค์์ผ๋ก ๋์ด๊ฐ์ ํ๋ฉด์ ๋ณด์ด๊ณ height๊ฐ 0์ด ์๋ ๋ชจ๋ element๋ค์ ์ฐพ์ ์คํฌ๋ฆฐ์ท์ ์ฐ๋๋ค.
https://selenium-python.readthedocs.io/waits.html#explicit-waits
์์ options๋ ์ฌ๋ ์ธ์ฆ ํ์ด์ง๊ฐ ๋จ์ง ์๊ฒ ํ๋ ๋ฐฉ๋ฒ.
browser.quit() vs browser.close()
quit()์ ๋ธ๋ผ์ฐ์ ์ ์ฒด + ์ธ์ ์ข ๋ฃ
close()๋ ํ์ฌ ํญ๋ง ๋ซ๋๋ค.
โ Selenium์ผ๋ก ํ์ด์ฌ์์ ์๋ฐ์คํฌ๋ฆฝํธ๋ก ์ธ์๋ฅผ ๋ณด๋ด ์๋ฐ์คํฌ๋ฆฝํธ๋ฅผ ์คํ ์ํฌ ์ ์๋ค.
execute_script()
browser.execute_script(
"""
alert(arguments[0])
""",
"hello!!",
)
execute_script(script, *args)
Scrapping ํ ๋ ํ์์๋ element ์ง์ฐ๊ธฐ
browser.execute_script(
"""
const shitty = arguments[0];
shitty.parentElement.removeChild(shitty)
""",
shitty_element,
)
โ ์คํฌ๋กค ํ ์ ์๋ ์ ์ฒด ๋์ด ๊ตฌํ๋ ๋ฐฉ๋ฒ
document.body.scrollHeight
์คํฌ๋กค ํ ์ ์๋ ๋์ด๊ฐ ๋ช ํฝ์ ์ธ์ง ์๋ ค์ค๋ค.
์ด ๋์ด๋ฅผ ๋ธ๋ผ์ฐ์ ์ ์ฌ์ด์ฆ๋ก ๋๋๋ฉด ์คํฌ๋กค์ ๋ช ๋ฒ ํด์ผ ํ ์ง ๋์จ๋ค.
JavaScript์์ Python์ผ๋ก ๊ฐ์ ๋ณด๋ด๋ ๋ฐฉ๋ฒ
for size in sizes:
browser.set_window_size(size, 949)
browser.execute_script("return document.body.scrollHeight")
time.sleep(5)
script๋ด์ return์ ์ ์ด์ฃผ๋ฉด ๋๋ค.
์คํฌ๋กคํ๊ธฐ
import time
from math import ceil
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
BROWSER_HEIGHT = 949
browser: WebDriver = webdriver.Chrome()
browser.get("https://nomadcoders.co")
browser.maximize_window()
time.sleep(6)
ActionChains(browser).send_keys(Keys.ESCAPE).perform()
sizes = [480, 960, 1366, 1920]
# print(browser.get_window_size())
for size in sizes:
browser.set_window_size(size, BROWSER_HEIGHT)
browser.execute_script("window.scrollTo(0, 0)")
time.sleep(3)
scroll_size = browser.execute_script("return document.body.scrollHeight")
total_sections = ceil(scroll_size / BROWSER_HEIGHT)
for section in range(total_sections):
browser.execute_script(f"window.scrollTo(0, {(section + 1) * BROWSER_HEIGHT})")
browser.save_screenshot(f"screenshots/{size}x{section + 1}.png")
time.sleep(2)
input("pause")
โ Action Chains
ActionChains = "์ฌ์ฉ์์ ๋ณตํฉ ํ๋(ํค๋ณด๋ + ๋ง์ฐ์ค)์ ์์๋๋ก ์คํํ๋ ๋๊ตฌ"
(์ฌ๋์ฒ๋ผ ํ๋์ ํ๋ด ๋ด๊ธฐ ์ํ Selenium ๋๊ตฌ)
Usage
for hashtag in hashtags:
ActionChains(browser).key_down(Keys.COMMAND).click(hashtag).perform()
โ Window handles
์ด๋ ค ์๋ ๋ชจ๋ ์ฐฝ์ ๊ด๋ฆฌํ ์ ์๊ฒ ํด์ค๋ค.
for window in browser.window_handles:
browser.switch_to.window(window)
hashtag_name = browser.current_url.split("explore/search/keyword/?q=%23")[-1].strip(
"/"
)
print(hashtag_name)
time.sleep(1)
โ Xpath
class๋ name๊ฐ์ ๋ฐฉ๋ฒ์ผ๋ก๋ element๋ฅผ ํน์ ํ๊ธฐ ํ๋ค ๋ ์ฌ์ฉํ๋ค.

XPath๋ฅผ ์ฌ์ฉํ๋ฉด ๋ธ๊น์ผ๋ก ํด๋น element๋ฅผ ์ฐธ์กฐํ ์ ์๋ค.
'๐ Python' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
| Python - venv ๊ฐ์ ํ๊ฒฝ ์ธํ (macOS), pyenv (0) | 2026.04.06 |
|---|