from urllib.request import urlopen from bs4 import BeautifulSoup from pprint import pprint URL = 'http://news.yahoo.co.jp/' with urlopen(URL) as res: html = res.read().decode("utf-8") soup = BeautifulSoup(html, 'html.parser') titles = soup.select('.ttl a') # domを取得 titles = [t.contents[0] for t in titles] # テキストを取得 pprint(titles) >>> ['トランプ氏「司法行き過ぎ」', 'PKO日報 特別防衛監察を指示', '病院で投与ミス 一時心肺停止', '特攻服の中学生ら