[Python] 纯文本查看 复制代码
import requestsfrom lxml import etreeimport csvurl = "https://www.youdict.com/w/"headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}wordlist = open("wordlist.csv", "r")wordlist_dict = csv.DictReader(wordlist)def sentence_download (): for word in wordlist_dict: url_word = url + word["word"] print("{}例句".format(word["word"]),end='') #避免造成换行的影响 # print(word["word"]+"例句") res = requests.get(url_word, headers=headers) res.encoding = res.apparent_encoding # 编码模式默认为ISO-8859-1 但是用apparent_encoding则可以解析中文返回值:utf-8 # 所以将原本apparent_encoding = utf-8赋予原来的ISO-8859-1的编码,从而可以正常解析 html = etree.HTML(res.text) with open('sentence.csv', 'a') as f: writer = csv.writer(f,delimiter='\t') # 注意delimiter='\t',delimiter默认是逗号 # quoting=csv.QUOTE_NONE, quotechar=None writer.writerow([word["word"]]) for i in range (6): i = str(i) contain1 = html.xpath('//div[@class="row"]/dl['+i+']/dt/text()[1]') contain2 = html.xpath('//div[@class="row"]/dl['+i+']/dt/b/text()') contain3 = html.xpath('//div[@class="row"]/dl['+i+']/dt/text()[2]') sentence = ''.join(contain1 + contain2 + contain3) print(sentence) with open('sentence.csv','a') as f: writer = csv.writer(f, delimiter='\t') #注意delimiter='\t',delimiter默认是逗号 # quoting=csv.QUOTE_NONE, quotechar=None writer.writerow([sentence]) #注意这地方要加[],否则出现分裂 例如 a m i dClear_Or_Not = input('是否覆盖原有数据(N/Y)')[0].capitalize() #限定只能输入一个字符if Clear_Or_Not == "Y": with open('sentence.csv', 'r+') as f: f.seek(0) f.truncate() # 清空原有所有数据 sentence_download()else: sentence_download()
将会以csv的形式进行保存。 新手作品,会一步步地改进的。以四个单词的例子:
[Plain Text] 纯文本查看 复制代码
amid""1. They announced, amid much ballyhoo, that they had made a breakthrough. 2. Dr Amid was assisted by a young Asian nurse. 3. Amid the trees the sea mist was dripping. 4. Children were changing classrooms amid laughter and shouts. 5. Dr Amid probed around the sensitive area. amidst""1. Whatever troubles arise, we'll have peace of mind amidst seeming chaos. 2. Amidst the current bunch of nonentities, he is a towering figure. 3. The bridge was completed in 1811 amidst much rejoicing. 4. He did his best to retrieve the situation, amidst some laughter. 5. The hooligans broke up amidst loud shouts and screams. amigo""1. C : Hola Paula . Pem í teme presentarte a mi amigo Juan. 2. Paseando por la calle, he encontrado a un viejo amigo. 3. Le da un abrazo a su amigo. 4. Amigo, you need a better ventilation back there. 5. Your time will be come, amigo. amine""1. Nonionics include polyoxyethylene condensates, sucrose esters and alkyl amine oxides. 2. Nonionic surface - active agents include polyoxyethylene condensates, sucrose esters and alkyl amine oxides. 3. Adding other groups at the amine stage may be advisable. 4. These amine substituted acid may polymenze in an analogous manner to unsubst ituted oxo - anions. 5. Demulsifier TKQ comprises of phenol - amine resin block polyethers and quaternized polyamidoamine.