Python/Basic/紀老師的教材/webClone.py
2024-06-27 15:41:10 +08:00

85 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'''
Program: webClone.py (Report comments/bugs to chikh@yuntech.edu.tw)
Function: 使用curl下載指定網址的檔案
'''
from PyQt5.QtWidgets import *
from PyQt5 import QtGui
import os
class DownloadWebData(QWidget):
def __init__(self):
super().__init__()
self.setWindowTitle("下載網頁所用檔案程式")
self.resize(500,50)
self.lineEdit = QLineEdit(self)
self.pushButton = QPushButton(self)
self.pushButton.setText("開始下載")
font = QtGui.QFont()
font.setFamily("微軟正黑體")
font.setPointSize(11)
self.lineEdit.setFont(font)
self.pushButton.setFont(font)
layout = QVBoxLayout()
layout.addWidget(self.lineEdit)
layout.addWidget(self.pushButton)
self.setLayout(layout)
self.lineEdit.returnPressed.connect(self.btnClicked) #https://bit.ly/3BxzOTy
self.pushButton.clicked.connect(self.btnClicked)
def btnClicked(self):
subjectURL = self.lineEdit.text()
if subjectURL == '':
QMessageBox.warning(self,"運作結果","<font size = 5>網址空白,請輸入有效網址</font>",QMessageBox.Yes)
return
self.pushButton.setEnabled(False)
fileName = subjectURL.split("/")[-1] #或寫成fileName = subjectURL[subjectURL.rfind("/")+1:]
#if "?" in fileName: fileName = fileName[:fileName.find("?")] #內含asp語法的網址網址應排除"?"後面的內容作為下載的檔名
os.system("curl %s -O -J -s" % subjectURL)
self.parseHTMLfile(subjectURL[:subjectURL.rfind("/")+1],fileName) #self.parseHTMLfile(subjectURL.split("/")[-1])
if QMessageBox.question(self,"運作結果","<font size = 5>複製完成,檢視%s</font>"%fileName,QMessageBox.Yes|QMessageBox.No) == QMessageBox.Yes:
os.system("start %s" % subjectURL.split("/")[-1])
self.lineEdit.clear()
self.pushButton.setEnabled(True)
def parseHTMLfile(self,mainURL,fileName):
inputFile = open(fileName,"r",encoding="utf-8",errors='ignore') #see https://stackoverflow.com/questions/30700166/python-open-file-error
fileContents = inputFile.read()
fileSize = len(fileContents)
self.searchTarget(mainURL,fileContents,fileSize,"href") #找到"href"出現的位置並依其後的URL進行下載或創建目錄夾的動作
self.searchTarget(mainURL,fileContents,fileSize,"src=") #找到"src="出現的位置並依其後的URL進行下載或創建目錄夾的動作
inputFile.close()
inputFile = open(fileName,"w",encoding="utf-8")
inputFile.write(fileContents.replace(mainURL,""))
inputFile.close()
def searchTarget(self,mainURL,fileContents,fileSize,keyword):
i = fileContents.find(keyword)
while i > 0:
i = fileContents.find('"',i,fileSize) #locate the first double quote (") mark after the occurrence of the keyword ("href" or "src=")
j = fileContents.find('"',i+1,fileSize) #locate the second double quote (") mark after the occurrence of the keyword ("href" or "src=")
filePath = fileContents[i+1:j]
k = filePath.rfind('/')
if k < 0 or "mailto" in filePath:
i = fileContents.find(keyword,j+1,fileSize)
continue
if "http" not in filePath:
if not os.path.exists(filePath[:k]): os.system('md "%s"' % filePath[:k]) #創建目錄
print("下載檔案 %s" % mainURL+filePath)
os.system("curl %s -o %s -J -s" % (mainURL+filePath,filePath))
elif mainURL in filePath:
k = filePath.find('/',8,len(filePath))
l = filePath.rfind('/')
if not os.path.exists(filePath[k+1:l]): os.system('md "%s"' % filePath[k+1:l]) #創建目錄
print("下載檔案 %s" % filePath)
os.system("curl %s -o %s -J -s" % (filePath,filePath[k+1:]))
i = fileContents.find(keyword,j+1,fileSize)
if __name__ == "__main__":
app = QApplication([])
win = DownloadWebData()
win.show()
app.exec_()