''' Program: webClone.py (Report comments/bugs to chikh@yuntech.edu.tw) Function: 使用curl下載指定網址的檔案 ''' from PyQt5.QtWidgets import * from PyQt5 import QtGui import os class DownloadWebData(QWidget): def __init__(self): super().__init__() self.setWindowTitle("下載網頁所用檔案程式") self.resize(500,50) self.lineEdit = QLineEdit(self) self.pushButton = QPushButton(self) self.pushButton.setText("開始下載") font = QtGui.QFont() font.setFamily("微軟正黑體") font.setPointSize(11) self.lineEdit.setFont(font) self.pushButton.setFont(font) layout = QVBoxLayout() layout.addWidget(self.lineEdit) layout.addWidget(self.pushButton) self.setLayout(layout) self.lineEdit.returnPressed.connect(self.btnClicked) #https://bit.ly/3BxzOTy self.pushButton.clicked.connect(self.btnClicked) def btnClicked(self): subjectURL = self.lineEdit.text() if subjectURL == '': QMessageBox.warning(self,"運作結果","網址空白,請輸入有效網址",QMessageBox.Yes) return self.pushButton.setEnabled(False) fileName = subjectURL.split("/")[-1] #或寫成fileName = subjectURL[subjectURL.rfind("/")+1:] #if "?" in fileName: fileName = fileName[:fileName.find("?")] #內含asp語法的網址,網址應排除"?"後面的內容作為下載的檔名 os.system("curl %s -O -J -s" % subjectURL) self.parseHTMLfile(subjectURL[:subjectURL.rfind("/")+1],fileName) #self.parseHTMLfile(subjectURL.split("/")[-1]) if QMessageBox.question(self,"運作結果","複製完成,檢視%s?"%fileName,QMessageBox.Yes|QMessageBox.No) == QMessageBox.Yes: os.system("start %s" % subjectURL.split("/")[-1]) self.lineEdit.clear() self.pushButton.setEnabled(True) def parseHTMLfile(self,mainURL,fileName): inputFile = open(fileName,"r",encoding="utf-8",errors='ignore') #see https://stackoverflow.com/questions/30700166/python-open-file-error fileContents = inputFile.read() fileSize = len(fileContents) self.searchTarget(mainURL,fileContents,fileSize,"href") #找到"href"出現的位置並依其後的URL進行下載或創建目錄夾的動作 self.searchTarget(mainURL,fileContents,fileSize,"src=") #找到"src="出現的位置並依其後的URL進行下載或創建目錄夾的動作 inputFile.close() inputFile = open(fileName,"w",encoding="utf-8") inputFile.write(fileContents.replace(mainURL,"")) inputFile.close() def searchTarget(self,mainURL,fileContents,fileSize,keyword): i = fileContents.find(keyword) while i > 0: i = fileContents.find('"',i,fileSize) #locate the first double quote (") mark after the occurrence of the keyword ("href" or "src=") j = fileContents.find('"',i+1,fileSize) #locate the second double quote (") mark after the occurrence of the keyword ("href" or "src=") filePath = fileContents[i+1:j] k = filePath.rfind('/') if k < 0 or "mailto" in filePath: i = fileContents.find(keyword,j+1,fileSize) continue if "http" not in filePath: if not os.path.exists(filePath[:k]): os.system('md "%s"' % filePath[:k]) #創建目錄 print("下載檔案 %s" % mainURL+filePath) os.system("curl %s -o %s -J -s" % (mainURL+filePath,filePath)) elif mainURL in filePath: k = filePath.find('/',8,len(filePath)) l = filePath.rfind('/') if not os.path.exists(filePath[k+1:l]): os.system('md "%s"' % filePath[k+1:l]) #創建目錄 print("下載檔案 %s" % filePath) os.system("curl %s -o %s -J -s" % (filePath,filePath[k+1:])) i = fileContents.find(keyword,j+1,fileSize) if __name__ == "__main__": app = QApplication([]) win = DownloadWebData() win.show() app.exec_()