85 lines
3.6 KiB
Python
85 lines
3.6 KiB
Python
|
'''
|
|||
|
Program: webClone.py (Report comments/bugs to chikh@yuntech.edu.tw)
|
|||
|
Function: 使用curl下載指定網址的檔案
|
|||
|
'''
|
|||
|
|
|||
|
from PyQt5.QtWidgets import *
|
|||
|
from PyQt5 import QtGui
|
|||
|
import os
|
|||
|
|
|||
|
class DownloadWebData(QWidget):
|
|||
|
def __init__(self):
|
|||
|
super().__init__()
|
|||
|
self.setWindowTitle("下載網頁所用檔案程式")
|
|||
|
self.resize(500,50)
|
|||
|
self.lineEdit = QLineEdit(self)
|
|||
|
self.pushButton = QPushButton(self)
|
|||
|
self.pushButton.setText("開始下載")
|
|||
|
font = QtGui.QFont()
|
|||
|
font.setFamily("微軟正黑體")
|
|||
|
font.setPointSize(11)
|
|||
|
self.lineEdit.setFont(font)
|
|||
|
self.pushButton.setFont(font)
|
|||
|
layout = QVBoxLayout()
|
|||
|
layout.addWidget(self.lineEdit)
|
|||
|
layout.addWidget(self.pushButton)
|
|||
|
self.setLayout(layout)
|
|||
|
self.lineEdit.returnPressed.connect(self.btnClicked) #https://bit.ly/3BxzOTy
|
|||
|
self.pushButton.clicked.connect(self.btnClicked)
|
|||
|
|
|||
|
def btnClicked(self):
|
|||
|
subjectURL = self.lineEdit.text()
|
|||
|
if subjectURL == '':
|
|||
|
QMessageBox.warning(self,"運作結果","<font size = 5>網址空白,請輸入有效網址</font>",QMessageBox.Yes)
|
|||
|
return
|
|||
|
self.pushButton.setEnabled(False)
|
|||
|
fileName = subjectURL.split("/")[-1] #或寫成fileName = subjectURL[subjectURL.rfind("/")+1:]
|
|||
|
#if "?" in fileName: fileName = fileName[:fileName.find("?")] #內含asp語法的網址,網址應排除"?"後面的內容作為下載的檔名
|
|||
|
os.system("curl %s -O -J -s" % subjectURL)
|
|||
|
self.parseHTMLfile(subjectURL[:subjectURL.rfind("/")+1],fileName) #self.parseHTMLfile(subjectURL.split("/")[-1])
|
|||
|
if QMessageBox.question(self,"運作結果","<font size = 5>複製完成,檢視%s?</font>"%fileName,QMessageBox.Yes|QMessageBox.No) == QMessageBox.Yes:
|
|||
|
os.system("start %s" % subjectURL.split("/")[-1])
|
|||
|
self.lineEdit.clear()
|
|||
|
self.pushButton.setEnabled(True)
|
|||
|
|
|||
|
def parseHTMLfile(self,mainURL,fileName):
|
|||
|
inputFile = open(fileName,"r",encoding="utf-8",errors='ignore') #see https://stackoverflow.com/questions/30700166/python-open-file-error
|
|||
|
fileContents = inputFile.read()
|
|||
|
fileSize = len(fileContents)
|
|||
|
self.searchTarget(mainURL,fileContents,fileSize,"href") #找到"href"出現的位置並依其後的URL進行下載或創建目錄夾的動作
|
|||
|
self.searchTarget(mainURL,fileContents,fileSize,"src=") #找到"src="出現的位置並依其後的URL進行下載或創建目錄夾的動作
|
|||
|
inputFile.close()
|
|||
|
inputFile = open(fileName,"w",encoding="utf-8")
|
|||
|
inputFile.write(fileContents.replace(mainURL,""))
|
|||
|
inputFile.close()
|
|||
|
|
|||
|
def searchTarget(self,mainURL,fileContents,fileSize,keyword):
|
|||
|
i = fileContents.find(keyword)
|
|||
|
while i > 0:
|
|||
|
i = fileContents.find('"',i,fileSize) #locate the first double quote (") mark after the occurrence of the keyword ("href" or "src=")
|
|||
|
j = fileContents.find('"',i+1,fileSize) #locate the second double quote (") mark after the occurrence of the keyword ("href" or "src=")
|
|||
|
filePath = fileContents[i+1:j]
|
|||
|
k = filePath.rfind('/')
|
|||
|
if k < 0 or "mailto" in filePath:
|
|||
|
i = fileContents.find(keyword,j+1,fileSize)
|
|||
|
continue
|
|||
|
if "http" not in filePath:
|
|||
|
if not os.path.exists(filePath[:k]): os.system('md "%s"' % filePath[:k]) #創建目錄
|
|||
|
print("下載檔案 %s" % mainURL+filePath)
|
|||
|
os.system("curl %s -o %s -J -s" % (mainURL+filePath,filePath))
|
|||
|
elif mainURL in filePath:
|
|||
|
k = filePath.find('/',8,len(filePath))
|
|||
|
l = filePath.rfind('/')
|
|||
|
if not os.path.exists(filePath[k+1:l]): os.system('md "%s"' % filePath[k+1:l]) #創建目錄
|
|||
|
print("下載檔案 %s" % filePath)
|
|||
|
os.system("curl %s -o %s -J -s" % (filePath,filePath[k+1:]))
|
|||
|
|
|||
|
i = fileContents.find(keyword,j+1,fileSize)
|
|||
|
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
app = QApplication([])
|
|||
|
win = DownloadWebData()
|
|||
|
win.show()
|
|||
|
app.exec_()
|