Python/Basic/紀老師的教材/webClone.py

'''
Program: webClone.py (Report comments/bugs to chikh@yuntech.edu.tw)
Function: 使用curl下載指定網址的檔案
'''

from PyQt5.QtWidgets import *
from PyQt5 import QtGui
import os

class DownloadWebData(QWidget):
	def __init__(self):
		super().__init__()
		self.setWindowTitle("下載網頁所用檔案程式")
		self.resize(500,50) 
		self.lineEdit = QLineEdit(self)      
		self.pushButton = QPushButton(self)   
		self.pushButton.setText("開始下載")
		font = QtGui.QFont() 
		font.setFamily("微軟正黑體")
		font.setPointSize(11) 
		self.lineEdit.setFont(font) 
		self.pushButton.setFont(font) 
		layout = QVBoxLayout()
		layout.addWidget(self.lineEdit)
		layout.addWidget(self.pushButton)     		
		self.setLayout(layout)
		self.lineEdit.returnPressed.connect(self.btnClicked) #https://bit.ly/3BxzOTy
		self.pushButton.clicked.connect(self.btnClicked)
	
	def btnClicked(self):
		subjectURL = self.lineEdit.text()
		if subjectURL == '': 
			QMessageBox.warning(self,"運作結果","<font size = 5>網址空白，請輸入有效網址</font>",QMessageBox.Yes)
			return
		self.pushButton.setEnabled(False)
		fileName = subjectURL.split("/")[-1] #或寫成fileName = subjectURL[subjectURL.rfind("/")+1:]
		#if "?" in fileName: fileName = fileName[:fileName.find("?")] #內含asp語法的網址，網址應排除"?"後面的內容作為下載的檔名
		os.system("curl %s -O -J -s" % subjectURL)
		self.parseHTMLfile(subjectURL[:subjectURL.rfind("/")+1],fileName) #self.parseHTMLfile(subjectURL.split("/")[-1])
		if QMessageBox.question(self,"運作結果","<font size = 5>複製完成，檢視%s？</font>"%fileName,QMessageBox.Yes|QMessageBox.No) == QMessageBox.Yes:
			os.system("start %s" % subjectURL.split("/")[-1])
		self.lineEdit.clear()
		self.pushButton.setEnabled(True)
	
	def parseHTMLfile(self,mainURL,fileName):
		inputFile = open(fileName,"r",encoding="utf-8",errors='ignore')	#see https://stackoverflow.com/questions/30700166/python-open-file-error
		fileContents = inputFile.read()
		fileSize = len(fileContents)
		self.searchTarget(mainURL,fileContents,fileSize,"href")	#找到"href"出現的位置並依其後的URL進行下載或創建目錄夾的動作
		self.searchTarget(mainURL,fileContents,fileSize,"src=")	#找到"src="出現的位置並依其後的URL進行下載或創建目錄夾的動作
		inputFile.close()
		inputFile = open(fileName,"w",encoding="utf-8")	
		inputFile.write(fileContents.replace(mainURL,""))
		inputFile.close()
		
	def searchTarget(self,mainURL,fileContents,fileSize,keyword):
		i = fileContents.find(keyword)
		while i > 0:
			i = fileContents.find('"',i,fileSize)	#locate the first double quote (") mark after the occurrence of the keyword ("href" or "src=")
			j = fileContents.find('"',i+1,fileSize)	#locate the second double quote (") mark after the occurrence of the keyword ("href" or "src=")
			filePath = fileContents[i+1:j]
			k = filePath.rfind('/')
			if k < 0 or "mailto" in filePath: 
				i = fileContents.find(keyword,j+1,fileSize)
				continue
			if "http" not in filePath: 
				if not os.path.exists(filePath[:k]): os.system('md "%s"' % filePath[:k]) #創建目錄
				print("下載檔案 %s" % mainURL+filePath)
				os.system("curl %s -o %s -J -s" % (mainURL+filePath,filePath))
			elif mainURL in filePath:
				k = filePath.find('/',8,len(filePath))
				l = filePath.rfind('/')
				if not os.path.exists(filePath[k+1:l]): os.system('md "%s"' % filePath[k+1:l])	#創建目錄
				print("下載檔案 %s" % filePath)
				os.system("curl %s -o %s -J -s" % (filePath,filePath[k+1:]))
				
			i = fileContents.find(keyword,j+1,fileSize)
			
			
if __name__ == "__main__":       
	app = QApplication([])
	win = DownloadWebData()	
	win.show()	
	app.exec_()