python爬虫使用笔记

8-09 1,705 views

所需模块包
import requests url请求
import threading 多线程
from bs4 import BeautifulSoup 网页抓取元素扩展,类似JQ
import datetime 时间库
import hashlib hash库,用来计算文件hash,防止重复下载文件
import os,sys,os.path 系统IO处理库
import random 随机数库
import re 正则库
import urllib.request URL请求库

"爬虫所需相关重要技术"

mysql类
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pymysql
import time

class MySQL:

error_code = '' #MySQL错误号码

_instance = None #本类的实例
_conn = None # 数据库conn
_cur = None #游标

_TIMEOUT = 30 #默认超时30秒
_timecount = 0

def __init__(self, dbconfig):
self._conn = pymysql.connect(host=dbconfig['host'],
port=dbconfig['port'],
user=dbconfig['user'],
passwd=dbconfig['passwd'],
db=dbconfig['db'],
charset=dbconfig['charset'])

self._cur = self._conn.cursor()
self._instance = pymysql

def query(self,sql,str=''):
self._cur.execute("SET NAMES utf8")
if (str != ''):
return self._cur.execute(sql,str)
else:
return self._cur.execute(sql)

def update(self,sql,str=''):
try:
self._cur.execute("SET NAMES utf8")
if (str != ''):
return self._cur.execute(sql,str)
else:
return self._cur.execute(sql)
self._conn.commit()
except Exception :print("发生异常")
return result

def insert(self,sql,str=''):
self._cur.execute("SET NAMES utf8")
if (str != ''):
return self._cur.execute(sql,str)
else:
return self._cur.execute(sql)
self._conn.commit()
return self._cur.lastrowid

def fetchAllRows(self):
return self._cur.fetchall()

def fetchOneRow(self):
return self._cur.fetchone()

def getRowCount(self):
return self._cur.rowcount

def commit(self):
self._conn.commit()

def rollback(self):
self._conn.rollback()

def __del__(self):
try:
self._cur.close()
self._conn.close()
except:
pass

def close(self):
self.__del__()

# if __name__ == '__main__':
# # 数据库连接参数
# dbconfig = {'host':'',
# 'port': ,
# 'user':'',
# 'passwd':'',
# 'db':'',
# 'charset':'utf8'}
#
# # # 连接数据库,创建这个类的实例
# # db = MySQL(dbconfig)
# #
# # # 操作数据库
# # sql = "insert into cmf_gifs(name) values('123')"
# # # sql2 = "UPDATE lbs_member SET username='123' WHERE userid=123"
# # # sql3 = "SELECT * FROM lbs_member LIMIT 5"
# # # db.query(sql)
# # # result = db.fetchAllRows()
# # # result = db.update(sql2)
# # result = db.insert(sql)
# # print(result)
# # db.close()

python清空文件夹下log文件

#!/usr/bin/python import os import sys files_list = [] def print_files(path): lsdir = os.listdir(path) dirs = [i for i in lsdir if os.path...

阅读全文

python paramiko(SFTP) 相关文档

python paramiko SFTP文档地址: http://docs.paramiko.org/en/2.4/api/sftp.html import paramiko host = "THEHOST.com" #...

阅读全文

python 执行cmd命令并获取pid

process = subprocess.Popen('cmd命令') pid = process.pid print(pid)

阅读全文

欢迎留言