1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
| import os import time import socket import numpy as np from obspy import UTCDateTime, Stream from obspy.clients.fdsn import Client from concurrent.futures import ThreadPoolExecutor, as_completed from requests.exceptions import ConnectionError, Timeout
client = Client("IRIS") output_dir = "global_data" os.makedirs(output_dir, exist_ok=True) sta_file = "station.lst" start_date = UTCDateTime("2013-01-01") end_date = UTCDateTime("2024-01-01") thread_workers = 20 exception_log = "exceptions.txt" timing_log = "download_time.txt" max_retries = 5 print(f"📁 当前工作目录: {os.getcwd()}") print(f"📁 波形保存路径: {os.path.abspath(output_dir)}")
sta_list = [] with open(sta_file, "r") as sf: for line in sf: if line.strip() and not line.strip().startswith("#"): parts = line.strip().split() if len(parts) >= 2: net, sta = parts[0], parts[1] sta_list.append((net, sta))
def download_station(net, sta, day): """ 下载单个台站某天波形数据,去仪器响应,返回Stream """ start = UTCDateTime(day) end = start + 86400 st = client.get_waveforms(net, sta, "*", "LH?", start, end, attach_response=True)
st.remove_response(output="VEL", pre_filt=(0.008, 0.01, 0.3, 0.4), taper=True, zero_mean=True, taper_fraction=0.05) return st def download_day(day): """ 下载某天所有台站数据,返回合并Stream和日志信息 """ stream_day = Stream() daily_log = [] log_lines = []
with ThreadPoolExecutor(max_workers=thread_workers) as executor: futures = {executor.submit(download_station, net, sta, day): (net, sta) for net, sta in sta_list}
for future in as_completed(futures): net, sta = futures[future] try: st = future.result() stream_day += st print(f"✅ {net}.{sta} 下载成功({len(st)} traces)") daily_log.append((net, sta, 1)) except Exception as e: print(f"❌ {net}.{sta} 下载失败: {e}") daily_log.append((net, sta, 0)) log_lines.append(f"{day.date} {net}.{sta} ❌ {e}")
return stream_day, daily_log, log_lines
def is_network_error(e): """ 判断异常是否为网络相关异常 """ network_error_types = (ConnectionError, Timeout, socket.timeout, socket.error) return isinstance(e, network_error_types) or "timed out" in str(e).lower() or "connection" in str(e).lower()
current_day = start_date while current_day <= end_date: filename = f"{current_day.strftime('%Y%m%d')}.mseed" filepath = os.path.join(output_dir, filename)
if os.path.exists(filepath) and os.path.getsize(filepath) > 0: print(f"\n📆 {current_day.date} 文件已存在且非空,跳过下载。") current_day += 86400 continue
print(f"\n📆 正在处理日期: {current_day.date}")
attempt = 0 success = False day_start_time = time.time()
while attempt < max_retries and not success: attempt += 1 print(f"🔄 尝试第 {attempt} 次下载 {current_day.date} ...")
try: stream_day, daily_log, log_lines = download_day(current_day)
if len(stream_day) == 0: print(f"⚠️ {current_day.date} 没有下载到数据,准备重试...") raise ValueError("下载数据为空")
stream_day.write(filepath, format="MSEED") print(f"💾 {filename} 保存成功(共 {len(stream_day)} traces)") success = True
if log_lines: with open(exception_log, "a") as elog: elog.write("\n".join(log_lines) + "\n")
except Exception as e: print(f"❌ 下载异常: {e}")
if is_network_error(e): print("🌐 网络异常,等待5秒后重试...") time.sleep(5) else: print("⚠️ 非网络异常,仍将重试...") time.sleep(3)
day_duration = time.time() - day_start_time with open(timing_log, "a") as tlog: tlog.write(f"{current_day.date}: {day_duration:.2f} seconds\n")
if not success: print(f"❌ {current_day.date} 下载失败,超过最大重试次数。请检查网络或日志。")
current_day += 86400
|