| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 
 | import osimport time
 import socket
 import numpy as np
 from obspy import UTCDateTime, Stream
 from obspy.clients.fdsn import Client
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from requests.exceptions import ConnectionError, Timeout
 
 
 client = Client("IRIS")
 output_dir = "global_data"
 os.makedirs(output_dir, exist_ok=True)
 sta_file = "station.lst"
 start_date = UTCDateTime("2013-01-01")
 end_date = UTCDateTime("2024-01-01")
 thread_workers = 20
 exception_log = "exceptions.txt"
 timing_log = "download_time.txt"
 max_retries = 5
 print(f"📁 当前工作目录: {os.getcwd()}")
 print(f"📁 波形保存路径: {os.path.abspath(output_dir)}")
 
 
 sta_list = []
 with open(sta_file, "r") as sf:
 for line in sf:
 if line.strip() and not line.strip().startswith("#"):
 parts = line.strip().split()
 if len(parts) >= 2:
 net, sta = parts[0], parts[1]
 sta_list.append((net, sta))
 
 def download_station(net, sta, day):
 """
 下载单个台站某天波形数据,去仪器响应,返回Stream
 """
 start = UTCDateTime(day)
 end = start + 86400
 st = client.get_waveforms(net, sta, "*", "LH?", start, end, attach_response=True)
 
 st.remove_response(output="VEL", pre_filt=(0.008, 0.01, 0.3, 0.4),
 taper=True, zero_mean=True, taper_fraction=0.05)
 return st
 def download_day(day):
 """
 下载某天所有台站数据,返回合并Stream和日志信息
 """
 stream_day = Stream()
 daily_log = []
 log_lines = []
 
 with ThreadPoolExecutor(max_workers=thread_workers) as executor:
 futures = {executor.submit(download_station, net, sta, day): (net, sta) for net, sta in sta_list}
 
 for future in as_completed(futures):
 net, sta = futures[future]
 try:
 st = future.result()
 stream_day += st
 print(f"✅ {net}.{sta} 下载成功({len(st)} traces)")
 daily_log.append((net, sta, 1))
 except Exception as e:
 print(f"❌ {net}.{sta} 下载失败: {e}")
 daily_log.append((net, sta, 0))
 log_lines.append(f"{day.date} {net}.{sta} ❌ {e}")
 
 return stream_day, daily_log, log_lines
 
 def is_network_error(e):
 """
 判断异常是否为网络相关异常
 """
 network_error_types = (ConnectionError, Timeout, socket.timeout, socket.error)
 return isinstance(e, network_error_types) or "timed out" in str(e).lower() or "connection" in str(e).lower()
 
 
 current_day = start_date
 while current_day <= end_date:
 filename = f"{current_day.strftime('%Y%m%d')}.mseed"
 filepath = os.path.join(output_dir, filename)
 
 
 if os.path.exists(filepath) and os.path.getsize(filepath) > 0:
 print(f"\n📆 {current_day.date} 文件已存在且非空,跳过下载。")
 current_day += 86400
 continue
 
 print(f"\n📆 正在处理日期: {current_day.date}")
 
 attempt = 0
 success = False
 day_start_time = time.time()
 
 while attempt < max_retries and not success:
 attempt += 1
 print(f"🔄 尝试第 {attempt} 次下载 {current_day.date} ...")
 
 try:
 stream_day, daily_log, log_lines = download_day(current_day)
 
 if len(stream_day) == 0:
 print(f"⚠️ {current_day.date} 没有下载到数据,准备重试...")
 raise ValueError("下载数据为空")
 
 
 stream_day.write(filepath, format="MSEED")
 print(f"💾 {filename} 保存成功(共 {len(stream_day)} traces)")
 success = True
 
 
 if log_lines:
 with open(exception_log, "a") as elog:
 elog.write("\n".join(log_lines) + "\n")
 
 except Exception as e:
 print(f"❌ 下载异常: {e}")
 
 
 if is_network_error(e):
 print("🌐 网络异常,等待5秒后重试...")
 time.sleep(5)
 else:
 print("⚠️ 非网络异常,仍将重试...")
 time.sleep(3)
 
 
 day_duration = time.time() - day_start_time
 with open(timing_log, "a") as tlog:
 tlog.write(f"{current_day.date}: {day_duration:.2f} seconds\n")
 
 if not success:
 print(f"❌ {current_day.date} 下载失败,超过最大重试次数。请检查网络或日志。")
 
 current_day += 86400
 
 |