SEISAMUSE

Jun Xie的博客

  安装了seisman的HinetPy,运行脚本数据下载脚本的时候就遇到这个问题:

1
2
3
4
5
6
from HinetPy import Client, win32
File "/home/junxie/work/Snet/HinetPy-main/HinetPy/__init__.py", line 23, in <module>
from .client import Client
File "/home/junxie/work/Snet/HinetPy-main/HinetPy/client.py", line 22, in <module>
from urllib3.util import create_urllib3_context
ImportError: cannot import name 'create_urllib3_context' from 'urllib3.util' (/home/junxie/.conda/envs/py3/lib/python3.9/site-packages/urllib3/util/__init__.py)

怎么整?

阅读全文 »

  数据下载好后需要统计下载的情况,这是python脚本。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import os
from obspy import read
from datetime import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.dates as mdates
from collections import defaultdict
from matplotlib.transforms import blended_transform_factory

# ===== 配置 =====
data_dir = "daily_waveforms" # 存放YYYYMMDD.mseed文件目录
station_file = "station.list" # 台站列表,格式:NET STA(两列)
components = ["LHZ", "LHN", "LHE"]
start_year = 2020
end_year = 2020

# ===== 读取台站列表 =====
stations = []
with open(station_file, "r") as f:
for line in f:
if line.strip():
net, sta = line.strip().split()[:2]
stations.append((net, sta))

station_set = set(f"{net}.{sta}" for net, sta in stations)

# ===== 初始化 =====
station_dates = {comp: defaultdict(set) for comp in components} # comp -> {net.sta -> set(date)}
station_file_sizes = defaultdict(int) # 台站对应所有文件大小
total_files = 0
total_file_size = 0

# ===== 遍历所有mseed文件 =====
all_files = sorted(f for f in os.listdir(data_dir) if f.endswith(".mseed"))
for filename in all_files:
filepath = os.path.join(data_dir, filename)

size = os.path.getsize(filepath)
total_files += 1
total_file_size += size

try:
st = read(filepath)
except Exception as e:
print(f"读取文件失败 {filename}: {e}")
continue

# 解析文件日期 YYYYMMDD
try:
file_date = dt.strptime(filename[:8], "%Y%m%d").date()
except Exception as e:
print(f"无法解析日期 {filename}: {e}")
continue

for tr in st:
net = tr.stats.network
sta = tr.stats.station
comp = tr.stats.channel[-3:]

key = f"{net}.{sta}"
if key not in station_set:
continue
if comp not in components:
continue

station_dates[comp][key].add(file_date)
station_file_sizes[key] += size

print(f"共计读取文件数: {total_files}")
print(f"所有文件总大小: {total_file_size / (1024**2):.2f} MB")

# ===== 删除无有效数据台站 =====
for comp in components:
keys_to_remove = [k for k, dates in station_dates[comp].items() if len(dates) == 0]
for k in keys_to_remove:
del station_dates[comp][k]

# ===== 输出有效天数统计文本 =====
for comp in components:
with open(f"station_day_count_{comp}.txt", "w") as f:
for key in sorted(station_dates[comp].keys()):
f.write(f"{key} {len(station_dates[comp][key])}\n")

# ===== 台网颜色映射 =====
all_nets = sorted(set(net for net, sta in stations))
net_color_map = {net: cm.get_cmap("tab20")(i / max(len(all_nets)-1,1)) for i, net in enumerate(all_nets)}

# ===== 绘图函数 =====
def plot_component_availability(comp, station_dates_comp):
valid_stations = sorted(station_dates_comp.keys(), key=lambda x: (x.split(".")[0], x.split(".")[1]))
fig, ax = plt.subplots(figsize=(14, max(6, 0.3 * len(valid_stations))))

def to_datetime(d):
return dt(d.year, d.month, d.day)

for idx, key in enumerate(valid_stations):
net = key.split(".")[0]
dates = sorted(station_dates_comp[key])
x_vals = [to_datetime(d) for d in dates]
ax.plot(x_vals, [idx]*len(dates), ".", color=net_color_map[net], markersize=2)

ax.set_xlabel("时间")
ax.set_ylabel("台站")
ax.set_yticks(range(len(valid_stations)))
ax.set_yticklabels(valid_stations, fontsize=5)

ax.grid(True, linestyle=":", alpha=0.5)

x_start = dt(start_year, 1, 1)
x_end = dt(end_year, 12, 31)
ax.set_xlim(x_start, x_end)

ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))

plt.tight_layout(rect=[0, 0, 0.85, 1])

# 右侧外部标注台网名
trans = blended_transform_factory(ax.transAxes, ax.transData)
net_indices = defaultdict(list)
for idx, key in enumerate(valid_stations):
net = key.split(".")[0]
net_indices[net].append(idx)

for net, indices in net_indices.items():
mid_idx = indices[len(indices)//2]
color = net_color_map[net]
ax.text(1.02, mid_idx, net, color=color, fontsize=8, fontweight='bold',
verticalalignment='center', horizontalalignment='left',
transform=trans)

plt.savefig(f"{comp}_availability_2013_2022.png", dpi=300, bbox_inches='tight')
plt.close()
print(f"已保存图像: {comp}_availability_2013_2022.png")

# ===== 画三分量图 =====
for comp in components:
plot_component_availability(comp, station_dates[comp])

# ===== 台网统计 =====
network_stats = defaultdict(lambda: {"total": 0, "valid": 0, "day_counts": []})

for net, sta in stations:
network_stats[net]["total"] += 1

all_valid_keys = set()
for comp in components:
all_valid_keys.update(station_dates[comp].keys())

for key in all_valid_keys:
net = key.split(".")[0]
days_list = [len(station_dates[comp].get(key, [])) for comp in components]
valid_days = max(days_list)
network_stats[net]["valid"] += 1
network_stats[net]["day_counts"].append(valid_days)

print("\n台网统计汇总:")
print(f"{'Net':<6} {'Total':>6} {'Valid':>6} {'AvgDays':>10}")
print("-" * 32)
with open("network_summary_all_components.txt", "w") as f:
f.write(f"{'Net':<6} {'Total':>6} {'Valid':>6} {'AvgDays':>10}\n")
for net in sorted(network_stats.keys()):
total = network_stats[net]["total"]
valid = network_stats[net]["valid"]
avg_days = np.mean(network_stats[net]["day_counts"]) if network_stats[net]["day_counts"] else 0
line = f"{net:<6} {total:>6} {valid:>6} {avg_days:>10.1f}"
print(line)
f.write(line + "\n")

# ===== 总结 =====
print(f"\n文件总数: {total_files}")
print(f"文件总大小: {total_file_size/(1024**2):.2f} MB ({total_file_size/(1024**3):.2f} GB)")

  这个脚本实现的功能包括:

  • 统计开始时间start_year到结束时间end_year的数据下载情况
  • 对LHZ、LHN和LHE分别进行统计。
  • 对统计结果进行可视化输出。
  • 横轴是时间,纵轴是台站名。不同台网用不同颜色标出。在图右侧标出台网名。
  • 输出文件network_summary_all_components.txt,给出台网包含台站数目,有效台站数目和各自平均天数。
  • 输出文件station_day_count_{comp}.txt,给出台站名和有效天数。
  • 输出文件总数,文件总大小。

  以下是地震数据下载脚本更新。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
from obspy.taup import TauPyModel
from obspy.geodetics import locations2degrees
from concurrent.futures import ThreadPoolExecutor, as_completed

client = Client("IRIS")
model = TauPyModel("iasp91")

output_dir = "processed_sac"
os.makedirs(output_dir, exist_ok=True)

event_file = "event.lst"
sta_file = "sta.lst"
exception_log = "exceptions.txt"
thread_workers = 4

# 读取台站信息:net, sta, lat, lon, elev
with open(sta_file, "r") as sf:
sta_lines = [line.strip() for line in sf if line.strip()]
sta_list = []
for line in sta_lines:
parts = line.split("|")
if len(parts) >= 5:
net = parts[0].strip()
sta = parts[1].strip()
lat = float(parts[2])
lon = float(parts[3])
elev = float(parts[4])
sta_list.append((net, sta, lat, lon, elev))

# 清空异常日志
with open(exception_log, "w") as elog:
elog.write("")

def process_station(event_id, origin_time, magnitude, ev_lat, ev_lon, ev_dep,
net, sta, sta_lat, sta_lon, sta_elev):
try:
dist_deg = locations2degrees(ev_lat, ev_lon, sta_lat, sta_lon)
arrivals = model.get_travel_times(source_depth_in_km=ev_dep,
distance_in_degree=dist_deg,
phase_list=["P"])
if not arrivals:
raise Exception("No P arrival")

p_arrival = origin_time + arrivals[0].time
start_dl = p_arrival - 50
end_dl = p_arrival + 150

st = client.get_waveforms(network=net, station=sta, location="*", channel="BH?",
starttime=start_dl, endtime=end_dl,
attach_response=True)

st.remove_response(output="VEL", pre_filt=(0.01, 0.02, 8, 10),
taper=True, zero_mean=True, taper_fraction=0.05)

st.trim(starttime=p_arrival - 10, endtime=p_arrival + 60)

comps = [tr.stats.channel[-1] for tr in st]
if not all(comp in comps for comp in ["N", "E", "Z"]):
raise Exception("Incomplete 3-component data")

for tr in st:
tr.stats.sac = tr.stats.get("sac", {})
tr.stats.sac.stla = sta_lat
tr.stats.sac.stlo = sta_lon
tr.stats.sac.stel = sta_elev
tr.stats.sac.evla = ev_lat
tr.stats.sac.evlo = ev_lon
tr.stats.sac.evdp = ev_dep
tr.stats.sac.mag = float(magnitude) if magnitude != "NA" else 0.0

time_tag = origin_time.strftime("%Y%m%dT%H%M%S")
chan = tr.stats.channel
filename = f"{time_tag}_M{magnitude}_{net}.{sta}.{chan}.sac"
filepath = os.path.join(output_dir, filename)
tr.write(filepath, format="SAC")

return f"{net}.{sta} ✅"

except Exception as e:
with open(exception_log, "a") as elog:
elog.write(f"{event_id} {net}.{sta} ❌ {str(e)}\n")
return f"{net}.{sta} ❌ {str(e)}"

def process_event(line):
results = []
parts = line.split("|")
if len(parts) < 10:
return ["跳过格式错误行"]

evid = parts[0].strip()
time_str = parts[1].strip()
ev_lat = float(parts[2].strip())
ev_lon = float(parts[3].strip())
ev_dep = float(parts[4].strip())
mag_info = parts[8].strip()
magnitude = mag_info.split(",")[1] if "," in mag_info else "NA"

origin_time = UTCDateTime(time_str)
print(f"\n🟡 Processing event {evid} M{magnitude} @ {origin_time} ({ev_lat}, {ev_lon}, {ev_dep}km)")

try:
task_list = []
with ThreadPoolExecutor(max_workers=thread_workers) as executor:
for net, sta, sta_lat, sta_lon, sta_elev in sta_list:
task = executor.submit(process_station, evid, origin_time, magnitude,
ev_lat, ev_lon, ev_dep, net, sta, sta_lat, sta_lon, sta_elev)
task_list.append(task)

for task in as_completed(task_list):
results.append(task.result())

except Exception as e:
with open(exception_log, "a") as elog:
elog.write(f"{evid} XB ERROR: {str(e)}\n")
results.append(f"⚠️ Failed to process event {evid}: {e}")
return results

# 读取事件列表并执行
with open(event_file, "r") as f:
event_lines = [line.strip() for line in f if line.strip()]

for line in event_lines:
results = process_event(line)
for r in results:
print(r)

  与之前的脚本:下载地震数据练习不同的地方是多了文件sta.lst。其格式为:
TA|140A|32.6408|-93.573997|56.0|
台网|台站|纬度|经度|高程

  有些时候需要抓取别人图片中的点的信息,应该怎么整?当然是找作者要咯!不过有时可能没办法(例如可能联系不上作者/作者找不到数据了/作者不想给你/作者说你自己提取吧)或则并不需要他图片中的点的准确信息的时候,我们可以自己去点。之前有个perl脚本,但找不到了(这又体现了整理资料和做笔记的必要性),现在python很方便的,调用pynput的Listener就好了。记录鼠标点击的脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from pynput.mouse import Listener
import logging
from datetime import datetime

# 配置日志记录
logging.basicConfig(
filename="mouse_clicks.log",
level=logging.INFO,
format="%(asctime)s %(message)s",
datefmt="%Y-%m-%dT%H:%M:%S"
)

def on_click(x, y, button, pressed):
if pressed:
message = f"{x} {y}"
print(message) # 输出到屏幕
logging.info(message) # 写入日志文件
if button == button.right:
# 停止监听器
return False

# 启动鼠标监听器
with Listener(on_click=on_click) as listener:
try:
listener.join()
except KeyboardInterrupt:
print("监听器已被用户中断。")

  这个脚本会记录鼠标左键的位置,输出到mouse_clicks.log文件中。mouse_clicks.log文件有三列:

2025-05-07T17:01:33 1977 1576

分别表示点击的时间,x和y坐标。其中y坐标是下面大上面小。此外点击鼠标右键或者输入Ctrl+C就可以结束记录。我一般是怎么做的呢?我先点击图片左下角得到(x0,y0),点击右下角得到(x1,y0),点击左上角得到(x0,y1),然后点击你想要的点。得到mouse_clicks.log以后就可以这么画图(gmt)。

1
2
3
4
5
6
7
8
9
10
11
12
dat=mouse_clicks.log
#获得x0,y0,x1,y1,他们是参考点
x0=`awk 'NR==1{print $2}' ${dat}`
y0=`awk 'NR==1{print $3}' ${dat}`
x1=`awk 'NR==2{print $2}' ${dat}`
y1=`awk 'NR==3{print $3}' ${dat}`
#获得横纵轴长度(屏幕尺度)
xs=`echo "$x1-$x0" |bc`
ys=`echo "$y0-$y1" |bc`

awk -v x0=$x0 -v y0=$y0 -v xs=$xs -v ys=$ys 'NR>3{print ($2-x0)/xs*2000-1000,(y0-$3)/ys*1500}' ${dat} | gmt psxy -R -J -O -K -W3p,black >>$ps
#这里(2000,1500)是横纵轴实际尺度,-1000表示实际位置调整。

  注意,这种方法仅仅适用于线性笛卡尔坐标,其他的各种投影都不行。
  以后得加上这句:脚本/程序不保证正确性,自求多幅(no warranty/use at your own risk)。

  用gmt画箭头还是挺常见的。GMT documentGMT中文手册虽好,但如果网络不好不就歇菜了,还是记录一下吧,自己翻起来方便。画箭头参数是Sv(或SV),自己调Sv之后的参数可以看效果。输入参数表示是x,y,angle,length。x,y表示箭头的起始坐标。angle是箭头指向,逆时针为正,x轴正向为0。length就是那个length了。

1
echo 0 0 60 0.5i | gmt psxy -R -J -O -K -Sv0.25c+e0.2c -W0.5p -Gblack >>$ps

  接收函数或者其他方法需要下载地震波形数据。这里给出结合FetchEvent和obspy进行数据下载的例子。
  FetchEvent从地址https://github.com/EarthScope/fetch-scripts下载。他是用perl脚本写的。运行例子如下:

1
./FetchEvent -s 2006-01-01 -e 2007-05-01 --radius 5:12:95:28 --mag 5:10 -o event.lst

  表示下载发生时间为2006-01-01到2007-05-01,位于以(5,12)(lat,lon)为中心,半径28-95度,震级5-10级的地震信息,保存到event.lst中。
  下载到的event.lst内容如下:

1
8037834 |2006/01/23 20:50:46.19Z |  6.9168| -77.7951|  24.0355|ISC|||MS,5.86,ISC|mb,6.16,ISC|Near west coast of Colombia

  表示ID|时间|纬度|经度|深度|目录|||震级类型,震级,目录|震级类型,震级,目录|位置
  可以看出地震信息来自ISC目录,其实到ISC直接检索也很方便。

  接下来,下载XB台网所有台站接收到的地震理论P波到时前50秒到后150秒三分量数据。其中P波到时调用taup来计算。接下来去仪器响应,最后再截取P波前10秒到后60秒。保存为SAC格式,每个地震每个台站保存一个SAC,名称需包含地震时间震级及台站名。利用多线程ThreadPoolExecutor加速。脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
from obspy import UTCDateTime, read_inventory
from obspy.clients.fdsn import Client
from obspy.taup import TauPyModel
from obspy.geodetics import locations2degrees
from concurrent.futures import ThreadPoolExecutor, as_completed

client = Client("IRIS")
model = TauPyModel("iasp91")

output_dir = "processed_sac"
os.makedirs(output_dir, exist_ok=True)

event_file = "event.lst"
exception_log = "exceptions.txt"
thread_workers = 4 # 可调线程数

with open(event_file, "r") as f:
event_lines = [line.strip() for line in f if line.strip()]

with open(exception_log, "w") as elog:
elog.write("")

def process_station(event_id, origin_time, magnitude, ev_lat, ev_lon, ev_dep, net, sta, sta_lat, sta_lon, sta_elev):
try:
# 使用 locations2degrees 计算震中距
dist_deg = locations2degrees(ev_lat, ev_lon, sta_lat, sta_lon)
print(dist_deg)
arrivals = model.get_travel_times(source_depth_in_km=ev_dep,
distance_in_degree=dist_deg,
phase_list=["P"])
if not arrivals:
raise Exception("No P arrival")

p_arrival = origin_time + arrivals[0].time
start_dl = p_arrival - 50
end_dl = p_arrival + 150

# 下载三分量数据
st = client.get_waveforms(network=net, station=sta, location="*", channel="BH?",
starttime=start_dl, endtime=end_dl,
attach_response=True)

# 去响应
st.remove_response(output="VEL", pre_filt=(0.01, 0.02, 8, 10), taper=True, zero_mean=True, taper_fraction=0.05)

# 截取有效窗口
st.trim(starttime=p_arrival - 10, endtime=p_arrival + 60)

# 检查是否为三分量
if not all(comp in [tr.stats.channel[-1] for tr in st] for comp in ["N", "E", "Z"]):
raise Exception("Incomplete 3-component data")

# 写入 SAC 文件并添加头段信息
for tr in st:
tr.stats.sac = tr.stats.get("sac", {})
tr.stats.sac.stla = sta_lat
tr.stats.sac.stlo = sta_lon
tr.stats.sac.stel = sta_elev
tr.stats.sac.evla = ev_lat
tr.stats.sac.evlo = ev_lon
tr.stats.sac.evdp = ev_dep
tr.stats.sac.mag = float(magnitude) if magnitude != "NA" else 0.0

time_tag = origin_time.strftime("%Y%m%dT%H%M%S")
chan = tr.stats.channel
filename = f"{time_tag}_M{magnitude}_{net}.{sta}.{chan}.sac"
filepath = os.path.join(output_dir, filename)
tr.write(filepath, format="SAC")
return f"{net}.{sta} ✅"
except Exception as e:
with open(exception_log, "a") as elog:
elog.write(f"{event_id} {net}.{sta} ❌ {str(e)}\n")
return f"{net}.{sta} ❌ {str(e)}"

def process_event(line):
results = []
parts = line.split("|")
if len(parts) < 10:
return ["跳过格式错误行"]

evid = parts[0].strip()
time_str = parts[1].strip()
ev_lat = float(parts[2].strip())
ev_lon = float(parts[3].strip())
ev_dep = float(parts[4].strip())
mag_info = parts[8].strip()
magnitude = mag_info.split(",")[1] if "," in mag_info else "NA"

origin_time = UTCDateTime(time_str)
print(f"\n🟡 Processing event {evid} M{magnitude} @ {origin_time} ({ev_lat}, {ev_lon}, {ev_dep}km)")

try:
inventory = client.get_stations(network="XB", starttime=origin_time,
endtime=origin_time + 3600,
level="station", channel="BH?")
task_list = []
with ThreadPoolExecutor(max_workers=thread_workers) as executor:
for network in inventory:
for station in network:
sta_lat = station.latitude
sta_lon = station.longitude
sta_elev = station.elevation
sta = station.code
net = network.code
task = executor.submit(process_station, evid, origin_time, magnitude,
ev_lat, ev_lon, ev_dep, net, sta, sta_lat, sta_lon, sta_elev)
task_list.append(task)

for task in as_completed(task_list):
results.append(task.result())

except Exception as e:
with open(exception_log, "a") as elog:
elog.write(f"{evid} XB ERROR: {str(e)}\n")
results.append(f"⚠️ Failed to process event {evid}: {e}")
return results

# 执行所有事件处理
for line in event_lines:
results = process_event(line)
for r in results:
print(r)
0%