import pandas as pd
import re
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('max_colwidth',100)
df1=pd.DataFrame({'id':[1,2],
'regions':['广东','上海'],
'isp':['电信','电信'],
'answers':['
xxx.xxx.com.\
nxxx.xxx.xxx.com.\n1.1.1.1 中国深圳电信\n2.2.2.2 中国深圳电信\n',
'
xxx.xxx.com.\
nxxx.xxx.xxx.com.\n3.3.3.3 中国上海电信\n4.4.4.4 中国上海电信\n']})
df2=pd.DataFrame({'Age':[13,0,20,25],
'ip':['1.1.1.1',
'2.2.2.2',
'3.3.3.3',
'4.4.4.4'],
'status_code':[200,403,200,200]})
for column in df2.columns:
df2[column]=df2[column].apply(str)
def ip_extract(input_string):
ip_pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
ip_addresses = re.findall(ip_pattern, input_string)
return '\n'.join(ip_addresses)
df1_new = df1.copy()
df1_new['ip'] = df1_new['answers'].apply(ip_extract)
df_ip = df1_new['ip'].str.split('\n',expand=True).stack().reset_index(level=1,drop=True).to_frame(name='ip')
df_merge = df1_new.drop(['ip'],axis=1).join(df_ip).merge(df2,on=['ip'])
def concat_func(x):
return pd.Series({column:'\n'.join(x[column]) for column in df2.columns})
df_group = df_merge.groupby(['answers']).apply(concat_func).reset_index()
df = df1.merge(df_group,on=['answers'])
print(df)