当前位置:网站首页>Project GFS data download

Project GFS data download

2022-07-06 07:06:00 Operation and maintenance dumplings

Because the program of downloading data from the company on the project , My is always abnormal , Repeated downloads or the accumulation of download processes . Then the download program is a little old ,pyhon and shell The way of combination , It's using python2 Written , I look very cumbersome . The program is written by myself , It's easy to maintain , It took an afternoon , There's nothing wrong with the test .

1. It can be downloaded automatically
2. Write to the log after downloading , No more downloads after completion
3. Write and read status Status file , Monitor the file download process , Return after completion 0, Easy to download the next day .
4. Changeable status File customization starts from the number
5. Document consistency inspection , If with company data md5 Inconsistent values , It will be downloaded again .
6. Automatically detect processes pid, If the program has been up , Will not start again , If an exception occurs or the download is completed, it will be deleted automatically pid file
7. After the download is completed, the next one will run automatically wps Mission

# -*- coding:utf-8 -*-
#!/public/home/model/lg/python3/bin/python3
# 20220212,lg
#  Download from the company gfs


from  ftplib  import FTP
import datetime
import hashlib
import os

def read_status():
    with open(status_name,'r',encoding='utf-8') as f:
        str_sum = f.read()
        sum = int(str_sum)
        global  list
        list = list[sum:]
        if list:
            down_ftp(sum)
        else:
            # After downloading , Reset file downloads 
            with open(status_name,'w',encoding='utf-8') as f2:
                f2.write('0')
            with open(log_name,'w',encoding='utf-8') as f3:
                f3.write('gfs end')
        os.remove(pid)
        os.system('cd /public/city/exe; /bin/csh load.gfs_alt &')

def str_date():
    yestoday = datetime.date.today() + datetime.timedelta(days=-1)
    str_yestody = yestoday.strftime('%Y%m%d')  # 20220211
    return str_yestody


def md5_gfs(file):
    content = hashlib.md5()
    with open(file,'rb') as f:
        while 1:
            s = f.read(1024)
            if s:
                content.update(s)
            else:
                break
    ret_md5 = content.hexdigest()
    return ret_md5


def get_md5(gfs_name):
    with open(md5_file,'r',encoding='utf-8') as f:
        for i in f:
            b = i.split()
            if gfs_name == b[1]:
                return b[0]   #md5



def down_ftp(sum):
    ftp = FTP()    
    ftp.connect('1.2.3.4',1111)
    ftp.login('111','111')
    ftp.cwd('gfs_12')
    # dst_file_list = ftp.dir()

    print(list,sum)
    for i in list:
        # file = file_name + i
        down_file = file_name + i
        new_file_name = gfs_file_name + i
        ftp.retrbinary('RETR %s' %down_file, open(down_file,'wb').write)
        ftp.retrbinary('RETR %s' %md5_file, open(md5_file, 'wb').write)
        down_md5_num = md5_gfs(down_file)   # Download the file md5
        file_get_md5 = get_md5(down_file)     #md5 Of documents md5
        print(down_md5_num,file_get_md5)

        while 1:
            # contrast md5, Different, download again 
            if file_get_md5 == down_md5_num:
            # if 1 == 1:
                print(f'{
      i}ok')
                # global sum
                sum = sum + 1
                print(sum)
                with open(status_name,'w',encoding='utf-8') as f1:
                    # Write the number of downloaded files into the file , Do persistent operations 
                    f1.write(str(sum))
                try:
                    os.rename(down_file, new_file_name)
                except FileExistsError:
                    break
                break
            else:
                ftp.retrbinary('RETR %s' %down_file, open(down_file, 'wb').write)
                ftp.retrbinary('RETR %s' %md5_file, open(md5_file, 'wb').write)

if __name__ == '__main__':

    #  About the definition of downloading files  av2_20220211_t12z.pgrb2.0p50.f192
    str_time = str_date()
    dir = '/public/city/data/input/gfs'
    my_dir = '/public/home/model/lg'
    log_dir = os.path.join(my_dir,'data')
    gfs_file_name = 'av2_' + str_time + '_t12z.pgrb2.0p50.f'
    md5_file = 'md5_' + str_time + '12.txt'
    os.chdir(dir)

    file_name = 'gfs.t12z.pgrb2.0p50.f'
    list = ['000', '024', '048', '072', '096', '120', '144', '168', '192' ]
    # list = ['000', '024', '048']
    # down_ftp()
    log_name = str_time + '.log'
    log_name = os.path.join(log_dir,log_name)
    status_name = os.path.join(log_dir,'status.txt')
    pid = '/tmp/ftp_' + str_time +'.pid'
    if os.path.exists(pid):
        exit()
    else:
        with open(pid,'w',encoding='utf-8') as f2:
            f2.write(str(os.getpid()))

        if not os.path.exists(log_name):
            try:
                read_status()
            except:
                os.remove(pid)
        else:
            exit()

原网站

版权声明
本文为[Operation and maintenance dumplings]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/02/202202131946340021.html