Skip to content

Commit 8b17b4e

Browse files
committed
More robust conversion of chromosome numbers to int
1 parent 9a99a8a commit 8b17b4e

File tree

4 files changed

+14
-5
lines changed

4 files changed

+14
-5
lines changed

ldsc_polyfun/jackknife.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ def __init__(self, x, y, n_blocks=None, separators=None, chr_num=None, verbose=T
574574
num_lambdas=100, approx_ridge=False,
575575
ridge_lambda=None, use_1se=False, has_intercept=False, standardize=True,
576576
skip_ridge_jackknife=True, num_chr_sets=2, num_chr=22):
577-
577+
578578
#sanity checks
579579
assert chr_num is not None
580580
# # # chr_num[:100000]=1

ldsc_polyfun/parse.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,20 @@ def read_csv(fh, **kwargs):
3232
return df
3333

3434
def set_snpid_index(df):
35+
36+
def float_to_int(c):
37+
try:
38+
c = int(c)
39+
except ValueError:
40+
pass
41+
return c
42+
3543
df['A1_first'] = (df['A1'] < df['A2']) | (df['A1'].str.len()>1) | (df['A2'].str.len()>1)
3644
df['A1s'] = df['A2'].copy()
3745
df.loc[df['A1_first'], 'A1s'] = df.loc[df['A1_first'], 'A1'].copy()
3846
df['A2s'] = df['A1'].copy()
3947
df.loc[df['A1_first'], 'A2s'] = df.loc[df['A1_first'], 'A2'].copy()
40-
s_chr = df['CHR'].map(lambda c: int(c) if str(c)[0] in ['0','1','2','3','4','5,','6','7','8','9'] else c).astype(str)
48+
s_chr = df['CHR'].map(float_to_int).astype(str)
4149
s_bp = df['BP'].astype(int).astype(str)
4250
df.index = s_chr + '.' + s_bp + '.' + df['A1s'] + '.' + df['A2s']
4351
df.index.name = 'snpid'
@@ -116,7 +124,7 @@ def sumstats(fh, alleles=True, dropna=True):
116124
if dropna:
117125
x = x.dropna(how='any')
118126

119-
x = set_snpid_index(x)
127+
x = set_snpid_index(x)
120128
x.drop(columns=['CHR', 'BP'], inplace=True)
121129

122130

ldsc_polyfun/sumstats.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def _print_part_delete_values(ldscore_reg, ofh, log):
236236

237237
def _merge_and_log(ld, sumstats, noun, log):
238238
'''Wrap smart merge with log messages about # of SNPs.'''
239-
sumstats = smart_merge(ld, sumstats)
239+
sumstats = smart_merge(ld, sumstats)
240240
msg = 'After merging with {F}, {N} SNPs remain.'
241241
if len(sumstats) == 0:
242242
msg += ' Please make sure that your annotation files include the SNPs in your sumstats files (please see the PolyFun wiki for details on downloading functional annotations)'
@@ -275,6 +275,7 @@ def _read_ld_sumstats(args, log, fh, alleles=True, dropna=True):
275275

276276
M_annot, ref_ld, novar_cols = _check_variance(log, M_annot, ref_ld)
277277
w_ld = _read_w_ld(args, log)
278+
278279
sumstats = _merge_and_log(ref_ld, sumstats, 'reference panel LD', log)
279280
sumstats = _merge_and_log(sumstats, w_ld, 'regression SNP LD', log)
280281
w_ld_cname = sumstats.columns[-1]

polyfun.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def run_ldsc(self, args, use_ridge, nn, keep_large, evenodd_split, n_blocks=2):
188188
df_sumstats = pd.read_table(args.sumstats, sep='\s+')
189189
###merge everything together...
190190

191-
#prepare LD-scores for S-LDSC run
191+
#prepare LD-scores for S-LDSC run
192192
ref_ld = np.array(df_sumstats[ref_ld_cnames], dtype=np.float32)
193193
sumstats._check_ld_condnum(args, log, ref_ld_cnames)
194194
if df_sumstats.shape[0] < 200000:

0 commit comments

Comments
 (0)