How to find the frequency of each character in string include the index positions with Python

2 Answers

0 votes
import re
 
s = "python java php c"
 
regex = re.compile('[a-zA-Z0-9]')
regex_find = regex.finditer(s)
idx_positions = {}
frequency = {}
  
for matchs in regex_find:
   frequency[matchs.group()] = frequency.get(matchs.group(), 0) + 1
   idx_positions[matchs.group()] = idx_positions.get(matchs.group(), []) + [matchs.start()]
 
for (key, value) in frequency.items():
     print(key , " frequency : ", value , ' indexes : ', idx_positions[key])
 
 
 
 
'''
run:
 
p  frequency :  3  indexes :  [0, 12, 14]
y  frequency :  1  indexes :  [1]
t  frequency :  1  indexes :  [2]
h  frequency :  2  indexes :  [3, 13]
o  frequency :  1  indexes :  [4]
n  frequency :  1  indexes :  [5]
j  frequency :  1  indexes :  [7]
a  frequency :  2  indexes :  [8, 10]
v  frequency :  1  indexes :  [9]
c  frequency :  1  indexes :  [16]
 
'''

 



answered Jan 26, 2020 by avibootz
edited Jan 26, 2020 by avibootz
0 votes
import re
 
s = "python java php c"
 
regex = re.compile('[a-zA-Z0-9]')
regex_find = regex.finditer(s)
idx_positions = {}
frequency = {}
  
for matchs in regex_find:
    print('*', matchs)
    frequency[matchs.group()] = frequency.get(matchs.group(), 0) + 1
    print('**', frequency.get(matchs.group(), 0) + 1)
    idx_positions[matchs.group()] = idx_positions.get(matchs.group(), []) + [matchs.start()]
    print('***', idx_positions.get(matchs.group(), []) + [matchs.start()])
    print('****', idx_positions)
 
for (key, value) in frequency.items():
     print(key , " frequency : ", value , ' indexes : ', idx_positions[key])
 
 
 
 
'''
run:
 
* <re.Match object; span=(0, 1), match='p'>
** 2
*** [0, 0]
**** {'p': [0]}
* <re.Match object; span=(1, 2), match='y'>
** 2
*** [1, 1]
**** {'p': [0], 'y': [1]}
* <re.Match object; span=(2, 3), match='t'>
** 2
*** [2, 2]
**** {'p': [0], 'y': [1], 't': [2]}
* <re.Match object; span=(3, 4), match='h'>
** 2
*** [3, 3]
**** {'p': [0], 'y': [1], 't': [2], 'h': [3]}
* <re.Match object; span=(4, 5), match='o'>
** 2
*** [4, 4]
**** {'p': [0], 'y': [1], 't': [2], 'h': [3], 'o': [4]}
* <re.Match object; span=(5, 6), match='n'>
** 2
*** [5, 5]
**** {'p': [0], 'y': [1], 't': [2], 'h': [3], 'o': [4], 'n': [5]}
* <re.Match object; span=(7, 8), match='j'>
** 2
*** [7, 7]
**** {'p': [0], 'y': [1], 't': [2], 'h': [3], 'o': [4], 'n': [5], 'j': [7]}
* <re.Match object; span=(8, 9), match='a'>
** 2
*** [8, 8]
**** {'p': [0], 'y': [1], 't': [2], 'h': [3], 'o': [4], 'n': [5], 'j': [7], 'a': [8]}
* <re.Match object; span=(9, 10), match='v'>
** 2
*** [9, 9]
**** {'p': [0], 'y': [1], 't': [2], 'h': [3], 'o': [4], 'n': [5], 'j': [7], 'a': [8], 'v': [9]}
* <re.Match object; span=(10, 11), match='a'>
** 3
*** [8, 10, 10]
**** {'p': [0], 'y': [1], 't': [2], 'h': [3], 'o': [4], 'n': [5], 'j': [7], 'a': [8, 10], 'v': [9]}
* <re.Match object; span=(12, 13), match='p'>
** 3
*** [0, 12, 12]
**** {'p': [0, 12], 'y': [1], 't': [2], 'h': [3], 'o': [4], 'n': [5], 'j': [7], 'a': [8, 10], 'v': [9]}
* <re.Match object; span=(13, 14), match='h'>
** 3
*** [3, 13, 13]
**** {'p': [0, 12], 'y': [1], 't': [2], 'h': [3, 13], 'o': [4], 'n': [5], 'j': [7], 'a': [8, 10], 'v': [9]}
* <re.Match object; span=(14, 15), match='p'>
** 4
*** [0, 12, 14, 14]
**** {'p': [0, 12, 14], 'y': [1], 't': [2], 'h': [3, 13], 'o': [4], 'n': [5], 'j': [7], 'a': [8,10], 'v': [9]}
* <re.Match object; span=(16, 17), match='c'>
** 2
*** [16, 16]
**** {'p': [0, 12, 14], 'y': [1], 't': [2], 'h': [3, 13], 'o': [4], 'n': [5], 'j': [7], 'a': [8,10], 'v': [9], 'c': [16]}
p  frequency :  3  indexes :  [0, 12, 14]
y  frequency :  1  indexes :  [1]
t  frequency :  1  indexes :  [2]
h  frequency :  2  indexes :  [3, 13]
o  frequency :  1  indexes :  [4]
n  frequency :  1  indexes :  [5]
j  frequency :  1  indexes :  [7]
a  frequency :  2  indexes :  [8, 10]
v  frequency :  1  indexes :  [9]
c  frequency :  1  indexes :  [16]
 
'''

 



answered Jan 26, 2020 by avibootz
edited Jan 26, 2020 by avibootz
...