Extract a running ELF from a memory dump - elf

Our teacher gave us as homework a memory dump from a VBox (Ubuntu 16.04.9) and said that the message we need to get is printed by an ELF currently running in the VM.
This is my what I get after running pslist on the image.
Offset Name Pid PPid Uid Gid DTB Start Time
------------------ -------------------- --------------- --------------- --------------- ------ ------------------ ----------
0xffff88007c998000 systemd 1 0 0 0 0x000000003552e000 -
0xffff88007c998e00 kthreadd 2 0 0 0 ------------------ -
0xffff88007c999c00 ksoftirqd/0 3 2 0 0 ------------------ -
0xffff88007c99aa00 kworker/0:0 4 2 0 0 ------------------ -
0xffff88007c99b800 kworker/0:0H 5 2 0 0 ------------------ -
0xffff88007c99c600 kworker/u4:0 6 2 0 0 ------------------ -
0xffff88007c99d400 rcu_sched 7 2 0 0 ------------------ -
0xffff88007c99e200 rcu_bh 8 2 0 0 ------------------ -
0xffff88007c99f000 migration/0 9 2 0 0 ------------------ -
0xffff88007c9f0000 watchdog/0 10 2 0 0 ------------------ -
0xffff88007c9f1c00 watchdog/1 11 2 0 0 ------------------ -
0xffff88007c9f2a00 migration/1 12 2 0 0 ------------------ -
0xffff88007c9f3800 ksoftirqd/1 13 2 0 0 ------------------ -
0xffff88007c9f4600 kworker/1:0 14 2 0 0 ------------------ -
0xffff88007c9f5400 kworker/1:0H 15 2 0 0 ------------------ -
0xffff88007c9f6200 kdevtmpfs 16 2 0 0 ------------------ -
0xffff88007c9f7000 netns 17 2 0 0 ------------------ -
0xffff88007ca90000 perf 18 2 0 0 ------------------ -
0xffff88007ca90e00 khungtaskd 19 2 0 0 ------------------ -
0xffff88007ca91c00 writeback 20 2 0 0 ------------------ -
0xffff88007ca92a00 ksmd 21 2 0 0 ------------------ -
0xffff88007ca93800 khugepaged 22 2 0 0 ------------------ -
0xffff88007ca94600 crypto 23 2 0 0 ------------------ -
0xffff88007ca95400 kintegrityd 24 2 0 0 ------------------ -
0xffff88007ca96200 bioset 25 2 0 0 ------------------ -
0xffff88007ca97000 kblockd 26 2 0 0 ------------------ -
0xffff88007cb80000 ata_sff 27 2 0 0 ------------------ -
0xffff88007cb80e00 md 28 2 0 0 ------------------ -
0xffff88007cb81c00 devfreq_wq 29 2 0 0 ------------------ -
0xffff88007cb82a00 kworker/u4:1 30 2 0 0 ------------------ -
0xffff88007cb83800 kworker/0:1 31 2 0 0 ------------------ -
0xffff88007cb84600 kworker/1:1 32 2 0 0 ------------------ -
0xffff88007cb86200 kswapd0 34 2 0 0 ------------------ -
0xffff88007cb87000 vmstat 35 2 0 0 ------------------ -
0xffff880075ec0000 fsnotify_mark 36 2 0 0 ------------------ -
0xffff880075ec0e00 ecryptfs-kthrea 37 2 0 0 ------------------ -
0xffff880075f27000 kthrotld 53 2 0 0 ------------------ -
0xffff88007cb85400 acpi_thermal_pm 54 2 0 0 ------------------ -
0xffff880075fc8000 bioset 55 2 0 0 ------------------ -
0xffff880075fc8e00 bioset 56 2 0 0 ------------------ -
0xffff880075fc9c00 bioset 57 2 0 0 ------------------ -
0xffff880075fcaa00 bioset 58 2 0 0 ------------------ -
0xffff880075fcb800 bioset 59 2 0 0 ------------------ -
0xffff880075fcc600 bioset 60 2 0 0 ------------------ -
0xffff880075fcd400 bioset 61 2 0 0 ------------------ -
0xffff880075fce200 bioset 62 2 0 0 ------------------ -
0xffff880075fcf000 scsi_eh_0 63 2 0 0 ------------------ -
0xffff880075f26200 scsi_tmf_0 64 2 0 0 ------------------ -
0xffff880075f24600 scsi_eh_1 65 2 0 0 ------------------ -
0xffff880075f22a00 scsi_tmf_1 66 2 0 0 ------------------ -
0xffff880075f20e00 kworker/u4:2 67 2 0 0 ------------------ -
0xffff880075f25400 kworker/u4:3 68 2 0 0 ------------------ -
0xffff880075ec6200 ipv6_addrconf 72 2 0 0 ------------------ -
0xffff880035595400 deferwq 85 2 0 0 ------------------ -
0xffff880035596200 charger_manager 86 2 0 0 ------------------ -
0xffff880035593800 bioset 87 2 0 0 ------------------ -
0xffff880034c49c00 kworker/0:2 126 2 0 0 ------------------ -
0xffff8800355e5400 kpsmoused 139 2 0 0 ------------------ -
0xffff880034ee8e00 kworker/0:3 156 2 0 0 ------------------ -
0xffff880075ec2a00 kworker/1:1H 166 2 0 0 ------------------ -
0xffff880034eef000 scsi_eh_2 167 2 0 0 ------------------ -
0xffff880034eee200 scsi_tmf_2 168 2 0 0 ------------------ -
0xffff880034eed400 bioset 169 2 0 0 ------------------ -
0xffff880075f23800 raid5wq 241 2 0 0 ------------------ -
0xffff880035590000 bioset 272 2 0 0 ------------------ -
0xffff880035594600 kworker/0:1H 295 2 0 0 ------------------ -
0xffff880035597000 jbd2/sda1-8 297 2 0 0 ------------------ -
0xffff880035590e00 ext4-rsv-conver 298 2 0 0 ------------------ -
0xffff880034c4aa00 systemd-journal 354 1 0 0 0x0000000079614000 -
0xffff880035592a00 iscsi_eh 356 2 0 0 ------------------ -
0xffff880079103800 kworker/1:2 370 2 0 0 ------------------ -
0xffff880034eeaa00 kauditd 372 2 0 0 ------------------ -
0xffff88007a478e00 ib_addr 382 2 0 0 ------------------ -
0xffff88007a479c00 ib_mcast 385 2 0 0 ------------------ -
0xffff88007a47aa00 ib_nl_sa_wq 386 2 0 0 ------------------ -
0xffff88007a47b800 ib_cm 387 2 0 0 ------------------ -
0xffff88007a47c600 iw_cm_wq 389 2 0 0 ------------------ -
0xffff88007a47d400 rdma_cm 391 2 0 0 ------------------ -
0xffff880075ec4600 lvmetad 394 1 0 0 0x000000007c36c000 -
0xffff88007a478000 kworker/1:3 399 2 0 0 ------------------ -
0xffff880079100000 systemd-udevd 408 1 0 0 0x000000007c2c8000 -
0xffff880079100e00 iprt-VBoxWQueue 493 2 0 0 ------------------ -
0xffff880034ebf000 ttm_swap 649 2 0 0 ------------------ -
0xffff88007a076200 atd 730 1 0 0 0x000000007c3f8000 -
0xffff88007a070000 lxcfs 738 1 0 0 0x0000000079fe0000 -
0xffff88007b68b800 accounts-daemon 739 1 0 0 0x0000000079fe2000 -
0xffff880034eb8e00 rsyslogd 745 1 104 108 0x0000000079530000 -
0xffff880034c4e200 cron 754 1 0 0 0x000000007a08c000 -
0xffff88007942c600 systemd-logind 758 1 0 0 0x000000007a6d6000 -
0xffff880079429c00 acpid 777 1 0 0 0x000000007917c000 -
0xffff880079428000 snapd 783 1 0 0 0x0000000079768000 -
0xffff880079428e00 dbus-daemon 785 1 107 111 0x0000000079470000 -
0xffff88007b17b800 dhclient 846 1 0 0 0x000000007a430000 -
0xffff88007942aa00 polkitd 898 1 0 0 0x0000000079b92000 -
0xffff880034ebd400 mdadm 907 1 0 0 0x000000007c3fc000 -
0xffff88007b17f000 VBoxService 941 1 0 0 0x000000007862e000 -
0xffff880034ebc600 named 1018 1 110 115 0x0000000079aa4000 -
0xffff88007a32c600 sshd 1023 1 0 0 0x0000000034dbc000 -
0xffff88007b179c00 iscsid 1036 1 0 0 0x000000007afdc000 -
0xffff88007b178e00 iscsid 1037 1 0 0 0x0000000079bd0000 -
0xffff88007b68f000 irqbalance 1079 1 0 0 0x000000007a462000 -
0xffff88007b688000 login 1084 1 0 1000 0x0000000079dc0000 -
0xffff88007a074600 systemd 1157 1 1000 1000 0x0000000034c16000 -
0xffff88007a073800 (sd-pam) 1160 1157 1000 1000 0x0000000079a92000 -
0xffff88007a075400 bash 1166 1084 1000 1000 0x0000000035720000 -
0xffff8800355e3800 ht0p 1192 1166 1000 1000 0x000000007b982000 -
0xffff8800355e6200 htop 1193 1166 1000 1000 0x000000007b9a2000 -
I have tried running procdump on a lot of processes there and then running strings on them but nothing seemed like the 'message'. I really have no idea what to do next, do I need to extract somehow the ELF that's running from memory? Also do you have any idea what process it might be or what else should I do?

If you are sure it is an ELF file, and if the file has a .ELF extension, then you could use the Volatility dumpfiles module and search for a regex that matches .ELF like so...
vol.py --profile=$PROFILE -f $MEMPATH dumpfiles -n -i -r \\.elf --dump-dir=$OUTDIR
-n : Use original file name in output
-r : Use regex
-i : Case insensitive

Related

How to replace variables across multiple columns using awk?

I have a file that looks like this with 2060 lines with a header (column names) at the top:
FID IID late_telangiectasia_G1 late_atrophy_G1 late_atrophy_G2 late_nipple_retraction_G1 late_nipple_retraction_G2 late_oedema_G1 late_oedema_G2 late_induration_tumour_G1 late_induration_outside_G1 late_induration_G2 late_arm_lympho_G1 late_hyper_G1
1 470502 1 0 0 0 0 0 0 0 0 0 0 0
2 470514 0 0 0 0 0 0 0 0 0 0 0 0
3 470422 0 0 0 0 0 0 0 0 0 0 0 1
4 470510 0 0 0 0 0 1 0 1 1 1 0 1
5 470506 0 0 0 0 0 0 0 0 0 0 0 0
6 471948 0 0 0 0 0 0 0 1 0 0 0 0
7 469922 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
8 471220 0 1 1 -9 -9 0 0 1 1 1 0 0
9 470498 0 1 0 0 0 0 0 0 0 0 0 0
10 471993 0 1 1 0 0 0 0 0 0 0 0 0
11 470414 0 1 0 0 0 0 0 0 1 0 0 0
12 470522 0 0 0 0 0 0 0 0 0 0 0 0
13 470345 0 0 0 0 0 0 0 0 0 0 0 0
14 471275 0 1 0 -9 0 0 0 1 0 0 0 0
15 471283 0 1 0 0 0 0 0 1 1 0 0 0
16 472577 0 1 0 0 0 0 0 1 0 0 0 0
17 470492 0 1 0 0 0 0 0 0 0 0 0 0
18 472889 0 0 0 -9 0 0 0 0 0 0 0 0
19 470500 0 1 0 1 0 0 0 0 1 0 0 0
20 470493 0 0 0 0 0 0 0 1 1 0 0 0
I want to replace all the 0 -> 1 and the 1 -> 2 from column 3 to 12. I don't want to replace the -9.
I know for a single column the command will be:
awk'
{
if($3==1)$3=2
if($3==0)$3=1
}
1'file
Therefore, for multiple columns is there an easier way to specify a range rather than manually type every column number?
awk'
{
if($3,$4,$5,$6,$7,$8,$9,$10,$11,$12==1)$3,$4,$5,$6,$7,$8,$9,$10,$11,$12=2
if($3,$4,$5,$6,$7,$8,$9,$10,$11,$12==0)$3,$4,$5,$6,$7,$8,$9,$10,$11,$12=1
}
1'file
Thanks in advance
You could use a loop and change the field values accessing the field value using $i
awk '
{
for(i=3; i<=12; i++) {
if ($i==1 || $i==0) $i++
}
}1
' file | column -t
One possibility if you want to change almost all of your fields (as in your case) is to just save the ones you don't want to change and then change everything else:
$ awk 'NR>1{hd=$1 FS $2; tl=$13 FS $14; $1=$2=$13=$14=""; gsub(1,2); gsub(0,1); $0=hd $0 tl} 1' file
FID IID late_telangiectasia_G1 late_atrophy_G1 late_atrophy_G2 late_nipple_retraction_G1 late_nipple_retraction_G2 late_oedema_G1 late_oedema_G2 late_induration_tumour_G1 late_induration_outside_G1 late_induration_G2 late_arm_lympho_G1 late_hyper_G1
1 470502 2 1 1 1 1 1 1 1 1 1 0 0
2 470514 1 1 1 1 1 1 1 1 1 1 0 0
3 470422 1 1 1 1 1 1 1 1 1 1 0 1
4 470510 1 1 1 1 1 2 1 2 2 2 0 1
5 470506 1 1 1 1 1 1 1 1 1 1 0 0
6 471948 1 1 1 1 1 1 1 2 1 1 0 0
7 469922 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
8 471220 1 2 2 -9 -9 1 1 2 2 2 0 0
9 470498 1 2 1 1 1 1 1 1 1 1 0 0
10 471993 1 2 2 1 1 1 1 1 1 1 0 0
11 470414 1 2 1 1 1 1 1 1 2 1 0 0
12 470522 1 1 1 1 1 1 1 1 1 1 0 0
13 470345 1 1 1 1 1 1 1 1 1 1 0 0
14 471275 1 2 1 -9 1 1 1 2 1 1 0 0
15 471283 1 2 1 1 1 1 1 2 2 1 0 0
16 472577 1 2 1 1 1 1 1 2 1 1 0 0
17 470492 1 2 1 1 1 1 1 1 1 1 0 0
18 472889 1 1 1 -9 1 1 1 1 1 1 0 0
19 470500 1 2 1 2 1 1 1 1 2 1 0 0
20 470493 1 1 1 1 1 1 1 2 2 1 0 0
pipe it to column -t for alignment if you like.
Or using GNU awk for the 3rg arg to match() and retaining white space:
$ awk 'NR>1{ match($0,/((\S+\s+){2})((\S+\s+){9}\S+)(.*)/,a); gsub(1,2,a[3]); gsub(0,1,a[3]); $0=a[1] a[3] a[5] } 1' file
FID IID late_telangiectasia_G1 late_atrophy_G1 late_atrophy_G2 late_nipple_retraction_G1 late_nipple_retraction_G2 late_oedema_G1 late_oedema_G2 late_induration_tumour_G1 late_induration_outside_G1 late_induration_G2 late_arm_lympho_G1 late_hyper_G1
1 470502 2 1 1 1 1 1 1 1 1 1 0 0
2 470514 1 1 1 1 1 1 1 1 1 1 0 0
3 470422 1 1 1 1 1 1 1 1 1 1 0 1
4 470510 1 1 1 1 1 2 1 2 2 2 0 1
5 470506 1 1 1 1 1 1 1 1 1 1 0 0
6 471948 1 1 1 1 1 1 1 2 1 1 0 0
7 469922 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
8 471220 1 2 2 -9 -9 1 1 2 2 2 0 0
9 470498 1 2 1 1 1 1 1 1 1 1 0 0
10 471993 1 2 2 1 1 1 1 1 1 1 0 0
11 470414 1 2 1 1 1 1 1 1 2 1 0 0
12 470522 1 1 1 1 1 1 1 1 1 1 0 0
13 470345 1 1 1 1 1 1 1 1 1 1 0 0
14 471275 1 2 1 -9 1 1 1 2 1 1 0 0
15 471283 1 2 1 1 1 1 1 2 2 1 0 0
16 472577 1 2 1 1 1 1 1 2 1 1 0 0
17 470492 1 2 1 1 1 1 1 1 1 1 0 0
18 472889 1 1 1 -9 1 1 1 1 1 1 0 0
19 470500 1 2 1 2 1 1 1 1 2 1 0 0
20 470493 1 1 1 1 1 1 1 2 2 1 0 0
It is hard to tell if that is space delimited or tab delimited?
Here is a ruby that will deal with either space or tab delimited fields and will convert the result to tab delimited.
Note: Ruby arrays are zero based, so fields 1,2 are [0..1] and fields 3-12 are [2..11]
ruby -r csv -e 'options={:col_sep=>"\t", :converters=>:all, :headers=>true}
data=CSV.parse($<.read.gsub(/[[:blank:]]+/,"\t"), **options)
data.each_with_index{
|r,i| data[i]=r[0..1]+r[2..11].map{|e| (e==1 || e==0) ? e+1 : e}+r[12..]}
puts data.to_csv(**options)
' file
Prints:
FID IID late_telangiectasia_G1 late_atrophy_G1 late_atrophy_G2 late_nipple_retraction_G1 late_nipple_retraction_G2 late_oedema_G1 late_oedema_G2 late_induration_tumour_G1 late_induration_outside_G1 late_induration_G2 late_arm_lympho_G1 late_hyper_G1
1 470502 2 1 1 1 1 1 1 1 1 1 0 0
2 470514 1 1 1 1 1 1 1 1 1 1 0 0
3 470422 1 1 1 1 1 1 1 1 1 1 0 1
4 470510 1 1 1 1 1 2 1 2 2 2 0 1
5 470506 1 1 1 1 1 1 1 1 1 1 0 0
6 471948 1 1 1 1 1 1 1 2 1 1 0 0
7 469922 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
8 471220 1 2 2 -9 -9 1 1 2 2 2 0 0
9 470498 1 2 1 1 1 1 1 1 1 1 0 0
10 471993 1 2 2 1 1 1 1 1 1 1 0 0
11 470414 1 2 1 1 1 1 1 1 2 1 0 0
12 470522 1 1 1 1 1 1 1 1 1 1 0 0
13 470345 1 1 1 1 1 1 1 1 1 1 0 0
14 471275 1 2 1 -9 1 1 1 2 1 1 0 0
15 471283 1 2 1 1 1 1 1 2 2 1 0 0
16 472577 1 2 1 1 1 1 1 2 1 1 0 0
17 470492 1 2 1 1 1 1 1 1 1 1 0 0
18 472889 1 1 1 -9 1 1 1 1 1 1 0 0
19 470500 1 2 1 2 1 1 1 1 2 1 0 0
20 470493 1 1 1 1 1 1 1 2 2 1 0 0
With awk you can do:
awk -v OFS="\t" 'FNR>1{for(i=3;i<=12;i++)if ($i~"^[10]$")$i=$i+1} $1=$1' file
# same output
gawk -v RS='[[:space:]]+' '++c > 2 && /^(0|1)$/ { ++$0 }
{ printf "%s", $0 RT } RT ~ /\n/ { c = 0 }' file

Gather several column using value as number of new rows

I want to convert a somewhat dirty table into a normalized one. The structure of the table is as follow:
CREATE TABLE dirty_table(
date DATE NOT NULL
,name VARCHAR(24) NOT NULL
,co BIT NOT NULL
,en BIT NOT NULL
,re BIT NOT NULL
,po BIT NOT NULL
,ga BIT NOT NULL
,pr BIT NOT NULL
,bi INTEGER NOT NULL
);
Somewhat similar to this question but with a caveat, I have a bit/integer instead for values in a true/false fashion, bit columns can contain values 0 and 1, and the bi column any positive number and 0. I want to create a new row keeping name and date column and the name of the non zero column. Something like this:
date |name |proc |
-----------|----------|-----|
2017-07-04 |Jonny doe |bi |
2017-07-04 |Jonny doe |bi |
2017-07-07 |Jonny doe |ga |
2017-07-04 |Jonny doe |po |
2017-07-04 |Jonda doe |en |
2017-07-04 |Jonda doe |co |
2017-07-07 |Jonda doe |re |
2017-07-07 |Jonda doe |re |
2017-08-03 |Jonda doe |re |
2017-08-08 |Josep doe |en |
2017-08-09 |Josep doe |bi |
2017-08-11 |Josep doe |ga |
As can be seen, the bi column can appear several times if the value is >1. Others, unless there's another row, are likely to have only one combination of date, name and proc column, as seen in this excerpt of dirty_table:
date name co en re po ga pr bi
2017-07-03 DPSUW 1 1 0 0 0 0 2
2017-07-03 XDUPT 1 0 0 0 0 0 0
2017-07-03 XIYUD 0 1 0 0 0 0 1
2017-07-03 HBJRL 1 1 0 0 0 0 2
2017-07-03 DIHMP 1 1 0 0 0 0 1
2017-07-04 MTHDT 1 1 0 0 0 0 2
2017-07-04 MFPLI 0 1 0 0 0 0 1
2017-07-04 GKHFG 1 0 0 0 0 0 1
2017-07-04 QKDNE 1 1 0 0 0 0 2
2017-07-04 GSXLN 1 1 0 0 0 0 2
2017-07-05 ICKUT 0 1 0 0 0 0 1
2017-07-05 NHVLT 0 1 0 0 0 0 1
2017-07-05 KTSFX 1 1 0 0 0 0 1
2017-07-05 AINSA 1 1 0 0 0 0 2
2017-07-07 YUCAU 0 1 0 0 0 0 1
2017-07-07 YLLVX 1 0 0 0 0 0 1
2017-07-10 CSIMK 1 1 0 0 0 0 2
2017-07-10 PWNCV 0 1 0 0 0 0 1
2017-07-10 AMMVX 0 1 0 0 0 0 1
2017-07-11 BLELT 0 1 0 0 0 0 1
2017-07-11 ONAKD 0 1 0 0 0 0 1
2017-07-11 IGJDK 1 0 0 0 0 0 1
2017-07-11 TOQLH 1 1 0 0 0 0 2
2017-07-11 DUQWM 1 0 0 0 0 0 0
2017-07-11 SFWVP 1 1 0 0 0 0 2
2017-07-12 MQVHW 0 1 0 0 0 0 1
2017-07-12 OFHWQ 0 1 0 0 0 0 1
2017-07-12 MPOAK 1 1 0 0 0 0 1
2017-07-12 YPFEH 1 1 0 0 0 0 1
2017-07-12 XUENE 1 0 0 0 0 0 1
I was trying to use case statements but that only creates a single row. How can I create multiple rows from one record using the value as number of new rows to create? I prefer using generic SQL, but I'm using MariaDB.
The simplest method is probably union all:
select date, name, 'co' as proc from t where co >= 1 union all
select date, name, 'en' as proc from t where en >= 1 union all
. . .
select date, name, 'bi' as proc from t where bi >= 1 union all
select date, name, 'bi' as proc from t where bi >= 2;
That the multiple rows for bi.

Filter table for abundance in at least 20 % of the samples

I have a huge table tab separated like the one below:
the first row is the subject list while the other rows are my counts.
KEGGAnnotation a b c d e f g h i l m n o p q r s t u v z w ee wr ty yu im
K01824 0 0 1 5 0 0 0 0 0 0 0 0 0 0 14 6 0 0 0 0 0 0 0 0 0 0 0
K03924 17302 15372 19601 18732 17180 18094 23560 20516 14280 24187 19642 20521 20330 20843 22948 17124 19557 18319 16608 19463 18334 21022 14325 10819 13342 16876 16979
K13730 0 0 1 5 0 0 0 0 0 0 0 0 0 0 14 6 0 0 0 0 0 0 0 0 0 0 0
K13735 5360 463 12516 7235 5051 2022 2499 2778 5392 1220 6460 9490 1169 6556 14862 9657 7360 6837 7810 4368 2186 12474 7810 9755 1401 12867 4431
K07279 0 0 1 5 0 0 0 0 0 0 0 0 0 0 14 6 0 0 0 0 0 0 0 0 0 0 0
K14194 4499 2216 2322 2031 2763 2219 704 1647 2536 876 2692 4196 687 2958 3207 2153 2266 1974 370 2867 1110 5372 3637 9828 2038 2812 3472
K11494 0 0 1 10 0 0 0 0 11 0 0 0 0 0 14 6 0 0 0 0 0 0 0 0 0 0 0
K03332 0 0 1 5 0 0 0 0 0 0 0 0 0 0 14 6 0 0 0 0 0 0 0 0 0 0 0
K01317 3 1 6 0 1 3 0 14 11 0 21 8 0 20 0 263 0 0 6 3 5 0 0 41 0 0 2
I would like to grep only the lines in which the counts >100 are present in at least 20% of the samples (= in at least 6 samples).
EX. sample Ko3924 will be grepped but not K03332.
increment the counter for values greater than the threshold. Print the lines if the counter is greater than the 20% of the fields checked. This will also print the header line.
awk '{c=0; for(i=2;i<=NF;i++) c+=($i>=100); if(c>=0.2*(NF-1)) print $0}' input

How to find matched rows in 2 files based on column3 and create extra file with rank value

I have 2 files, I need to merge based on column3 (pos). Then find matched position and create an desirable output as follows using awk. I would like to have output with 4 columns. The 4th columns indicate common position across 2 files with rank number.
File1.txt
SNP-ID Chr Pos
rs62637813 1 52058
rs150021059 1 52238
rs4477212 1 52356
kgp15717912 1 53424
rs140052487 1 54353
rs9701779 1 56537
kgp7727307 1 56962
kgp15297216 1 72391
rs3094315 1 75256
rs3131972 1 75272
kgp6703048 1 75406
kgp22792200 1 75665
kgp15557302 1 75769
File2.txt:
SNP-ID Chr Pos Chip1
rs58108140 1 10583 1
rs189107123 1 10611 2
rs180734498 1 13302 3
rs144762171 1 13327 4
rs201747181 1 13957 5
rs151276478 1 13980 6
rs140337953 1 30923 7
rs199681827 1 46402 8
rs200430748 1 47190 9
rs187298206 1 51476 10
rs116400033 1 51479 11
rs190452223 1 51914 12
rs181754315 1 51935 13
rs185832753 1 51954 14
rs62637813 1 52058 15
rs190291950 1 52144 16
rs201374420 1 52185 17
rs150021059 1 52238 18
rs199502715 1 53234 19
rs140052487 1 54353 20
Desirable-output:
SNP-ID Chr Pos Chip1 Chip2
rs58108140 1 10583 1 0
rs189107123 1 10611 2 0
rs180734498 1 13302 3 0
rs144762171 1 13327 4 0
rs201747181 1 13957 5 0
rs151276478 1 13980 6 0
rs140337953 1 30923 7 0
rs199681827 1 46402 8 0
rs200430748 1 47190 9 0
rs187298206 1 51476 10 0
rs116400033 1 51479 11 0
rs190452223 1 51914 12 0
rs181754315 1 51935 13 0
rs185832753 1 51954 14 0
rs62637813 1 52058 15 1
rs190291950 1 52144 16 0
rs201374420 1 52185 17 0
rs150021059 1 52238 18 2
rs199502715 1 53234 19 0
rs140052487 1 54353 20 3
I don't quite understand what you mean by "rank"
awk '
NR==FNR {pos[$3]=1; next}
FNR==1 {print $0, "Chip2"; next}
{print $0, ($3 in pos ? ++rank : 0)}
' File1.txt File2.txt | column -t
SNP-ID Chr Pos Chip1 Chip2
rs58108140 1 10583 1 0
rs189107123 1 10611 2 0
rs180734498 1 13302 3 0
rs144762171 1 13327 4 0
rs201747181 1 13957 5 0
rs151276478 1 13980 6 0
rs140337953 1 30923 7 0
rs199681827 1 46402 8 0
rs200430748 1 47190 9 0
rs187298206 1 51476 10 0
rs116400033 1 51479 11 0
rs190452223 1 51914 12 0
rs181754315 1 51935 13 0
rs185832753 1 51954 14 0
rs62637813 1 52058 15 1
rs190291950 1 52144 16 0
rs201374420 1 52185 17 0
rs150021059 1 52238 18 2
rs199502715 1 53234 19 0
rs140052487 1 54353 20 3

How do would you split this given NSString into a NSDictionary?

I have some data i aquire from some linux box and want to put it into a NSDictionary for later processing.
How wold you get this NSString into a NSDictionary like the following?
data
(
bytes
(
60 ( 1370515694 )
48 ( 812 )
49 ( 300 )
...
)
pkt
(
60 ( 380698 )
59 ( 8 )
58 ( 412 )
...
)
block
(
60 ( 5 )
48 ( 4 )
49 ( 7 )
...
)
drop
(
60 ( 706 )
48 ( 2 )
49 ( 4 )
...
)
session
(
60 ( 3 )
48 ( 1 )
49 ( 2 )
...
)
)
The data string looks like:
//time bytes pkt block drop session
60 1370515694 380698 5 706 3
48 812 8 4 2 1
49 300 412 7 4 2
50 0 0 0 0 0
51 87 2 0 0 0
52 87 2 0 0 0
53 0 0 0 0 0
54 0 0 0 0 0
55 0 0 0 0 0
56 0 0 0 0 0
57 812 8 0 0 0
58 812 8 0 0 0
59 0 0 0 0 0
0 0 0 0 0 0
1 2239 12 2 0 0
2 0 0 0 0 0
3 0 0 0 0 0
4 0 0 0 0 0
5 0 0 0 0 0
6 0 0 0 0 0
7 2882 19 2 0 0
8 4906 29 4 0 0
9 1844 15 11 0 0
10 4210 29 17 0 0
11 3370 18 4 0 0
12 3370 18 4 0 0
13 1184 7 3 0 0
14 0 0 0 0 0
15 4046 19 3 0 0
16 4956 23 3 0 0
17 2960 18 2 0 0
18 2960 18 2 0 0
19 1088 6 2 0 0
20 0 0 0 0 0
21 3261 17 3 0 0
22 3261 17 3 0 0
23 1228 6 2 0 0
24 1228 6 2 0 0
25 2628 17 2 0 0
26 4688 26 3 0 0
27 1752 13 5 0 0
28 3062 21 5 0 0
29 174 2 2 0 0
30 96 1 1 0 0
31 4351 23 5 0 0
32 0 0 0 0 0
33 4930 23 7 0 0
34 6750 31 7 0 0
35 1241 6 2 0 0
36 1241 6 2 0 0
37 3571 29 2 0 0
38 0 0 0 0 0
39 1010 5 1 0 0
40 1010 5 1 0 0
41 88859 72 3 0 1
42 90783 81 4 0 1
43 2914 19 3 0 0
44 0 0 0 0 0
45 2157 17 1 0 0
46 2157 17 1 0 0
47 78 1 1 0 0
.
Time (first column) should be the key for the sub-sub-dictionaries.
So the idea behind all that is that i can later randmly access the PKT value at a given TIME x, as well as the BLOCK amount at TIME y, and SESSION value at TIME z .. and so on..
Thanks in advance
You probably don't want a dictionary but an array containing dictionaries of all the data entries. The simplest way to parse something like this in Objective-C is to use the componentsSeparatedByString method in NSString
NSString* dataString = <Your Data String> // Assumes the items are separated by newlines
NSArray* items = [dataString componentsSeparatedByString:#"\n"];
NSMutableArray* dataDictionaries = [NSMutableArray array];
for (NSString* item in items) {
NSArray* elements = [item componentsSeparatedByString:#" "];
NSDictionary* entry = #{
#"time": [elements objectAtIndex:0],
#"bytes": [elements objectAtIndex:1],
#"pkt": [elements objectAtIndex:2],
#"block": [elements objectAtIndex:3], #"drop": [elements objectAtIndex:4],
#"session": [elements objectAtIndex:5],
};
[dataDictionaries addObject: entry];
}